2005-02-01 13:09:12 +01:00
|
|
|
###
|
|
|
|
# Copyright (c) 2002-2004, Jeremiah Fincher
|
2010-09-09 04:20:23 +02:00
|
|
|
# Copyright (c) 2008-2010, James Vega
|
2005-02-01 13:09:12 +01:00
|
|
|
# All rights reserved.
|
|
|
|
#
|
|
|
|
# Redistribution and use in source and binary forms, with or without
|
|
|
|
# modification, are permitted provided that the following conditions are met:
|
|
|
|
#
|
|
|
|
# * Redistributions of source code must retain the above copyright notice,
|
|
|
|
# this list of conditions, and the following disclaimer.
|
|
|
|
# * Redistributions in binary form must reproduce the above copyright notice,
|
|
|
|
# this list of conditions, and the following disclaimer in the
|
|
|
|
# documentation and/or other materials provided with the distribution.
|
|
|
|
# * Neither the name of the author of this software nor the name of
|
|
|
|
# contributors to this software may be used to endorse or promote products
|
|
|
|
# derived from this software without specific prior written consent.
|
|
|
|
#
|
|
|
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
|
|
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
|
|
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
|
|
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
|
|
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
|
|
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
|
|
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
|
|
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
|
|
# POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
###
|
|
|
|
|
|
|
|
import re
|
|
|
|
import cgi
|
|
|
|
import time
|
|
|
|
import socket
|
|
|
|
import urllib
|
|
|
|
|
|
|
|
import supybot.conf as conf
|
|
|
|
import supybot.utils as utils
|
|
|
|
import supybot.world as world
|
|
|
|
from supybot.commands import *
|
|
|
|
import supybot.ircmsgs as ircmsgs
|
|
|
|
import supybot.ircutils as ircutils
|
|
|
|
import supybot.callbacks as callbacks
|
2010-10-17 14:50:31 +02:00
|
|
|
from supybot.i18n import PluginInternationalization, internationalizeDocstring
|
|
|
|
_ = PluginInternationalization('Google')
|
2005-02-01 13:09:12 +01:00
|
|
|
|
2009-06-03 08:55:40 +02:00
|
|
|
simplejson = None
|
|
|
|
|
|
|
|
try:
|
|
|
|
simplejson = utils.python.universalImport('json')
|
|
|
|
except ImportError:
|
|
|
|
pass
|
|
|
|
|
2009-03-20 01:33:17 +01:00
|
|
|
try:
|
2009-05-19 06:55:17 +02:00
|
|
|
# The 3rd party simplejson module was included in Python 2.6 and renamed to
|
|
|
|
# json. Unfortunately, this conflicts with the 3rd party json module.
|
|
|
|
# Luckily, the 3rd party json module has a different interface so we test
|
|
|
|
# to make sure we aren't using it.
|
2009-06-03 08:55:40 +02:00
|
|
|
if simplejson is None or hasattr(simplejson, 'read'):
|
|
|
|
simplejson = utils.python.universalImport('simplejson',
|
|
|
|
'local.simplejson')
|
2009-03-20 01:33:17 +01:00
|
|
|
except ImportError:
|
|
|
|
raise callbacks.Error, \
|
|
|
|
'You need Python2.6 or the simplejson module installed to use ' \
|
|
|
|
'this plugin. Download the module at ' \
|
|
|
|
'<http://undefined.org/python/#simplejson>.'
|
2009-03-16 00:55:14 +01:00
|
|
|
|
2005-02-09 08:04:04 +01:00
|
|
|
class Google(callbacks.PluginRegexp):
|
2005-02-01 13:09:12 +01:00
|
|
|
threaded = True
|
2005-02-18 19:46:57 +01:00
|
|
|
callBefore = ['Web']
|
2010-09-09 04:20:23 +02:00
|
|
|
regexps = ['googleSnarfer']
|
2005-02-01 13:09:12 +01:00
|
|
|
|
|
|
|
_colorGoogles = {}
|
|
|
|
def _getColorGoogle(self, m):
|
|
|
|
s = m.group(1)
|
|
|
|
ret = self._colorGoogles.get(s)
|
|
|
|
if not ret:
|
|
|
|
L = list(s)
|
|
|
|
L[0] = ircutils.mircColor(L[0], 'blue')[:-1]
|
|
|
|
L[1] = ircutils.mircColor(L[1], 'red')[:-1]
|
|
|
|
L[2] = ircutils.mircColor(L[2], 'yellow')[:-1]
|
|
|
|
L[3] = ircutils.mircColor(L[3], 'blue')[:-1]
|
|
|
|
L[4] = ircutils.mircColor(L[4], 'green')[:-1]
|
|
|
|
L[5] = ircutils.mircColor(L[5], 'red')
|
|
|
|
ret = ''.join(L)
|
|
|
|
self._colorGoogles[s] = ret
|
|
|
|
return ircutils.bold(ret)
|
|
|
|
|
|
|
|
_googleRe = re.compile(r'\b(google)\b', re.I)
|
|
|
|
def outFilter(self, irc, msg):
|
|
|
|
if msg.command == 'PRIVMSG' and \
|
|
|
|
self.registryValue('colorfulFilter', msg.args[0]):
|
|
|
|
s = msg.args[1]
|
|
|
|
s = re.sub(self._googleRe, self._getColorGoogle, s)
|
|
|
|
msg = ircmsgs.privmsg(msg.args[0], s, msg=msg)
|
|
|
|
return msg
|
|
|
|
|
2008-08-15 01:20:06 +02:00
|
|
|
_gsearchUrl = 'http://ajax.googleapis.com/ajax/services/search/web'
|
2010-10-17 14:50:31 +02:00
|
|
|
@internationalizeDocstring
|
2008-08-15 01:20:06 +02:00
|
|
|
def search(self, query, channel, options={}):
|
|
|
|
"""Perform a search using Google's AJAX API.
|
|
|
|
search("search phrase", options={})
|
|
|
|
|
|
|
|
Valid options are:
|
|
|
|
smallsearch - True/False (Default: False)
|
2009-02-09 06:41:16 +01:00
|
|
|
filter - {active,moderate,off} (Default: "moderate")
|
2008-08-15 01:20:06 +02:00
|
|
|
language - Restrict search to documents in the given language
|
|
|
|
(Default: "lang_en")
|
|
|
|
"""
|
|
|
|
ref = self.registryValue('referer')
|
|
|
|
if not ref:
|
|
|
|
ref = 'http://%s/%s' % (dynamic.irc.server,
|
|
|
|
dynamic.irc.nick)
|
|
|
|
headers = utils.web.defaultHeaders
|
|
|
|
headers['Referer'] = ref
|
|
|
|
opts = {'q': query, 'v': '1.0'}
|
|
|
|
for (k, v) in options.iteritems():
|
|
|
|
if k == 'smallsearch':
|
|
|
|
if v:
|
|
|
|
opts['rsz'] = 'small'
|
|
|
|
else:
|
|
|
|
opts['rsz'] = 'large'
|
2009-02-09 06:41:16 +01:00
|
|
|
elif k == 'filter':
|
2008-08-15 01:20:06 +02:00
|
|
|
opts['safe'] = v
|
|
|
|
elif k == 'language':
|
|
|
|
opts['lr'] = v
|
|
|
|
defLang = self.registryValue('defaultLanguage', channel)
|
|
|
|
if 'lr' not in opts and defLang:
|
|
|
|
opts['lr'] = defLang
|
|
|
|
if 'safe' not in opts:
|
2009-02-09 06:41:16 +01:00
|
|
|
opts['safe'] = self.registryValue('searchFilter', dynamic.channel)
|
2008-08-15 01:20:06 +02:00
|
|
|
if 'rsz' not in opts:
|
2008-10-17 23:27:45 +02:00
|
|
|
opts['rsz'] = 'large'
|
2008-08-15 01:20:06 +02:00
|
|
|
|
|
|
|
fd = utils.web.getUrlFd('%s?%s' % (self._gsearchUrl,
|
|
|
|
urllib.urlencode(opts)),
|
|
|
|
headers)
|
|
|
|
json = simplejson.load(fd)
|
|
|
|
fd.close()
|
|
|
|
if json['responseStatus'] != 200:
|
2010-10-17 14:50:31 +02:00
|
|
|
raise callbacks.Error, _('We broke The Google!')
|
2008-08-15 01:20:06 +02:00
|
|
|
return json
|
|
|
|
|
2005-02-01 13:09:12 +01:00
|
|
|
def formatData(self, data, bold=True, max=0):
|
|
|
|
if isinstance(data, basestring):
|
|
|
|
return data
|
|
|
|
results = []
|
|
|
|
if max:
|
2008-08-15 01:20:06 +02:00
|
|
|
data = data[:max]
|
|
|
|
for result in data:
|
|
|
|
title = utils.web.htmlToText(result['titleNoFormatting']\
|
|
|
|
.encode('utf-8'))
|
2008-10-17 23:27:36 +02:00
|
|
|
url = result['unescapedUrl'].encode('utf-8')
|
2005-02-01 13:09:12 +01:00
|
|
|
if title:
|
|
|
|
if bold:
|
|
|
|
title = ircutils.bold(title)
|
|
|
|
results.append(format('%s: %u', title, url))
|
|
|
|
else:
|
|
|
|
results.append(url)
|
|
|
|
if not results:
|
2010-10-17 14:50:31 +02:00
|
|
|
return format(_('No matches found.'))
|
2005-02-01 13:09:12 +01:00
|
|
|
else:
|
2008-08-15 01:20:06 +02:00
|
|
|
return format('; '.join(results))
|
2005-02-01 13:09:12 +01:00
|
|
|
|
2010-10-17 14:50:31 +02:00
|
|
|
@internationalizeDocstring
|
2010-07-14 21:56:48 +02:00
|
|
|
def lucky(self, irc, msg, args, opts, text):
|
|
|
|
"""[--snippet] <search>
|
2005-02-01 13:09:12 +01:00
|
|
|
|
|
|
|
Does a google search, but only returns the first result.
|
2010-07-14 21:56:48 +02:00
|
|
|
If option --snippet is given, returns also the page text snippet.
|
2005-02-01 13:09:12 +01:00
|
|
|
"""
|
2010-07-14 21:56:48 +02:00
|
|
|
opts = dict(opts)
|
2008-08-15 01:20:06 +02:00
|
|
|
data = self.search(text, msg.args[0], {'smallsearch': True})
|
|
|
|
if data['responseData']['results']:
|
2010-07-26 15:22:07 +02:00
|
|
|
url = data['responseData']['results'][0]['unescapedUrl'].encode('utf-8')
|
2010-07-14 21:56:48 +02:00
|
|
|
if opts.has_key('snippet'):
|
2010-07-26 15:22:07 +02:00
|
|
|
snippet = data['responseData']['results'][0]['content'].encode('utf-8')
|
2010-07-15 01:03:31 +02:00
|
|
|
snippet = " | " + utils.web.htmlToText(snippet, tagReplace='')
|
2010-07-14 21:56:48 +02:00
|
|
|
else:
|
|
|
|
snippet = ""
|
|
|
|
result = url + snippet
|
2010-07-26 15:22:07 +02:00
|
|
|
irc.reply(result)
|
2005-02-01 13:09:12 +01:00
|
|
|
else:
|
2010-10-17 14:50:31 +02:00
|
|
|
irc.reply(_('Google found nothing.'))
|
2010-07-14 21:56:48 +02:00
|
|
|
lucky = wrap(lucky, [getopts({'snippet':'',}), 'text'])
|
2005-02-01 13:09:12 +01:00
|
|
|
|
2010-10-17 14:50:31 +02:00
|
|
|
@internationalizeDocstring
|
2005-02-01 13:09:12 +01:00
|
|
|
def google(self, irc, msg, args, optlist, text):
|
2009-02-09 06:41:16 +01:00
|
|
|
"""<search> [--{filter,language} <value>]
|
2005-02-01 13:09:12 +01:00
|
|
|
|
|
|
|
Searches google.com for the given string. As many results as can fit
|
2009-02-09 06:41:16 +01:00
|
|
|
are included. --language accepts a language abbreviation; --filter
|
2008-08-15 01:20:06 +02:00
|
|
|
accepts a filtering level ('active', 'moderate', 'off').
|
2005-02-01 13:09:12 +01:00
|
|
|
"""
|
2008-08-15 01:20:06 +02:00
|
|
|
if 'language' in optlist and optlist['language'].lower() not in \
|
|
|
|
conf.supybot.plugins.Google.safesearch.validStrings:
|
|
|
|
irc.errorInvalid('language')
|
|
|
|
data = self.search(text, msg.args[0], dict(optlist))
|
|
|
|
if data['responseStatus'] != 200:
|
2010-10-17 14:50:31 +02:00
|
|
|
irc.reply(_('We broke The Google!'))
|
2005-02-01 13:09:12 +01:00
|
|
|
return
|
|
|
|
bold = self.registryValue('bold', msg.args[0])
|
|
|
|
max = self.registryValue('maximumResults', msg.args[0])
|
2008-08-15 01:20:06 +02:00
|
|
|
irc.reply(self.formatData(data['responseData']['results'],
|
|
|
|
bold=bold, max=max))
|
2005-02-01 13:09:12 +01:00
|
|
|
google = wrap(google, [getopts({'language':'something',
|
2009-02-09 06:41:16 +01:00
|
|
|
'filter':''}),
|
2008-08-15 01:20:06 +02:00
|
|
|
'text'])
|
2005-02-01 13:09:12 +01:00
|
|
|
|
2010-10-17 14:50:31 +02:00
|
|
|
@internationalizeDocstring
|
2005-02-01 13:09:12 +01:00
|
|
|
def cache(self, irc, msg, args, url):
|
|
|
|
"""<url>
|
|
|
|
|
|
|
|
Returns a link to the cached version of <url> if it is available.
|
|
|
|
"""
|
2008-08-15 01:20:06 +02:00
|
|
|
data = self.search(url, msg.args[0], {'smallsearch': True})
|
|
|
|
if data['responseData']['results']:
|
|
|
|
m = data['responseData']['results'][0]
|
|
|
|
if m['cacheUrl']:
|
|
|
|
url = m['cacheUrl'].encode('utf-8')
|
|
|
|
irc.reply(url)
|
|
|
|
return
|
2010-10-17 14:50:31 +02:00
|
|
|
irc.error(_('Google seems to have no cache for that site.'))
|
2005-02-01 13:09:12 +01:00
|
|
|
cache = wrap(cache, ['url'])
|
|
|
|
|
2010-10-17 14:50:31 +02:00
|
|
|
@internationalizeDocstring
|
2005-02-01 13:09:12 +01:00
|
|
|
def fight(self, irc, msg, args):
|
|
|
|
"""<search string> <search string> [<search string> ...]
|
|
|
|
|
|
|
|
Returns the results of each search, in order, from greatest number
|
|
|
|
of results to least.
|
|
|
|
"""
|
2008-08-15 01:20:06 +02:00
|
|
|
channel = msg.args[0]
|
2005-02-01 13:09:12 +01:00
|
|
|
results = []
|
|
|
|
for arg in args:
|
2008-08-15 01:20:06 +02:00
|
|
|
data = self.search(arg, channel, {'smallsearch': True})
|
2009-11-29 03:21:00 +01:00
|
|
|
count = data['responseData']['cursor'].get('estimatedResultCount',
|
|
|
|
0)
|
2008-08-15 01:20:06 +02:00
|
|
|
results.append((int(count), arg))
|
2005-02-01 13:09:12 +01:00
|
|
|
results.sort()
|
|
|
|
results.reverse()
|
|
|
|
if self.registryValue('bold', msg.args[0]):
|
|
|
|
bold = ircutils.bold
|
|
|
|
else:
|
|
|
|
bold = repr
|
|
|
|
s = ', '.join([format('%s: %i', bold(s), i) for (i, s) in results])
|
|
|
|
irc.reply(s)
|
|
|
|
|
|
|
|
def googleSnarfer(self, irc, msg, match):
|
|
|
|
r"^google\s+(.*)$"
|
|
|
|
if not self.registryValue('searchSnarfer', msg.args[0]):
|
|
|
|
return
|
|
|
|
searchString = match.group(1)
|
2008-10-01 22:17:51 +02:00
|
|
|
data = self.search(searchString, msg.args[0], {'smallsearch': True})
|
|
|
|
if data['responseData']['results']:
|
|
|
|
url = data['responseData']['results'][0]['unescapedUrl']
|
|
|
|
irc.reply(url.encode('utf-8'), prefixNick=False)
|
2005-02-01 13:09:12 +01:00
|
|
|
googleSnarfer = urlSnarfer(googleSnarfer)
|
|
|
|
|
|
|
|
def _googleUrl(self, s):
|
2011-12-12 20:57:10 +01:00
|
|
|
s = s.replace('+', '%2B')
|
|
|
|
s = s.replace(' ', '+')
|
|
|
|
url = r'http://google.com/search?q=' + s
|
|
|
|
return url
|
|
|
|
|
|
|
|
def _googleUrlIG(self, s):
|
2005-02-01 13:09:12 +01:00
|
|
|
s = s.replace('+', '%2B')
|
|
|
|
s = s.replace(' ', '+')
|
2011-12-12 18:59:27 +01:00
|
|
|
url = r'http://www.google.com/ig/calculator?hl=en&q=' + s
|
2005-02-01 13:09:12 +01:00
|
|
|
return url
|
|
|
|
|
2011-12-12 20:57:10 +01:00
|
|
|
_calcRe1 = re.compile(r'<table.*class="?obcontainer"?[^>]*>(.*?)</table>', re.I)
|
|
|
|
_calcRe2 = re.compile(r'<h\d class="?r"?[^>]*>(?:<b>)?(.*?)(?:</b>)?</h\d>', re.I | re.S)
|
|
|
|
_calcSupRe = re.compile(r'<sup>(.*?)</sup>', re.I)
|
|
|
|
_calcFontRe = re.compile(r'<font size=-2>(.*?)</font>')
|
|
|
|
_calcTimesRe = re.compile(r'&(?:times|#215);')
|
2011-12-12 21:27:37 +01:00
|
|
|
@internationalizeDocstring
|
2005-02-01 13:09:12 +01:00
|
|
|
def calc(self, irc, msg, args, expr):
|
|
|
|
"""<expression>
|
|
|
|
|
|
|
|
Uses Google's calculator to calculate the value of <expression>.
|
|
|
|
"""
|
2011-12-12 20:57:10 +01:00
|
|
|
urlig = self._googleUrlIG(expr)
|
|
|
|
js = utils.web.getUrl(urlig)
|
2011-12-12 18:59:27 +01:00
|
|
|
# fix bad google json
|
2012-05-02 17:36:34 +02:00
|
|
|
js = js \
|
|
|
|
.replace('lhs:','"lhs":') \
|
|
|
|
.replace('rhs:','"rhs":') \
|
|
|
|
.replace('error:','"error":') \
|
|
|
|
.replace('icc:','"icc":') \
|
|
|
|
.replace('\\', '\\\\')
|
2011-12-12 18:59:27 +01:00
|
|
|
js = simplejson.loads(js)
|
|
|
|
|
2011-12-12 20:57:10 +01:00
|
|
|
url = self._googleUrl(expr)
|
|
|
|
html = utils.web.getUrl(url)
|
|
|
|
match = self._calcRe1.search(html)
|
|
|
|
if match is None:
|
|
|
|
match = self._calcRe2.search(html)
|
|
|
|
if match is not None:
|
|
|
|
s = match.group(1)
|
|
|
|
s = self._calcSupRe.sub(r'^(\1)', s)
|
|
|
|
s = self._calcFontRe.sub(r',', s)
|
|
|
|
s = self._calcTimesRe.sub(r'*', s)
|
|
|
|
s = utils.web.htmlToText(s)
|
|
|
|
irc.reply(s)
|
2005-02-01 13:09:12 +01:00
|
|
|
else:
|
2011-12-12 18:59:27 +01:00
|
|
|
irc.reply(_('Google says: Error: %s.') % (js['error'],))
|
2011-12-12 20:57:10 +01:00
|
|
|
irc.reply('Google\'s calculator didn\'t come up with anything.')
|
2005-02-01 13:09:12 +01:00
|
|
|
calc = wrap(calc, ['text'])
|
|
|
|
|
|
|
|
_phoneRe = re.compile(r'Phonebook.*?<font size=-1>(.*?)<a href')
|
2010-10-17 14:50:31 +02:00
|
|
|
@internationalizeDocstring
|
2005-02-01 13:09:12 +01:00
|
|
|
def phonebook(self, irc, msg, args, phonenumber):
|
|
|
|
"""<phone number>
|
|
|
|
|
|
|
|
Looks <phone number> up on Google.
|
|
|
|
"""
|
|
|
|
url = self._googleUrl(phonenumber)
|
|
|
|
html = utils.web.getUrl(url)
|
|
|
|
m = self._phoneRe.search(html)
|
|
|
|
if m is not None:
|
|
|
|
s = m.group(1)
|
|
|
|
s = s.replace('<b>', '')
|
|
|
|
s = s.replace('</b>', '')
|
|
|
|
s = utils.web.htmlToText(s)
|
|
|
|
irc.reply(s)
|
|
|
|
else:
|
2010-10-17 14:50:31 +02:00
|
|
|
irc.reply(_('Google\'s phonebook didn\'t come up with anything.'))
|
2005-02-01 13:09:12 +01:00
|
|
|
phonebook = wrap(phonebook, ['text'])
|
|
|
|
|
|
|
|
|
|
|
|
Class = Google
|
|
|
|
|
|
|
|
|
2006-02-11 16:52:51 +01:00
|
|
|
# vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79:
|