mirror of
https://github.com/Mikaela/Limnoria.git
synced 2024-11-19 08:59:27 +01:00
Google: AJAX API was closed today, using HTML scraping instead.
This commit is contained in:
parent
fa5552ee5f
commit
b5268d63a3
@ -80,7 +80,19 @@ class Google(callbacks.PluginRegexp):
|
|||||||
msg = ircmsgs.privmsg(msg.args[0], s, msg=msg)
|
msg = ircmsgs.privmsg(msg.args[0], s, msg=msg)
|
||||||
return msg
|
return msg
|
||||||
|
|
||||||
_gsearchUrl = 'http://ajax.googleapis.com/ajax/services/search/web'
|
_decode_re = re.compile(r'<h3 class="r"><a href="/url\?q=(?P<url>[^"]+)&[^"]+">(?P<title>.*?)</a></h3>.*?<a class="[^"]+" href="/url\?q=(?P<cacheUrl>http://webcache[^"]+)">.*?<span class="st">(?P<content>.*?)</span>', re.DOTALL | re.MULTILINE)
|
||||||
|
@classmethod
|
||||||
|
def decode(cls, text):
|
||||||
|
matches = cls._decode_re.findall(text)
|
||||||
|
results = []
|
||||||
|
for match in matches:
|
||||||
|
r = dict(zip(('url', 'title', 'cacheUrl', 'content'), match))
|
||||||
|
r['url'] = utils.web.htmlToText(r['url'].split('&')[0])
|
||||||
|
results.append(r)
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
_gsearchUrl = 'https://www.google.fr/search?gbv=1'
|
||||||
@internationalizeDocstring
|
@internationalizeDocstring
|
||||||
def search(self, query, channel, options={}):
|
def search(self, query, channel, options={}):
|
||||||
"""Perform a search using Google's AJAX API.
|
"""Perform a search using Google's AJAX API.
|
||||||
@ -120,22 +132,17 @@ class Google(callbacks.PluginRegexp):
|
|||||||
text = utils.web.getUrl('%s?%s' % (self._gsearchUrl,
|
text = utils.web.getUrl('%s?%s' % (self._gsearchUrl,
|
||||||
utils.web.urlencode(opts)),
|
utils.web.urlencode(opts)),
|
||||||
headers=headers).decode('utf8')
|
headers=headers).decode('utf8')
|
||||||
data = json.loads(text)
|
return text
|
||||||
if data['responseStatus'] != 200:
|
|
||||||
self.log.info("Google: unhandled error message: ", text)
|
|
||||||
raise callbacks.Error(data['responseDetails'])
|
|
||||||
return data
|
|
||||||
|
|
||||||
def formatData(self, data, bold=True, max=0, onetoone=False):
|
def formatData(self, data, bold=True, max=0, onetoone=False):
|
||||||
if isinstance(data, minisix.string_types):
|
data = self.decode(data)
|
||||||
return data
|
|
||||||
results = []
|
results = []
|
||||||
if max:
|
if max:
|
||||||
data = data[:max]
|
data = data[:max]
|
||||||
for result in data:
|
for result in data:
|
||||||
title = utils.web.htmlToText(result['titleNoFormatting']\
|
title = utils.web.htmlToText(result['title']\
|
||||||
.encode('utf-8'))
|
.encode('utf-8'))
|
||||||
url = result['unescapedUrl']
|
url = result['url']
|
||||||
if minisix.PY2:
|
if minisix.PY2:
|
||||||
url = url.encode('utf-8')
|
url = url.encode('utf-8')
|
||||||
if title:
|
if title:
|
||||||
@ -163,10 +170,10 @@ class Google(callbacks.PluginRegexp):
|
|||||||
"""
|
"""
|
||||||
opts = dict(opts)
|
opts = dict(opts)
|
||||||
data = self.search(text, msg.args[0], {'smallsearch': True})
|
data = self.search(text, msg.args[0], {'smallsearch': True})
|
||||||
if data['responseData']['results']:
|
if data:
|
||||||
url = data['responseData']['results'][0]['unescapedUrl']
|
url = data['url']
|
||||||
if 'snippet' in opts:
|
if 'snippet' in opts:
|
||||||
snippet = data['responseData']['results'][0]['content']
|
snippet = data['content']
|
||||||
snippet = " | " + utils.web.htmlToText(snippet, tagReplace='')
|
snippet = " | " + utils.web.htmlToText(snippet, tagReplace='')
|
||||||
else:
|
else:
|
||||||
snippet = ""
|
snippet = ""
|
||||||
@ -194,7 +201,7 @@ class Google(callbacks.PluginRegexp):
|
|||||||
# do not want @google to echo ~20 lines of results, even if you
|
# do not want @google to echo ~20 lines of results, even if you
|
||||||
# have reply.oneToOne enabled.
|
# have reply.oneToOne enabled.
|
||||||
onetoone = self.registryValue('oneToOne', msg.args[0])
|
onetoone = self.registryValue('oneToOne', msg.args[0])
|
||||||
for result in self.formatData(data['responseData']['results'],
|
for result in self.formatData(data,
|
||||||
bold=bold, max=max, onetoone=onetoone):
|
bold=bold, max=max, onetoone=onetoone):
|
||||||
irc.reply(result)
|
irc.reply(result)
|
||||||
google = wrap(google, [getopts({'language':'something',
|
google = wrap(google, [getopts({'language':'something',
|
||||||
@ -208,8 +215,8 @@ class Google(callbacks.PluginRegexp):
|
|||||||
Returns a link to the cached version of <url> if it is available.
|
Returns a link to the cached version of <url> if it is available.
|
||||||
"""
|
"""
|
||||||
data = self.search(url, msg.args[0], {'smallsearch': True})
|
data = self.search(url, msg.args[0], {'smallsearch': True})
|
||||||
if data['responseData']['results']:
|
if data:
|
||||||
m = data['responseData']['results'][0]
|
m = data[0]
|
||||||
if m['cacheUrl']:
|
if m['cacheUrl']:
|
||||||
url = m['cacheUrl'].encode('utf-8')
|
url = m['cacheUrl'].encode('utf-8')
|
||||||
irc.reply(url)
|
irc.reply(url)
|
||||||
@ -217,6 +224,7 @@ class Google(callbacks.PluginRegexp):
|
|||||||
irc.error(_('Google seems to have no cache for that site.'))
|
irc.error(_('Google seems to have no cache for that site.'))
|
||||||
cache = wrap(cache, ['url'])
|
cache = wrap(cache, ['url'])
|
||||||
|
|
||||||
|
_fight_re = re.compile(r'id="resultStats"[^>]*>(?P<stats>[^<]*)')
|
||||||
@internationalizeDocstring
|
@internationalizeDocstring
|
||||||
def fight(self, irc, msg, args):
|
def fight(self, irc, msg, args):
|
||||||
"""<search string> <search string> [<search string> ...]
|
"""<search string> <search string> [<search string> ...]
|
||||||
@ -227,9 +235,13 @@ class Google(callbacks.PluginRegexp):
|
|||||||
channel = msg.args[0]
|
channel = msg.args[0]
|
||||||
results = []
|
results = []
|
||||||
for arg in args:
|
for arg in args:
|
||||||
data = self.search(arg, channel, {'smallsearch': True})
|
text = self.search(arg, channel, {'smallsearch': True})
|
||||||
count = data['responseData']['cursor'].get('estimatedResultCount',
|
i = text.find('id="resultStats"')
|
||||||
0)
|
stats = utils.web.htmlToText(self._fight_re.search(text).group('stats'))
|
||||||
|
if stats == '':
|
||||||
|
results.append((0, args))
|
||||||
|
continue
|
||||||
|
count = ''.join(filter('0123456789'.__contains__, stats))
|
||||||
results.append((int(count), arg))
|
results.append((int(count), arg))
|
||||||
results.sort()
|
results.sort()
|
||||||
results.reverse()
|
results.reverse()
|
||||||
|
@ -55,10 +55,10 @@ class GoogleTestCase(ChannelPluginTestCase):
|
|||||||
self.assertNotError('google ae')
|
self.assertNotError('google ae')
|
||||||
|
|
||||||
def testSearchFormat(self):
|
def testSearchFormat(self):
|
||||||
self.assertRegexp('google foo', '<http://.*>')
|
self.assertRegexp('google foo', '<https?://.*>')
|
||||||
self.assertNotError('config reply.format.url %s')
|
self.assertNotError('config reply.format.url %s')
|
||||||
self.assertRegexp('google foo', 'http://.*')
|
self.assertRegexp('google foo', 'https?://.*')
|
||||||
self.assertNotRegexp('google foo', '<http://.*>')
|
self.assertNotRegexp('google foo', '<https?://.*>')
|
||||||
|
|
||||||
def testSearchOneToOne(self):
|
def testSearchOneToOne(self):
|
||||||
self.assertRegexp('google dupa', ';')
|
self.assertRegexp('google dupa', ';')
|
||||||
|
Loading…
Reference in New Issue
Block a user