Google: use web scraping as fallback to ig api

ig api doesn't have everything (for one, timezones),
and also, in case the IG api ever dies.

Conflicts:

	plugins/Google/plugin.py
This commit is contained in:
Daniel Folkinshteyn 2011-12-12 14:57:10 -05:00 committed by Valentin Lorentz
parent e1ffe0f4e3
commit b991c8679b

View File

@ -322,27 +322,53 @@ class Google(callbacks.PluginRegexp):
googleSnarfer = urlSnarfer(googleSnarfer) googleSnarfer = urlSnarfer(googleSnarfer)
def _googleUrl(self, s): def _googleUrl(self, s):
s = s.replace('+', '%2B')
s = s.replace(' ', '+')
url = r'http://google.com/search?q=' + s
return url
def _googleUrlIG(self, s):
s = s.replace('+', '%2B') s = s.replace('+', '%2B')
s = s.replace(' ', '+') s = s.replace(' ', '+')
url = r'http://www.google.com/ig/calculator?hl=en&q=' + s url = r'http://www.google.com/ig/calculator?hl=en&q=' + s
return url return url
@internationalizeDocstring @internationalizeDocstring
_calcRe1 = re.compile(r'<table.*class="?obcontainer"?[^>]*>(.*?)</table>', re.I)
_calcRe2 = re.compile(r'<h\d class="?r"?[^>]*>(?:<b>)?(.*?)(?:</b>)?</h\d>', re.I | re.S)
_calcSupRe = re.compile(r'<sup>(.*?)</sup>', re.I)
_calcFontRe = re.compile(r'<font size=-2>(.*?)</font>')
_calcTimesRe = re.compile(r'&(?:times|#215);')
def calc(self, irc, msg, args, expr): def calc(self, irc, msg, args, expr):
"""<expression> """<expression>
Uses Google's calculator to calculate the value of <expression>. Uses Google's calculator to calculate the value of <expression>.
""" """
url = self._googleUrl(expr) urlig = self._googleUrlIG(expr)
js = utils.web.getUrl(url) js = utils.web.getUrl(urlig)
# fix bad google json # fix bad google json
js = js.replace('lhs:','"lhs":').replace('rhs:','"rhs":').replace('error:','"error":').replace('icc:','"icc":') js = js.replace('lhs:','"lhs":').replace('rhs:','"rhs":').replace('error:','"error":').replace('icc:','"icc":')
js = simplejson.loads(js) js = simplejson.loads(js)
if js['error'] == '': if js['error'] == '':
irc.reply("%s = %s" % (js['lhs'], js['rhs'],)) irc.reply("%s = %s" % (js['lhs'], js['rhs'],))
return
url = self._googleUrl(expr)
html = utils.web.getUrl(url)
match = self._calcRe1.search(html)
if match is None:
match = self._calcRe2.search(html)
if match is not None:
s = match.group(1)
s = self._calcSupRe.sub(r'^(\1)', s)
s = self._calcFontRe.sub(r',', s)
s = self._calcTimesRe.sub(r'*', s)
s = utils.web.htmlToText(s)
irc.reply(s)
else: else:
irc.reply(_('Google says: Error: %s.') % (js['error'],)) irc.reply(_('Google says: Error: %s.') % (js['error'],))
irc.reply('Google\'s calculator didn\'t come up with anything.')
calc = wrap(calc, ['text']) calc = wrap(calc, ['text'])
_phoneRe = re.compile(r'Phonebook.*?<font size=-1>(.*?)<a href') _phoneRe = re.compile(r'Phonebook.*?<font size=-1>(.*?)<a href')