Google: Update for Google's new HTML layout.

Closes GH-1365
This commit is contained in:
Valentin Lorentz 2019-05-28 19:37:47 +02:00
parent 76699ec063
commit 540b827213
1 changed files with 3 additions and 3 deletions

View File

@ -75,13 +75,13 @@ class Google(callbacks.PluginRegexp):
msg = ircmsgs.privmsg(msg.args[0], s, msg=msg) msg = ircmsgs.privmsg(msg.args[0], s, msg=msg)
return msg return msg
_decode_re = re.compile(r'<h3 class="r"><a href="/url\?q=(?P<url>[^"]+)&[^"]+">(?P<title>.*?)</a></h3>.*?<a class="[^"]+" href="/url\?q=(?P<cacheUrl>http://webcache[^"]+)">.*?<span class="st">(?P<content>.*?)</span>', re.DOTALL | re.MULTILINE) _decode_re = re.compile(r'<div><div class="\w+"><a href="/url\?q=(?P<url>[^"]+)&[^"]+"><div class="(\w| )+">(?P<title>.*?)</div><div class="(\w| )+">(?P<breadcrumbs>.*?)</div></a></div>(?P<content><div class="(\w| )+">.*?</div></div>)', re.DOTALL | re.MULTILINE)
@classmethod @classmethod
def decode(cls, text): def decode(cls, text):
matches = cls._decode_re.findall(text) matches = cls._decode_re.finditer(text)
results = [] results = []
for match in matches: for match in matches:
r = dict(zip(('url', 'title', 'cacheUrl', 'content'), match)) r = match.groupdict()
r['url'] = utils.web.urlunquote(utils.web.htmlToText(r['url'].split('&amp;')[0])) r['url'] = utils.web.urlunquote(utils.web.htmlToText(r['url'].split('&amp;')[0]))
results.append(r) results.append(r)
return results return results