mirror of
https://github.com/Mikaela/Limnoria.git
synced 2024-11-19 08:59:27 +01:00
Google: update scraping regexp
This commit is contained in:
parent
540b827213
commit
dba0ae3771
@ -75,7 +75,7 @@ class Google(callbacks.PluginRegexp):
|
|||||||
msg = ircmsgs.privmsg(msg.args[0], s, msg=msg)
|
msg = ircmsgs.privmsg(msg.args[0], s, msg=msg)
|
||||||
return msg
|
return msg
|
||||||
|
|
||||||
_decode_re = re.compile(r'<div><div class="\w+"><a href="/url\?q=(?P<url>[^"]+)&[^"]+"><div class="(\w| )+">(?P<title>.*?)</div><div class="(\w| )+">(?P<breadcrumbs>.*?)</div></a></div>(?P<content><div class="(\w| )+">.*?</div></div>)', re.DOTALL | re.MULTILINE)
|
_decode_re = re.compile(r'<div class="\w+"><a href="/url\?q=(?P<url>[^"]+)&[^"]+"><div class="(\w| )+">(?P<title>.*?)</div><div class="(\w| )+">(?P<breadcrumbs>.*?)</div></a></div>(?P<content><div class="(\w| )+">.*?</div></div>)', re.DOTALL | re.MULTILINE)
|
||||||
@classmethod
|
@classmethod
|
||||||
def decode(cls, text):
|
def decode(cls, text):
|
||||||
matches = cls._decode_re.finditer(text)
|
matches = cls._decode_re.finditer(text)
|
||||||
|
Loading…
Reference in New Issue
Block a user