Google: Fix false positives on embedded thumbnails (eg. Youtube results).

This commit is contained in:
Valentin Lorentz 2020-05-28 19:07:08 +02:00
parent 847e51a7ec
commit aa2f9202ec

View File

@ -75,8 +75,8 @@ class GoogleHTMLParser(HTMLParser):
if tag in STACKED_TAGS: if tag in STACKED_TAGS:
self.stack.append(tag) self.stack.append(tag)
if tag == 'a' and attrs['href'].startswith('/url?q='): if tag == 'a' and attrs['href'].startswith('/url?q=') \
assert self.state == ParserState.OUTSIDE, (self.state, self.current_title) and self.state == ParserState.OUTSIDE:
self.state = ParserState.IN_LINK self.state = ParserState.IN_LINK
href = attrs['href'][len('/url?q='):] href = attrs['href'][len('/url?q='):]
self.current_link = utils.web.urlunquote(utils.web.htmlToText(href.split('&sa')[0])) self.current_link = utils.web.urlunquote(utils.web.htmlToText(href.split('&sa')[0]))