Google: Fix false positives on embedded thumbnails (eg. Youtube results).

This commit is contained in:
Valentin Lorentz 2020-05-28 19:07:08 +02:00
parent 847e51a7ec
commit aa2f9202ec
1 changed files with 2 additions and 2 deletions

View File

@ -75,8 +75,8 @@ class GoogleHTMLParser(HTMLParser):
if tag in STACKED_TAGS:
self.stack.append(tag)
if tag == 'a' and attrs['href'].startswith('/url?q='):
assert self.state == ParserState.OUTSIDE, (self.state, self.current_title)
if tag == 'a' and attrs['href'].startswith('/url?q=') \
and self.state == ParserState.OUTSIDE:
self.state = ParserState.IN_LINK
href = attrs['href'][len('/url?q='):]
self.current_link = utils.web.urlunquote(utils.web.htmlToText(href.split('&sa')[0]))