mirror of
https://github.com/Mikaela/Limnoria.git
synced 2024-11-05 02:29:22 +01:00
DDG: expand redirect links in search results
Some links on DuckDuckGo's site are now shown going through a redirect service.
These links are in the format "/l/?kh=-1&uddg=https%3A%2F%2Fduckduckgo.com%2F" instead of simply being "https://duckduckgo.com", and we should decode these here.
From: 8187d51cef
This commit is contained in:
parent
98600b26e8
commit
c92bc0ca0c
18
plugin.py
18
plugin.py
@ -43,9 +43,10 @@ except ImportError:
|
||||
|
||||
|
||||
try: # Python 3
|
||||
from urllib.parse import urlencode
|
||||
from urllib.parse import urlencode, parse_qs
|
||||
except ImportError: # Python 2
|
||||
from urllib import urlencode
|
||||
from urlparse import parse_qs
|
||||
try:
|
||||
from bs4 import BeautifulSoup
|
||||
except ImportError:
|
||||
@ -99,7 +100,20 @@ class DDG(callbacks.Plugin):
|
||||
# 2) Fetch the link title.
|
||||
title = res.a.text.strip()
|
||||
# 3) Fetch the result link.
|
||||
link = res.a.get('href')
|
||||
origlink = link = res.a.get('href')
|
||||
|
||||
# As of 2017-01-20, some links on DuckDuckGo's site are shown going through
|
||||
# a redirect service. The links are in the format "/l/?kh=-1&uddg=https%3A%2F%2Fduckduckgo.com%2F"
|
||||
# instead of simply being "https://duckduckgo.com". So, we decode these links here.
|
||||
if link.startswith('/l/'):
|
||||
linkparse = utils.web.urlparse(link)
|
||||
try:
|
||||
link = parse_qs(linkparse.query)['uddg'][0]
|
||||
except (IndexError, KeyError):
|
||||
self.log.exception("DDG: failed to expand redirected result URL %s", origlink)
|
||||
else:
|
||||
self.log.debug("DDG: expanded result URL from %s to %s", origlink, link)
|
||||
|
||||
s = format("%s - %s %u", ircutils.bold(title), snippet,
|
||||
link)
|
||||
replies.append(s)
|
||||
|
Loading…
Reference in New Issue
Block a user