mirror of
https://github.com/Mikaela/Limnoria.git
synced 2024-12-25 12:12:54 +01:00
DDG: rewrite _ddgurl() to return new and original request URLs
This uses utils.web.getUrlTargetAndContent(), which is specific to Limnoria and requires commit ProgVal/Limnoria@57b77a6725 or later
From: 2db371a9fa
This commit is contained in:
parent
53318d142c
commit
ba04480405
21
plugin.py
21
plugin.py
@ -65,11 +65,17 @@ class DDG(callbacks.Plugin):
|
|||||||
# DuckDuckGo has a 'lite' site free of unparseable JavaScript
|
# DuckDuckGo has a 'lite' site free of unparseable JavaScript
|
||||||
# elements, so we'll use that to our advantage!
|
# elements, so we'll use that to our advantage!
|
||||||
url = "https://duckduckgo.com/lite?" + urlencode({"q": text})
|
url = "https://duckduckgo.com/lite?" + urlencode({"q": text})
|
||||||
|
|
||||||
log.debug("DDG: Using URL %s for search %s", url, text)
|
log.debug("DDG: Using URL %s for search %s", url, text)
|
||||||
data = utils.web.getUrl(url).decode("utf-8")
|
|
||||||
|
real_url, data = utils.web.getUrlTargetAndContent(url)
|
||||||
|
data = data.decode("utf-8")
|
||||||
soup = BeautifulSoup(data)
|
soup = BeautifulSoup(data)
|
||||||
|
|
||||||
# Remove "sponsored link" results
|
# Remove "sponsored link" results
|
||||||
return [td for td in soup.find_all('td') if 'result-sponsored' not in str(td.parent.get('class'))]
|
return (url, real_url, [td for td in soup.find_all('td') if 'result-sponsored' not in
|
||||||
|
str(td.parent.get('class'))])
|
||||||
|
|
||||||
|
|
||||||
def search_core(self, text, channel_context=None, max_results=None, show_snippet=None):
|
def search_core(self, text, channel_context=None, max_results=None, show_snippet=None):
|
||||||
"""
|
"""
|
||||||
@ -84,12 +90,13 @@ class DDG(callbacks.Plugin):
|
|||||||
self.log.debug('DDG: got %s for max results', maxr)
|
self.log.debug('DDG: got %s for max results', maxr)
|
||||||
|
|
||||||
# In a nutshell, the 'lite' site puts all of its usable content
|
# In a nutshell, the 'lite' site puts all of its usable content
|
||||||
# into tables. This means that headings, result snippets and
|
# into tables. This does mean that headings, result snippets and
|
||||||
# everything else are all using the same tag (<td>), which still makes
|
# everything else are all using the same tag (<td>), so parsing is
|
||||||
# parsing somewhat tricky.
|
# still somewhat tricky.
|
||||||
results = []
|
results = []
|
||||||
|
|
||||||
raw_results = self._ddgurl(text)
|
url, real_url, raw_results = self._ddgurl(text)
|
||||||
|
|
||||||
for t in raw_results:
|
for t in raw_results:
|
||||||
res = ''
|
res = ''
|
||||||
# Each valid result has a preceding heading in the format
|
# Each valid result has a preceding heading in the format
|
||||||
@ -158,7 +165,7 @@ class DDG(callbacks.Plugin):
|
|||||||
# Zero-click info: 8 (number)
|
# Zero-click info: 8 (number)
|
||||||
# Zero-click info: 8
|
# Zero-click info: 8
|
||||||
replies = {}
|
replies = {}
|
||||||
for td in self._ddgurl(text):
|
for td in self._ddgurl(text)[-1]:
|
||||||
if td.text.startswith("Zero-click info:"):
|
if td.text.startswith("Zero-click info:"):
|
||||||
# Make a dictionary of things
|
# Make a dictionary of things
|
||||||
item = td.text.split("Zero-click info:", 1)[1].strip()
|
item = td.text.split("Zero-click info:", 1)[1].strip()
|
||||||
|
Loading…
Reference in New Issue
Block a user