DDG: rewrite to fix "max results" not working

From: 7a6e2f9f97
This commit is contained in:
James Lu 2017-01-20 18:50:16 -08:00
parent 30615a7284
commit 48b5016f41

View File

@ -81,6 +81,7 @@ class DDG(callbacks.Plugin):
# settings given to the function directly. # settings given to the function directly.
show_snippet = self.registryValue("showSnippet", channel_context) show_snippet = self.registryValue("showSnippet", channel_context)
maxr = max_results or self.registryValue("maxResults", channel_context) maxr = max_results or self.registryValue("maxResults", channel_context)
self.log.debug('DDG: got %s for max results', maxr)
# In a nutshell, the 'lite' site puts all of its usable content # In a nutshell, the 'lite' site puts all of its usable content
# into tables. This means that headings, result snippets and # into tables. This means that headings, result snippets and
@ -88,50 +89,49 @@ class DDG(callbacks.Plugin):
# parsing somewhat tricky. # parsing somewhat tricky.
results = [] results = []
for t in self._ddgurl(text): raw_results = self._ddgurl(text)
# We run a for loop here to extract meaningful content: for t in raw_results:
for n in range(1, maxr): res = ''
res = '' # Each valid result has a preceding heading in the format
# Each valid result has a preceding heading in the format # '<td valign="top">1.&nbsp;</td>', etc.
# '<td valign="top">1.&nbsp;</td>', etc. if t.text[0].isdigit():
if ("%s." % n) in t.text: res = t.next_sibling.next_sibling
res = t.next_sibling.next_sibling if not res:
if not res: continue
continue try:
try: snippet = ''
snippet = '' # 1) Get a result snippet.
# 1) Get a result snippet.
if self.registryValue("showsnippet", channel_context): if self.registryValue("showsnippet", channel_context):
snippet = res.parent.next_sibling.next_sibling.\ snippet = res.parent.next_sibling.next_sibling.\
find_all("td")[-1] find_all("td")[-1]
snippet = snippet.text.strip() snippet = snippet.text.strip()
# 2) Fetch the link title. # 2) Fetch the link title.
title = res.a.text.strip() title = res.a.text.strip()
# 3) Fetch the result link. # 3) Fetch the result link.
origlink = link = res.a.get('href') origlink = link = res.a.get('href')
# As of 2017-01-20, some links on DuckDuckGo's site are shown going through # As of 2017-01-20, some links on DuckDuckGo's site are shown going through
# a redirect service. The links are in the format "/l/?kh=-1&uddg=https%3A%2F%2Fduckduckgo.com%2F" # a redirect service. The links are in the format "/l/?kh=-1&uddg=https%3A%2F%2Fduckduckgo.com%2F"
# instead of simply being "https://duckduckgo.com". So, we decode these links here. # instead of simply being "https://duckduckgo.com". So, we decode these links here.
if link.startswith('/l/'): if link.startswith('/l/'):
linkparse = utils.web.urlparse(link) linkparse = utils.web.urlparse(link)
try: try:
link = parse_qs(linkparse.query)['uddg'][0] link = parse_qs(linkparse.query)['uddg'][0]
except KeyError: except KeyError:
# No link was given here, skip. # No link was given here, skip.
continue continue
except IndexError: except IndexError:
self.log.exception("DDG: failed to expand redirected result URL %s", origlink) self.log.exception("DDG: failed to expand redirected result URL %s", origlink)
continue continue
else: else:
self.log.debug("DDG: expanded result URL from %s to %s", origlink, link) self.log.debug("DDG: expanded result URL from %s to %s", origlink, link)
s = format("%s - %s %u", ircutils.bold(title), snippet, link) s = format("%s - %s %u", ircutils.bold(title), snippet, link)
results.append(s) results.append(s)
except AttributeError: except AttributeError:
continue continue
return results return results[:maxr]
@wrap(['text']) @wrap(['text'])
def search(self, irc, msg, args, text): def search(self, irc, msg, args, text):