From 1a6e0127bdb9fe9d78fc533fb3c6017eb9d2aa3b Mon Sep 17 00:00:00 2001 From: James Lu Date: Mon, 15 Dec 2014 17:28:06 -0800 Subject: [PATCH] DDG: fix code handling, remove regex parsing From: https://github.com/jlu5/SupyPlugins/commit/64c29496dfbe4918a76fa71b5efbfd6b67c9bbfd --- plugin.py | 35 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/plugin.py b/plugin.py index 93d533f0d..cbaccaf22 100644 --- a/plugin.py +++ b/plugin.py @@ -41,7 +41,6 @@ except ImportError: # without the i18n module _ = lambda x:x -import re try: # Python 3 from urllib.parse import urlencode except ImportError: # Python 2 @@ -59,7 +58,7 @@ class DDG(callbacks.Plugin): def search(self, irc, msg, args, text): """ - + Searches for on DuckDuckGo (web search).""" url = "https://duckduckgo.com/lite?" + urlencode({"q":text}) try: @@ -67,26 +66,24 @@ class DDG(callbacks.Plugin): except utils.web.Error as e: self.log.info(url) irc.error(str(e), Raise=True) - # GRR, having to clean up our HTML for the results... - data = re.sub('\t|\r|\n', '', data) - data = re.sub('\s{2,}', ' ', data) soup = BeautifulSoup(data) - tds = soup.find_all('td') - for t in tds: + for t in soup.find_all('td'): if "1." in t.text: res = t.next_sibling.next_sibling - break - try: - # 1) Fetch the result link. - link = res.a.get('href') - # 2) Get a result snippet. - snippet = res.parent.next_sibling.next_sibling.find("td", - class_="result-snippet") - snippet = snippet.text.strip() - - s = format("%s - %u", snippet, link) - irc.reply(s) - except AttributeError: + try: + # 1) Get a result snippet. + snippet = res.parent.next_sibling.next_sibling.find("td", + class_="result-snippet") + # 2) Fetch the result link. + link = res.a.get('href') + snippet = snippet.text.strip() + + s = format("%s - %u", snippet, link) + irc.reply(s) + return + except (AttributeError, UnboundLocalError): + continue + else: irc.error("No results found.") search = wrap(search, ['text'])