DDG: fix code handling, remove regex parsing

From: 64c29496df
This commit is contained in:
James Lu 2014-12-15 17:28:06 -08:00
parent 9298b71732
commit 1a6e0127bd
1 changed files with 16 additions and 19 deletions

View File

@ -41,7 +41,6 @@ except ImportError:
# without the i18n module # without the i18n module
_ = lambda x:x _ = lambda x:x
import re
try: # Python 3 try: # Python 3
from urllib.parse import urlencode from urllib.parse import urlencode
except ImportError: # Python 2 except ImportError: # Python 2
@ -59,7 +58,7 @@ class DDG(callbacks.Plugin):
def search(self, irc, msg, args, text): def search(self, irc, msg, args, text):
"""<query> """<query>
Searches for <query> on DuckDuckGo (web search).""" Searches for <query> on DuckDuckGo (web search)."""
url = "https://duckduckgo.com/lite?" + urlencode({"q":text}) url = "https://duckduckgo.com/lite?" + urlencode({"q":text})
try: try:
@ -67,26 +66,24 @@ class DDG(callbacks.Plugin):
except utils.web.Error as e: except utils.web.Error as e:
self.log.info(url) self.log.info(url)
irc.error(str(e), Raise=True) irc.error(str(e), Raise=True)
# GRR, having to clean up our HTML for the results...
data = re.sub('\t|\r|\n', '', data)
data = re.sub('\s{2,}', ' ', data)
soup = BeautifulSoup(data) soup = BeautifulSoup(data)
tds = soup.find_all('td') for t in soup.find_all('td'):
for t in tds:
if "1." in t.text: if "1." in t.text:
res = t.next_sibling.next_sibling res = t.next_sibling.next_sibling
break try:
try: # 1) Get a result snippet.
# 1) Fetch the result link. snippet = res.parent.next_sibling.next_sibling.find("td",
link = res.a.get('href') class_="result-snippet")
# 2) Get a result snippet. # 2) Fetch the result link.
snippet = res.parent.next_sibling.next_sibling.find("td", link = res.a.get('href')
class_="result-snippet") snippet = snippet.text.strip()
snippet = snippet.text.strip()
s = format("%s - %u", snippet, link)
s = format("%s - %u", snippet, link) irc.reply(s)
irc.reply(s) return
except AttributeError: except (AttributeError, UnboundLocalError):
continue
else:
irc.error("No results found.") irc.error("No results found.")
search = wrap(search, ['text']) search = wrap(search, ['text'])