DDG: fix code handling, remove regex parsing

From: 64c29496df
This commit is contained in:
James Lu 2014-12-15 17:28:06 -08:00
parent 9298b71732
commit 1a6e0127bd

View File

@ -41,7 +41,6 @@ except ImportError:
# without the i18n module # without the i18n module
_ = lambda x:x _ = lambda x:x
import re
try: # Python 3 try: # Python 3
from urllib.parse import urlencode from urllib.parse import urlencode
except ImportError: # Python 2 except ImportError: # Python 2
@ -67,26 +66,24 @@ class DDG(callbacks.Plugin):
except utils.web.Error as e: except utils.web.Error as e:
self.log.info(url) self.log.info(url)
irc.error(str(e), Raise=True) irc.error(str(e), Raise=True)
# GRR, having to clean up our HTML for the results...
data = re.sub('\t|\r|\n', '', data)
data = re.sub('\s{2,}', ' ', data)
soup = BeautifulSoup(data) soup = BeautifulSoup(data)
tds = soup.find_all('td') for t in soup.find_all('td'):
for t in tds:
if "1." in t.text: if "1." in t.text:
res = t.next_sibling.next_sibling res = t.next_sibling.next_sibling
break
try: try:
# 1) Fetch the result link. # 1) Get a result snippet.
link = res.a.get('href')
# 2) Get a result snippet.
snippet = res.parent.next_sibling.next_sibling.find("td", snippet = res.parent.next_sibling.next_sibling.find("td",
class_="result-snippet") class_="result-snippet")
# 2) Fetch the result link.
link = res.a.get('href')
snippet = snippet.text.strip() snippet = snippet.text.strip()
s = format("%s - %u", snippet, link) s = format("%s - %u", snippet, link)
irc.reply(s) irc.reply(s)
except AttributeError: return
except (AttributeError, UnboundLocalError):
continue
else:
irc.error("No results found.") irc.error("No results found.")
search = wrap(search, ['text']) search = wrap(search, ['text'])