mirror of
https://github.com/Mikaela/Limnoria.git
synced 2024-11-02 17:29:22 +01:00
950aec696e
From: 52ec05a041
155 lines
6.3 KiB
Python
155 lines
6.3 KiB
Python
###
|
|
# Copyright (c) 2014-2015, James Lu
|
|
# All rights reserved.
|
|
#
|
|
# Redistribution and use in source and binary forms, with or without
|
|
# modification, are permitted provided that the following conditions are met:
|
|
#
|
|
# * Redistributions of source code must retain the above copyright notice,
|
|
# this list of conditions, and the following disclaimer.
|
|
# * Redistributions in binary form must reproduce the above copyright notice,
|
|
# this list of conditions, and the following disclaimer in the
|
|
# documentation and/or other materials provided with the distribution.
|
|
# * Neither the name of the author of this software nor the name of
|
|
# contributors to this software may be used to endorse or promote products
|
|
# derived from this software without specific prior written consent.
|
|
#
|
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
# POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
###
|
|
|
|
import supybot.utils as utils
|
|
from supybot.commands import *
|
|
import supybot.plugins as plugins
|
|
import supybot.ircutils as ircutils
|
|
import supybot.callbacks as callbacks
|
|
try:
|
|
from supybot.i18n import PluginInternationalization
|
|
_ = PluginInternationalization('DDG')
|
|
except ImportError:
|
|
# Placeholder that allows to run the plugin on a bot
|
|
# without the i18n module
|
|
_ = lambda x: x
|
|
|
|
|
|
try: # Python 3
|
|
from urllib.parse import urlencode
|
|
except ImportError: # Python 2
|
|
from urllib import urlencode
|
|
try:
|
|
from bs4 import BeautifulSoup
|
|
except ImportError:
|
|
raise ImportError("Beautiful Soup 4 is required for this plugin: get it"
|
|
" at http://www.crummy.com/software/BeautifulSoup/bs4"
|
|
"/doc/#installing-beautiful-soup")
|
|
|
|
|
|
class DDG(callbacks.Plugin):
|
|
"""Searches for results on DuckDuckGo."""
|
|
threaded = True
|
|
|
|
def _ddgurl(self, text):
|
|
# DuckDuckGo has a 'lite' site free of unparseable JavaScript
|
|
# elements, so we'll use that to our advantage!
|
|
url = "https://duckduckgo.com/lite?" + urlencode({"q": text})
|
|
data = utils.web.getUrl(url).decode("utf-8")
|
|
soup = BeautifulSoup(data)
|
|
# Remove "sponsored link" results
|
|
return [td for td in soup.find_all('td') if 'result-sponsored' not in str(td.parent.get('class'))]
|
|
|
|
def search(self, irc, msg, args, text):
|
|
"""<text>
|
|
|
|
Searches for <text> on DuckDuckGo's web search."""
|
|
replies = []
|
|
channel = msg.args[0]
|
|
# In a nutshell, the 'lite' site puts all of its usable content
|
|
# into tables. This means that headings, result snippets and
|
|
# everything else are all using the same tag (<td>), which makes
|
|
# parsing somewhat difficult.
|
|
for t in self._ddgurl(text):
|
|
maxr = self.registryValue("maxResults", channel)
|
|
# Hence we run a for loop to extract meaningful content:
|
|
for n in range(1, maxr):
|
|
res = ''
|
|
# Each valid result has a preceding heading in the format
|
|
# '<td valign="top">1. </td>', etc.
|
|
if ("%s." % n) in t.text:
|
|
res = t.next_sibling.next_sibling
|
|
if not res:
|
|
continue
|
|
try:
|
|
snippet = ''
|
|
# 1) Get a result snippet.
|
|
if self.registryValue("showsnippet", channel):
|
|
snippet = res.parent.next_sibling.next_sibling.\
|
|
find_all("td")[-1]
|
|
snippet = snippet.text.strip()
|
|
# 2) Fetch the link title.
|
|
title = res.a.text.strip()
|
|
# 3) Fetch the result link.
|
|
link = res.a.get('href')
|
|
s = format("%s - %s %u", ircutils.bold(title), snippet,
|
|
link)
|
|
replies.append(s)
|
|
except AttributeError:
|
|
continue
|
|
else:
|
|
if not replies:
|
|
irc.error("No results found.")
|
|
else:
|
|
irc.reply(', '.join(replies))
|
|
search = wrap(search, ['text'])
|
|
|
|
@wrap(['text'])
|
|
def zeroclick(self, irc, msg, args, text):
|
|
"""<text>
|
|
|
|
Looks up <text> on DuckDuckGo's zero-click engine."""
|
|
# Zero-click can give multiple replies for things if the
|
|
# query is ambiguous, sort of like an encyclopedia.
|
|
|
|
# For example, looking up "2^3" will give both:
|
|
# Zero-click info: 8 (number)
|
|
# Zero-click info: 8
|
|
replies = {}
|
|
for td in self._ddgurl(text):
|
|
if td.text.startswith("Zero-click info:"):
|
|
# Make a dictionary of things
|
|
item = td.text.split("Zero-click info:", 1)[1].strip()
|
|
td = td.parent.next_sibling.next_sibling.\
|
|
find("td")
|
|
# Condense newlines (<br> tags)
|
|
for br in td.find_all('br'):
|
|
br.replace_with(' - ')
|
|
res = td.text.strip().split("\n")[0]
|
|
try:
|
|
# Some zero-click results have an attached link to them.
|
|
link = td.a.get('href')
|
|
# Others have a piece of meaningless JavaScript...
|
|
if link != "javascript:;":
|
|
res += format(" %u", link)
|
|
except AttributeError:
|
|
pass
|
|
replies[item] = res
|
|
else:
|
|
if not replies:
|
|
irc.error("No zero-click info could be found for '%s'." %
|
|
text, Raise=True)
|
|
s = ["%s - %s" % (ircutils.bold(k), v) for k, v in replies.items()]
|
|
irc.reply("; ".join(s))
|
|
Class = DDG
|
|
|
|
|
|
# vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79:
|