Merge DDG plugin from my repository

Add 'plugins/DDG/' from commit '410b51657aeb93b4836d62168a90b319677a45f1'

git-subtree-dir: plugins/DDG
git-subtree-mainline: 91381aec0f
git-subtree-split: 410b51657a
This commit is contained in:
James Lu 2020-05-15 11:28:48 -07:00
commit db184f962b
6 changed files with 349 additions and 0 deletions

7
plugins/DDG/README.md Normal file
View File

@ -0,0 +1,7 @@
Provides an interface to DuckDuckGo's web search. This plugin requires the [Beautiful Soup 4](http://www.crummy.com/software/BeautifulSoup/bs4/doc/) Python module.
#### Example
> <+GLolol> %ddg search eiffel tower
> <@Atlas> The Eiffel Tower is an iron lattice tower located on the Champ de Mars in Paris. It was named after the engineer Gustave Eiffel, whose company designed and built the tower. - <https://en.wikipedia.org/wiki/Eiffel_Tower>

68
plugins/DDG/__init__.py Normal file
View File

@ -0,0 +1,68 @@
###
# Copyright (c) 2014-2015, James Lu
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions, and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions, and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the author of this software nor the name of
# contributors to this software may be used to endorse or promote products
# derived from this software without specific prior written consent.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
###
"""
DDG: Searches for results on DuckDuckGo.
"""
import supybot
import supybot.world as world
# Use this for the version of this plugin. You may wish to put a CVS keyword
# in here if you're keeping the plugin in CVS or some similar system.
__version__ = ""
__author__ = getattr(supybot.authors, 'jlu',
supybot.Author('James Lu', 'GLolol', 'james@overdrivenetworks.com'))
# This is a dictionary mapping supybot.Author instances to lists of
# contributions.
__contributors__ = {}
# This is a url where the most recent plugin package can be downloaded.
__url__ = 'https://github.com/jlu5/SupyPlugins/'
from . import config
from . import plugin
from imp import reload
# In case we're being reloaded.
reload(config)
reload(plugin)
# Add more reloads here if you add third-party modules and want them to be
# reloaded when this plugin is reloaded. Don't forget to import them as well!
if world.testing:
from . import test
Class = plugin.Class
configure = config.configure
# vim:set shiftwidth=4 tabstop=4 expandtab textwidth=79:

61
plugins/DDG/config.py Normal file
View File

@ -0,0 +1,61 @@
###
# Copyright (c) 2014-2015, James Lu
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions, and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions, and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the author of this software nor the name of
# contributors to this software may be used to endorse or promote products
# derived from this software without specific prior written consent.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
###
import supybot.conf as conf
import supybot.registry as registry
try:
from supybot.i18n import PluginInternationalization
_ = PluginInternationalization('DDG')
except:
# Placeholder that allows to run the plugin on a bot
# without the i18n module
_ = lambda x: x
def configure(advanced):
# This will be called by supybot to configure this module. advanced is
# a bool that specifies whether the user identified themself as an advanced
# user or not. You should effect your configuration by manipulating the
# registry as appropriate.
from supybot.questions import expect, anything, something, yn
conf.registerPlugin('DDG', True)
DDG = conf.registerPlugin('DDG')
conf.registerChannelValue(DDG, 'maxResults',
registry.PositiveInteger(4, _("""Determines the maximum number of
results the bot will respond with.""")))
conf.registerChannelValue(DDG, 'showSnippet',
registry.Boolean(True, _("""Determines whether the bot will show a
snippet of each resulting link. If False, it will show the title
of the link instead.""")))
# vim:set shiftwidth=4 tabstop=4 expandtab textwidth=79:

View File

@ -0,0 +1 @@
# Stub so local is a module, used for third-party modules

172
plugins/DDG/plugin.py Normal file
View File

@ -0,0 +1,172 @@
###
# Copyright (c) 2014-2017, James Lu <james@overdrivenetworks.com>
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions, and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions, and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the author of this software nor the name of
# contributors to this software may be used to endorse or promote products
# derived from this software without specific prior written consent.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
###
import supybot.utils as utils
from supybot.commands import *
import supybot.plugins as plugins
import supybot.ircutils as ircutils
import supybot.callbacks as callbacks
import supybot.log as log
try:
from supybot.i18n import PluginInternationalization
_ = PluginInternationalization('DDG')
except ImportError:
# Placeholder that allows to run the plugin on a bot
# without the i18n module
_ = lambda x: x
try: # Python 3
from urllib.parse import urlencode, parse_qs
except ImportError: # Python 2
from urllib import urlencode
from urlparse import parse_qs
try:
from bs4 import BeautifulSoup
except ImportError:
raise ImportError("Beautiful Soup 4 is required for this plugin: get it"
" at http://www.crummy.com/software/BeautifulSoup/bs4"
"/doc/#installing-beautiful-soup")
class DDG(callbacks.Plugin):
"""Searches for results on DuckDuckGo."""
threaded = True
@staticmethod
def _ddgurl(text):
# DuckDuckGo has a 'lite' site free of unparseable JavaScript
# elements, so we'll use that to our advantage!
url = "https://duckduckgo.com/lite?" + urlencode({"q": text})
log.debug("DDG: Using URL %s for search %s", url, text)
real_url, data = utils.web.getUrlTargetAndContent(url)
data = data.decode("utf-8")
soup = BeautifulSoup(data)
# Remove "sponsored link" results
return (url, real_url, [td for td in soup.find_all('td') if 'result-sponsored' not in
str(td.parent.get('class'))])
def search_core(self, text, channel_context=None, max_results=None, show_snippet=None):
"""
Core results fetcher for the DDG plugin. Other plugins can call this as well via
irc.getCallback('DDG').search_core(...)
"""
if show_snippet is None:
# Note: don't use ternary here, or the registry value will override any False
# settings given to the function directly.
show_snippet = self.registryValue("showSnippet", channel_context)
maxr = max_results or self.registryValue("maxResults", channel_context)
self.log.debug('DDG: got %s for max results', maxr)
# In a nutshell, the 'lite' site puts all of its usable content
# into tables. This does mean that headings, result snippets and
# everything else are all using the same tag (<td>), so parsing is
# still somewhat tricky.
results = []
url, real_url, raw_results = self._ddgurl(text)
if real_url != url:
# We received a redirect, likely from something like a !bang request.
# Don't bother parsing the target page, as it probably won't work anyways.
return [('', '', real_url)]
for t in raw_results:
res = ''
# Each valid result has a preceding heading in the format
# '<td valign="top">1.&nbsp;</td>', etc.
if t.text[0].isdigit():
res = t.next_sibling.next_sibling
if not res:
continue
try:
snippet = ''
# 1) Get a result snippet.
if self.registryValue("showsnippet", channel_context):
snippet = res.parent.next_sibling.next_sibling.\
find_all("td")[-1]
snippet = snippet.text.strip()
# 2) Fetch the link title.
title = res.a.text.strip()
# 3) Fetch the result link.
origlink = link = res.a.get('href')
# As of 2017-01-20, some links on DuckDuckGo's site are shown going through
# a redirect service. The links are in the format "/l/?kh=-1&uddg=https%3A%2F%2Fduckduckgo.com%2F"
# instead of simply being "https://duckduckgo.com". So, we decode these links here.
if link.startswith('/l/'):
linkparse = utils.web.urlparse(link)
try:
link = parse_qs(linkparse.query)['uddg'][0]
except KeyError:
# No link was given here, skip.
continue
except IndexError:
self.log.exception("DDG: failed to expand redirected result URL %s", origlink)
continue
else:
self.log.debug("DDG: expanded result URL from %s to %s", origlink, link)
# Return a list of tuples in the form (link title, snippet text, link)
results.append((title, snippet, link))
except AttributeError:
continue
return results[:maxr]
@wrap(['text'])
def search(self, irc, msg, args, text):
"""<text>
Searches for <text> on DuckDuckGo's web search."""
results = self.search_core(text, msg.args[0])
if not results:
irc.error("No results found.")
else:
strings = []
for r in results:
if not r[0]:
# This result has no title, so it's likely a redirect from !bang.
strings.append(format("See %u", r[2]))
else:
strings.append(format("%s - %s %u", ircutils.bold(r[0]), r[1], r[2]))
irc.reply(', '.join(strings))
Class = DDG
# vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79:

40
plugins/DDG/test.py Normal file
View File

@ -0,0 +1,40 @@
###
# Copyright (c) 2014-2017, James Lu <james@overdrivenetworks.com>
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions, and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions, and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the author of this software nor the name of
# contributors to this software may be used to endorse or promote products
# derived from this software without specific prior written consent.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
###
from supybot.test import *
class DDGTestCase(PluginTestCase):
plugins = ('DDG',)
def testSearch(self):
self.assertRegexp(
'ddg search wikipedia', 'Wikipedia.*? - .*?https?\:\/\/')
# vim:set shiftwidth=4 tabstop=4 expandtab textwidth=79: