diff --git a/plugins/DDG/README.md b/plugins/DDG/README.md new file mode 100644 index 000000000..b18634bee --- /dev/null +++ b/plugins/DDG/README.md @@ -0,0 +1,7 @@ +Provides an interface to DuckDuckGo's web search. This plugin requires the [Beautiful Soup 4](http://www.crummy.com/software/BeautifulSoup/bs4/doc/) Python module. + +#### Example + +> <+GLolol> %ddg search eiffel tower + +> <@Atlas> The Eiffel Tower is an iron lattice tower located on the Champ de Mars in Paris. It was named after the engineer Gustave Eiffel, whose company designed and built the tower. - diff --git a/plugins/DDG/__init__.py b/plugins/DDG/__init__.py new file mode 100644 index 000000000..56d6993ef --- /dev/null +++ b/plugins/DDG/__init__.py @@ -0,0 +1,68 @@ +### +# Copyright (c) 2014-2015, James Lu +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions, and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the author of this software nor the name of +# contributors to this software may be used to endorse or promote products +# derived from this software without specific prior written consent. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +### + +""" +DDG: Searches for results on DuckDuckGo. +""" + +import supybot +import supybot.world as world + +# Use this for the version of this plugin. You may wish to put a CVS keyword +# in here if you're keeping the plugin in CVS or some similar system. +__version__ = "" + +__author__ = getattr(supybot.authors, 'jlu', + supybot.Author('James Lu', 'GLolol', 'james@overdrivenetworks.com')) + +# This is a dictionary mapping supybot.Author instances to lists of +# contributions. +__contributors__ = {} + +# This is a url where the most recent plugin package can be downloaded. +__url__ = 'https://github.com/jlu5/SupyPlugins/' + +from . import config +from . import plugin +from imp import reload +# In case we're being reloaded. +reload(config) +reload(plugin) +# Add more reloads here if you add third-party modules and want them to be +# reloaded when this plugin is reloaded. Don't forget to import them as well! + +if world.testing: + from . import test + +Class = plugin.Class +configure = config.configure + + +# vim:set shiftwidth=4 tabstop=4 expandtab textwidth=79: diff --git a/plugins/DDG/config.py b/plugins/DDG/config.py new file mode 100644 index 000000000..a70294e0c --- /dev/null +++ b/plugins/DDG/config.py @@ -0,0 +1,61 @@ +### +# Copyright (c) 2014-2015, James Lu +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions, and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the author of this software nor the name of +# contributors to this software may be used to endorse or promote products +# derived from this software without specific prior written consent. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +### + +import supybot.conf as conf +import supybot.registry as registry +try: + from supybot.i18n import PluginInternationalization + _ = PluginInternationalization('DDG') +except: + # Placeholder that allows to run the plugin on a bot + # without the i18n module + _ = lambda x: x + + +def configure(advanced): + # This will be called by supybot to configure this module. advanced is + # a bool that specifies whether the user identified themself as an advanced + # user or not. You should effect your configuration by manipulating the + # registry as appropriate. + from supybot.questions import expect, anything, something, yn + conf.registerPlugin('DDG', True) + + +DDG = conf.registerPlugin('DDG') +conf.registerChannelValue(DDG, 'maxResults', + registry.PositiveInteger(4, _("""Determines the maximum number of + results the bot will respond with."""))) +conf.registerChannelValue(DDG, 'showSnippet', + registry.Boolean(True, _("""Determines whether the bot will show a + snippet of each resulting link. If False, it will show the title + of the link instead."""))) + + +# vim:set shiftwidth=4 tabstop=4 expandtab textwidth=79: diff --git a/plugins/DDG/local/__init__.py b/plugins/DDG/local/__init__.py new file mode 100644 index 000000000..e86e97b86 --- /dev/null +++ b/plugins/DDG/local/__init__.py @@ -0,0 +1 @@ +# Stub so local is a module, used for third-party modules diff --git a/plugins/DDG/plugin.py b/plugins/DDG/plugin.py new file mode 100644 index 000000000..004136efd --- /dev/null +++ b/plugins/DDG/plugin.py @@ -0,0 +1,172 @@ +### +# Copyright (c) 2014-2017, James Lu +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions, and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the author of this software nor the name of +# contributors to this software may be used to endorse or promote products +# derived from this software without specific prior written consent. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +### + +import supybot.utils as utils +from supybot.commands import * +import supybot.plugins as plugins +import supybot.ircutils as ircutils +import supybot.callbacks as callbacks +import supybot.log as log +try: + from supybot.i18n import PluginInternationalization + _ = PluginInternationalization('DDG') +except ImportError: + # Placeholder that allows to run the plugin on a bot + # without the i18n module + _ = lambda x: x + + +try: # Python 3 + from urllib.parse import urlencode, parse_qs +except ImportError: # Python 2 + from urllib import urlencode + from urlparse import parse_qs +try: + from bs4 import BeautifulSoup +except ImportError: + raise ImportError("Beautiful Soup 4 is required for this plugin: get it" + " at http://www.crummy.com/software/BeautifulSoup/bs4" + "/doc/#installing-beautiful-soup") + + +class DDG(callbacks.Plugin): + """Searches for results on DuckDuckGo.""" + threaded = True + + @staticmethod + def _ddgurl(text): + # DuckDuckGo has a 'lite' site free of unparseable JavaScript + # elements, so we'll use that to our advantage! + url = "https://duckduckgo.com/lite?" + urlencode({"q": text}) + + log.debug("DDG: Using URL %s for search %s", url, text) + + real_url, data = utils.web.getUrlTargetAndContent(url) + data = data.decode("utf-8") + soup = BeautifulSoup(data) + + # Remove "sponsored link" results + return (url, real_url, [td for td in soup.find_all('td') if 'result-sponsored' not in + str(td.parent.get('class'))]) + + + def search_core(self, text, channel_context=None, max_results=None, show_snippet=None): + """ + Core results fetcher for the DDG plugin. Other plugins can call this as well via + irc.getCallback('DDG').search_core(...) + """ + if show_snippet is None: + # Note: don't use ternary here, or the registry value will override any False + # settings given to the function directly. + show_snippet = self.registryValue("showSnippet", channel_context) + maxr = max_results or self.registryValue("maxResults", channel_context) + self.log.debug('DDG: got %s for max results', maxr) + + # In a nutshell, the 'lite' site puts all of its usable content + # into tables. This does mean that headings, result snippets and + # everything else are all using the same tag (), so parsing is + # still somewhat tricky. + results = [] + + url, real_url, raw_results = self._ddgurl(text) + + if real_url != url: + # We received a redirect, likely from something like a !bang request. + # Don't bother parsing the target page, as it probably won't work anyways. + return [('', '', real_url)] + + for t in raw_results: + res = '' + # Each valid result has a preceding heading in the format + # '1. ', etc. + if t.text[0].isdigit(): + res = t.next_sibling.next_sibling + if not res: + continue + try: + snippet = '' + # 1) Get a result snippet. + + if self.registryValue("showsnippet", channel_context): + snippet = res.parent.next_sibling.next_sibling.\ + find_all("td")[-1] + snippet = snippet.text.strip() + # 2) Fetch the link title. + title = res.a.text.strip() + # 3) Fetch the result link. + origlink = link = res.a.get('href') + + # As of 2017-01-20, some links on DuckDuckGo's site are shown going through + # a redirect service. The links are in the format "/l/?kh=-1&uddg=https%3A%2F%2Fduckduckgo.com%2F" + # instead of simply being "https://duckduckgo.com". So, we decode these links here. + if link.startswith('/l/'): + linkparse = utils.web.urlparse(link) + try: + link = parse_qs(linkparse.query)['uddg'][0] + except KeyError: + # No link was given here, skip. + continue + except IndexError: + self.log.exception("DDG: failed to expand redirected result URL %s", origlink) + continue + else: + self.log.debug("DDG: expanded result URL from %s to %s", origlink, link) + + # Return a list of tuples in the form (link title, snippet text, link) + results.append((title, snippet, link)) + + except AttributeError: + continue + return results[:maxr] + + @wrap(['text']) + def search(self, irc, msg, args, text): + """ + + Searches for on DuckDuckGo's web search.""" + results = self.search_core(text, msg.args[0]) + if not results: + irc.error("No results found.") + else: + strings = [] + + for r in results: + if not r[0]: + # This result has no title, so it's likely a redirect from !bang. + strings.append(format("See %u", r[2])) + else: + strings.append(format("%s - %s %u", ircutils.bold(r[0]), r[1], r[2])) + + irc.reply(', '.join(strings)) + +Class = DDG + + +# vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79: diff --git a/plugins/DDG/test.py b/plugins/DDG/test.py new file mode 100644 index 000000000..6829aa00f --- /dev/null +++ b/plugins/DDG/test.py @@ -0,0 +1,40 @@ +### +# Copyright (c) 2014-2017, James Lu +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions, and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the author of this software nor the name of +# contributors to this software may be used to endorse or promote products +# derived from this software without specific prior written consent. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +### + +from supybot.test import * + +class DDGTestCase(PluginTestCase): + plugins = ('DDG',) + + def testSearch(self): + self.assertRegexp( + 'ddg search wikipedia', 'Wikipedia.*? - .*?https?\:\/\/') + +# vim:set shiftwidth=4 tabstop=4 expandtab textwidth=79: