From 3459d23a4d8d50f8542f581b6e74d078f2101585 Mon Sep 17 00:00:00 2001 From: James Lu Date: Wed, 10 Dec 2014 18:59:11 -0800 Subject: [PATCH 01/36] DDG: First commit From: https://github.com/jlu5/SupyPlugins/commit/f16f666ebd3233d83bfc2f2ba4bed9427b0749f1 --- README.md | 1 + __init__.py | 68 +++++++++++++++++++++++++++++ config.py | 56 ++++++++++++++++++++++++ local/__init__.py | 1 + plugin.py | 108 ++++++++++++++++++++++++++++++++++++++++++++++ test.py | 37 ++++++++++++++++ 6 files changed, 271 insertions(+) create mode 100644 README.md create mode 100644 __init__.py create mode 100644 config.py create mode 100644 local/__init__.py create mode 100644 plugin.py create mode 100644 test.py diff --git a/README.md b/README.md new file mode 100644 index 000000000..ae64ff077 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +Searches results on DuckDuckGo. diff --git a/__init__.py b/__init__.py new file mode 100644 index 000000000..674eb6385 --- /dev/null +++ b/__init__.py @@ -0,0 +1,68 @@ +### +# Copyright (c) 2014, James Lu +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions, and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the author of this software nor the name of +# contributors to this software may be used to endorse or promote products +# derived from this software without specific prior written consent. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +### + +""" +DDG: Searches results on DuckDuckGo. +""" + +import supybot +import supybot.world as world + +# Use this for the version of this plugin. You may wish to put a CVS keyword +# in here if you're keeping the plugin in CVS or some similar system. +__version__ = "" + +# XXX Replace this with an appropriate author or supybot.Author instance. +__author__ = supybot.authors.unknown + +# This is a dictionary mapping supybot.Author instances to lists of +# contributions. +__contributors__ = {} + +# This is a url where the most recent plugin package can be downloaded. +__url__ = '' + +from . import config +from . import plugin +from imp import reload +# In case we're being reloaded. +reload(config) +reload(plugin) +# Add more reloads here if you add third-party modules and want them to be +# reloaded when this plugin is reloaded. Don't forget to import them as well! + +if world.testing: + from . import test + +Class = plugin.Class +configure = config.configure + + +# vim:set shiftwidth=4 tabstop=4 expandtab textwidth=79: diff --git a/config.py b/config.py new file mode 100644 index 000000000..2e8b3da9d --- /dev/null +++ b/config.py @@ -0,0 +1,56 @@ +### +# Copyright (c) 2014, James Lu +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions, and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the author of this software nor the name of +# contributors to this software may be used to endorse or promote products +# derived from this software without specific prior written consent. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +### + +import supybot.conf as conf +import supybot.registry as registry +try: + from supybot.i18n import PluginInternationalization + _ = PluginInternationalization('DDG') +except: + # Placeholder that allows to run the plugin on a bot + # without the i18n module + _ = lambda x:x + +def configure(advanced): + # This will be called by supybot to configure this module. advanced is + # a bool that specifies whether the user identified themself as an advanced + # user or not. You should effect your configuration by manipulating the + # registry as appropriate. + from supybot.questions import expect, anything, something, yn + conf.registerPlugin('DDG', True) + + +DDG = conf.registerPlugin('DDG') +# This is where your configuration variables (if any) should go. For example: +# conf.registerGlobalValue(DDG, 'someConfigVariableName', +# registry.Boolean(False, _("""Help for someConfigVariableName."""))) + + +# vim:set shiftwidth=4 tabstop=4 expandtab textwidth=79: diff --git a/local/__init__.py b/local/__init__.py new file mode 100644 index 000000000..e86e97b86 --- /dev/null +++ b/local/__init__.py @@ -0,0 +1 @@ +# Stub so local is a module, used for third-party modules diff --git a/plugin.py b/plugin.py new file mode 100644 index 000000000..3c42f8776 --- /dev/null +++ b/plugin.py @@ -0,0 +1,108 @@ +### +# Copyright (c) 2014, James Lu +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions, and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the author of this software nor the name of +# contributors to this software may be used to endorse or promote products +# derived from this software without specific prior written consent. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +### + +import supybot.utils as utils +from supybot.commands import * +import supybot.plugins as plugins +import supybot.ircutils as ircutils +import supybot.callbacks as callbacks +try: + from supybot.i18n import PluginInternationalization + _ = PluginInternationalization('DDG') +except ImportError: + # Placeholder that allows to run the plugin on a bot + # without the i18n module + _ = lambda x:x + +import re +try: # Python 3 + from urllib.parse import urlencode +except ImportError: # Python 2 + from urllib import urlencode +try: + from bs4 import BeautifulSoup +except ImportError: + raise ImportError("Beautiful Soup 4 is required for this plugin: get it" + " at http://www.crummy.com/software/BeautifulSoup/bs4/doc/" + "#installing-beautiful-soup") + +class DDG(callbacks.Plugin): + """Searches results on DuckDuckGo.""" + threaded = True + + def search(self, irc, msg, args, text): + """ + + Searches for on DuckDuckGo (web search).""" + url = "https://duckduckgo.com/lite?" + urlencode({"q":text}) + try: + data = utils.web.getUrl(url).decode("utf-8") + except utils.web.Error as e: + self.log.info(url) + irc.error(str(e), Raise=True) + # GRR, having to clean up our HTML for the results... + data = re.sub('\t|\r|\n', '', data) + data = re.sub('\s{2,}', ' ', data) + soup = BeautifulSoup(data) + # DuckDuckGo lite uses tables for everything. Each WEB result is made + # up of 3 tags: + tables = soup.find_all('table') + regex = re.compile('.*!(sponsored).*') + + webresults = tables[1].find_all('tr') + if not webresults: + # Sometimes there will be another table for page navigation. + webresults = tables[2].find_all('tr') + if webresults: + try: + if 'result-sponsored' in webresults[0]["class"]: + webresults = webresults[4:] + except KeyError: pass + # 1) The link and title. + link = webresults[0].find('a').get('href') + # 2) A result snippet. + snippet = webresults[1].find("td", class_="result-snippet") + try: + snippet = snippet.text.strip() + except AttributeError: + snippet = webresults[1].td.text.strip() + # 3) The link-text; essentially the same as the link in 1), but with the + # URI (http(s)://) removed. We do not need this section. + + s = format("%s - %u", snippet, link) + irc.reply(s) + else: + irc.error("No results found.") + search = wrap(search, ['text']) + +Class = DDG + + +# vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79: diff --git a/test.py b/test.py new file mode 100644 index 000000000..32cfe1a30 --- /dev/null +++ b/test.py @@ -0,0 +1,37 @@ +### +# Copyright (c) 2014, James Lu +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions, and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the author of this software nor the name of +# contributors to this software may be used to endorse or promote products +# derived from this software without specific prior written consent. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +### + +from supybot.test import * + +class DDGTestCase(PluginTestCase): + plugins = ('DDG',) + + +# vim:set shiftwidth=4 tabstop=4 expandtab textwidth=79: From 5d0de2d44e31bc1d11ab86ce852c79cfd31a4842 Mon Sep 17 00:00:00 2001 From: James Lu Date: Wed, 10 Dec 2014 19:29:53 -0800 Subject: [PATCH 02/36] DDG: remove unused variable From: https://github.com/jlu5/SupyPlugins/commit/3c6126fbc46f73e80b388401ff013eb2f1cdb857 --- plugin.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/plugin.py b/plugin.py index 3c42f8776..1e70dea04 100644 --- a/plugin.py +++ b/plugin.py @@ -54,7 +54,7 @@ except ImportError: "#installing-beautiful-soup") class DDG(callbacks.Plugin): - """Searches results on DuckDuckGo.""" + """Searches for results on DuckDuckGo.""" threaded = True def search(self, irc, msg, args, text): @@ -74,7 +74,6 @@ class DDG(callbacks.Plugin): # DuckDuckGo lite uses tables for everything. Each WEB result is made # up of 3 tags: tables = soup.find_all('table') - regex = re.compile('.*!(sponsored).*') webresults = tables[1].find_all('tr') if not webresults: From f8d5afbb465dd63057e97fd5a7ed1158c16808dc Mon Sep 17 00:00:00 2001 From: James Lu Date: Thu, 11 Dec 2014 07:47:58 -0800 Subject: [PATCH 03/36] DDG: strip ads better + skip zeroclick info From: https://github.com/jlu5/SupyPlugins/commit/625ef777cdc292ca7b8562e697c44b748a84edf8 --- plugin.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/plugin.py b/plugin.py index 1e70dea04..d44718294 100644 --- a/plugin.py +++ b/plugin.py @@ -56,7 +56,7 @@ except ImportError: class DDG(callbacks.Plugin): """Searches for results on DuckDuckGo.""" threaded = True - + def search(self, irc, msg, args, text): """ @@ -74,14 +74,15 @@ class DDG(callbacks.Plugin): # DuckDuckGo lite uses tables for everything. Each WEB result is made # up of 3 tags: tables = soup.find_all('table') - - webresults = tables[1].find_all('tr') + + # Sometimes there is an extra table for page navigation + webresults = tables[2].find_all('tr') if not webresults: - # Sometimes there will be another table for page navigation. - webresults = tables[2].find_all('tr') + webresults = tables[1].find_all('tr') if webresults: try: - if 'result-sponsored' in webresults[0]["class"]: + while 'result-sponsored' in webresults[0]["class"]: + self.log.debug("DDG: stripping 1 sponsored/ad result.") webresults = webresults[4:] except KeyError: pass # 1) The link and title. From f627837496cc5c4cce89968e9b28802b92512aa4 Mon Sep 17 00:00:00 2001 From: James Lu Date: Thu, 11 Dec 2014 18:39:59 -0800 Subject: [PATCH 04/36] DDG/PkgInfo: Update tests From: https://github.com/jlu5/SupyPlugins/commit/f73028a0ed9ba5c4261069294347b8c0e7f9fe41 --- test.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test.py b/test.py index 32cfe1a30..3eea5c218 100644 --- a/test.py +++ b/test.py @@ -33,5 +33,8 @@ from supybot.test import * class DDGTestCase(PluginTestCase): plugins = ('DDG',) + def testSearch(self): + self.assertRegexp('ddg search wikipedia', 'Wikipedia.*? - .*?https?\:\/\/') + # vim:set shiftwidth=4 tabstop=4 expandtab textwidth=79: From be3112197b801b718abff03877dfa5c112c1c6b0 Mon Sep 17 00:00:00 2001 From: James Lu Date: Sun, 14 Dec 2014 20:00:43 -0800 Subject: [PATCH 05/36] DDG: bugfix From: https://github.com/jlu5/SupyPlugins/commit/ad8931ad73d7ebe9f8c7a9460faaf00dba2ea1d4 --- plugin.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/plugin.py b/plugin.py index d44718294..48acd4e25 100644 --- a/plugin.py +++ b/plugin.py @@ -76,8 +76,9 @@ class DDG(callbacks.Plugin): tables = soup.find_all('table') # Sometimes there is an extra table for page navigation - webresults = tables[2].find_all('tr') - if not webresults: + try: + webresults = tables[2].find_all('tr') + except IndexError: webresults = tables[1].find_all('tr') if webresults: try: From 9298b717323f889646f583ba44c2baad7b32732a Mon Sep 17 00:00:00 2001 From: James Lu Date: Sun, 14 Dec 2014 21:55:15 -0800 Subject: [PATCH 06/36] DDG: attempt at simplify code From: https://github.com/jlu5/SupyPlugins/commit/b62249c557e1a667cc414745ce837aa3b151497f --- plugin.py | 37 ++++++++++++------------------------- 1 file changed, 12 insertions(+), 25 deletions(-) diff --git a/plugin.py b/plugin.py index 48acd4e25..93d533f0d 100644 --- a/plugin.py +++ b/plugin.py @@ -71,35 +71,22 @@ class DDG(callbacks.Plugin): data = re.sub('\t|\r|\n', '', data) data = re.sub('\s{2,}', ' ', data) soup = BeautifulSoup(data) - # DuckDuckGo lite uses tables for everything. Each WEB result is made - # up of 3 tags: - tables = soup.find_all('table') - - # Sometimes there is an extra table for page navigation + tds = soup.find_all('td') + for t in tds: + if "1." in t.text: + res = t.next_sibling.next_sibling + break try: - webresults = tables[2].find_all('tr') - except IndexError: - webresults = tables[1].find_all('tr') - if webresults: - try: - while 'result-sponsored' in webresults[0]["class"]: - self.log.debug("DDG: stripping 1 sponsored/ad result.") - webresults = webresults[4:] - except KeyError: pass - # 1) The link and title. - link = webresults[0].find('a').get('href') - # 2) A result snippet. - snippet = webresults[1].find("td", class_="result-snippet") - try: - snippet = snippet.text.strip() - except AttributeError: - snippet = webresults[1].td.text.strip() - # 3) The link-text; essentially the same as the link in 1), but with the - # URI (http(s)://) removed. We do not need this section. + # 1) Fetch the result link. + link = res.a.get('href') + # 2) Get a result snippet. + snippet = res.parent.next_sibling.next_sibling.find("td", + class_="result-snippet") + snippet = snippet.text.strip() s = format("%s - %u", snippet, link) irc.reply(s) - else: + except AttributeError: irc.error("No results found.") search = wrap(search, ['text']) From 1a6e0127bdb9fe9d78fc533fb3c6017eb9d2aa3b Mon Sep 17 00:00:00 2001 From: James Lu Date: Mon, 15 Dec 2014 17:28:06 -0800 Subject: [PATCH 07/36] DDG: fix code handling, remove regex parsing From: https://github.com/jlu5/SupyPlugins/commit/64c29496dfbe4918a76fa71b5efbfd6b67c9bbfd --- plugin.py | 35 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/plugin.py b/plugin.py index 93d533f0d..cbaccaf22 100644 --- a/plugin.py +++ b/plugin.py @@ -41,7 +41,6 @@ except ImportError: # without the i18n module _ = lambda x:x -import re try: # Python 3 from urllib.parse import urlencode except ImportError: # Python 2 @@ -59,7 +58,7 @@ class DDG(callbacks.Plugin): def search(self, irc, msg, args, text): """ - + Searches for on DuckDuckGo (web search).""" url = "https://duckduckgo.com/lite?" + urlencode({"q":text}) try: @@ -67,26 +66,24 @@ class DDG(callbacks.Plugin): except utils.web.Error as e: self.log.info(url) irc.error(str(e), Raise=True) - # GRR, having to clean up our HTML for the results... - data = re.sub('\t|\r|\n', '', data) - data = re.sub('\s{2,}', ' ', data) soup = BeautifulSoup(data) - tds = soup.find_all('td') - for t in tds: + for t in soup.find_all('td'): if "1." in t.text: res = t.next_sibling.next_sibling - break - try: - # 1) Fetch the result link. - link = res.a.get('href') - # 2) Get a result snippet. - snippet = res.parent.next_sibling.next_sibling.find("td", - class_="result-snippet") - snippet = snippet.text.strip() - - s = format("%s - %u", snippet, link) - irc.reply(s) - except AttributeError: + try: + # 1) Get a result snippet. + snippet = res.parent.next_sibling.next_sibling.find("td", + class_="result-snippet") + # 2) Fetch the result link. + link = res.a.get('href') + snippet = snippet.text.strip() + + s = format("%s - %u", snippet, link) + irc.reply(s) + return + except (AttributeError, UnboundLocalError): + continue + else: irc.error("No results found.") search = wrap(search, ['text']) From d462b09dd725a0b5662a6a9ab34e2316de75fbff Mon Sep 17 00:00:00 2001 From: James Lu Date: Thu, 18 Dec 2014 16:25:12 -0800 Subject: [PATCH 08/36] DDG: better exception handling From: https://github.com/jlu5/SupyPlugins/commit/d2f1f32896fa8138908b78b66bfa43999c9de13d --- plugin.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/plugin.py b/plugin.py index cbaccaf22..1e1f77b90 100644 --- a/plugin.py +++ b/plugin.py @@ -67,9 +67,12 @@ class DDG(callbacks.Plugin): self.log.info(url) irc.error(str(e), Raise=True) soup = BeautifulSoup(data) + res = '' for t in soup.find_all('td'): if "1." in t.text: res = t.next_sibling.next_sibling + if not res: + continue try: # 1) Get a result snippet. snippet = res.parent.next_sibling.next_sibling.find("td", @@ -81,7 +84,7 @@ class DDG(callbacks.Plugin): s = format("%s - %u", snippet, link) irc.reply(s) return - except (AttributeError, UnboundLocalError): + except AttributeError: continue else: irc.error("No results found.") From 41db069581d30d6a88173f6a35ff755e80ad25a6 Mon Sep 17 00:00:00 2001 From: James Lu Date: Thu, 18 Dec 2014 16:36:22 -0800 Subject: [PATCH 09/36] README updates [skip ci] From: https://github.com/jlu5/SupyPlugins/commit/826bcf9eb13ad8d11f21dc2d9a67ea45a883bae6 --- README.md | 7 ++++++- __init__.py | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index ae64ff077..65c422be9 100644 --- a/README.md +++ b/README.md @@ -1 +1,6 @@ -Searches results on DuckDuckGo. +Provides an interface to DuckDuckGo's web search. + +``` +<+sGLolol> %ddg eiffel tower +<@Atlas> The Eiffel Tower is an iron lattice tower located on the Champ de Mars in Paris. It was named after the engineer Gustave Eiffel, whose company designed and built the tower. - +``` diff --git a/__init__.py b/__init__.py index 674eb6385..c2ab28451 100644 --- a/__init__.py +++ b/__init__.py @@ -29,7 +29,7 @@ ### """ -DDG: Searches results on DuckDuckGo. +DDG: Searches for results on DuckDuckGo. """ import supybot From 7b40bd4fdb9faf0fe2fdd1a91e4993e8d415e749 Mon Sep 17 00:00:00 2001 From: James Lu Date: Thu, 18 Dec 2014 17:21:08 -0800 Subject: [PATCH 10/36] DDG: bugfix for older versions of beautifulsoup4 From: https://github.com/jlu5/SupyPlugins/commit/ad0a5dd6652501a7493d29bea858657a6caf4996 --- plugin.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/plugin.py b/plugin.py index 1e1f77b90..4522acbcd 100644 --- a/plugin.py +++ b/plugin.py @@ -70,13 +70,12 @@ class DDG(callbacks.Plugin): res = '' for t in soup.find_all('td'): if "1." in t.text: - res = t.next_sibling.next_sibling + res = t.next_sibling.next_sibling if not res: continue try: # 1) Get a result snippet. - snippet = res.parent.next_sibling.next_sibling.find("td", - class_="result-snippet") + snippet = res.parent.next_sibling.next_sibling.find_all("td")[-1] # 2) Fetch the result link. link = res.a.get('href') snippet = snippet.text.strip() From d3c77bbc726f7115a9febfea95e1882921212787 Mon Sep 17 00:00:00 2001 From: James Lu Date: Fri, 19 Dec 2014 22:44:45 -0800 Subject: [PATCH 11/36] DDG: fill in __init__.py From: https://github.com/jlu5/SupyPlugins/commit/e9d916765015f6e3993f0708e53ae2fdd729a89d --- __init__.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/__init__.py b/__init__.py index c2ab28451..dc8e5ffe6 100644 --- a/__init__.py +++ b/__init__.py @@ -32,7 +32,7 @@ DDG: Searches for results on DuckDuckGo. """ -import supybot +iimport supybot import supybot.world as world # Use this for the version of this plugin. You may wish to put a CVS keyword @@ -40,14 +40,15 @@ import supybot.world as world __version__ = "" # XXX Replace this with an appropriate author or supybot.Author instance. -__author__ = supybot.authors.unknown +__author__ = supybot.Author('James Lu', 'GLolol', + 'GLolol@overdrive.pw') # This is a dictionary mapping supybot.Author instances to lists of # contributions. __contributors__ = {} # This is a url where the most recent plugin package can be downloaded. -__url__ = '' +__url__ = 'https://github.com/GLolol/SupyPlugins/' from . import config from . import plugin From 7ecd98f29c637a285d4085f5e6a6b99bb51c5e80 Mon Sep 17 00:00:00 2001 From: James Lu Date: Fri, 19 Dec 2014 22:51:56 -0800 Subject: [PATCH 12/36] . From: https://github.com/jlu5/SupyPlugins/commit/6c166d1555c1c4dc3fbdf22af6c5855c56117ef4 --- __init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/__init__.py b/__init__.py index dc8e5ffe6..c6d775649 100644 --- a/__init__.py +++ b/__init__.py @@ -32,7 +32,7 @@ DDG: Searches for results on DuckDuckGo. """ -iimport supybot +import supybot import supybot.world as world # Use this for the version of this plugin. You may wish to put a CVS keyword From 94c582a4a0dcc313f6df3f46508586b6ce8e7b38 Mon Sep 17 00:00:00 2001 From: James Lu Date: Sat, 27 Dec 2014 11:37:16 -0800 Subject: [PATCH 13/36] DDG/FML/Isup: work towards PEP8 compliancy Skipping the config options in config.py since the indented version looks just as bad (not enough space to write the text without making it use 5 lines). From: https://github.com/jlu5/SupyPlugins/commit/4a62b4ad13c3d6f40f914f0bf8037e097d675c80 --- config.py | 3 ++- plugin.py | 17 ++++++++++------- test.py | 4 +++- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/config.py b/config.py index 2e8b3da9d..8f642f610 100644 --- a/config.py +++ b/config.py @@ -36,7 +36,8 @@ try: except: # Placeholder that allows to run the plugin on a bot # without the i18n module - _ = lambda x:x + _ = lambda x: x + def configure(advanced): # This will be called by supybot to configure this module. advanced is diff --git a/plugin.py b/plugin.py index 4522acbcd..c0d7886e6 100644 --- a/plugin.py +++ b/plugin.py @@ -39,18 +39,20 @@ try: except ImportError: # Placeholder that allows to run the plugin on a bot # without the i18n module - _ = lambda x:x + _ = lambda x: x -try: # Python 3 + +try: # Python 3 from urllib.parse import urlencode -except ImportError: # Python 2 +except ImportError: # Python 2 from urllib import urlencode try: from bs4 import BeautifulSoup except ImportError: raise ImportError("Beautiful Soup 4 is required for this plugin: get it" - " at http://www.crummy.com/software/BeautifulSoup/bs4/doc/" - "#installing-beautiful-soup") + " at http://www.crummy.com/software/BeautifulSoup/bs4" + "/doc/#installing-beautiful-soup") + class DDG(callbacks.Plugin): """Searches for results on DuckDuckGo.""" @@ -60,7 +62,7 @@ class DDG(callbacks.Plugin): """ Searches for on DuckDuckGo (web search).""" - url = "https://duckduckgo.com/lite?" + urlencode({"q":text}) + url = "https://duckduckgo.com/lite?" + urlencode({"q": text}) try: data = utils.web.getUrl(url).decode("utf-8") except utils.web.Error as e: @@ -75,7 +77,8 @@ class DDG(callbacks.Plugin): continue try: # 1) Get a result snippet. - snippet = res.parent.next_sibling.next_sibling.find_all("td")[-1] + snippet = res.parent.next_sibling.next_sibling.\ + find_all("td")[-1] # 2) Fetch the result link. link = res.a.get('href') snippet = snippet.text.strip() diff --git a/test.py b/test.py index 3eea5c218..abfed70b8 100644 --- a/test.py +++ b/test.py @@ -30,11 +30,13 @@ from supybot.test import * + class DDGTestCase(PluginTestCase): plugins = ('DDG',) def testSearch(self): - self.assertRegexp('ddg search wikipedia', 'Wikipedia.*? - .*?https?\:\/\/') + self.assertRegexp( + 'ddg search wikipedia', 'Wikipedia.*? - .*?https?\:\/\/') # vim:set shiftwidth=4 tabstop=4 expandtab textwidth=79: From d26e8ff93d8f8e54daf87c58e5ff6cdd275f56b7 Mon Sep 17 00:00:00 2001 From: James Lu Date: Sat, 17 Jan 2015 02:36:08 -0500 Subject: [PATCH 14/36] DDG: reintroduce support for multiple results From: https://github.com/jlu5/SupyPlugins/commit/3c5cc19ba737e266017a46594571f49f76d878b8 --- config.py | 10 +++++++--- plugin.py | 48 +++++++++++++++++++++++++++++------------------- 2 files changed, 36 insertions(+), 22 deletions(-) diff --git a/config.py b/config.py index 8f642f610..508fea699 100644 --- a/config.py +++ b/config.py @@ -49,9 +49,13 @@ def configure(advanced): DDG = conf.registerPlugin('DDG') -# This is where your configuration variables (if any) should go. For example: -# conf.registerGlobalValue(DDG, 'someConfigVariableName', -# registry.Boolean(False, _("""Help for someConfigVariableName."""))) +conf.registerChannelValue(DDG, 'maxResults', + registry.PositiveInteger(4, _("""Determines the maximum number of + results the bot will respond with."""))) +conf.registerChannelValue(DDG, 'showSnippet', + registry.Boolean(True, _("""Determines whether the bot will show a + snippet of each resulting link. If False, it will show the title + of the link instead."""))) # vim:set shiftwidth=4 tabstop=4 expandtab textwidth=79: diff --git a/plugin.py b/plugin.py index c0d7886e6..c02ec2bae 100644 --- a/plugin.py +++ b/plugin.py @@ -69,27 +69,37 @@ class DDG(callbacks.Plugin): self.log.info(url) irc.error(str(e), Raise=True) soup = BeautifulSoup(data) - res = '' + replies = [] + channel = msg.args[0] for t in soup.find_all('td'): - if "1." in t.text: - res = t.next_sibling.next_sibling - if not res: - continue - try: - # 1) Get a result snippet. - snippet = res.parent.next_sibling.next_sibling.\ - find_all("td")[-1] - # 2) Fetch the result link. - link = res.a.get('href') - snippet = snippet.text.strip() - - s = format("%s - %u", snippet, link) - irc.reply(s) - return - except AttributeError: - continue + maxr = self.registryValue("maxResults", channel) + for n in range(1, maxr): + res = '' + if ("%s." % n) in t.text: + res = t.next_sibling.next_sibling + if not res: + continue + try: + snippet = '' + # 1) Get a result snippet. + if self.registryValue("showsnippet", channel): + snippet = res.parent.next_sibling.next_sibling.\ + find_all("td")[-1] + snippet = snippet.text.strip() + # 2) Fetch the link title. + title = res.a.text.strip() + # 3) Fetch the result link. + link = res.a.get('href') + s = format("%s - %s %u", ircutils.bold(title), snippet, + link) + replies.append(s) + except AttributeError: + continue else: - irc.error("No results found.") + if not replies: + irc.error("No results found.") + else: + irc.reply(', '.join(replies)) search = wrap(search, ['text']) Class = DDG From 4a46c1b88bee543945cb942960a681e93442666c Mon Sep 17 00:00:00 2001 From: James Lu Date: Sun, 8 Feb 2015 22:15:46 -0500 Subject: [PATCH 15/36] DDG: Initial addition of 'zeroclick' command (Closes #24) From: https://github.com/jlu5/SupyPlugins/commit/285948a47d2159250ff26abe945e499115323263 --- plugin.py | 60 +++++++++++++++++++++++++++++++++++++++++++++++++------ test.py | 4 +++- 2 files changed, 57 insertions(+), 7 deletions(-) diff --git a/plugin.py b/plugin.py index c02ec2bae..2ad01888e 100644 --- a/plugin.py +++ b/plugin.py @@ -58,23 +58,34 @@ class DDG(callbacks.Plugin): """Searches for results on DuckDuckGo.""" threaded = True - def search(self, irc, msg, args, text): - """ - - Searches for on DuckDuckGo (web search).""" + def _ddgurl(self, text): + # DuckDuckGo has a 'lite' site free of unparseable JavaScript + # elements, so we'll use that to our advantage! url = "https://duckduckgo.com/lite?" + urlencode({"q": text}) try: data = utils.web.getUrl(url).decode("utf-8") except utils.web.Error as e: - self.log.info(url) irc.error(str(e), Raise=True) soup = BeautifulSoup(data) + return soup.find_all('td') + + def search(self, irc, msg, args, text): + """ + + Searches for on DuckDuckGo's web search.""" replies = [] channel = msg.args[0] - for t in soup.find_all('td'): + # In a nutshell, the 'lite' site puts all of its usable content + # into tables. This means that headings, result snippets and + # everything else are all using the same tag (), which makes + # parsing somewhat difficult. + for t in self._ddgurl(text): maxr = self.registryValue("maxResults", channel) + # Hence we run a for loop to extract meaningful content: for n in range(1, maxr): res = '' + # Each valid result has a preceding heading in the format + # '1. ', etc. if ("%s." % n) in t.text: res = t.next_sibling.next_sibling if not res: @@ -102,6 +113,43 @@ class DDG(callbacks.Plugin): irc.reply(', '.join(replies)) search = wrap(search, ['text']) + @wrap(['text']) + def zeroclick(self, irc, msg, args, text): + """ + + Looks up on DuckDuckGo's zero-click engine.""" + # Zero-click can give multiple replies for things if the + # query is ambiguous, sort of like an encyclopedia. + + # For example, looking up "2^3" will give both: + # Zero-click info: 8 (number) + # Zero-click info: 8 + replies = {} + for td in self._ddgurl(text): + if td.text.startswith("Zero-click info:"): + # Make a dictionary of things + item = td.text.split("Zero-click info:", 1)[1].strip() + td = td.parent.next_sibling.next_sibling.\ + find("td") + # Condense newlines (
tags) + for br in td.find_all('br'): + br.replace_with(' - ') + res = td.text.strip().split("\n")[0] + try: + # Some zero-click results have an attached link to them. + link = td.a.get('href') + # Others have a piece of meaningless JavaScript... + if link != "javascript:;": + res += format(" %u", link) + except AttributeError: + pass + replies[item] = res + else: + if not replies: + irc.error("No zero-click info could be found for '%s'." % + text, Raise=True) + s = ["%s - %s" % (ircutils.bold(k), v) for k, v in replies.items()] + irc.reply("; ".join(s)) Class = DDG diff --git a/test.py b/test.py index abfed70b8..554b3f929 100644 --- a/test.py +++ b/test.py @@ -37,6 +37,8 @@ class DDGTestCase(PluginTestCase): def testSearch(self): self.assertRegexp( 'ddg search wikipedia', 'Wikipedia.*? - .*?https?\:\/\/') - + def testZeroclick(self): + self.assertRegexp( + 'zeroclick 2^3', '8') # vim:set shiftwidth=4 tabstop=4 expandtab textwidth=79: From 704d5a6fc2abcc4211abd9d13b68e4053e8d8566 Mon Sep 17 00:00:00 2001 From: James Lu Date: Sat, 25 Apr 2015 23:21:21 -0700 Subject: [PATCH 16/36] copyright year updates From: https://github.com/jlu5/SupyPlugins/commit/ee6278260350c50f2151bac467ebe4109f42d52c --- __init__.py | 2 +- config.py | 2 +- plugin.py | 2 +- test.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/__init__.py b/__init__.py index c6d775649..f501f6d1c 100644 --- a/__init__.py +++ b/__init__.py @@ -1,5 +1,5 @@ ### -# Copyright (c) 2014, James Lu +# Copyright (c) 2014-2015, James Lu # All rights reserved. # # Redistribution and use in source and binary forms, with or without diff --git a/config.py b/config.py index 508fea699..a70294e0c 100644 --- a/config.py +++ b/config.py @@ -1,5 +1,5 @@ ### -# Copyright (c) 2014, James Lu +# Copyright (c) 2014-2015, James Lu # All rights reserved. # # Redistribution and use in source and binary forms, with or without diff --git a/plugin.py b/plugin.py index 2ad01888e..ccd004144 100644 --- a/plugin.py +++ b/plugin.py @@ -1,5 +1,5 @@ ### -# Copyright (c) 2014, James Lu +# Copyright (c) 2014-2015, James Lu # All rights reserved. # # Redistribution and use in source and binary forms, with or without diff --git a/test.py b/test.py index 554b3f929..e4613b0bd 100644 --- a/test.py +++ b/test.py @@ -1,5 +1,5 @@ ### -# Copyright (c) 2014, James Lu +# Copyright (c) 2014-2015, James Lu # All rights reserved. # # Redistribution and use in source and binary forms, with or without From 0079333c80b06b74e1448cd4f813b02532cdcca0 Mon Sep 17 00:00:00 2001 From: James Lu Date: Fri, 22 May 2015 21:57:36 -0700 Subject: [PATCH 17/36] Various plugins: README tweaks [skip ci] From: https://github.com/jlu5/SupyPlugins/commit/9324d78dee1c8d4db09efd6cb1a78e140a6bd4db --- README.md | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 65c422be9..b6cee6816 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,17 @@ -Provides an interface to DuckDuckGo's web search. +Provides an interface to DuckDuckGo's web search and Zero-click engine. + +### Web Search ``` -<+sGLolol> %ddg eiffel tower +<+GLolol> %ddg eiffel tower <@Atlas> The Eiffel Tower is an iron lattice tower located on the Champ de Mars in Paris. It was named after the engineer Gustave Eiffel, whose company designed and built the tower. - ``` + +### Zero-Click Results + +``` +<+GLolol> %zeroclick 2015 +<@Atlas> 2015 - 2015 has been designated the International Year of Light and the International Year of Soils by the sixty-eighth session of the United Nations General Assembly. +<+GLolol> %zeroclick 57^2 +<@Atlas> 57^2 - 3,249 +``` From 35677548df1bb61227879ea48627cb7d8dc2eb4b Mon Sep 17 00:00:00 2001 From: James Lu Date: Sat, 22 Aug 2015 18:32:04 -0700 Subject: [PATCH 18/36] DDG: strip Sponsored links from search results From: https://github.com/jlu5/SupyPlugins/commit/e9edceb73551c738dc38fc5c1556269c2f8ad83d --- plugin.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/plugin.py b/plugin.py index ccd004144..fdf197c4d 100644 --- a/plugin.py +++ b/plugin.py @@ -67,7 +67,8 @@ class DDG(callbacks.Plugin): except utils.web.Error as e: irc.error(str(e), Raise=True) soup = BeautifulSoup(data) - return soup.find_all('td') + # Remove "sponsored link" results + return [td for td in soup.find_all('td') if 'result-sponsored' not in str(td.parent.get('class'))] def search(self, irc, msg, args, text): """ From 950aec696ecbc55cece30291aeef66f44a929bd9 Mon Sep 17 00:00:00 2001 From: James Lu Date: Sun, 1 Nov 2015 10:24:03 -0800 Subject: [PATCH 19/36] DDG: remove broken error catching (irc isn't in the _ddgurl namespace) From: https://github.com/jlu5/SupyPlugins/commit/52ec05a0411270543a36fdd0dbc3d0b930989399 --- plugin.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/plugin.py b/plugin.py index fdf197c4d..9ec029d4a 100644 --- a/plugin.py +++ b/plugin.py @@ -62,10 +62,7 @@ class DDG(callbacks.Plugin): # DuckDuckGo has a 'lite' site free of unparseable JavaScript # elements, so we'll use that to our advantage! url = "https://duckduckgo.com/lite?" + urlencode({"q": text}) - try: - data = utils.web.getUrl(url).decode("utf-8") - except utils.web.Error as e: - irc.error(str(e), Raise=True) + data = utils.web.getUrl(url).decode("utf-8") soup = BeautifulSoup(data) # Remove "sponsored link" results return [td for td in soup.find_all('td') if 'result-sponsored' not in str(td.parent.get('class'))] From f9b83ac2deae2f115996c6b34a038cc665bf6894 Mon Sep 17 00:00:00 2001 From: James Lu Date: Sun, 1 Nov 2015 10:44:05 -0800 Subject: [PATCH 20/36] DDG: log search URL to DEBUG From: https://github.com/jlu5/SupyPlugins/commit/5431f25e0a60bfb9f8dd1dd9325a1e021339979f --- plugin.py | 1 + 1 file changed, 1 insertion(+) diff --git a/plugin.py b/plugin.py index 9ec029d4a..5efbd7e9e 100644 --- a/plugin.py +++ b/plugin.py @@ -62,6 +62,7 @@ class DDG(callbacks.Plugin): # DuckDuckGo has a 'lite' site free of unparseable JavaScript # elements, so we'll use that to our advantage! url = "https://duckduckgo.com/lite?" + urlencode({"q": text}) + self.log.debug("DDG: Using URL %s for search %s", url, text) data = utils.web.getUrl(url).decode("utf-8") soup = BeautifulSoup(data) # Remove "sponsored link" results From 1806646be20402e64660fbbbd9b80314ad76d281 Mon Sep 17 00:00:00 2001 From: James Lu Date: Fri, 4 Mar 2016 16:36:16 -0800 Subject: [PATCH 21/36] plugins: update my email From: https://github.com/jlu5/SupyPlugins/commit/177eefa0a3bca7fd8c6b52fcb7e75c1d3afccc9f --- __init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/__init__.py b/__init__.py index f501f6d1c..6964d5992 100644 --- a/__init__.py +++ b/__init__.py @@ -41,7 +41,7 @@ __version__ = "" # XXX Replace this with an appropriate author or supybot.Author instance. __author__ = supybot.Author('James Lu', 'GLolol', - 'GLolol@overdrive.pw') + 'GLolol@overdrivenetworks.com') # This is a dictionary mapping supybot.Author instances to lists of # contributions. From 98600b26e8cf0c9810e09702c7810ddff794ee29 Mon Sep 17 00:00:00 2001 From: James Lu Date: Sun, 29 May 2016 16:23:10 -0700 Subject: [PATCH 22/36] DDG: fix wrong command name in README [skip ci] From: https://github.com/jlu5/SupyPlugins/commit/22a4fb457da8b834bd665555938622c881e02643 --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b6cee6816..be0721350 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ Provides an interface to DuckDuckGo's web search and Zero-click engine. ### Web Search ``` -<+GLolol> %ddg eiffel tower +<+GLolol> %ddg search eiffel tower <@Atlas> The Eiffel Tower is an iron lattice tower located on the Champ de Mars in Paris. It was named after the engineer Gustave Eiffel, whose company designed and built the tower. - ``` From c92bc0ca0ccde0284e1dd75a3b7665f943a395bb Mon Sep 17 00:00:00 2001 From: James Lu Date: Fri, 20 Jan 2017 18:14:07 -0800 Subject: [PATCH 23/36] DDG: expand redirect links in search results Some links on DuckDuckGo's site are now shown going through a redirect service. These links are in the format "/l/?kh=-1&uddg=https%3A%2F%2Fduckduckgo.com%2F" instead of simply being "https://duckduckgo.com", and we should decode these here. From: https://github.com/jlu5/SupyPlugins/commit/8187d51cef8a716f7b58d8e4521ebf37444f8fe0 --- plugin.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/plugin.py b/plugin.py index 5efbd7e9e..c4780a168 100644 --- a/plugin.py +++ b/plugin.py @@ -43,9 +43,10 @@ except ImportError: try: # Python 3 - from urllib.parse import urlencode + from urllib.parse import urlencode, parse_qs except ImportError: # Python 2 from urllib import urlencode + from urlparse import parse_qs try: from bs4 import BeautifulSoup except ImportError: @@ -99,7 +100,20 @@ class DDG(callbacks.Plugin): # 2) Fetch the link title. title = res.a.text.strip() # 3) Fetch the result link. - link = res.a.get('href') + origlink = link = res.a.get('href') + + # As of 2017-01-20, some links on DuckDuckGo's site are shown going through + # a redirect service. The links are in the format "/l/?kh=-1&uddg=https%3A%2F%2Fduckduckgo.com%2F" + # instead of simply being "https://duckduckgo.com". So, we decode these links here. + if link.startswith('/l/'): + linkparse = utils.web.urlparse(link) + try: + link = parse_qs(linkparse.query)['uddg'][0] + except (IndexError, KeyError): + self.log.exception("DDG: failed to expand redirected result URL %s", origlink) + else: + self.log.debug("DDG: expanded result URL from %s to %s", origlink, link) + s = format("%s - %s %u", ircutils.bold(title), snippet, link) replies.append(s) From 44b626df641f97863370521cdbefebd4c81ec64a Mon Sep 17 00:00:00 2001 From: James Lu Date: Fri, 20 Jan 2017 18:18:06 -0800 Subject: [PATCH 24/36] DDG: make _ddgurl() a static method From: https://github.com/jlu5/SupyPlugins/commit/d332e73e48dfa679f3a08786a9dae9da5149b0cc --- plugin.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/plugin.py b/plugin.py index c4780a168..9ffdfd4e8 100644 --- a/plugin.py +++ b/plugin.py @@ -33,6 +33,7 @@ from supybot.commands import * import supybot.plugins as plugins import supybot.ircutils as ircutils import supybot.callbacks as callbacks +import supybot.log as log try: from supybot.i18n import PluginInternationalization _ = PluginInternationalization('DDG') @@ -59,11 +60,12 @@ class DDG(callbacks.Plugin): """Searches for results on DuckDuckGo.""" threaded = True - def _ddgurl(self, text): + @staticmethod + def _ddgurl(text): # DuckDuckGo has a 'lite' site free of unparseable JavaScript # elements, so we'll use that to our advantage! url = "https://duckduckgo.com/lite?" + urlencode({"q": text}) - self.log.debug("DDG: Using URL %s for search %s", url, text) + log.debug("DDG: Using URL %s for search %s", url, text) data = utils.web.getUrl(url).decode("utf-8") soup = BeautifulSoup(data) # Remove "sponsored link" results From 30615a728486777757cbc77714ce3fcb2fd38a24 Mon Sep 17 00:00:00 2001 From: James Lu Date: Fri, 20 Jan 2017 18:39:42 -0800 Subject: [PATCH 25/36] DDG: abstract out a search_core(), as the LastFM plugin will use this later From: https://github.com/jlu5/SupyPlugins/commit/47bb74d7d95b67a8e2fd8a5e5f2210a41f1b11a1 --- plugin.py | 56 +++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 36 insertions(+), 20 deletions(-) diff --git a/plugin.py b/plugin.py index 9ffdfd4e8..afdf81ef9 100644 --- a/plugin.py +++ b/plugin.py @@ -1,5 +1,5 @@ ### -# Copyright (c) 2014-2015, James Lu +# Copyright (c) 2014-2017, James Lu # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -71,19 +71,25 @@ class DDG(callbacks.Plugin): # Remove "sponsored link" results return [td for td in soup.find_all('td') if 'result-sponsored' not in str(td.parent.get('class'))] - def search(self, irc, msg, args, text): - """ + def search_core(self, text, channel_context=None, max_results=None, show_snippet=None): + """ + Core results fetcher for the DDG plugin. Other plugins can call this as well via + irc.getCallback('DDG').search_core(...) + """ + if show_snippet is None: + # Note: don't use ternary there, or the registry value will override any False + # settings given to the function directly. + show_snippet = self.registryValue("showSnippet", channel_context) + maxr = max_results or self.registryValue("maxResults", channel_context) - Searches for on DuckDuckGo's web search.""" - replies = [] - channel = msg.args[0] # In a nutshell, the 'lite' site puts all of its usable content # into tables. This means that headings, result snippets and - # everything else are all using the same tag (), which makes - # parsing somewhat difficult. + # everything else are all using the same tag (), which still makes + # parsing somewhat tricky. + results = [] + for t in self._ddgurl(text): - maxr = self.registryValue("maxResults", channel) - # Hence we run a for loop to extract meaningful content: + # We run a for loop here to extract meaningful content: for n in range(1, maxr): res = '' # Each valid result has a preceding heading in the format @@ -95,7 +101,8 @@ class DDG(callbacks.Plugin): try: snippet = '' # 1) Get a result snippet. - if self.registryValue("showsnippet", channel): + + if self.registryValue("showsnippet", channel_context): snippet = res.parent.next_sibling.next_sibling.\ find_all("td")[-1] snippet = snippet.text.strip() @@ -111,22 +118,31 @@ class DDG(callbacks.Plugin): linkparse = utils.web.urlparse(link) try: link = parse_qs(linkparse.query)['uddg'][0] - except (IndexError, KeyError): + except KeyError: + # No link was given here, skip. + continue + except IndexError: self.log.exception("DDG: failed to expand redirected result URL %s", origlink) + continue else: self.log.debug("DDG: expanded result URL from %s to %s", origlink, link) - s = format("%s - %s %u", ircutils.bold(title), snippet, - link) - replies.append(s) + s = format("%s - %s %u", ircutils.bold(title), snippet, link) + results.append(s) except AttributeError: continue + return results + + @wrap(['text']) + def search(self, irc, msg, args, text): + """ + + Searches for on DuckDuckGo's web search.""" + results = self.search_core(text, msg.args[0]) + if not results: + irc.error("No results found.") else: - if not replies: - irc.error("No results found.") - else: - irc.reply(', '.join(replies)) - search = wrap(search, ['text']) + irc.reply(', '.join(results)) @wrap(['text']) def zeroclick(self, irc, msg, args, text): From 48b5016f418eb7eba6b8a248180d133217304f49 Mon Sep 17 00:00:00 2001 From: James Lu Date: Fri, 20 Jan 2017 18:50:16 -0800 Subject: [PATCH 26/36] DDG: rewrite to fix "max results" not working From: https://github.com/jlu5/SupyPlugins/commit/7a6e2f9f972cf3d08e3a3aefbbfd405aab525d5f --- plugin.py | 82 +++++++++++++++++++++++++++---------------------------- 1 file changed, 41 insertions(+), 41 deletions(-) diff --git a/plugin.py b/plugin.py index afdf81ef9..c1f889fce 100644 --- a/plugin.py +++ b/plugin.py @@ -81,6 +81,7 @@ class DDG(callbacks.Plugin): # settings given to the function directly. show_snippet = self.registryValue("showSnippet", channel_context) maxr = max_results or self.registryValue("maxResults", channel_context) + self.log.debug('DDG: got %s for max results', maxr) # In a nutshell, the 'lite' site puts all of its usable content # into tables. This means that headings, result snippets and @@ -88,50 +89,49 @@ class DDG(callbacks.Plugin): # parsing somewhat tricky. results = [] - for t in self._ddgurl(text): - # We run a for loop here to extract meaningful content: - for n in range(1, maxr): - res = '' - # Each valid result has a preceding heading in the format - # '1. ', etc. - if ("%s." % n) in t.text: - res = t.next_sibling.next_sibling - if not res: - continue - try: - snippet = '' - # 1) Get a result snippet. + raw_results = self._ddgurl(text) + for t in raw_results: + res = '' + # Each valid result has a preceding heading in the format + # '1. ', etc. + if t.text[0].isdigit(): + res = t.next_sibling.next_sibling + if not res: + continue + try: + snippet = '' + # 1) Get a result snippet. - if self.registryValue("showsnippet", channel_context): - snippet = res.parent.next_sibling.next_sibling.\ - find_all("td")[-1] - snippet = snippet.text.strip() - # 2) Fetch the link title. - title = res.a.text.strip() - # 3) Fetch the result link. - origlink = link = res.a.get('href') + if self.registryValue("showsnippet", channel_context): + snippet = res.parent.next_sibling.next_sibling.\ + find_all("td")[-1] + snippet = snippet.text.strip() + # 2) Fetch the link title. + title = res.a.text.strip() + # 3) Fetch the result link. + origlink = link = res.a.get('href') - # As of 2017-01-20, some links on DuckDuckGo's site are shown going through - # a redirect service. The links are in the format "/l/?kh=-1&uddg=https%3A%2F%2Fduckduckgo.com%2F" - # instead of simply being "https://duckduckgo.com". So, we decode these links here. - if link.startswith('/l/'): - linkparse = utils.web.urlparse(link) - try: - link = parse_qs(linkparse.query)['uddg'][0] - except KeyError: - # No link was given here, skip. - continue - except IndexError: - self.log.exception("DDG: failed to expand redirected result URL %s", origlink) - continue - else: - self.log.debug("DDG: expanded result URL from %s to %s", origlink, link) + # As of 2017-01-20, some links on DuckDuckGo's site are shown going through + # a redirect service. The links are in the format "/l/?kh=-1&uddg=https%3A%2F%2Fduckduckgo.com%2F" + # instead of simply being "https://duckduckgo.com". So, we decode these links here. + if link.startswith('/l/'): + linkparse = utils.web.urlparse(link) + try: + link = parse_qs(linkparse.query)['uddg'][0] + except KeyError: + # No link was given here, skip. + continue + except IndexError: + self.log.exception("DDG: failed to expand redirected result URL %s", origlink) + continue + else: + self.log.debug("DDG: expanded result URL from %s to %s", origlink, link) - s = format("%s - %s %u", ircutils.bold(title), snippet, link) - results.append(s) - except AttributeError: - continue - return results + s = format("%s - %s %u", ircutils.bold(title), snippet, link) + results.append(s) + except AttributeError: + continue + return results[:maxr] @wrap(['text']) def search(self, irc, msg, args, text): From 828112eaa1d7db33b0d3a665522ceac22ddc78af Mon Sep 17 00:00:00 2001 From: James Lu Date: Fri, 20 Jan 2017 18:51:00 -0800 Subject: [PATCH 27/36] DDG: fix a typo in comments From: https://github.com/jlu5/SupyPlugins/commit/37290686334d4253f4f1c501cdc97167ccb4c5e1 --- plugin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugin.py b/plugin.py index c1f889fce..9d09824cc 100644 --- a/plugin.py +++ b/plugin.py @@ -77,7 +77,7 @@ class DDG(callbacks.Plugin): irc.getCallback('DDG').search_core(...) """ if show_snippet is None: - # Note: don't use ternary there, or the registry value will override any False + # Note: don't use ternary here, or the registry value will override any False # settings given to the function directly. show_snippet = self.registryValue("showSnippet", channel_context) maxr = max_results or self.registryValue("maxResults", channel_context) From f5a53a1965db9826529558dfbe64d2a72d110e92 Mon Sep 17 00:00:00 2001 From: James Lu Date: Fri, 20 Jan 2017 18:57:55 -0800 Subject: [PATCH 28/36] DDG: actually, return a list of tuples in search_core() This abstraction makes it easier for depending plugins to filter out the data they actually need. From: https://github.com/jlu5/SupyPlugins/commit/4915b7713b062c8a7661159c81b99bfa94280e1e --- plugin.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/plugin.py b/plugin.py index 9d09824cc..677f4e30d 100644 --- a/plugin.py +++ b/plugin.py @@ -127,8 +127,9 @@ class DDG(callbacks.Plugin): else: self.log.debug("DDG: expanded result URL from %s to %s", origlink, link) - s = format("%s - %s %u", ircutils.bold(title), snippet, link) - results.append(s) + # Return a list of tuples in the form (link title, snippet text, link) + results.append((title, snippet, link)) + except AttributeError: continue return results[:maxr] @@ -142,7 +143,8 @@ class DDG(callbacks.Plugin): if not results: irc.error("No results found.") else: - irc.reply(', '.join(results)) + strings = [format("%s - %s %u", ircutils.bold(res[0]), res[1], res[2]) for res in results] + irc.reply(', '.join(strings)) @wrap(['text']) def zeroclick(self, irc, msg, args, text): From 53318d142cd5cb873247ebe5850b2b7179a12c55 Mon Sep 17 00:00:00 2001 From: James Lu Date: Fri, 20 Jan 2017 21:51:05 -0800 Subject: [PATCH 29/36] DDG: show all lines of multiline zeroclick replies Thanks to @nathan0 for pointing this out. From: https://github.com/jlu5/SupyPlugins/commit/0b0c1b59ab71ea22e34e31f22d9874c47ded183a --- plugin.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plugin.py b/plugin.py index 677f4e30d..d63d0f21f 100644 --- a/plugin.py +++ b/plugin.py @@ -164,10 +164,10 @@ class DDG(callbacks.Plugin): item = td.text.split("Zero-click info:", 1)[1].strip() td = td.parent.next_sibling.next_sibling.\ find("td") - # Condense newlines (
tags) + # Condense newlines (
tags). XXX: make these separators configurable. for br in td.find_all('br'): br.replace_with(' - ') - res = td.text.strip().split("\n")[0] + res = ' | '.join(td.text.strip().split("\n")) try: # Some zero-click results have an attached link to them. link = td.a.get('href') From ba044804052cf33262dcdb8bbbae6da4662b4954 Mon Sep 17 00:00:00 2001 From: James Lu Date: Fri, 20 Jan 2017 22:11:49 -0800 Subject: [PATCH 30/36] DDG: rewrite _ddgurl() to return new and original request URLs This uses utils.web.getUrlTargetAndContent(), which is specific to Limnoria and requires commit ProgVal/Limnoria@57b77a6725d2e6f2f417419d2a0459982898b877 or later From: https://github.com/jlu5/SupyPlugins/commit/2db371a9fad73f15e13c36d2d45d4f8baeaa9938 --- plugin.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/plugin.py b/plugin.py index d63d0f21f..34bd094d1 100644 --- a/plugin.py +++ b/plugin.py @@ -65,11 +65,17 @@ class DDG(callbacks.Plugin): # DuckDuckGo has a 'lite' site free of unparseable JavaScript # elements, so we'll use that to our advantage! url = "https://duckduckgo.com/lite?" + urlencode({"q": text}) + log.debug("DDG: Using URL %s for search %s", url, text) - data = utils.web.getUrl(url).decode("utf-8") + + real_url, data = utils.web.getUrlTargetAndContent(url) + data = data.decode("utf-8") soup = BeautifulSoup(data) + # Remove "sponsored link" results - return [td for td in soup.find_all('td') if 'result-sponsored' not in str(td.parent.get('class'))] + return (url, real_url, [td for td in soup.find_all('td') if 'result-sponsored' not in + str(td.parent.get('class'))]) + def search_core(self, text, channel_context=None, max_results=None, show_snippet=None): """ @@ -84,12 +90,13 @@ class DDG(callbacks.Plugin): self.log.debug('DDG: got %s for max results', maxr) # In a nutshell, the 'lite' site puts all of its usable content - # into tables. This means that headings, result snippets and - # everything else are all using the same tag (), which still makes - # parsing somewhat tricky. + # into tables. This does mean that headings, result snippets and + # everything else are all using the same tag (), so parsing is + # still somewhat tricky. results = [] - raw_results = self._ddgurl(text) + url, real_url, raw_results = self._ddgurl(text) + for t in raw_results: res = '' # Each valid result has a preceding heading in the format @@ -158,7 +165,7 @@ class DDG(callbacks.Plugin): # Zero-click info: 8 (number) # Zero-click info: 8 replies = {} - for td in self._ddgurl(text): + for td in self._ddgurl(text)[-1]: if td.text.startswith("Zero-click info:"): # Make a dictionary of things item = td.text.split("Zero-click info:", 1)[1].strip() From e4d16257b6f497e7c5a1f40f98f265f58701fab8 Mon Sep 17 00:00:00 2001 From: James Lu Date: Fri, 20 Jan 2017 22:16:05 -0800 Subject: [PATCH 31/36] DDG: support !bang links (redirects) From: https://github.com/jlu5/SupyPlugins/commit/1e7bfed72e04de31f88598ec0f333459b4417fdf --- plugin.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/plugin.py b/plugin.py index 34bd094d1..ee7d2f480 100644 --- a/plugin.py +++ b/plugin.py @@ -97,6 +97,11 @@ class DDG(callbacks.Plugin): url, real_url, raw_results = self._ddgurl(text) + if real_url != url: + # We received a redirect, likely from something like a !bang request. + # Don't bother parsing the target page, as it probably won't work anyways. + return [('', '', real_url)] + for t in raw_results: res = '' # Each valid result has a preceding heading in the format @@ -150,7 +155,15 @@ class DDG(callbacks.Plugin): if not results: irc.error("No results found.") else: - strings = [format("%s - %s %u", ircutils.bold(res[0]), res[1], res[2]) for res in results] + strings = [] + + for r in results: + if not r[0]: + # This result has no title, so it's likely a redirect from !bang. + strings.append(format("See %u", r[2])) + else: + strings.append(format("%s - %s %u", ircutils.bold(r[0]), r[1], r[2])) + irc.reply(', '.join(strings)) @wrap(['text']) From c73542fb3f960e50fbfb5181e7889ccb16c9814c Mon Sep 17 00:00:00 2001 From: James Lu Date: Sat, 3 Jun 2017 17:28:43 -0700 Subject: [PATCH 32/36] DDG: remove the 'zeroclick' command The IRC output is poorly formatted, and doesn't yield useful results most of the time anyways. From: https://github.com/jlu5/SupyPlugins/commit/cdf6c7eb3ec7d0c20bdde367454ce16ccab71d3d --- plugin.py | 37 ------------------------------------- test.py | 6 +----- 2 files changed, 1 insertion(+), 42 deletions(-) diff --git a/plugin.py b/plugin.py index ee7d2f480..004136efd 100644 --- a/plugin.py +++ b/plugin.py @@ -166,43 +166,6 @@ class DDG(callbacks.Plugin): irc.reply(', '.join(strings)) - @wrap(['text']) - def zeroclick(self, irc, msg, args, text): - """ - - Looks up on DuckDuckGo's zero-click engine.""" - # Zero-click can give multiple replies for things if the - # query is ambiguous, sort of like an encyclopedia. - - # For example, looking up "2^3" will give both: - # Zero-click info: 8 (number) - # Zero-click info: 8 - replies = {} - for td in self._ddgurl(text)[-1]: - if td.text.startswith("Zero-click info:"): - # Make a dictionary of things - item = td.text.split("Zero-click info:", 1)[1].strip() - td = td.parent.next_sibling.next_sibling.\ - find("td") - # Condense newlines (
tags). XXX: make these separators configurable. - for br in td.find_all('br'): - br.replace_with(' - ') - res = ' | '.join(td.text.strip().split("\n")) - try: - # Some zero-click results have an attached link to them. - link = td.a.get('href') - # Others have a piece of meaningless JavaScript... - if link != "javascript:;": - res += format(" %u", link) - except AttributeError: - pass - replies[item] = res - else: - if not replies: - irc.error("No zero-click info could be found for '%s'." % - text, Raise=True) - s = ["%s - %s" % (ircutils.bold(k), v) for k, v in replies.items()] - irc.reply("; ".join(s)) Class = DDG diff --git a/test.py b/test.py index e4613b0bd..6829aa00f 100644 --- a/test.py +++ b/test.py @@ -1,5 +1,5 @@ ### -# Copyright (c) 2014-2015, James Lu +# Copyright (c) 2014-2017, James Lu # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -30,15 +30,11 @@ from supybot.test import * - class DDGTestCase(PluginTestCase): plugins = ('DDG',) def testSearch(self): self.assertRegexp( 'ddg search wikipedia', 'Wikipedia.*? - .*?https?\:\/\/') - def testZeroclick(self): - self.assertRegexp( - 'zeroclick 2^3', '8') # vim:set shiftwidth=4 tabstop=4 expandtab textwidth=79: From 8689345f74be3fa0fe444e0e3d099f019d84fbc4 Mon Sep 17 00:00:00 2001 From: James Lu Date: Thu, 28 Dec 2017 00:24:07 -0800 Subject: [PATCH 33/36] DDG: remove zeroclick from plugin readme [skip ci] From: https://github.com/jlu5/SupyPlugins/commit/3ab4ba16dc09755424b7e5da4e0ffd2e4a087c9a --- README.md | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index be0721350..c41f6696c 100644 --- a/README.md +++ b/README.md @@ -1,17 +1,7 @@ -Provides an interface to DuckDuckGo's web search and Zero-click engine. +Provides an interface to DuckDuckGo's web search. -### Web Search +#### Example -``` -<+GLolol> %ddg search eiffel tower -<@Atlas> The Eiffel Tower is an iron lattice tower located on the Champ de Mars in Paris. It was named after the engineer Gustave Eiffel, whose company designed and built the tower. - -``` +> <+GLolol> %ddg search eiffel tower -### Zero-Click Results - -``` -<+GLolol> %zeroclick 2015 -<@Atlas> 2015 - 2015 has been designated the International Year of Light and the International Year of Soils by the sixty-eighth session of the United Nations General Assembly. -<+GLolol> %zeroclick 57^2 -<@Atlas> 57^2 - 3,249 -``` +> <@Atlas> The Eiffel Tower is an iron lattice tower located on the Champ de Mars in Paris. It was named after the engineer Gustave Eiffel, whose company designed and built the tower. - From c99353106e75b73a60740953fe3f1d89637920db Mon Sep 17 00:00:00 2001 From: James Lu Date: Mon, 3 Dec 2018 17:33:52 -0800 Subject: [PATCH 34/36] Annotate READMEs with BeautifulSoup dependencies From: https://github.com/jlu5/SupyPlugins/commit/a80a19564c32093da5e303f3016763b3c25456d1 --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c41f6696c..b18634bee 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -Provides an interface to DuckDuckGo's web search. +Provides an interface to DuckDuckGo's web search. This plugin requires the [Beautiful Soup 4](http://www.crummy.com/software/BeautifulSoup/bs4/doc/) Python module. #### Example From e44083f9a55eb9d0ac38eb15bfc47027ba10dc1f Mon Sep 17 00:00:00 2001 From: James Lu Date: Fri, 11 Oct 2019 09:58:50 -0700 Subject: [PATCH 35/36] Update my email & repo link references From: https://github.com/jlu5/SupyPlugins/commit/2ae51939b3258d7dd28f8f5dc825575e7638b243 --- __init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/__init__.py b/__init__.py index 6964d5992..7a4b6b352 100644 --- a/__init__.py +++ b/__init__.py @@ -41,14 +41,14 @@ __version__ = "" # XXX Replace this with an appropriate author or supybot.Author instance. __author__ = supybot.Author('James Lu', 'GLolol', - 'GLolol@overdrivenetworks.com') + 'james@overdrivenetworks.com') # This is a dictionary mapping supybot.Author instances to lists of # contributions. __contributors__ = {} # This is a url where the most recent plugin package can be downloaded. -__url__ = 'https://github.com/GLolol/SupyPlugins/' +__url__ = 'https://github.com/jlu5/SupyPlugins/' from . import config from . import plugin From 410b51657aeb93b4836d62168a90b319677a45f1 Mon Sep 17 00:00:00 2001 From: James Lu Date: Sat, 16 Nov 2019 12:26:32 -0800 Subject: [PATCH 36/36] Update plugin author/maintainer data From: https://github.com/jlu5/SupyPlugins/commit/cbd953b32c3da5f5d2bb5b9e711a467050a9dcb9 --- __init__.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/__init__.py b/__init__.py index 7a4b6b352..56d6993ef 100644 --- a/__init__.py +++ b/__init__.py @@ -39,9 +39,8 @@ import supybot.world as world # in here if you're keeping the plugin in CVS or some similar system. __version__ = "" -# XXX Replace this with an appropriate author or supybot.Author instance. -__author__ = supybot.Author('James Lu', 'GLolol', - 'james@overdrivenetworks.com') +__author__ = getattr(supybot.authors, 'jlu', + supybot.Author('James Lu', 'GLolol', 'james@overdrivenetworks.com')) # This is a dictionary mapping supybot.Author instances to lists of # contributions.