diff --git a/plugins/Web/README.txt b/plugins/Web/README.txt new file mode 100644 index 000000000..d60b47a97 --- /dev/null +++ b/plugins/Web/README.txt @@ -0,0 +1 @@ +Insert a description of your plugin here, with any notes, etc. about using it. diff --git a/plugins/Web/__init__.py b/plugins/Web/__init__.py new file mode 100644 index 000000000..e2c2875e5 --- /dev/null +++ b/plugins/Web/__init__.py @@ -0,0 +1,58 @@ +### +# Copyright (c) 2005, Jeremiah Fincher +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions, and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the author of this software nor the name of +# contributors to this software may be used to endorse or promote products +# derived from this software without specific prior written consent. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +### + +""" +Includes various web-related commands. +""" + +import supybot +import supybot.world as world + +__version__ = "%%VERSION%%" + +__author__ = supybot.authors.jemfinch + +# This is a dictionary mapping supybot.Author instances to lists of +# contributions. +__contributors__ = {} + +import config +import plugin +reload(plugin) # In case we're being reloaded. +# Add more reloads here if you add third-party modules and want them to be +# reloaded when this plugin is reloaded. Don't forget to import them as well! + +if world.testing: + import test + +Class = plugin.Class +configure = config.configure + + +# vim:set shiftwidth=4 tabstop=8 expandtab textwidth=78: diff --git a/plugins/Web/config.py b/plugins/Web/config.py new file mode 100644 index 000000000..a2723e53d --- /dev/null +++ b/plugins/Web/config.py @@ -0,0 +1,56 @@ +### +# Copyright (c) 2005, Jeremiah Fincher +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions, and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the author of this software nor the name of +# contributors to this software may be used to endorse or promote products +# derived from this software without specific prior written consent. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +### + +import supybot.conf as conf +import supybot.registry as registry + +def configure(advanced): + # This will be called by supybot to configure this module. advanced is + # a bool that specifies whether the user identified himself as an advanced + # user or not. You should effect your configuration by manipulating the + # registry as appropriate. + from supybot.questions import expect, anything, something, yn + Web = conf.registerPlugin('Web', True) + if yn("""This plugin also offers a snarfer that will try to fetch the + title of URLs that it sees in the channel. Would like you this + snarfer to be enabled?""", default=False): + Web.titleSnarfer.setValue(True) + + +Web = conf.registerPlugin('Web') +conf.registerChannelValue(Web, 'titleSnarfer', + registry.Boolean(False, """Determines whether the bot will output the HTML + title of URLs it sees in the channel.""")) +conf.registerChannelValue(Web, 'nonSnarfingRegexp', + registry.Regexp(None, """Determines what URLs are to be snarfed and stored + in the database in the channel; URLs matching the regexp given will not be + snarfed. Give the empty string if you have no URLs that you'd like to + exclude from being snarfed.""")) + +# vim:set shiftwidth=4 tabstop=8 expandtab textwidth=78 diff --git a/plugins/Web/plugin.py b/plugins/Web/plugin.py new file mode 100644 index 000000000..9e9cf50a6 --- /dev/null +++ b/plugins/Web/plugin.py @@ -0,0 +1,187 @@ +### +# Copyright (c) 2005, Jeremiah Fincher +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions, and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the author of this software nor the name of +# contributors to this software may be used to endorse or promote products +# derived from this software without specific prior written consent. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +### + +import re + +import supybot.conf as conf +import supybot.utils as utils +from supybot.commands import * +import supybot.plugins as plugins +import supybot.ircutils as ircutils +import supybot.privmsgs as privmsgs +import supybot.callbacks as callbacks + +class Web(callbacks.PrivmsgCommandAndRegexp): + """Add the help for "@help Web" here.""" + regexps = ['titleSnarfer'] + threaded = True + _titleRe = re.compile(r'(.*?)', re.I | re.S) + def titleSnarfer(self, irc, msg, match): + r"https?://[^\])>\s]+" + channel = msg.args[0] + if not irc.isChannel(channel): + return + if callbacks.addressed(irc.nick, msg): + return + if self.registryValue('titleSnarfer', channel): + url = match.group(0) + r = self.registryValue('nonSnarfingRegexp', channel) + if r and r.search(url): + self.log.debug('Not titleSnarfing %q.', url) + return + try: + size = conf.supybot.protocols.http.peekSize() + text = utils.web.getUrl(url, size=size) + except utils.web.Error, e: + self.log.info('Couldn\'t snarf title of %u: %s.', url, e) + return + m = self._titleRe.search(text) + if m is not None: + domain = utils.web.getDomain(url) + title = utils.web.htmlToText(m.group(1).strip()) + s = format('Title: %s (at %s)', title, domain) + irc.reply(s, prefixName=False) + titleSnarfer = urlSnarfer(titleSnarfer) + + def headers(self, irc, msg, args, url): + """ + + Returns the HTTP headers of . Only HTTP urls are valid, of + course. + """ + fd = utils.web.getUrlFd(url) + try: + s = ', '.join([format('%s: %s', k, v) + for (k, v) in fd.headers.items()]) + irc.reply(s) + finally: + fd.close() + headers = wrap(headers, ['httpUrl']) + + _doctypeRe = re.compile(r'(]+>)', re.M) + def doctype(self, irc, msg, args, url): + """ + + Returns the DOCTYPE string of . Only HTTP urls are valid, of + course. + """ + size = conf.supybot.protocols.http.peekSize() + s = utils.web.getUrl(url, size=size) + m = self._doctypeRe.search(s) + if m: + s = utils.str.normalizeWhitespace(m.group(0)) + irc.reply(s) + else: + irc.reply('That URL has no specified doctype.') + doctype = wrap(doctype, ['httpUrl']) + + def size(self, irc, msg, args, url): + """ + + Returns the Content-Length header of . Only HTTP urls are valid, + of course. + """ + fd = utils.web.getUrlFd(url) + try: + try: + size = fd.headers['Content-Length'] + irc.reply(format('%u is %i bytes long.', url, size)) + except KeyError: + size = conf.supybot.protocols.http.peekSize() + s = fd.read(size) + if len(s) != size: + irc.reply(format('%u is %i bytes long.', url, len(s))) + else: + irc.reply(format('The server didn\'t tell me how long %u ' + 'is but it\'s longer than %i bytes.', + url, size)) + finally: + fd.close() + size = wrap(size, ['httpUrl']) + + def title(self, irc, msg, args, url): + """ + + Returns the HTML ... of a URL. + """ + size = conf.supybot.protocols.http.peekSize() + text = utils.web.getUrl(url, size=size) + m = self._titleRe.search(text) + if m is not None: + irc.reply(utils.web.htmlToText(m.group(1).strip())) + else: + irc.reply(format('That URL appears to have no HTML title ' + 'within the first %i bytes.', size)) + title = wrap(title, ['httpUrl']) + + _netcraftre = re.compile(r'td align="left">\s+]+>(.*?) + + Returns Netcraft.com's determination of what operating system and + webserver is running on the host given. + """ + url = 'http://uptime.netcraft.com/up/graph/?host=' + hostname + html = utils.web.getUrl(url) + m = self._netcraftre.search(html) + if m: + html = m.group(1) + s = utils.web.htmlToText(html, tagReplace='').strip() + s = s.rstrip('-').strip() + irc.reply(s) # Snip off "the site" + elif 'We could not get any results' in html: + irc.reply('No results found for %s.' % hostname) + else: + irc.error('The format of page the was odd.') + netcraft = wrap(netcraft, ['text']) + + def urlquote(self, irc, msg, args, text): + """ + + Returns the URL quoted form of the text. + """ + irc.reply(utils.web.urlquote(text)) + urlquote = wrap(urlquote, ['text']) + + def urlunquote(self, irc, msg, args, text): + """ + + Returns the text un-URL quoted. + """ + s = utils.web.urlunquote(text) + irc.reply(s) + urlunquote = wrap(urlunquote, ['text']) + + + +Class = Web + + +# vim:set shiftwidth=4 tabstop=8 expandtab textwidth=78: diff --git a/plugins/Web/test.py b/plugins/Web/test.py new file mode 100644 index 000000000..38161afc3 --- /dev/null +++ b/plugins/Web/test.py @@ -0,0 +1,111 @@ +### +# Copyright (c) 2005, Jeremiah Fincher +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions, and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the author of this software nor the name of +# contributors to this software may be used to endorse or promote products +# derived from this software without specific prior written consent. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +### + +from supybot.test import * + +class WebTestCase(PluginTestCase): + plugins = ('Web',) + if network: + def testHeaders(self): + self.assertError('headers ftp://ftp.cdrom.com/pub/linux') + self.assertNotError('headers http://www.slashdot.org/') + + def testDoctype(self): + self.assertError('doctype ftp://ftp.cdrom.com/pub/linux') + self.assertNotError('doctype http://www.slashdot.org/') + m = self.getMsg('doctype http://moobot.sf.net/') + self.failUnless(m.args[1].endswith('>')) + + def testSize(self): + self.assertError('size ftp://ftp.cdrom.com/pub/linux') + self.assertNotError('size http://supybot.sf.net/') + self.assertNotError('size http://www.slashdot.org/') + + def testTitle(self): + self.assertResponse('title http://www.slashdot.org/', + 'Slashdot: News for nerds, stuff that matters') + # Amazon add a bunch of scripting stuff to the top of their page, + # so we need to allow for a larger peekSize + try: + orig = conf.supybot.protocols.http.peekSize() + conf.supybot.protocols.http.peekSize.setValue(8192) + self.assertNotRegexp('title ' + 'http://www.amazon.com/exec/obidos/tg/detail/-/' + '1884822312/qid=1063140754/sr=8-1/ref=sr_8_1/' + '002-9802970-2308826?v=glance&s=books&n=507846', + 'no HTML title') + finally: + conf.supybot.protocols.http.peekSize.setValue(orig) + # Checks the non-greediness of the regexp + self.assertResponse('title ' + 'http://www.space.com/scienceastronomy/' + 'jupiter_dark_spot_031023.html', + 'Mystery Spot on Jupiter Baffles Astronomers') + # Checks for @title not-working correctly + self.assertResponse('title '\ + 'http://www.catb.org/~esr/jargon/html/F/foo.html', + 'foo') + + def testNetcraft(self): + self.assertNotError('netcraft slashdot.org') + + def testTitleSnarfer(self): + try: + conf.supybot.plugins.Web.titleSnarfer.setValue(True) + self.assertSnarfResponse('http://microsoft.com/', + 'Title: Microsoft Corporation' + ' (at microsoft.com)') + finally: + conf.supybot.plugins.Web.titleSnarfer.setValue(False) + + def testNonSnarfing(self): + snarf = conf.supybot.plugins.Web.nonSnarfingRegexp() + title = conf.supybot.plugins.Web.titleSnarfer() + try: + conf.supybot.plugins.Web.nonSnarfingRegexp.set('m/sf/') + try: + conf.supybot.plugins.Web.titleSnarfer.setValue(True) + self.assertSnarfNoResponse('http://sf.net/', 2) + self.assertSnarfRegexp('http://www.sourceforge.net/', + r'Sourceforge\.net') + finally: + conf.supybot.plugins.Web.titleSnarfer.setValue(title) + finally: + conf.supybot.plugins.Web.nonSnarfingRegexp.setValue(snarf) + + def testNonSnarfingRegexpConfigurable(self): + self.assertSnarfNoResponse('http://foo.bar.baz/', 2) + try: + conf.supybot.plugins.Web.nonSnarfingRegexp.set('m/biff/') + self.assertSnarfNoResponse('http://biff.bar.baz/', 2) + finally: + conf.supybot.plugins.Web.nonSnarfingRegexp.set('') + + +# vim:set shiftwidth=4 tabstop=8 expandtab textwidth=78: diff --git a/setup.py b/setup.py index 489f3cb0c..48175164d 100644 --- a/setup.py +++ b/setup.py @@ -48,6 +48,7 @@ plugins = [ 'Status', 'User', 'Utilities', + 'Web', ] import sys