diff --git a/plugins/Web/README.txt b/plugins/Web/README.txt
new file mode 100644
index 000000000..d60b47a97
--- /dev/null
+++ b/plugins/Web/README.txt
@@ -0,0 +1 @@
+Insert a description of your plugin here, with any notes, etc. about using it.
diff --git a/plugins/Web/__init__.py b/plugins/Web/__init__.py
new file mode 100644
index 000000000..e2c2875e5
--- /dev/null
+++ b/plugins/Web/__init__.py
@@ -0,0 +1,58 @@
+###
+# Copyright (c) 2005, Jeremiah Fincher
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions, and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions, and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# * Neither the name of the author of this software nor the name of
+# contributors to this software may be used to endorse or promote products
+# derived from this software without specific prior written consent.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+###
+
+"""
+Includes various web-related commands.
+"""
+
+import supybot
+import supybot.world as world
+
+__version__ = "%%VERSION%%"
+
+__author__ = supybot.authors.jemfinch
+
+# This is a dictionary mapping supybot.Author instances to lists of
+# contributions.
+__contributors__ = {}
+
+import config
+import plugin
+reload(plugin) # In case we're being reloaded.
+# Add more reloads here if you add third-party modules and want them to be
+# reloaded when this plugin is reloaded. Don't forget to import them as well!
+
+if world.testing:
+ import test
+
+Class = plugin.Class
+configure = config.configure
+
+
+# vim:set shiftwidth=4 tabstop=8 expandtab textwidth=78:
diff --git a/plugins/Web/config.py b/plugins/Web/config.py
new file mode 100644
index 000000000..a2723e53d
--- /dev/null
+++ b/plugins/Web/config.py
@@ -0,0 +1,56 @@
+###
+# Copyright (c) 2005, Jeremiah Fincher
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions, and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions, and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# * Neither the name of the author of this software nor the name of
+# contributors to this software may be used to endorse or promote products
+# derived from this software without specific prior written consent.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+###
+
+import supybot.conf as conf
+import supybot.registry as registry
+
+def configure(advanced):
+ # This will be called by supybot to configure this module. advanced is
+ # a bool that specifies whether the user identified himself as an advanced
+ # user or not. You should effect your configuration by manipulating the
+ # registry as appropriate.
+ from supybot.questions import expect, anything, something, yn
+ Web = conf.registerPlugin('Web', True)
+ if yn("""This plugin also offers a snarfer that will try to fetch the
+ title of URLs that it sees in the channel. Would like you this
+ snarfer to be enabled?""", default=False):
+ Web.titleSnarfer.setValue(True)
+
+
+Web = conf.registerPlugin('Web')
+conf.registerChannelValue(Web, 'titleSnarfer',
+ registry.Boolean(False, """Determines whether the bot will output the HTML
+ title of URLs it sees in the channel."""))
+conf.registerChannelValue(Web, 'nonSnarfingRegexp',
+ registry.Regexp(None, """Determines what URLs are to be snarfed and stored
+ in the database in the channel; URLs matching the regexp given will not be
+ snarfed. Give the empty string if you have no URLs that you'd like to
+ exclude from being snarfed."""))
+
+# vim:set shiftwidth=4 tabstop=8 expandtab textwidth=78
diff --git a/plugins/Web/plugin.py b/plugins/Web/plugin.py
new file mode 100644
index 000000000..9e9cf50a6
--- /dev/null
+++ b/plugins/Web/plugin.py
@@ -0,0 +1,187 @@
+###
+# Copyright (c) 2005, Jeremiah Fincher
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions, and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions, and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# * Neither the name of the author of this software nor the name of
+# contributors to this software may be used to endorse or promote products
+# derived from this software without specific prior written consent.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+###
+
+import re
+
+import supybot.conf as conf
+import supybot.utils as utils
+from supybot.commands import *
+import supybot.plugins as plugins
+import supybot.ircutils as ircutils
+import supybot.privmsgs as privmsgs
+import supybot.callbacks as callbacks
+
+class Web(callbacks.PrivmsgCommandAndRegexp):
+ """Add the help for "@help Web" here."""
+ regexps = ['titleSnarfer']
+ threaded = True
+ _titleRe = re.compile(r'
(.*?)', re.I | re.S)
+ def titleSnarfer(self, irc, msg, match):
+ r"https?://[^\])>\s]+"
+ channel = msg.args[0]
+ if not irc.isChannel(channel):
+ return
+ if callbacks.addressed(irc.nick, msg):
+ return
+ if self.registryValue('titleSnarfer', channel):
+ url = match.group(0)
+ r = self.registryValue('nonSnarfingRegexp', channel)
+ if r and r.search(url):
+ self.log.debug('Not titleSnarfing %q.', url)
+ return
+ try:
+ size = conf.supybot.protocols.http.peekSize()
+ text = utils.web.getUrl(url, size=size)
+ except utils.web.Error, e:
+ self.log.info('Couldn\'t snarf title of %u: %s.', url, e)
+ return
+ m = self._titleRe.search(text)
+ if m is not None:
+ domain = utils.web.getDomain(url)
+ title = utils.web.htmlToText(m.group(1).strip())
+ s = format('Title: %s (at %s)', title, domain)
+ irc.reply(s, prefixName=False)
+ titleSnarfer = urlSnarfer(titleSnarfer)
+
+ def headers(self, irc, msg, args, url):
+ """
+
+ Returns the HTTP headers of . Only HTTP urls are valid, of
+ course.
+ """
+ fd = utils.web.getUrlFd(url)
+ try:
+ s = ', '.join([format('%s: %s', k, v)
+ for (k, v) in fd.headers.items()])
+ irc.reply(s)
+ finally:
+ fd.close()
+ headers = wrap(headers, ['httpUrl'])
+
+ _doctypeRe = re.compile(r'(]+>)', re.M)
+ def doctype(self, irc, msg, args, url):
+ """
+
+ Returns the DOCTYPE string of . Only HTTP urls are valid, of
+ course.
+ """
+ size = conf.supybot.protocols.http.peekSize()
+ s = utils.web.getUrl(url, size=size)
+ m = self._doctypeRe.search(s)
+ if m:
+ s = utils.str.normalizeWhitespace(m.group(0))
+ irc.reply(s)
+ else:
+ irc.reply('That URL has no specified doctype.')
+ doctype = wrap(doctype, ['httpUrl'])
+
+ def size(self, irc, msg, args, url):
+ """
+
+ Returns the Content-Length header of . Only HTTP urls are valid,
+ of course.
+ """
+ fd = utils.web.getUrlFd(url)
+ try:
+ try:
+ size = fd.headers['Content-Length']
+ irc.reply(format('%u is %i bytes long.', url, size))
+ except KeyError:
+ size = conf.supybot.protocols.http.peekSize()
+ s = fd.read(size)
+ if len(s) != size:
+ irc.reply(format('%u is %i bytes long.', url, len(s)))
+ else:
+ irc.reply(format('The server didn\'t tell me how long %u '
+ 'is but it\'s longer than %i bytes.',
+ url, size))
+ finally:
+ fd.close()
+ size = wrap(size, ['httpUrl'])
+
+ def title(self, irc, msg, args, url):
+ """
+
+ Returns the HTML ... of a URL.
+ """
+ size = conf.supybot.protocols.http.peekSize()
+ text = utils.web.getUrl(url, size=size)
+ m = self._titleRe.search(text)
+ if m is not None:
+ irc.reply(utils.web.htmlToText(m.group(1).strip()))
+ else:
+ irc.reply(format('That URL appears to have no HTML title '
+ 'within the first %i bytes.', size))
+ title = wrap(title, ['httpUrl'])
+
+ _netcraftre = re.compile(r'td align="left">\s+]+>(.*?)
+
+ Returns Netcraft.com's determination of what operating system and
+ webserver is running on the host given.
+ """
+ url = 'http://uptime.netcraft.com/up/graph/?host=' + hostname
+ html = utils.web.getUrl(url)
+ m = self._netcraftre.search(html)
+ if m:
+ html = m.group(1)
+ s = utils.web.htmlToText(html, tagReplace='').strip()
+ s = s.rstrip('-').strip()
+ irc.reply(s) # Snip off "the site"
+ elif 'We could not get any results' in html:
+ irc.reply('No results found for %s.' % hostname)
+ else:
+ irc.error('The format of page the was odd.')
+ netcraft = wrap(netcraft, ['text'])
+
+ def urlquote(self, irc, msg, args, text):
+ """
+
+ Returns the URL quoted form of the text.
+ """
+ irc.reply(utils.web.urlquote(text))
+ urlquote = wrap(urlquote, ['text'])
+
+ def urlunquote(self, irc, msg, args, text):
+ """
+
+ Returns the text un-URL quoted.
+ """
+ s = utils.web.urlunquote(text)
+ irc.reply(s)
+ urlunquote = wrap(urlunquote, ['text'])
+
+
+
+Class = Web
+
+
+# vim:set shiftwidth=4 tabstop=8 expandtab textwidth=78:
diff --git a/plugins/Web/test.py b/plugins/Web/test.py
new file mode 100644
index 000000000..38161afc3
--- /dev/null
+++ b/plugins/Web/test.py
@@ -0,0 +1,111 @@
+###
+# Copyright (c) 2005, Jeremiah Fincher
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions, and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions, and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# * Neither the name of the author of this software nor the name of
+# contributors to this software may be used to endorse or promote products
+# derived from this software without specific prior written consent.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+###
+
+from supybot.test import *
+
+class WebTestCase(PluginTestCase):
+ plugins = ('Web',)
+ if network:
+ def testHeaders(self):
+ self.assertError('headers ftp://ftp.cdrom.com/pub/linux')
+ self.assertNotError('headers http://www.slashdot.org/')
+
+ def testDoctype(self):
+ self.assertError('doctype ftp://ftp.cdrom.com/pub/linux')
+ self.assertNotError('doctype http://www.slashdot.org/')
+ m = self.getMsg('doctype http://moobot.sf.net/')
+ self.failUnless(m.args[1].endswith('>'))
+
+ def testSize(self):
+ self.assertError('size ftp://ftp.cdrom.com/pub/linux')
+ self.assertNotError('size http://supybot.sf.net/')
+ self.assertNotError('size http://www.slashdot.org/')
+
+ def testTitle(self):
+ self.assertResponse('title http://www.slashdot.org/',
+ 'Slashdot: News for nerds, stuff that matters')
+ # Amazon add a bunch of scripting stuff to the top of their page,
+ # so we need to allow for a larger peekSize
+ try:
+ orig = conf.supybot.protocols.http.peekSize()
+ conf.supybot.protocols.http.peekSize.setValue(8192)
+ self.assertNotRegexp('title '
+ 'http://www.amazon.com/exec/obidos/tg/detail/-/'
+ '1884822312/qid=1063140754/sr=8-1/ref=sr_8_1/'
+ '002-9802970-2308826?v=glance&s=books&n=507846',
+ 'no HTML title')
+ finally:
+ conf.supybot.protocols.http.peekSize.setValue(orig)
+ # Checks the non-greediness of the regexp
+ self.assertResponse('title '
+ 'http://www.space.com/scienceastronomy/'
+ 'jupiter_dark_spot_031023.html',
+ 'Mystery Spot on Jupiter Baffles Astronomers')
+ # Checks for @title not-working correctly
+ self.assertResponse('title '\
+ 'http://www.catb.org/~esr/jargon/html/F/foo.html',
+ 'foo')
+
+ def testNetcraft(self):
+ self.assertNotError('netcraft slashdot.org')
+
+ def testTitleSnarfer(self):
+ try:
+ conf.supybot.plugins.Web.titleSnarfer.setValue(True)
+ self.assertSnarfResponse('http://microsoft.com/',
+ 'Title: Microsoft Corporation'
+ ' (at microsoft.com)')
+ finally:
+ conf.supybot.plugins.Web.titleSnarfer.setValue(False)
+
+ def testNonSnarfing(self):
+ snarf = conf.supybot.plugins.Web.nonSnarfingRegexp()
+ title = conf.supybot.plugins.Web.titleSnarfer()
+ try:
+ conf.supybot.plugins.Web.nonSnarfingRegexp.set('m/sf/')
+ try:
+ conf.supybot.plugins.Web.titleSnarfer.setValue(True)
+ self.assertSnarfNoResponse('http://sf.net/', 2)
+ self.assertSnarfRegexp('http://www.sourceforge.net/',
+ r'Sourceforge\.net')
+ finally:
+ conf.supybot.plugins.Web.titleSnarfer.setValue(title)
+ finally:
+ conf.supybot.plugins.Web.nonSnarfingRegexp.setValue(snarf)
+
+ def testNonSnarfingRegexpConfigurable(self):
+ self.assertSnarfNoResponse('http://foo.bar.baz/', 2)
+ try:
+ conf.supybot.plugins.Web.nonSnarfingRegexp.set('m/biff/')
+ self.assertSnarfNoResponse('http://biff.bar.baz/', 2)
+ finally:
+ conf.supybot.plugins.Web.nonSnarfingRegexp.set('')
+
+
+# vim:set shiftwidth=4 tabstop=8 expandtab textwidth=78:
diff --git a/setup.py b/setup.py
index 489f3cb0c..48175164d 100644
--- a/setup.py
+++ b/setup.py
@@ -48,6 +48,7 @@ plugins = [
'Status',
'User',
'Utilities',
+ 'Web',
]
import sys