Added the Web plugin (from pieces of Http, Fun, and URL) in the new plugin format.

This commit is contained in:
Jeremy Fincher 2005-02-01 09:41:54 +00:00
parent 18f8044621
commit 0c2da03a67
6 changed files with 414 additions and 0 deletions

1
plugins/Web/README.txt Normal file
View File

@ -0,0 +1 @@
Insert a description of your plugin here, with any notes, etc. about using it.

58
plugins/Web/__init__.py Normal file
View File

@ -0,0 +1,58 @@
###
# Copyright (c) 2005, Jeremiah Fincher
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions, and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions, and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the author of this software nor the name of
# contributors to this software may be used to endorse or promote products
# derived from this software without specific prior written consent.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
###
"""
Includes various web-related commands.
"""
import supybot
import supybot.world as world
__version__ = "%%VERSION%%"
__author__ = supybot.authors.jemfinch
# This is a dictionary mapping supybot.Author instances to lists of
# contributions.
__contributors__ = {}
import config
import plugin
reload(plugin) # In case we're being reloaded.
# Add more reloads here if you add third-party modules and want them to be
# reloaded when this plugin is reloaded. Don't forget to import them as well!
if world.testing:
import test
Class = plugin.Class
configure = config.configure
# vim:set shiftwidth=4 tabstop=8 expandtab textwidth=78:

56
plugins/Web/config.py Normal file
View File

@ -0,0 +1,56 @@
###
# Copyright (c) 2005, Jeremiah Fincher
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions, and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions, and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the author of this software nor the name of
# contributors to this software may be used to endorse or promote products
# derived from this software without specific prior written consent.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
###
import supybot.conf as conf
import supybot.registry as registry
def configure(advanced):
# This will be called by supybot to configure this module. advanced is
# a bool that specifies whether the user identified himself as an advanced
# user or not. You should effect your configuration by manipulating the
# registry as appropriate.
from supybot.questions import expect, anything, something, yn
Web = conf.registerPlugin('Web', True)
if yn("""This plugin also offers a snarfer that will try to fetch the
title of URLs that it sees in the channel. Would like you this
snarfer to be enabled?""", default=False):
Web.titleSnarfer.setValue(True)
Web = conf.registerPlugin('Web')
conf.registerChannelValue(Web, 'titleSnarfer',
registry.Boolean(False, """Determines whether the bot will output the HTML
title of URLs it sees in the channel."""))
conf.registerChannelValue(Web, 'nonSnarfingRegexp',
registry.Regexp(None, """Determines what URLs are to be snarfed and stored
in the database in the channel; URLs matching the regexp given will not be
snarfed. Give the empty string if you have no URLs that you'd like to
exclude from being snarfed."""))
# vim:set shiftwidth=4 tabstop=8 expandtab textwidth=78

187
plugins/Web/plugin.py Normal file
View File

@ -0,0 +1,187 @@
###
# Copyright (c) 2005, Jeremiah Fincher
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions, and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions, and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the author of this software nor the name of
# contributors to this software may be used to endorse or promote products
# derived from this software without specific prior written consent.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
###
import re
import supybot.conf as conf
import supybot.utils as utils
from supybot.commands import *
import supybot.plugins as plugins
import supybot.ircutils as ircutils
import supybot.privmsgs as privmsgs
import supybot.callbacks as callbacks
class Web(callbacks.PrivmsgCommandAndRegexp):
"""Add the help for "@help Web" here."""
regexps = ['titleSnarfer']
threaded = True
_titleRe = re.compile(r'<title>(.*?)</title>', re.I | re.S)
def titleSnarfer(self, irc, msg, match):
r"https?://[^\])>\s]+"
channel = msg.args[0]
if not irc.isChannel(channel):
return
if callbacks.addressed(irc.nick, msg):
return
if self.registryValue('titleSnarfer', channel):
url = match.group(0)
r = self.registryValue('nonSnarfingRegexp', channel)
if r and r.search(url):
self.log.debug('Not titleSnarfing %q.', url)
return
try:
size = conf.supybot.protocols.http.peekSize()
text = utils.web.getUrl(url, size=size)
except utils.web.Error, e:
self.log.info('Couldn\'t snarf title of %u: %s.', url, e)
return
m = self._titleRe.search(text)
if m is not None:
domain = utils.web.getDomain(url)
title = utils.web.htmlToText(m.group(1).strip())
s = format('Title: %s (at %s)', title, domain)
irc.reply(s, prefixName=False)
titleSnarfer = urlSnarfer(titleSnarfer)
def headers(self, irc, msg, args, url):
"""<url>
Returns the HTTP headers of <url>. Only HTTP urls are valid, of
course.
"""
fd = utils.web.getUrlFd(url)
try:
s = ', '.join([format('%s: %s', k, v)
for (k, v) in fd.headers.items()])
irc.reply(s)
finally:
fd.close()
headers = wrap(headers, ['httpUrl'])
_doctypeRe = re.compile(r'(<!DOCTYPE[^>]+>)', re.M)
def doctype(self, irc, msg, args, url):
"""<url>
Returns the DOCTYPE string of <url>. Only HTTP urls are valid, of
course.
"""
size = conf.supybot.protocols.http.peekSize()
s = utils.web.getUrl(url, size=size)
m = self._doctypeRe.search(s)
if m:
s = utils.str.normalizeWhitespace(m.group(0))
irc.reply(s)
else:
irc.reply('That URL has no specified doctype.')
doctype = wrap(doctype, ['httpUrl'])
def size(self, irc, msg, args, url):
"""<url>
Returns the Content-Length header of <url>. Only HTTP urls are valid,
of course.
"""
fd = utils.web.getUrlFd(url)
try:
try:
size = fd.headers['Content-Length']
irc.reply(format('%u is %i bytes long.', url, size))
except KeyError:
size = conf.supybot.protocols.http.peekSize()
s = fd.read(size)
if len(s) != size:
irc.reply(format('%u is %i bytes long.', url, len(s)))
else:
irc.reply(format('The server didn\'t tell me how long %u '
'is but it\'s longer than %i bytes.',
url, size))
finally:
fd.close()
size = wrap(size, ['httpUrl'])
def title(self, irc, msg, args, url):
"""<url>
Returns the HTML <title>...</title> of a URL.
"""
size = conf.supybot.protocols.http.peekSize()
text = utils.web.getUrl(url, size=size)
m = self._titleRe.search(text)
if m is not None:
irc.reply(utils.web.htmlToText(m.group(1).strip()))
else:
irc.reply(format('That URL appears to have no HTML title '
'within the first %i bytes.', size))
title = wrap(title, ['httpUrl'])
_netcraftre = re.compile(r'td align="left">\s+<a[^>]+>(.*?)<a href',
re.S | re.I)
def netcraft(self, irc, msg, args, hostname):
"""<hostname|ip>
Returns Netcraft.com's determination of what operating system and
webserver is running on the host given.
"""
url = 'http://uptime.netcraft.com/up/graph/?host=' + hostname
html = utils.web.getUrl(url)
m = self._netcraftre.search(html)
if m:
html = m.group(1)
s = utils.web.htmlToText(html, tagReplace='').strip()
s = s.rstrip('-').strip()
irc.reply(s) # Snip off "the site"
elif 'We could not get any results' in html:
irc.reply('No results found for %s.' % hostname)
else:
irc.error('The format of page the was odd.')
netcraft = wrap(netcraft, ['text'])
def urlquote(self, irc, msg, args, text):
"""<text>
Returns the URL quoted form of the text.
"""
irc.reply(utils.web.urlquote(text))
urlquote = wrap(urlquote, ['text'])
def urlunquote(self, irc, msg, args, text):
"""<text>
Returns the text un-URL quoted.
"""
s = utils.web.urlunquote(text)
irc.reply(s)
urlunquote = wrap(urlunquote, ['text'])
Class = Web
# vim:set shiftwidth=4 tabstop=8 expandtab textwidth=78:

111
plugins/Web/test.py Normal file
View File

@ -0,0 +1,111 @@
###
# Copyright (c) 2005, Jeremiah Fincher
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions, and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions, and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the author of this software nor the name of
# contributors to this software may be used to endorse or promote products
# derived from this software without specific prior written consent.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
###
from supybot.test import *
class WebTestCase(PluginTestCase):
plugins = ('Web',)
if network:
def testHeaders(self):
self.assertError('headers ftp://ftp.cdrom.com/pub/linux')
self.assertNotError('headers http://www.slashdot.org/')
def testDoctype(self):
self.assertError('doctype ftp://ftp.cdrom.com/pub/linux')
self.assertNotError('doctype http://www.slashdot.org/')
m = self.getMsg('doctype http://moobot.sf.net/')
self.failUnless(m.args[1].endswith('>'))
def testSize(self):
self.assertError('size ftp://ftp.cdrom.com/pub/linux')
self.assertNotError('size http://supybot.sf.net/')
self.assertNotError('size http://www.slashdot.org/')
def testTitle(self):
self.assertResponse('title http://www.slashdot.org/',
'Slashdot: News for nerds, stuff that matters')
# Amazon add a bunch of scripting stuff to the top of their page,
# so we need to allow for a larger peekSize
try:
orig = conf.supybot.protocols.http.peekSize()
conf.supybot.protocols.http.peekSize.setValue(8192)
self.assertNotRegexp('title '
'http://www.amazon.com/exec/obidos/tg/detail/-/'
'1884822312/qid=1063140754/sr=8-1/ref=sr_8_1/'
'002-9802970-2308826?v=glance&s=books&n=507846',
'no HTML title')
finally:
conf.supybot.protocols.http.peekSize.setValue(orig)
# Checks the non-greediness of the regexp
self.assertResponse('title '
'http://www.space.com/scienceastronomy/'
'jupiter_dark_spot_031023.html',
'Mystery Spot on Jupiter Baffles Astronomers')
# Checks for @title not-working correctly
self.assertResponse('title '\
'http://www.catb.org/~esr/jargon/html/F/foo.html',
'foo')
def testNetcraft(self):
self.assertNotError('netcraft slashdot.org')
def testTitleSnarfer(self):
try:
conf.supybot.plugins.Web.titleSnarfer.setValue(True)
self.assertSnarfResponse('http://microsoft.com/',
'Title: Microsoft Corporation'
' (at microsoft.com)')
finally:
conf.supybot.plugins.Web.titleSnarfer.setValue(False)
def testNonSnarfing(self):
snarf = conf.supybot.plugins.Web.nonSnarfingRegexp()
title = conf.supybot.plugins.Web.titleSnarfer()
try:
conf.supybot.plugins.Web.nonSnarfingRegexp.set('m/sf/')
try:
conf.supybot.plugins.Web.titleSnarfer.setValue(True)
self.assertSnarfNoResponse('http://sf.net/', 2)
self.assertSnarfRegexp('http://www.sourceforge.net/',
r'Sourceforge\.net')
finally:
conf.supybot.plugins.Web.titleSnarfer.setValue(title)
finally:
conf.supybot.plugins.Web.nonSnarfingRegexp.setValue(snarf)
def testNonSnarfingRegexpConfigurable(self):
self.assertSnarfNoResponse('http://foo.bar.baz/', 2)
try:
conf.supybot.plugins.Web.nonSnarfingRegexp.set('m/biff/')
self.assertSnarfNoResponse('http://biff.bar.baz/', 2)
finally:
conf.supybot.plugins.Web.nonSnarfingRegexp.set('')
# vim:set shiftwidth=4 tabstop=8 expandtab textwidth=78:

View File

@ -48,6 +48,7 @@ plugins = [
'Status',
'User',
'Utilities',
'Web',
]
import sys