mirror of
https://github.com/Mikaela/Limnoria.git
synced 2024-11-26 20:59:27 +01:00
Added the Web plugin (from pieces of Http, Fun, and URL) in the new plugin format.
This commit is contained in:
parent
18f8044621
commit
0c2da03a67
1
plugins/Web/README.txt
Normal file
1
plugins/Web/README.txt
Normal file
@ -0,0 +1 @@
|
||||
Insert a description of your plugin here, with any notes, etc. about using it.
|
58
plugins/Web/__init__.py
Normal file
58
plugins/Web/__init__.py
Normal file
@ -0,0 +1,58 @@
|
||||
###
|
||||
# Copyright (c) 2005, Jeremiah Fincher
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions, and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions, and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of the author of this software nor the name of
|
||||
# contributors to this software may be used to endorse or promote products
|
||||
# derived from this software without specific prior written consent.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
###
|
||||
|
||||
"""
|
||||
Includes various web-related commands.
|
||||
"""
|
||||
|
||||
import supybot
|
||||
import supybot.world as world
|
||||
|
||||
__version__ = "%%VERSION%%"
|
||||
|
||||
__author__ = supybot.authors.jemfinch
|
||||
|
||||
# This is a dictionary mapping supybot.Author instances to lists of
|
||||
# contributions.
|
||||
__contributors__ = {}
|
||||
|
||||
import config
|
||||
import plugin
|
||||
reload(plugin) # In case we're being reloaded.
|
||||
# Add more reloads here if you add third-party modules and want them to be
|
||||
# reloaded when this plugin is reloaded. Don't forget to import them as well!
|
||||
|
||||
if world.testing:
|
||||
import test
|
||||
|
||||
Class = plugin.Class
|
||||
configure = config.configure
|
||||
|
||||
|
||||
# vim:set shiftwidth=4 tabstop=8 expandtab textwidth=78:
|
56
plugins/Web/config.py
Normal file
56
plugins/Web/config.py
Normal file
@ -0,0 +1,56 @@
|
||||
###
|
||||
# Copyright (c) 2005, Jeremiah Fincher
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions, and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions, and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of the author of this software nor the name of
|
||||
# contributors to this software may be used to endorse or promote products
|
||||
# derived from this software without specific prior written consent.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
###
|
||||
|
||||
import supybot.conf as conf
|
||||
import supybot.registry as registry
|
||||
|
||||
def configure(advanced):
|
||||
# This will be called by supybot to configure this module. advanced is
|
||||
# a bool that specifies whether the user identified himself as an advanced
|
||||
# user or not. You should effect your configuration by manipulating the
|
||||
# registry as appropriate.
|
||||
from supybot.questions import expect, anything, something, yn
|
||||
Web = conf.registerPlugin('Web', True)
|
||||
if yn("""This plugin also offers a snarfer that will try to fetch the
|
||||
title of URLs that it sees in the channel. Would like you this
|
||||
snarfer to be enabled?""", default=False):
|
||||
Web.titleSnarfer.setValue(True)
|
||||
|
||||
|
||||
Web = conf.registerPlugin('Web')
|
||||
conf.registerChannelValue(Web, 'titleSnarfer',
|
||||
registry.Boolean(False, """Determines whether the bot will output the HTML
|
||||
title of URLs it sees in the channel."""))
|
||||
conf.registerChannelValue(Web, 'nonSnarfingRegexp',
|
||||
registry.Regexp(None, """Determines what URLs are to be snarfed and stored
|
||||
in the database in the channel; URLs matching the regexp given will not be
|
||||
snarfed. Give the empty string if you have no URLs that you'd like to
|
||||
exclude from being snarfed."""))
|
||||
|
||||
# vim:set shiftwidth=4 tabstop=8 expandtab textwidth=78
|
187
plugins/Web/plugin.py
Normal file
187
plugins/Web/plugin.py
Normal file
@ -0,0 +1,187 @@
|
||||
###
|
||||
# Copyright (c) 2005, Jeremiah Fincher
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions, and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions, and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of the author of this software nor the name of
|
||||
# contributors to this software may be used to endorse or promote products
|
||||
# derived from this software without specific prior written consent.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
###
|
||||
|
||||
import re
|
||||
|
||||
import supybot.conf as conf
|
||||
import supybot.utils as utils
|
||||
from supybot.commands import *
|
||||
import supybot.plugins as plugins
|
||||
import supybot.ircutils as ircutils
|
||||
import supybot.privmsgs as privmsgs
|
||||
import supybot.callbacks as callbacks
|
||||
|
||||
class Web(callbacks.PrivmsgCommandAndRegexp):
|
||||
"""Add the help for "@help Web" here."""
|
||||
regexps = ['titleSnarfer']
|
||||
threaded = True
|
||||
_titleRe = re.compile(r'<title>(.*?)</title>', re.I | re.S)
|
||||
def titleSnarfer(self, irc, msg, match):
|
||||
r"https?://[^\])>\s]+"
|
||||
channel = msg.args[0]
|
||||
if not irc.isChannel(channel):
|
||||
return
|
||||
if callbacks.addressed(irc.nick, msg):
|
||||
return
|
||||
if self.registryValue('titleSnarfer', channel):
|
||||
url = match.group(0)
|
||||
r = self.registryValue('nonSnarfingRegexp', channel)
|
||||
if r and r.search(url):
|
||||
self.log.debug('Not titleSnarfing %q.', url)
|
||||
return
|
||||
try:
|
||||
size = conf.supybot.protocols.http.peekSize()
|
||||
text = utils.web.getUrl(url, size=size)
|
||||
except utils.web.Error, e:
|
||||
self.log.info('Couldn\'t snarf title of %u: %s.', url, e)
|
||||
return
|
||||
m = self._titleRe.search(text)
|
||||
if m is not None:
|
||||
domain = utils.web.getDomain(url)
|
||||
title = utils.web.htmlToText(m.group(1).strip())
|
||||
s = format('Title: %s (at %s)', title, domain)
|
||||
irc.reply(s, prefixName=False)
|
||||
titleSnarfer = urlSnarfer(titleSnarfer)
|
||||
|
||||
def headers(self, irc, msg, args, url):
|
||||
"""<url>
|
||||
|
||||
Returns the HTTP headers of <url>. Only HTTP urls are valid, of
|
||||
course.
|
||||
"""
|
||||
fd = utils.web.getUrlFd(url)
|
||||
try:
|
||||
s = ', '.join([format('%s: %s', k, v)
|
||||
for (k, v) in fd.headers.items()])
|
||||
irc.reply(s)
|
||||
finally:
|
||||
fd.close()
|
||||
headers = wrap(headers, ['httpUrl'])
|
||||
|
||||
_doctypeRe = re.compile(r'(<!DOCTYPE[^>]+>)', re.M)
|
||||
def doctype(self, irc, msg, args, url):
|
||||
"""<url>
|
||||
|
||||
Returns the DOCTYPE string of <url>. Only HTTP urls are valid, of
|
||||
course.
|
||||
"""
|
||||
size = conf.supybot.protocols.http.peekSize()
|
||||
s = utils.web.getUrl(url, size=size)
|
||||
m = self._doctypeRe.search(s)
|
||||
if m:
|
||||
s = utils.str.normalizeWhitespace(m.group(0))
|
||||
irc.reply(s)
|
||||
else:
|
||||
irc.reply('That URL has no specified doctype.')
|
||||
doctype = wrap(doctype, ['httpUrl'])
|
||||
|
||||
def size(self, irc, msg, args, url):
|
||||
"""<url>
|
||||
|
||||
Returns the Content-Length header of <url>. Only HTTP urls are valid,
|
||||
of course.
|
||||
"""
|
||||
fd = utils.web.getUrlFd(url)
|
||||
try:
|
||||
try:
|
||||
size = fd.headers['Content-Length']
|
||||
irc.reply(format('%u is %i bytes long.', url, size))
|
||||
except KeyError:
|
||||
size = conf.supybot.protocols.http.peekSize()
|
||||
s = fd.read(size)
|
||||
if len(s) != size:
|
||||
irc.reply(format('%u is %i bytes long.', url, len(s)))
|
||||
else:
|
||||
irc.reply(format('The server didn\'t tell me how long %u '
|
||||
'is but it\'s longer than %i bytes.',
|
||||
url, size))
|
||||
finally:
|
||||
fd.close()
|
||||
size = wrap(size, ['httpUrl'])
|
||||
|
||||
def title(self, irc, msg, args, url):
|
||||
"""<url>
|
||||
|
||||
Returns the HTML <title>...</title> of a URL.
|
||||
"""
|
||||
size = conf.supybot.protocols.http.peekSize()
|
||||
text = utils.web.getUrl(url, size=size)
|
||||
m = self._titleRe.search(text)
|
||||
if m is not None:
|
||||
irc.reply(utils.web.htmlToText(m.group(1).strip()))
|
||||
else:
|
||||
irc.reply(format('That URL appears to have no HTML title '
|
||||
'within the first %i bytes.', size))
|
||||
title = wrap(title, ['httpUrl'])
|
||||
|
||||
_netcraftre = re.compile(r'td align="left">\s+<a[^>]+>(.*?)<a href',
|
||||
re.S | re.I)
|
||||
def netcraft(self, irc, msg, args, hostname):
|
||||
"""<hostname|ip>
|
||||
|
||||
Returns Netcraft.com's determination of what operating system and
|
||||
webserver is running on the host given.
|
||||
"""
|
||||
url = 'http://uptime.netcraft.com/up/graph/?host=' + hostname
|
||||
html = utils.web.getUrl(url)
|
||||
m = self._netcraftre.search(html)
|
||||
if m:
|
||||
html = m.group(1)
|
||||
s = utils.web.htmlToText(html, tagReplace='').strip()
|
||||
s = s.rstrip('-').strip()
|
||||
irc.reply(s) # Snip off "the site"
|
||||
elif 'We could not get any results' in html:
|
||||
irc.reply('No results found for %s.' % hostname)
|
||||
else:
|
||||
irc.error('The format of page the was odd.')
|
||||
netcraft = wrap(netcraft, ['text'])
|
||||
|
||||
def urlquote(self, irc, msg, args, text):
|
||||
"""<text>
|
||||
|
||||
Returns the URL quoted form of the text.
|
||||
"""
|
||||
irc.reply(utils.web.urlquote(text))
|
||||
urlquote = wrap(urlquote, ['text'])
|
||||
|
||||
def urlunquote(self, irc, msg, args, text):
|
||||
"""<text>
|
||||
|
||||
Returns the text un-URL quoted.
|
||||
"""
|
||||
s = utils.web.urlunquote(text)
|
||||
irc.reply(s)
|
||||
urlunquote = wrap(urlunquote, ['text'])
|
||||
|
||||
|
||||
|
||||
Class = Web
|
||||
|
||||
|
||||
# vim:set shiftwidth=4 tabstop=8 expandtab textwidth=78:
|
111
plugins/Web/test.py
Normal file
111
plugins/Web/test.py
Normal file
@ -0,0 +1,111 @@
|
||||
###
|
||||
# Copyright (c) 2005, Jeremiah Fincher
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions, and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions, and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of the author of this software nor the name of
|
||||
# contributors to this software may be used to endorse or promote products
|
||||
# derived from this software without specific prior written consent.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
###
|
||||
|
||||
from supybot.test import *
|
||||
|
||||
class WebTestCase(PluginTestCase):
|
||||
plugins = ('Web',)
|
||||
if network:
|
||||
def testHeaders(self):
|
||||
self.assertError('headers ftp://ftp.cdrom.com/pub/linux')
|
||||
self.assertNotError('headers http://www.slashdot.org/')
|
||||
|
||||
def testDoctype(self):
|
||||
self.assertError('doctype ftp://ftp.cdrom.com/pub/linux')
|
||||
self.assertNotError('doctype http://www.slashdot.org/')
|
||||
m = self.getMsg('doctype http://moobot.sf.net/')
|
||||
self.failUnless(m.args[1].endswith('>'))
|
||||
|
||||
def testSize(self):
|
||||
self.assertError('size ftp://ftp.cdrom.com/pub/linux')
|
||||
self.assertNotError('size http://supybot.sf.net/')
|
||||
self.assertNotError('size http://www.slashdot.org/')
|
||||
|
||||
def testTitle(self):
|
||||
self.assertResponse('title http://www.slashdot.org/',
|
||||
'Slashdot: News for nerds, stuff that matters')
|
||||
# Amazon add a bunch of scripting stuff to the top of their page,
|
||||
# so we need to allow for a larger peekSize
|
||||
try:
|
||||
orig = conf.supybot.protocols.http.peekSize()
|
||||
conf.supybot.protocols.http.peekSize.setValue(8192)
|
||||
self.assertNotRegexp('title '
|
||||
'http://www.amazon.com/exec/obidos/tg/detail/-/'
|
||||
'1884822312/qid=1063140754/sr=8-1/ref=sr_8_1/'
|
||||
'002-9802970-2308826?v=glance&s=books&n=507846',
|
||||
'no HTML title')
|
||||
finally:
|
||||
conf.supybot.protocols.http.peekSize.setValue(orig)
|
||||
# Checks the non-greediness of the regexp
|
||||
self.assertResponse('title '
|
||||
'http://www.space.com/scienceastronomy/'
|
||||
'jupiter_dark_spot_031023.html',
|
||||
'Mystery Spot on Jupiter Baffles Astronomers')
|
||||
# Checks for @title not-working correctly
|
||||
self.assertResponse('title '\
|
||||
'http://www.catb.org/~esr/jargon/html/F/foo.html',
|
||||
'foo')
|
||||
|
||||
def testNetcraft(self):
|
||||
self.assertNotError('netcraft slashdot.org')
|
||||
|
||||
def testTitleSnarfer(self):
|
||||
try:
|
||||
conf.supybot.plugins.Web.titleSnarfer.setValue(True)
|
||||
self.assertSnarfResponse('http://microsoft.com/',
|
||||
'Title: Microsoft Corporation'
|
||||
' (at microsoft.com)')
|
||||
finally:
|
||||
conf.supybot.plugins.Web.titleSnarfer.setValue(False)
|
||||
|
||||
def testNonSnarfing(self):
|
||||
snarf = conf.supybot.plugins.Web.nonSnarfingRegexp()
|
||||
title = conf.supybot.plugins.Web.titleSnarfer()
|
||||
try:
|
||||
conf.supybot.plugins.Web.nonSnarfingRegexp.set('m/sf/')
|
||||
try:
|
||||
conf.supybot.plugins.Web.titleSnarfer.setValue(True)
|
||||
self.assertSnarfNoResponse('http://sf.net/', 2)
|
||||
self.assertSnarfRegexp('http://www.sourceforge.net/',
|
||||
r'Sourceforge\.net')
|
||||
finally:
|
||||
conf.supybot.plugins.Web.titleSnarfer.setValue(title)
|
||||
finally:
|
||||
conf.supybot.plugins.Web.nonSnarfingRegexp.setValue(snarf)
|
||||
|
||||
def testNonSnarfingRegexpConfigurable(self):
|
||||
self.assertSnarfNoResponse('http://foo.bar.baz/', 2)
|
||||
try:
|
||||
conf.supybot.plugins.Web.nonSnarfingRegexp.set('m/biff/')
|
||||
self.assertSnarfNoResponse('http://biff.bar.baz/', 2)
|
||||
finally:
|
||||
conf.supybot.plugins.Web.nonSnarfingRegexp.set('')
|
||||
|
||||
|
||||
# vim:set shiftwidth=4 tabstop=8 expandtab textwidth=78:
|
Loading…
Reference in New Issue
Block a user