Limnoria/plugins/Http.py

423 lines
16 KiB
Python
Raw Normal View History

2003-03-12 07:26:59 +01:00
#!/usr/bin/env python
###
# Copyright (c) 2002, Jeremiah Fincher
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions, and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions, and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the author of this software nor the name of
# contributors to this software may be used to endorse or promote products
# derived from this software without specific prior written consent.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
###
"""
Provides several commands that go out to websites and get things.
"""
2003-11-25 09:23:47 +01:00
__revision__ = "$Id$"
2004-07-24 07:18:26 +02:00
import supybot.plugins as plugins
2003-03-12 07:26:59 +01:00
import re
import sets
import getopt
import socket
import urllib
import xml.dom.minidom
from itertools import imap, ifilter
2004-07-24 07:18:26 +02:00
import supybot.conf as conf
import supybot.utils as utils
import supybot.webutils as webutils
import supybot.privmsgs as privmsgs
import supybot.registry as registry
import supybot.callbacks as callbacks
2003-03-12 07:26:59 +01:00
class FreshmeatException(Exception):
pass
class Http(callbacks.Privmsg):
threaded = True
_titleRe = re.compile(r'<title>(.*?)</title>', re.I | re.S)
def callCommand(self, method, irc, msg, *L):
try:
callbacks.Privmsg.callCommand(self, method, irc, msg, *L)
except webutils.WebError, e:
irc.error(str(e))
def headers(self, irc, msg, args):
"""<url>
Returns the HTTP headers of <url>. Only HTTP urls are valid, of
course.
"""
url = privmsgs.getArgs(args)
if not url.startswith('http://'):
irc.error('Only HTTP urls are valid.')
return
2004-01-03 07:26:39 +01:00
fd = webutils.getUrlFd(url)
s = ', '.join(['%s: %s' % (k, v) for (k, v) in fd.headers.items()])
irc.reply(s)
2003-12-04 09:50:49 +01:00
_doctypeRe = re.compile(r'(<!DOCTYPE[^>]+>)', re.M)
def doctype(self, irc, msg, args):
"""<url>
Returns the DOCTYPE string of <url>. Only HTTP urls are valid, of
course.
"""
url = privmsgs.getArgs(args)
if not url.startswith('http://'):
irc.error('Only HTTP urls are valid.')
return
size = conf.supybot.protocols.http.peekSize()
s = webutils.getUrl(url, size=size)
2004-01-03 07:26:39 +01:00
m = self._doctypeRe.search(s)
if m:
s = utils.normalizeWhitespace(m.group(0))
irc.reply(s)
2004-01-03 07:26:39 +01:00
else:
irc.reply('That URL has no specified doctype.')
def size(self, irc, msg, args):
"""<url>
Returns the Content-Length header of <url>. Only HTTP urls are valid,
of course.
"""
url = privmsgs.getArgs(args)
if not url.startswith('http://'):
irc.error('Only HTTP urls are valid.')
return
2004-01-03 07:26:39 +01:00
fd = webutils.getUrlFd(url)
try:
2004-01-03 07:26:39 +01:00
size = fd.headers['Content-Length']
irc.reply('%s is %s bytes long.' % (url, size))
2004-01-03 07:26:39 +01:00
except KeyError:
size = conf.supybot.protocols.http.peekSize()
s = fd.read(size)
if len(s) != size:
irc.reply('%s is %s bytes long.' % (url, len(s)))
2004-01-03 07:26:39 +01:00
else:
irc.reply('The server didn\'t tell me how long %s is '
2004-04-30 18:55:42 +02:00
'but it\'s longer than %s bytes.' % (url, size))
def title(self, irc, msg, args):
"""<url>
Returns the HTML <title>...</title> of a URL.
"""
url = privmsgs.getArgs(args)
if '://' not in url:
url = 'http://%s' % url
size = conf.supybot.protocols.http.peekSize()
text = webutils.getUrl(url, size=size)
2004-01-03 07:26:39 +01:00
m = self._titleRe.search(text)
if m is not None:
irc.reply(utils.htmlToText(m.group(1).strip()))
2004-01-03 07:26:39 +01:00
else:
irc.reply('That URL appears to have no HTML title '
'within the first %s bytes.' % size)
2003-03-12 07:26:59 +01:00
def freshmeat(self, irc, msg, args):
"""<project name>
Returns Freshmeat data about a given project.
"""
2003-03-12 07:26:59 +01:00
project = privmsgs.getArgs(args)
2003-12-02 12:16:26 +01:00
project = ''.join(project.split())
2003-03-12 07:26:59 +01:00
url = 'http://www.freshmeat.net/projects-xml/%s' % project
try:
text = webutils.getUrl(url)
2003-03-12 07:26:59 +01:00
if text.startswith('Error'):
2004-01-03 07:26:39 +01:00
text = text.split(None, 1)[1]
2003-03-12 07:26:59 +01:00
raise FreshmeatException, text
dom = xml.dom.minidom.parseString(text)
def getNode(name):
node = dom.getElementsByTagName(name)[0]
return str(node.childNodes[0].data)
project = getNode('projectname_full')
version = getNode('latest_release_version')
vitality = getNode('vitality_percent')
popularity = getNode('popularity_percent')
lastupdated = getNode('date_updated')
irc.reply('%s, last updated %s, with a vitality percent of %s '
'and a popularity of %s, is in version %s.' %
(project, lastupdated, vitality, popularity, version))
2003-03-12 07:26:59 +01:00
except FreshmeatException, e:
irc.error(str(e))
2003-03-12 07:26:59 +01:00
def stockquote(self, irc, msg, args):
"""<company symbol>
Gets the information about the current price and change from the
previous day of a given compny (represented by a stock symbol).
"""
2003-03-12 07:26:59 +01:00
symbol = privmsgs.getArgs(args)
if ' ' in symbol:
irc.error('Only one stockquote can be looked up at a time.')
return
url = 'http://finance.yahoo.com/d/quotes.csv?s=%s' \
2003-03-12 07:26:59 +01:00
'&f=sl1d1t1c1ohgv&e=.csv' % symbol
quote = webutils.getUrl(url)
2003-03-12 07:26:59 +01:00
data = quote.split(',')
if data[1] != '0.00':
irc.reply('The current price of %s is %s, as of %s EST. '
'A change of %s from the last business day.' %
(data[0][1:-1], data[1], data[3][1:-1], data[4]))
2003-03-12 07:26:59 +01:00
else:
m = 'I couldn\'t find a listing for %s' % symbol
irc.error(m)
2003-03-12 07:26:59 +01:00
2003-10-21 21:54:30 +02:00
_mlgeekquotere = re.compile('<p class="qt">(.*?)</p>', re.M | re.DOTALL)
def geekquote(self, irc, msg, args):
"""[<id>]
Returns a random geek quote from bash.org; the optional argument
id specifies which quote to retrieve.
"""
id = privmsgs.getArgs(args, required=0, optional=1)
id = id or 'random1'
html = webutils.getUrl('http://bash.org/?%s' % id)
m = self._mlgeekquotere.search(html)
if m is None:
irc.error('No quote found.')
return
quote = utils.htmlToText(m.group(1))
quote = ' // '.join(quote.splitlines())
irc.reply(quote)
2003-08-20 18:26:23 +02:00
2004-01-23 17:41:01 +01:00
_cyborgRe = re.compile(r'<p class="mediumheader">(.*?)</p>', re.I)
def cyborg(self, irc, msg, args):
"""<name>
Returns a cyborg acronym for <name> from <http://www.cyborgname.com/>.
"""
name = privmsgs.getArgs(args)
name = urllib.quote(name)
url = 'http://www.cyborgname.com/cyborger.cgi?acronym=%s' % name
html = webutils.getUrl(url)
m = self._cyborgRe.search(html)
if m:
2004-06-26 22:29:55 +02:00
s = m.group(1)
s = utils.normalizeWhitespace(s)
irc.reply(s)
2004-01-23 17:41:01 +01:00
else:
irc.errorPossibleBug('No cyborg name returned.')
_acronymre = re.compile(r'valign="middle" width="7\d%" bgcolor="[^"]+">'
r'(?:<b>)?([^<]+)')
def acronym(self, irc, msg, args):
"""<acronym>
2003-08-21 14:25:35 +02:00
Displays acronym matches from acronymfinder.com
"""
acronym = privmsgs.getArgs(args)
url = 'http://www.acronymfinder.com/' \
'af-query.asp?String=exact&Acronym=%s' % urllib.quote(acronym)
2004-08-05 20:29:26 +02:00
request = webutils.Request(url, headers={'User-agent':
'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 4.0)'})
html = webutils.getUrl(request)
if 'daily limit' in html:
s = 'Acronymfinder.com says I\'ve reached my daily limit. Sorry.'
irc.error(s)
return
# The following definitions are stripped and empties are removed.
defs = filter(None, imap(str.strip, self._acronymre.findall(html)))
utils.sortBy(lambda s: not s.startswith('[not an acronym]'), defs)
for (i, s) in enumerate(defs):
if s.startswith('[not an acronym]'):
defs[i] = s.split('is ', 1)[1]
if len(defs) == 0:
irc.reply('No definitions found.')
else:
s = ', or '.join(defs)
irc.reply('%s could be %s' % (acronym, s))
2004-07-24 07:09:22 +02:00
_netcraftre = re.compile(r'td align="left">\s+<a[^>]+>(.*?)<a href',
re.S | re.I)
2003-04-02 10:54:23 +02:00
def netcraft(self, irc, msg, args):
"""<hostname|ip>
Returns Netcraft.com's determination of what operating system and
webserver is running on the host given.
"""
2003-04-02 10:54:23 +02:00
hostname = privmsgs.getArgs(args)
url = 'http://uptime.netcraft.com/up/graph/?host=%s' % hostname
html = webutils.getUrl(url)
2003-04-02 10:54:23 +02:00
m = self._netcraftre.search(html)
if m:
html = m.group(1)
2004-04-30 03:39:29 +02:00
s = utils.htmlToText(html, tagReplace='').strip()
s = s.rstrip('-').strip()
irc.reply(s) # Snip off "the site"
elif 'We could not get any results' in html:
irc.reply('No results found for %s.' % hostname)
2003-04-02 10:54:23 +02:00
else:
irc.error('The format of page the was odd.')
2003-04-02 10:54:23 +02:00
def kernel(self, irc, msg, args):
"""takes no arguments
Returns information about the current version of the Linux kernel.
"""
try:
2004-01-03 07:26:39 +01:00
fd = webutils.getUrlFd('http://kernel.org/kdist/finger_banner')
stable = 'unknown'
beta = 'unknown'
for line in fd:
(name, version) = line.split(':')
if 'latest stable' in name:
stable = version.strip()
elif 'latest beta' in name:
beta = version.strip()
finally:
fd.close()
irc.reply('The latest stable kernel is %s; '
'the latest beta kernel is %s.' % (stable, beta))
_pgpkeyre = re.compile(r'pub\s+\d{4}\w/<a href="([^"]+)">'
r'([^<]+)</a>[^>]+>([^<]+)</a>')
2003-09-08 10:48:33 +02:00
def pgpkey(self, irc, msg, args):
"""<search words>
Returns the results of querying pgp.mit.edu for keys that match
the <search words>.
"""
search = privmsgs.getArgs(args)
urlClean = search.replace(' ', '+')
host = 'http://pgp.mit.edu:11371'
url = '%s/pks/lookup?op=index&search=%s' % (host, urlClean)
try:
L = []
2003-12-09 15:52:32 +01:00
fd = webutils.getUrlFd(url)
for line in iter(fd.next, ''):
info = self._pgpkeyre.search(line)
if info:
L.append('%s <%s%s>' % (info.group(3),host,info.group(1)))
if len(L) == 0:
irc.reply('No results found for %s.' % search)
else:
s = 'Matches found for %s: %s' % (search, ' :: '.join(L))
irc.reply(s)
finally:
fd.close()
2003-09-08 10:48:33 +02:00
2003-12-09 15:35:19 +01:00
_filextre = re.compile(
r'<strong>Extension:</strong>.*?<tr>.*?</tr>\s+<tr>\s+<td colspan='
r'"2">(?:<a href[^>]+>([^<]+)</a>\s+|([^<]+))</td>\s+<td>'
2003-12-09 15:35:19 +01:00
r'(?:<a href[^>]+>([^<]+)</a>|<img src="images/spacer.gif"(.))',
re.I|re.S)
def extension(self, irc, msg, args):
"""<ext>
Returns the results of querying filext.com for file extenstions that
match <ext>.
"""
ext = privmsgs.getArgs(args)
invalid = '|<>\^=?/[]";,*'
for c in invalid:
if c in ext:
irc.error('\'%s\' is an invalid extension character' % c)
2003-12-09 15:35:19 +01:00
return
s = 'http://www.filext.com/detaillist.php?extdetail=%s&goButton=Go'
2004-01-03 07:26:39 +01:00
text = webutils.getUrl(s % ext)
2003-12-09 15:35:19 +01:00
matches = self._filextre.findall(text)
#print matches
res = []
for match in matches:
(file1, file2, comp1, comp2) = match
if file1:
filetype = file1.strip()
else:
filetype = file2.strip()
if comp1:
company = comp1.strip()
else:
company = comp2.strip()
if company:
res.append('%s\'s %s' % (company, filetype))
else:
res.append(filetype)
if res:
irc.reply(utils.commaAndify(res))
2003-12-09 15:35:19 +01:00
else:
irc.error('No matching file extenstions were found.')
2003-12-09 15:35:19 +01:00
2004-04-28 23:13:33 +02:00
_zipinfore = re.compile(r'Latitude<BR>\(([^)]+)\)</th><th>Longitude<BR>'
r'\(([^)]+)\).*?<tr>(.*?)</tr>', re.I)
_zipstatre = re.compile(r'(Only about \d+,\d{3} of.*?in use.)')
def zipinfo(self, irc, msg, args):
"""<zip code>
Returns a plethora of information for the given <zip code>.
"""
zipcode = privmsgs.getArgs(args)
try:
int(zipcode)
except ValueError:
irc.error('Zip code must be a 5-digit integer.')
return
if len(zipcode) != 5:
irc.error('Zip code must be a 5-digit integer.')
return
url = 'http://zipinfo.com/cgi-local/zipsrch.exe?cnty=cnty&ac=ac&'\
'tz=tz&ll=ll&zip=%s&Go=Go' % zipcode
try:
text = webutils.getUrl(url)
except webutils.WebError, e:
irc.error(str(e))
2004-05-01 21:25:19 +02:00
return
2004-04-28 23:13:33 +02:00
if 'daily usage limit' in text:
irc.error('I have exceeded the site\'s daily usage limit.')
return
m = self._zipstatre.search(text)
if m:
irc.reply('%s %s is not one of them.' % (m.group(1), zipcode))
return
n = self._zipinfore.search(text)
if not n:
irc.error('Unable to retrieve information for that zip code.')
return
(latdir, longdir, rawinfo) = n.groups()
# Info consists of the following (whitespace separated):
# City, State Abbrev., Zip Code, County, FIPS Code, Area Code, Time
# Zone, Daylight Time(?), Latitude, Longitude
info = utils.htmlToText(rawinfo)
info = info.split()
2004-04-30 03:39:29 +02:00
zipindex = info.index(zipcode)
resp = ['City: %s' % ' '.join(info[:zipindex-1]),
'State: %s' % info[zipindex-1],
'County: %s' % ' '.join(info[zipindex+1:-6]),
'Area Code: %s' % info[-5],
'Time Zone: %s' % info[-4],
'Daylight Savings: %s' % info[-3],
'Latitude: %s (%s)' % (info[-2], latdir),
'Longitude: %s (%s)' % (info[-1], longdir),
]
2004-04-28 23:13:33 +02:00
irc.reply('; '.join(resp))
2003-03-12 07:26:59 +01:00
Class = Http
2003-04-02 10:54:23 +02:00
# vim:set shiftwidth=4 tabstop=8 expandtab textwidth=78: