diff --git a/ChangeLog b/ChangeLog
index 60304038e..1cec4c063 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,6 @@
+ * Added Http.size and Http.doctype and Http.headers to retrieve
+ various meta-information on URLs.
+
* Added a ranking to ChannelDB.wordstats.
* Added 'roulette' command to Fun plugin.
diff --git a/plugins/Http.py b/plugins/Http.py
index 3b93b1f57..e14c9e3a7 100644
--- a/plugins/Http.py
+++ b/plugins/Http.py
@@ -56,13 +56,78 @@ class FreshmeatException(Exception):
class Http(callbacks.Privmsg):
threaded = True
+ maxSize = 4096
_titleRe = re.compile(r'
(.*?)', re.I | re.S)
def callCommand(self, method, irc, msg, *L):
try:
callbacks.Privmsg.callCommand(self, method, irc, msg, *L)
- except webutils.WebException, e:
+ except webutils.WebError, e:
irc.error(msg, str(e))
+ def headers(self, irc, msg, args):
+ """
+
+ Returns the HTTP headers of . Only HTTP urls are valid, of
+ course.
+ """
+ url = privmsgs.getArgs(args)
+ if not url.startswith('http://'):
+ irc.error(msg, 'Only HTTP urls are valid.')
+ return
+ try:
+ fd = webutils.getUrlFd(url)
+ s = ', '.join(['%s: %s' % (k, v) for (k, v) in fd.headers.items()])
+ irc.reply(msg, s)
+ except webutils.WebError, e:
+ irc.error(msg, str(e))
+
+ def doctype(self, irc, msg, args):
+ """
+
+ Returns the DOCTYPE string of . Only HTTP urls are valid, of
+ course.
+ """
+ url = privmsgs.getArgs(args)
+ if not url.startswith('http://'):
+ irc.error(msg, 'Only HTTP urls are valid.')
+ return
+ try:
+ s = webutils.getUrl(url, size=self.maxSize)
+ if 'DOCTYPE' in s and '\n' in s:
+ line = s.splitlines()[0]
+ s = utils.normalizeWhitespace(line.strip())
+ irc.reply(msg, '%s has the following doctype: %s' % (url, s))
+ else:
+ irc.reply(msg, '%s has no specified doctype.' % url)
+ except webutils.WebError, e:
+ irc.error(msg, str(e))
+
+ def size(self, irc, msg, args):
+ """
+
+ Returns the Content-Length header of . Only HTTP urls are valid,
+ of course.
+ """
+ url = privmsgs.getArgs(args)
+ if not url.startswith('http://'):
+ irc.error(msg, 'Only HTTP urls are valid.')
+ return
+ try:
+ fd = webutils.getUrlFd(url)
+ try:
+ size = fd.headers['Content-Length']
+ irc.reply(msg, '%s is %s bytes long.' % (url, size))
+ except KeyError:
+ s = fd.read(self.maxSize)
+ if len(s) != self.maxSize:
+ irc.reply(msg, '%s is %s bytes long.' % (url, size))
+ else:
+ irc.reply(msg, 'The server didn\'t tell me how long %s is '
+ 'but it\'s longer than %s bytes.' %
+ (url,self.maxSize))
+ except webutils.WebError, e:
+ irc.error(msg, str(e))
+
def title(self, irc, msg, args):
"""
@@ -72,7 +137,7 @@ class Http(callbacks.Privmsg):
if '://' not in url:
url = 'http://%s' % url
try:
- text = webutils.getUrl(url, size=4096)
+ text = webutils.getUrl(url, size=self.maxSize)
m = self._titleRe.search(text)
if m is not None:
irc.reply(msg, utils.htmlToText(m.group(1).strip()))
@@ -315,7 +380,7 @@ class Http(callbacks.Privmsg):
try:
try:
fd = webutils.getUrlFd('http://kernel.org/kdist/finger_banner')
- except webutils.WebException, e:
+ except webutils.WebError, e:
irc.error(msg, str(e))
return
for line in fd:
diff --git a/test/test_Http.py b/test/test_Http.py
index ffa5fe16b..408c8e7bc 100644
--- a/test/test_Http.py
+++ b/test/test_Http.py
@@ -33,6 +33,19 @@ from testsupport import *
class HttpTest(PluginTestCase, PluginDocumentation):
plugins = ('Http',)
+ def testHeaders(self):
+ self.assertError('headers ftp://ftp.cdrom.com/pub/linux')
+ self.assertNotError('headers http://www.slashdot.org/')
+
+ def testDoctype(self):
+ self.assertError('doctype ftp://ftp.cdrom.com/pub/linux')
+ self.assertNotError('doctype http://www.slashdot.org/')
+
+ def testSize(self):
+ self.assertError('size ftp://ftp.cdrom.com/pub/linux')
+ self.assertNotError('size http://supybot.sf.net/')
+ self.assertNotError('size http://www.slashdot.org/')
+
def testStockquote(self):
self.assertNotError('stockquote MSFT')