diff --git a/ChangeLog b/ChangeLog index 60304038e..1cec4c063 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,6 @@ + * Added Http.size and Http.doctype and Http.headers to retrieve + various meta-information on URLs. + * Added a ranking to ChannelDB.wordstats. * Added 'roulette' command to Fun plugin. diff --git a/plugins/Http.py b/plugins/Http.py index 3b93b1f57..e14c9e3a7 100644 --- a/plugins/Http.py +++ b/plugins/Http.py @@ -56,13 +56,78 @@ class FreshmeatException(Exception): class Http(callbacks.Privmsg): threaded = True + maxSize = 4096 _titleRe = re.compile(r'(.*?)', re.I | re.S) def callCommand(self, method, irc, msg, *L): try: callbacks.Privmsg.callCommand(self, method, irc, msg, *L) - except webutils.WebException, e: + except webutils.WebError, e: irc.error(msg, str(e)) + def headers(self, irc, msg, args): + """ + + Returns the HTTP headers of . Only HTTP urls are valid, of + course. + """ + url = privmsgs.getArgs(args) + if not url.startswith('http://'): + irc.error(msg, 'Only HTTP urls are valid.') + return + try: + fd = webutils.getUrlFd(url) + s = ', '.join(['%s: %s' % (k, v) for (k, v) in fd.headers.items()]) + irc.reply(msg, s) + except webutils.WebError, e: + irc.error(msg, str(e)) + + def doctype(self, irc, msg, args): + """ + + Returns the DOCTYPE string of . Only HTTP urls are valid, of + course. + """ + url = privmsgs.getArgs(args) + if not url.startswith('http://'): + irc.error(msg, 'Only HTTP urls are valid.') + return + try: + s = webutils.getUrl(url, size=self.maxSize) + if 'DOCTYPE' in s and '\n' in s: + line = s.splitlines()[0] + s = utils.normalizeWhitespace(line.strip()) + irc.reply(msg, '%s has the following doctype: %s' % (url, s)) + else: + irc.reply(msg, '%s has no specified doctype.' % url) + except webutils.WebError, e: + irc.error(msg, str(e)) + + def size(self, irc, msg, args): + """ + + Returns the Content-Length header of . Only HTTP urls are valid, + of course. + """ + url = privmsgs.getArgs(args) + if not url.startswith('http://'): + irc.error(msg, 'Only HTTP urls are valid.') + return + try: + fd = webutils.getUrlFd(url) + try: + size = fd.headers['Content-Length'] + irc.reply(msg, '%s is %s bytes long.' % (url, size)) + except KeyError: + s = fd.read(self.maxSize) + if len(s) != self.maxSize: + irc.reply(msg, '%s is %s bytes long.' % (url, size)) + else: + irc.reply(msg, 'The server didn\'t tell me how long %s is ' + 'but it\'s longer than %s bytes.' % + (url,self.maxSize)) + except webutils.WebError, e: + irc.error(msg, str(e)) + def title(self, irc, msg, args): """ @@ -72,7 +137,7 @@ class Http(callbacks.Privmsg): if '://' not in url: url = 'http://%s' % url try: - text = webutils.getUrl(url, size=4096) + text = webutils.getUrl(url, size=self.maxSize) m = self._titleRe.search(text) if m is not None: irc.reply(msg, utils.htmlToText(m.group(1).strip())) @@ -315,7 +380,7 @@ class Http(callbacks.Privmsg): try: try: fd = webutils.getUrlFd('http://kernel.org/kdist/finger_banner') - except webutils.WebException, e: + except webutils.WebError, e: irc.error(msg, str(e)) return for line in fd: diff --git a/test/test_Http.py b/test/test_Http.py index ffa5fe16b..408c8e7bc 100644 --- a/test/test_Http.py +++ b/test/test_Http.py @@ -33,6 +33,19 @@ from testsupport import * class HttpTest(PluginTestCase, PluginDocumentation): plugins = ('Http',) + def testHeaders(self): + self.assertError('headers ftp://ftp.cdrom.com/pub/linux') + self.assertNotError('headers http://www.slashdot.org/') + + def testDoctype(self): + self.assertError('doctype ftp://ftp.cdrom.com/pub/linux') + self.assertNotError('doctype http://www.slashdot.org/') + + def testSize(self): + self.assertError('size ftp://ftp.cdrom.com/pub/linux') + self.assertNotError('size http://supybot.sf.net/') + self.assertNotError('size http://www.slashdot.org/') + def testStockquote(self): self.assertNotError('stockquote MSFT')