diff --git a/plugins/Web/plugin.py b/plugins/Web/plugin.py index b42dcc39e..6b88800be 100644 --- a/plugins/Web/plugin.py +++ b/plugins/Web/plugin.py @@ -210,15 +210,8 @@ class Web(callbacks.PluginRegexp): irc.error("This url is not on the whitelist.") return size = conf.supybot.protocols.http.peekSize() - fd = utils.web.getUrlFd(url) - content_type = fd.getheader('Content-type', 'text/html') \ - .split(';', 1)[0] - if content_type not in ('text/html', 'application/xhtml+xml', - 'application/xhtml'): - irc.error(_('This is not an HTML page (content type is %r)') % - content_type) - return - s = fd.read(size).decode('utf8') + s = utils.web.getUrl(url, size=size) \ + .decode('utf8') m = self._doctypeRe.search(s) if m: s = utils.str.normalizeWhitespace(m.group(0)) @@ -271,15 +264,7 @@ class Web(callbacks.PluginRegexp): irc.error("This url is not on the whitelist.") return size = conf.supybot.protocols.http.peekSize() - fd = utils.web.getUrlFd(url) - content_type = fd.getheader('Content-type', 'text/html') \ - .split(';', 1)[0] - if content_type not in ('text/html', 'application/xhtml+xml', - 'application/xhtml'): - irc.error(_('This is not an HTML page (content type is %r)') % - content_type) - return - text = fd.read(size) + text = utils.web.getUrl(url, size=size) try: text = text.decode(utils.web.getEncoding(text) or 'utf8', 'replace') diff --git a/plugins/Web/test.py b/plugins/Web/test.py index 63f94208a..d60ba9d5f 100644 --- a/plugins/Web/test.py +++ b/plugins/Web/test.py @@ -40,8 +40,6 @@ class WebTestCase(ChannelPluginTestCase): def testDoctype(self): self.assertError('doctype ftp://ftp.cdrom.com/pub/linux') self.assertNotError('doctype http://www.slashdot.org/') - self.assertRegexp('doctype http://www.google.com/favicon.ico', - 'Error.*not an HTML page') m = self.getMsg('doctype http://moobot.sf.net/') self.failUnless(m.args[1].endswith('>')) @@ -53,8 +51,6 @@ class WebTestCase(ChannelPluginTestCase): def testTitle(self): self.assertRegexp('title http://www.slashdot.org/', 'News for nerds, stuff that matters') - self.assertRegexp('doctype http://www.google.com/favicon.ico', - 'Error.*not an HTML page') # Checks for @title not-working correctly self.assertResponse('title ' 'http://www.catb.org/~esr/jargon/html/F/foo.html',