mirror of
https://github.com/Mikaela/Limnoria.git
synced 2024-11-27 05:09:23 +01:00
Bug #1190350, Don't grab fake title.
This commit is contained in:
parent
d9ce747fef
commit
fcfda73f64
@ -29,6 +29,8 @@
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
from HTMLParser import HTMLParser
|
||||||
|
|
||||||
import supybot.conf as conf
|
import supybot.conf as conf
|
||||||
import supybot.utils as utils
|
import supybot.utils as utils
|
||||||
from supybot.commands import *
|
from supybot.commands import *
|
||||||
@ -36,6 +38,24 @@ import supybot.plugins as plugins
|
|||||||
import supybot.ircutils as ircutils
|
import supybot.ircutils as ircutils
|
||||||
import supybot.callbacks as callbacks
|
import supybot.callbacks as callbacks
|
||||||
|
|
||||||
|
class Title(HTMLParser):
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
self.inTitle = False
|
||||||
|
self.title = None
|
||||||
|
HTMLParser.__init__(self, *args, **kwargs)
|
||||||
|
|
||||||
|
def handle_starttag(self, tag, attrs):
|
||||||
|
if tag == 'title':
|
||||||
|
self.inTitle = True
|
||||||
|
|
||||||
|
def handle_data(self, data):
|
||||||
|
if self.inTitle:
|
||||||
|
self.title = data
|
||||||
|
|
||||||
|
def handle_endtag(self, tag):
|
||||||
|
if tag == 'title':
|
||||||
|
self.inTitle = False
|
||||||
|
|
||||||
class Web(callbacks.PluginRegexp):
|
class Web(callbacks.PluginRegexp):
|
||||||
"""Add the help for "@help Web" here."""
|
"""Add the help for "@help Web" here."""
|
||||||
threaded = True
|
threaded = True
|
||||||
@ -45,8 +65,7 @@ class Web(callbacks.PluginRegexp):
|
|||||||
super(Web, self).callCommand(command, irc, msg, *args, **kwargs)
|
super(Web, self).callCommand(command, irc, msg, *args, **kwargs)
|
||||||
except utils.web.Error, e:
|
except utils.web.Error, e:
|
||||||
irc.reply(str(e))
|
irc.reply(str(e))
|
||||||
|
|
||||||
_titleRe = re.compile(r'<title>(.*?)</title>', re.I | re.S)
|
|
||||||
def titleSnarfer(self, irc, msg, match):
|
def titleSnarfer(self, irc, msg, match):
|
||||||
r"https?://[^\])>\s]+"
|
r"https?://[^\])>\s]+"
|
||||||
channel = msg.args[0]
|
channel = msg.args[0]
|
||||||
@ -66,10 +85,11 @@ class Web(callbacks.PluginRegexp):
|
|||||||
except utils.web.Error, e:
|
except utils.web.Error, e:
|
||||||
self.log.info('Couldn\'t snarf title of %u: %s.', url, e)
|
self.log.info('Couldn\'t snarf title of %u: %s.', url, e)
|
||||||
return
|
return
|
||||||
m = self._titleRe.search(text)
|
parser = Title()
|
||||||
if m is not None:
|
parser.feed(text)
|
||||||
|
if parser.title is not None:
|
||||||
domain = utils.web.getDomain(url)
|
domain = utils.web.getDomain(url)
|
||||||
title = utils.web.htmlToText(m.group(1).strip())
|
title = utils.web.htmlToText(parser.title.strip())
|
||||||
s = format('Title: %s (at %s)', title, domain)
|
s = format('Title: %s (at %s)', title, domain)
|
||||||
irc.reply(s, prefixName=False)
|
irc.reply(s, prefixName=False)
|
||||||
titleSnarfer = urlSnarfer(titleSnarfer)
|
titleSnarfer = urlSnarfer(titleSnarfer)
|
||||||
@ -137,9 +157,10 @@ class Web(callbacks.PluginRegexp):
|
|||||||
"""
|
"""
|
||||||
size = conf.supybot.protocols.http.peekSize()
|
size = conf.supybot.protocols.http.peekSize()
|
||||||
text = utils.web.getUrl(url, size=size)
|
text = utils.web.getUrl(url, size=size)
|
||||||
m = self._titleRe.search(text)
|
parser = Title()
|
||||||
if m is not None:
|
parser.feed(text)
|
||||||
irc.reply(utils.web.htmlToText(m.group(1).strip()))
|
if parser.title is not None:
|
||||||
|
irc.reply(utils.web.htmlToText(parser.title.strip()))
|
||||||
else:
|
else:
|
||||||
irc.reply(format('That URL appears to have no HTML title '
|
irc.reply(format('That URL appears to have no HTML title '
|
||||||
'within the first %i bytes.', size))
|
'within the first %i bytes.', size))
|
||||||
|
@ -29,7 +29,7 @@
|
|||||||
|
|
||||||
from supybot.test import *
|
from supybot.test import *
|
||||||
|
|
||||||
class WebTestCase(PluginTestCase):
|
class WebTestCase(ChannelPluginTestCase):
|
||||||
plugins = ('Web',)
|
plugins = ('Web',)
|
||||||
if network:
|
if network:
|
||||||
def testHeaders(self):
|
def testHeaders(self):
|
||||||
@ -68,9 +68,15 @@ class WebTestCase(PluginTestCase):
|
|||||||
'jupiter_dark_spot_031023.html',
|
'jupiter_dark_spot_031023.html',
|
||||||
'Mystery Spot on Jupiter Baffles Astronomers')
|
'Mystery Spot on Jupiter Baffles Astronomers')
|
||||||
# Checks for @title not-working correctly
|
# Checks for @title not-working correctly
|
||||||
self.assertResponse('title '\
|
self.assertResponse('title '
|
||||||
'http://www.catb.org/~esr/jargon/html/F/foo.html',
|
'http://www.catb.org/~esr/jargon/html/F/foo.html',
|
||||||
'foo')
|
'foo')
|
||||||
|
# Checks for only grabbing the real title tags instead of title
|
||||||
|
# tags inside, for example, script tags. Bug #1190350
|
||||||
|
self.assertNotRegexp('title '
|
||||||
|
'http://www.irinnews.org/report.asp?ReportID=45910&'
|
||||||
|
'SelectRegion=West_Africa&SelectCountry=CHAD',
|
||||||
|
r'document\.write\(')
|
||||||
|
|
||||||
def testNetcraft(self):
|
def testNetcraft(self):
|
||||||
self.assertNotError('netcraft slashdot.org')
|
self.assertNotError('netcraft slashdot.org')
|
||||||
|
Loading…
Reference in New Issue
Block a user