Web: Fix unicode problems for titleSnarfer.

This commit is contained in:
Terje Hoås 2012-05-04 00:56:24 +02:00
parent 124ca33f4e
commit 3579701d2d

View File

@ -97,13 +97,13 @@ class Web(callbacks.PluginRegexp):
return return
parser = Title() parser = Title()
try: try:
parser.feed(text) parser.feed(text.decode('utf-8'))
except HTMLParser.HTMLParseError: except HTMLParser.HTMLParseError:
self.log.debug('Encountered a problem parsing %u. Title may ' self.log.debug('Encountered a problem parsing %u. Title may '
'already be set, though', url) 'already be set, though', url)
if parser.title: if parser.title:
domain = utils.web.getDomain(url) domain = utils.web.getDomain(url)
title = utils.web.htmlToText(parser.title.strip()) title = utils.web.htmlToText(parser.title.strip().encode('utf-8'))
s = format(_('Title: %s (at %s)'), title, domain) s = format(_('Title: %s (at %s)'), title, domain)
irc.reply(s, prefixNick=False) irc.reply(s, prefixNick=False)
titleSnarfer = urlSnarfer(titleSnarfer) titleSnarfer = urlSnarfer(titleSnarfer)
@ -178,12 +178,12 @@ class Web(callbacks.PluginRegexp):
text = utils.web.getUrl(url, size=size) text = utils.web.getUrl(url, size=size)
parser = Title() parser = Title()
try: try:
parser.feed(text) parser.feed(text.decode('utf-8'))
except HTMLParser.HTMLParseError: except HTMLParser.HTMLParseError:
self.log.debug('Encountered a problem parsing %u. Title may ' self.log.debug('Encountered a problem parsing %u. Title may '
'already be set, though', url) 'already be set, though', url)
if parser.title: if parser.title:
irc.reply(utils.web.htmlToText(parser.title.strip())) irc.reply(utils.web.htmlToText(parser.title.strip().encode('utf-8')))
elif len(text) < size: elif len(text) < size:
irc.reply(_('That URL appears to have no HTML title.')) irc.reply(_('That URL appears to have no HTML title.'))
else: else: