plugins/RSS/plugin.py: Refactor the character encoding detection and conversion so it can be used in more places. Hopepfully this will cause fewer UnicodeDecodeErrors.

This commit is contained in:
James Vega 2006-05-02 12:22:11 +00:00
parent 6722a856d8
commit 73d7fd3a2d
1 changed files with 15 additions and 9 deletions

View File

@ -254,15 +254,20 @@ class RSS(callbacks.Plugin):
finally: finally:
self.releaseLock(url) self.releaseLock(url)
def _getConverter(self, feed):
toText = utils.web.htmlToText
if 'encoding' in feed:
return lambda s: toText(s).strip().encode(feed['encoding'],
'replace')
else:
return lambda s: toText(s).strip()
def getHeadlines(self, feed): def getHeadlines(self, feed):
headlines = [] headlines = []
if 'encoding' in feed: conv = self._getConverter(feed)
conv = lambda s: s.encode(feed['encoding'], 'replace')
else:
conv = lambda s: s
for d in feed['items']: for d in feed['items']:
if 'title' in d: if 'title' in d:
title = conv(utils.web.htmlToText(d['title']).strip()) title = conv(d['title'])
link = d.get('link') link = d.get('link')
if link: if link:
headlines.append((title, link)) headlines.append((title, link))
@ -395,6 +400,7 @@ class RSS(callbacks.Plugin):
except registry.NonExistentRegistryEntry: except registry.NonExistentRegistryEntry:
pass pass
feed = self.getFeed(url) feed = self.getFeed(url)
conv = self._getConverter(feed)
info = feed.get('feed') info = feed.get('feed')
if not info: if not info:
irc.error('I couldn\'t retrieve that RSS feed.') irc.error('I couldn\'t retrieve that RSS feed.')
@ -406,13 +412,13 @@ class RSS(callbacks.Plugin):
when = utils.timeElapsed(now - seconds) + ' ago' when = utils.timeElapsed(now - seconds) + ' ago'
else: else:
when = 'time unavailable' when = 'time unavailable'
title = conv(info.get('title', 'unavailable'))
desc = conv(info.get('description', 'unavailable'))
# The rest of the entries are all available in the channel key # The rest of the entries are all available in the channel key
response = format('Title: %s; URL: %u; ' response = format('Title: %s; URL: %u; '
'Description: %s; Last updated: %s.', 'Description: %s; Last updated: %s.',
info.get('title', 'unavailable').strip(), title, info.get('link', 'unavailable').strip(),
info.get('link', 'unavailable').strip(), desc, when)
info.get('description', 'unavailable').strip(),
when)
irc.reply(utils.str.normalizeWhitespace(response)) irc.reply(utils.str.normalizeWhitespace(response))
info = wrap(info, [first('url', 'feedName')]) info = wrap(info, [first('url', 'feedName')])