RSS: Add Python 3 support.

This commit is contained in:
Valentin Lorentz 2013-05-29 15:16:23 +02:00
parent 6266d0f16f
commit ad25f17639
2 changed files with 10 additions and 3 deletions

View File

@ -262,7 +262,7 @@ class RSS(callbacks.Plugin):
try: try:
self.log.debug('Downloading new feed from %u', url) self.log.debug('Downloading new feed from %u', url)
results = feedparser.parse(url) results = feedparser.parse(url)
if 'bozo_exception' in results: if 'bozo_exception' in results and not results['entries']:
raise results['bozo_exception'] raise results['bozo_exception']
except feedparser.sgmllib.SGMLParseError: except feedparser.sgmllib.SGMLParseError:
self.log.exception('Uncaught exception from feedparser:') self.log.exception('Uncaught exception from feedparser:')

View File

@ -189,8 +189,15 @@ class HtmlToText(HTMLParser, object):
def htmlToText(s, tagReplace=' '): def htmlToText(s, tagReplace=' '):
"""Turns HTML into text. tagReplace is a string to replace HTML tags with. """Turns HTML into text. tagReplace is a string to replace HTML tags with.
""" """
if sys.version_info[0] >= 3 and isinstance(s, bytes): try:
s = s.decode() import chardet
except ImportError:
s = s.decode('utf8')
else:
u = chardet.universaldetector.UniversalDetector()
u.feed(s)
u.close()
s = s.decode(u.result['encoding'])
x = HtmlToText(tagReplace) x = HtmlToText(tagReplace)
x.feed(s) x.feed(s)
return x.getText() return x.getText()