Fix fetch of RSS feeds containing unicode.

This commit is contained in:
Valentin Lorentz 2013-05-13 10:51:24 +02:00
parent 63f911b8a4
commit 8d95a424b0
2 changed files with 6 additions and 2 deletions

View File

@ -450,7 +450,11 @@ def format(s, *args, **kwargs):
def sub(match): def sub(match):
char = match.group(1) char = match.group(1)
if char == 's': if char == 's':
return str(args.pop()) token = args.pop()
if isinstance(token, unicode) or isinstance(token, str):
return token
else:
return unicode(token)
elif char == 'i': elif char == 'i':
# XXX Improve me! # XXX Improve me!
return str(args.pop()) return str(args.pop())

View File

@ -180,7 +180,7 @@ class HtmlToText(HTMLParser, object):
self.data.append(data) self.data.append(data)
def handle_entityref(self, data): def handle_entityref(self, data):
self.data.append(chr(htmlentitydefs.name2codepoint[data])) self.data.append(unichr(htmlentitydefs.name2codepoint[data]))
def getText(self): def getText(self):
text = ''.join(self.data).strip() text = ''.join(self.data).strip()