From b42b06fe79ee0e1c283d70c45ef29685a491e649 Mon Sep 17 00:00:00 2001 From: James McCoy Date: Sat, 22 Oct 2011 15:23:56 -0400 Subject: [PATCH] RSS._getConverter: Encode strings before handing them off to other functions When the feed has a specified encoding, we'll be dealing with unicode objects in the response from feedparser.parse(). To avoid possible UnicodeErrors, we need to encode() before handing the string off to other functions, so the other functions are always dealing with bytestrings instead of bytestrings and unicode objects. Mixing unicode and bytestrings will cause implicit conversions of the unicode objects, which will most likely use the wrong encoding. Signed-off-by: James McCoy (cherry picked from commit 964c73f591f7eafed94d7bcd6dd7b94dbb0afad5) Signed-off-by: Daniel Folkinshteyn --- plugins/RSS/plugin.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/plugins/RSS/plugin.py b/plugins/RSS/plugin.py index 7481cb31e..0617a1264 100644 --- a/plugins/RSS/plugin.py +++ b/plugins/RSS/plugin.py @@ -288,8 +288,14 @@ class RSS(callbacks.Plugin): def _getConverter(self, feed): toText = utils.web.htmlToText if 'encoding' in feed: - return lambda s: toText(s).strip().encode(feed['encoding'], - 'replace') + def conv(s): + # encode() first so there implicit encoding doesn't happen in + # other functions when unicode and bytestring objects are used + # together + s = s.encode(feed['encoding'], 'replace') + s = toText(s).strip() + return s + return conv else: return lambda s: toText(s).strip()