mirror of
https://github.com/Mikaela/Limnoria.git
synced 2024-11-14 14:49:21 +01:00
RSS._getConverter: Encode strings before handing them off to other functions
When the feed has a specified encoding, we'll be dealing with unicode objects
in the response from feedparser.parse(). To avoid possible UnicodeErrors, we
need to encode() before handing the string off to other functions, so the
other functions are always dealing with bytestrings instead of bytestrings and
unicode objects. Mixing unicode and bytestrings will cause implicit
conversions of the unicode objects, which will most likely use the wrong
encoding.
Signed-off-by: James McCoy <jamessan@users.sourceforge.net>
(cherry picked from commit 964c73f591
)
Signed-off-by: Daniel Folkinshteyn <nanotube@users.sourceforge.net>
This commit is contained in:
parent
50e4b6baf1
commit
b42b06fe79
@ -288,8 +288,14 @@ class RSS(callbacks.Plugin):
|
||||
def _getConverter(self, feed):
|
||||
toText = utils.web.htmlToText
|
||||
if 'encoding' in feed:
|
||||
return lambda s: toText(s).strip().encode(feed['encoding'],
|
||||
'replace')
|
||||
def conv(s):
|
||||
# encode() first so there implicit encoding doesn't happen in
|
||||
# other functions when unicode and bytestring objects are used
|
||||
# together
|
||||
s = s.encode(feed['encoding'], 'replace')
|
||||
s = toText(s).strip()
|
||||
return s
|
||||
return conv
|
||||
else:
|
||||
return lambda s: toText(s).strip()
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user