mirror of
https://github.com/Mikaela/Limnoria.git
synced 2024-12-24 11:42:52 +01:00
utils/web.py: Attempt to fix handling of RSS feeds using broken entity references.
This commit is contained in:
parent
ff5d83e7e6
commit
42b8a0676c
@ -199,7 +199,14 @@ class HtmlToText(HTMLParser, object):
|
|||||||
self.data.append(data)
|
self.data.append(data)
|
||||||
|
|
||||||
def handle_entityref(self, data):
|
def handle_entityref(self, data):
|
||||||
|
if data in htmlentitydefs.name2codepoint:
|
||||||
self.data.append(unichr(htmlentitydefs.name2codepoint[data]))
|
self.data.append(unichr(htmlentitydefs.name2codepoint[data]))
|
||||||
|
elif sys.version_info[0] >= 3 and isinstance(data, bytes):
|
||||||
|
self.data.append(data.decode())
|
||||||
|
elif sys.version_info[0] < 3 and isinstance(data, str):
|
||||||
|
self.data.append(data.decode('utf8', errors='replace'))
|
||||||
|
else:
|
||||||
|
self.data.append(data)
|
||||||
|
|
||||||
def getText(self):
|
def getText(self):
|
||||||
text = ''.join(self.data).strip()
|
text = ''.join(self.data).strip()
|
||||||
|
Loading…
Reference in New Issue
Block a user