mirror of
https://github.com/Mikaela/Limnoria.git
synced 2024-11-23 11:09:23 +01:00
Use HTMLParser.unescape instead of chr to decode HTML entities.
This adds support for entities encoded in hexadecimal notation.
This commit is contained in:
parent
25a913a82b
commit
7f38076e59
@ -261,7 +261,7 @@ class HtmlToText(HTMLParser, object):
|
||||
return normalizeWhitespace(text)
|
||||
|
||||
def handle_charref(self, name):
|
||||
self.append((unichr if minisix.PY2 else chr)(int(name)))
|
||||
self.append(self.unescape('&#%s;' % name))
|
||||
|
||||
def htmlToText(s, tagReplace=' '):
|
||||
"""Turns HTML into text. tagReplace is a string to replace HTML tags with.
|
||||
|
Loading…
Reference in New Issue
Block a user