mirror of
https://github.com/Mikaela/Limnoria.git
synced 2025-02-17 06:00:42 +01:00
Use HTMLParser.unescape instead of chr to decode HTML entities.
This adds support for entities encoded in hexadecimal notation.
This commit is contained in:
parent
25a913a82b
commit
7f38076e59
@ -261,7 +261,7 @@ class HtmlToText(HTMLParser, object):
|
|||||||
return normalizeWhitespace(text)
|
return normalizeWhitespace(text)
|
||||||
|
|
||||||
def handle_charref(self, name):
|
def handle_charref(self, name):
|
||||||
self.append((unichr if minisix.PY2 else chr)(int(name)))
|
self.append(self.unescape('&#%s;' % name))
|
||||||
|
|
||||||
def htmlToText(s, tagReplace=' '):
|
def htmlToText(s, tagReplace=' '):
|
||||||
"""Turns HTML into text. tagReplace is a string to replace HTML tags with.
|
"""Turns HTML into text. tagReplace is a string to replace HTML tags with.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user