Use HTMLParser.unescape instead of chr to decode HTML entities.

This adds support for entities encoded in hexadecimal notation.
2026-05-31 04:49:14 +02:00 · 2016-03-08 22:00:44 +01:00 · 2016-03-08 22:00:44 +01:00 · 7f38076e59
commit 7f38076e59
parent 25a913a82b
1 changed files with 1 additions and 1 deletions
--- a/src/utils/web.py
+++ b/src/utils/web.py
@ -261,7 +261,7 @@ class HtmlToText(HTMLParser, object):
        return normalizeWhitespace(text)

    def handle_charref(self, name):
-        self.append((unichr if minisix.PY2 else chr)(int(name)))
+        self.append(self.unescape('&#%s;' % name))

 def htmlToText(s, tagReplace=' '):
    """Turns HTML into text.  tagReplace is a string to replace HTML tags with.