From 77f614be869df519c75115a654ed041b295e8bec Mon Sep 17 00:00:00 2001 From: Valentin Lorentz Date: Fri, 1 Feb 2013 20:50:46 +0100 Subject: [PATCH] utils.web.HtmlToText: Convert html entities. --- src/utils/web.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/utils/web.py b/src/utils/web.py index 8d9a4881b..d550ecdef 100644 --- a/src/utils/web.py +++ b/src/utils/web.py @@ -179,6 +179,9 @@ class HtmlToText(HTMLParser, object): def handle_data(self, data): self.data.append(data) + def handle_entityref(self, data): + self.data.append(chr(htmlentitydefs.name2codepoint[data])) + def getText(self): text = ''.join(self.data).strip() return normalizeWhitespace(text)