diff --git a/src/utils/web.py b/src/utils/web.py index 8d9a4881b..d550ecdef 100644 --- a/src/utils/web.py +++ b/src/utils/web.py @@ -179,6 +179,9 @@ class HtmlToText(HTMLParser, object): def handle_data(self, data): self.data.append(data) + def handle_entityref(self, data): + self.data.append(chr(htmlentitydefs.name2codepoint[data])) + def getText(self): text = ''.join(self.data).strip() return normalizeWhitespace(text)