utils/web.py: Add some 'try' statements to make decoding as error-permissive as possible.

This commit is contained in:
Valentin Lorentz 2013-06-13 17:22:33 +00:00
parent 2eec980c8e
commit cca1e6dba5

View File

@ -191,14 +191,16 @@ def htmlToText(s, tagReplace=' '):
"""
try:
import chardet.universaldetector
except ImportError:
if sys.version_info[0] < 3 or isinstance(s, bytes):
s = s.decode('utf8')
else:
u = chardet.universaldetector.UniversalDetector()
u.feed(s)
u.close()
s = s.decode(u.result['encoding'])
except:
try:
if sys.version_info[0] < 3 or isinstance(s, bytes):
s = s.decode('utf8')
except:
pass
x = HtmlToText(tagReplace)
x.feed(s)
return x.getText()