mirror of
https://github.com/Mikaela/Limnoria.git
synced 2024-11-27 13:19:24 +01:00
utils/web.py: Add some 'try' statements to make decoding as error-permissive as possible.
This commit is contained in:
parent
2eec980c8e
commit
cca1e6dba5
@ -191,14 +191,16 @@ def htmlToText(s, tagReplace=' '):
|
|||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
import chardet.universaldetector
|
import chardet.universaldetector
|
||||||
except ImportError:
|
|
||||||
if sys.version_info[0] < 3 or isinstance(s, bytes):
|
|
||||||
s = s.decode('utf8')
|
|
||||||
else:
|
|
||||||
u = chardet.universaldetector.UniversalDetector()
|
u = chardet.universaldetector.UniversalDetector()
|
||||||
u.feed(s)
|
u.feed(s)
|
||||||
u.close()
|
u.close()
|
||||||
s = s.decode(u.result['encoding'])
|
s = s.decode(u.result['encoding'])
|
||||||
|
except:
|
||||||
|
try:
|
||||||
|
if sys.version_info[0] < 3 or isinstance(s, bytes):
|
||||||
|
s = s.decode('utf8')
|
||||||
|
except:
|
||||||
|
pass
|
||||||
x = HtmlToText(tagReplace)
|
x = HtmlToText(tagReplace)
|
||||||
x.feed(s)
|
x.feed(s)
|
||||||
return x.getText()
|
return x.getText()
|
||||||
|
Loading…
Reference in New Issue
Block a user