diff --git a/src/utils/web.py b/src/utils/web.py index 706f92cd7..f47f1becb 100644 --- a/src/utils/web.py +++ b/src/utils/web.py @@ -166,8 +166,18 @@ def getUrl(url, size=None, headers=None, data=None): def getDomain(url): return urlparse.urlparse(url)[1] +_charset_re = (']+charset=' + """(?P("[^"]+"|'[^']+'))""") def getEncoding(s): - # TODO: use + try: + match = re.search(_charset_re, s, re.MULTILINE) + if match: + return match.group('charset')[1:-1] + except: + match = re.search(_charset_re.encode(), s, re.MULTILINE) + if match: + return match.group('charset').decode()[1:-1] + try: import charade.universaldetector u = charade.universaldetector.UniversalDetector()