From 771b739af7c49623b413a2f95028c01acfc70807 Mon Sep 17 00:00:00 2001 From: Valentin Lorentz Date: Tue, 9 Jul 2013 12:40:42 +0000 Subject: [PATCH] utils.web.getEncoding: use if available. --- src/utils/web.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/utils/web.py b/src/utils/web.py index 706f92cd7..f47f1becb 100644 --- a/src/utils/web.py +++ b/src/utils/web.py @@ -166,8 +166,18 @@ def getUrl(url, size=None, headers=None, data=None): def getDomain(url): return urlparse.urlparse(url)[1] +_charset_re = (']+charset=' + """(?P("[^"]+"|'[^']+'))""") def getEncoding(s): - # TODO: use + try: + match = re.search(_charset_re, s, re.MULTILINE) + if match: + return match.group('charset')[1:-1] + except: + match = re.search(_charset_re.encode(), s, re.MULTILINE) + if match: + return match.group('charset').decode()[1:-1] + try: import charade.universaldetector u = charade.universaldetector.UniversalDetector()