mirror of
https://github.com/Mikaela/Limnoria.git
synced 2024-11-23 11:09:23 +01:00
utils.web.getEncoding: use <meta charset /> if available.
This commit is contained in:
parent
b4402b28ed
commit
771b739af7
@ -166,8 +166,18 @@ def getUrl(url, size=None, headers=None, data=None):
|
|||||||
def getDomain(url):
|
def getDomain(url):
|
||||||
return urlparse.urlparse(url)[1]
|
return urlparse.urlparse(url)[1]
|
||||||
|
|
||||||
|
_charset_re = ('<meta[^a-z<>]+charset='
|
||||||
|
"""(?P<charset>("[^"]+"|'[^']+'))""")
|
||||||
def getEncoding(s):
|
def getEncoding(s):
|
||||||
# TODO: use <meta charset />
|
try:
|
||||||
|
match = re.search(_charset_re, s, re.MULTILINE)
|
||||||
|
if match:
|
||||||
|
return match.group('charset')[1:-1]
|
||||||
|
except:
|
||||||
|
match = re.search(_charset_re.encode(), s, re.MULTILINE)
|
||||||
|
if match:
|
||||||
|
return match.group('charset').decode()[1:-1]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import charade.universaldetector
|
import charade.universaldetector
|
||||||
u = charade.universaldetector.UniversalDetector()
|
u = charade.universaldetector.UniversalDetector()
|
||||||
|
Loading…
Reference in New Issue
Block a user