mirror of
https://github.com/Mikaela/Limnoria.git
synced 2025-02-19 23:20:57 +01:00
Web: Decode using the charset advertized in response headers
And fall back to the sniffing when not present
This commit is contained in:
parent
e6c4da0fff
commit
1a7c14f4b3
@ -164,8 +164,17 @@ class Web(callbacks.PluginRegexp):
|
|||||||
timeout = self.registryValue('timeout')
|
timeout = self.registryValue('timeout')
|
||||||
headers = conf.defaultHttpHeaders(irc.network, msg.channel)
|
headers = conf.defaultHttpHeaders(irc.network, msg.channel)
|
||||||
try:
|
try:
|
||||||
(target, text) = utils.web.getUrlTargetAndContent(url, size=size,
|
fd = utils.web.getUrlFd(url, timeout=timeout, headers=headers)
|
||||||
timeout=timeout, headers=headers)
|
target = fd.geturl()
|
||||||
|
text = fd.read(size)
|
||||||
|
response_headers = fd.headers
|
||||||
|
fd.close()
|
||||||
|
except socket.timeout:
|
||||||
|
if raiseErrors:
|
||||||
|
irc.error(_('Connection to %s timed out') % url, Raise=True)
|
||||||
|
else:
|
||||||
|
selg.log.info('Web plugins TitleSnarfer: URL <%s> timed out',
|
||||||
|
url)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if raiseErrors:
|
if raiseErrors:
|
||||||
irc.error(_('That URL raised <' + str(e)) + '>',
|
irc.error(_('That URL raised <' + str(e)) + '>',
|
||||||
@ -174,9 +183,19 @@ class Web(callbacks.PluginRegexp):
|
|||||||
self.log.info('Web plugin TitleSnarfer: URL <%s> raised <%s>',
|
self.log.info('Web plugin TitleSnarfer: URL <%s> raised <%s>',
|
||||||
url, str(e))
|
url, str(e))
|
||||||
return
|
return
|
||||||
|
|
||||||
|
encoding = None
|
||||||
|
if 'Content-Type' in fd.headers:
|
||||||
|
mime_params = [p.split('=', 1)
|
||||||
|
for p in fd.headers['Content-Type'].split(';')[1:]]
|
||||||
|
mime_params = {k.strip(): v.strip() for (k, v) in mime_params}
|
||||||
|
if mime_params.get('charset'):
|
||||||
|
encoding = mime_params['charset']
|
||||||
|
|
||||||
|
encoding = encoding or utils.web.getEncoding(text) or 'utf8'
|
||||||
|
|
||||||
try:
|
try:
|
||||||
text = text.decode(utils.web.getEncoding(text) or 'utf8',
|
text = text.decode(encoding, 'replace')
|
||||||
'replace')
|
|
||||||
except UnicodeDecodeError:
|
except UnicodeDecodeError:
|
||||||
if minisix.PY3:
|
if minisix.PY3:
|
||||||
if raiseErrors:
|
if raiseErrors:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user