mirror of
https://github.com/Mikaela/Limnoria.git
synced 2024-11-27 05:09:23 +01:00
utils.web: Rename get_encoding to getEncoding for consistency.
This commit is contained in:
parent
820113344c
commit
b4402b28ed
@ -99,7 +99,7 @@ class Web(callbacks.PluginRegexp):
|
|||||||
irc.reply(url+" : "+utils.web.strError(e), prefixNick=False)
|
irc.reply(url+" : "+utils.web.strError(e), prefixNick=False)
|
||||||
return
|
return
|
||||||
try:
|
try:
|
||||||
text = text.decode(utils.web.get_encoding(text) or 'utf8',
|
text = text.decode(utils.web.getEncoding(text) or 'utf8',
|
||||||
'replace')
|
'replace')
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
@ -218,7 +218,7 @@ class Web(callbacks.PluginRegexp):
|
|||||||
size = conf.supybot.protocols.http.peekSize()
|
size = conf.supybot.protocols.http.peekSize()
|
||||||
text = utils.web.getUrl(url, size=size)
|
text = utils.web.getUrl(url, size=size)
|
||||||
try:
|
try:
|
||||||
text = text.decode(utils.web.get_encoding(text) or 'utf8',
|
text = text.decode(utils.web.getEncoding(text) or 'utf8',
|
||||||
'replace')
|
'replace')
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
@ -166,6 +166,17 @@ def getUrl(url, size=None, headers=None, data=None):
|
|||||||
def getDomain(url):
|
def getDomain(url):
|
||||||
return urlparse.urlparse(url)[1]
|
return urlparse.urlparse(url)[1]
|
||||||
|
|
||||||
|
def getEncoding(s):
|
||||||
|
# TODO: use <meta charset />
|
||||||
|
try:
|
||||||
|
import charade.universaldetector
|
||||||
|
u = charade.universaldetector.UniversalDetector()
|
||||||
|
u.feed(s)
|
||||||
|
u.close()
|
||||||
|
return u.result['encoding']
|
||||||
|
except:
|
||||||
|
return None
|
||||||
|
|
||||||
class HtmlToText(HTMLParser, object):
|
class HtmlToText(HTMLParser, object):
|
||||||
"""Taken from some eff-bot code on c.l.p."""
|
"""Taken from some eff-bot code on c.l.p."""
|
||||||
entitydefs = htmlentitydefs.entitydefs.copy()
|
entitydefs = htmlentitydefs.entitydefs.copy()
|
||||||
@ -191,21 +202,10 @@ class HtmlToText(HTMLParser, object):
|
|||||||
text = ''.join(self.data).strip()
|
text = ''.join(self.data).strip()
|
||||||
return normalizeWhitespace(text)
|
return normalizeWhitespace(text)
|
||||||
|
|
||||||
def get_encoding(s):
|
|
||||||
# TODO: use <meta charset />
|
|
||||||
try:
|
|
||||||
import charade.universaldetector
|
|
||||||
u = charade.universaldetector.UniversalDetector()
|
|
||||||
u.feed(s)
|
|
||||||
u.close()
|
|
||||||
return u.result['encoding']
|
|
||||||
except:
|
|
||||||
return None
|
|
||||||
|
|
||||||
def htmlToText(s, tagReplace=' '):
|
def htmlToText(s, tagReplace=' '):
|
||||||
"""Turns HTML into text. tagReplace is a string to replace HTML tags with.
|
"""Turns HTML into text. tagReplace is a string to replace HTML tags with.
|
||||||
"""
|
"""
|
||||||
encoding = get_encoding(s)
|
encoding = getEncoding(s)
|
||||||
if encoding:
|
if encoding:
|
||||||
s = s.decode(encoding)
|
s = s.decode(encoding)
|
||||||
else:
|
else:
|
||||||
|
Loading…
Reference in New Issue
Block a user