utils.web: Rename get_encoding to getEncoding for consistency.

This commit is contained in:
Valentin Lorentz 2013-07-09 12:05:51 +00:00
parent 820113344c
commit b4402b28ed
2 changed files with 14 additions and 14 deletions

View File

@ -99,7 +99,7 @@ class Web(callbacks.PluginRegexp):
irc.reply(url+" : "+utils.web.strError(e), prefixNick=False)
return
try:
text = text.decode(utils.web.get_encoding(text) or 'utf8',
text = text.decode(utils.web.getEncoding(text) or 'utf8',
'replace')
except:
pass
@ -218,7 +218,7 @@ class Web(callbacks.PluginRegexp):
size = conf.supybot.protocols.http.peekSize()
text = utils.web.getUrl(url, size=size)
try:
text = text.decode(utils.web.get_encoding(text) or 'utf8',
text = text.decode(utils.web.getEncoding(text) or 'utf8',
'replace')
except:
pass

View File

@ -166,6 +166,17 @@ def getUrl(url, size=None, headers=None, data=None):
def getDomain(url):
return urlparse.urlparse(url)[1]
def getEncoding(s):
# TODO: use <meta charset />
try:
import charade.universaldetector
u = charade.universaldetector.UniversalDetector()
u.feed(s)
u.close()
return u.result['encoding']
except:
return None
class HtmlToText(HTMLParser, object):
"""Taken from some eff-bot code on c.l.p."""
entitydefs = htmlentitydefs.entitydefs.copy()
@ -191,21 +202,10 @@ class HtmlToText(HTMLParser, object):
text = ''.join(self.data).strip()
return normalizeWhitespace(text)
def get_encoding(s):
# TODO: use <meta charset />
try:
import charade.universaldetector
u = charade.universaldetector.UniversalDetector()
u.feed(s)
u.close()
return u.result['encoding']
except:
return None
def htmlToText(s, tagReplace=' '):
"""Turns HTML into text. tagReplace is a string to replace HTML tags with.
"""
encoding = get_encoding(s)
encoding = getEncoding(s)
if encoding:
s = s.decode(encoding)
else: