diff --git a/plugins/Web/plugin.py b/plugins/Web/plugin.py index da33c5dcb..c86b9539b 100644 --- a/plugins/Web/plugin.py +++ b/plugins/Web/plugin.py @@ -99,7 +99,7 @@ class Web(callbacks.PluginRegexp): irc.reply(url+" : "+utils.web.strError(e), prefixNick=False) return try: - text = text.decode(utils.web.get_encoding(text) or 'utf8', + text = text.decode(utils.web.getEncoding(text) or 'utf8', 'replace') except: pass @@ -218,7 +218,7 @@ class Web(callbacks.PluginRegexp): size = conf.supybot.protocols.http.peekSize() text = utils.web.getUrl(url, size=size) try: - text = text.decode(utils.web.get_encoding(text) or 'utf8', + text = text.decode(utils.web.getEncoding(text) or 'utf8', 'replace') except: pass diff --git a/src/utils/web.py b/src/utils/web.py index 557831065..706f92cd7 100644 --- a/src/utils/web.py +++ b/src/utils/web.py @@ -166,6 +166,17 @@ def getUrl(url, size=None, headers=None, data=None): def getDomain(url): return urlparse.urlparse(url)[1] +def getEncoding(s): + # TODO: use + try: + import charade.universaldetector + u = charade.universaldetector.UniversalDetector() + u.feed(s) + u.close() + return u.result['encoding'] + except: + return None + class HtmlToText(HTMLParser, object): """Taken from some eff-bot code on c.l.p.""" entitydefs = htmlentitydefs.entitydefs.copy() @@ -191,21 +202,10 @@ class HtmlToText(HTMLParser, object): text = ''.join(self.data).strip() return normalizeWhitespace(text) -def get_encoding(s): - # TODO: use - try: - import charade.universaldetector - u = charade.universaldetector.UniversalDetector() - u.feed(s) - u.close() - return u.result['encoding'] - except: - return None - def htmlToText(s, tagReplace=' '): """Turns HTML into text. tagReplace is a string to replace HTML tags with. """ - encoding = get_encoding(s) + encoding = getEncoding(s) if encoding: s = s.decode(encoding) else: