mirror of
https://github.com/Mikaela/Limnoria.git
synced 2025-01-11 04:32:36 +01:00
utils.web: Rename get_encoding to getEncoding for consistency.
This commit is contained in:
parent
820113344c
commit
b4402b28ed
@ -99,7 +99,7 @@ class Web(callbacks.PluginRegexp):
|
||||
irc.reply(url+" : "+utils.web.strError(e), prefixNick=False)
|
||||
return
|
||||
try:
|
||||
text = text.decode(utils.web.get_encoding(text) or 'utf8',
|
||||
text = text.decode(utils.web.getEncoding(text) or 'utf8',
|
||||
'replace')
|
||||
except:
|
||||
pass
|
||||
@ -218,7 +218,7 @@ class Web(callbacks.PluginRegexp):
|
||||
size = conf.supybot.protocols.http.peekSize()
|
||||
text = utils.web.getUrl(url, size=size)
|
||||
try:
|
||||
text = text.decode(utils.web.get_encoding(text) or 'utf8',
|
||||
text = text.decode(utils.web.getEncoding(text) or 'utf8',
|
||||
'replace')
|
||||
except:
|
||||
pass
|
||||
|
@ -166,6 +166,17 @@ def getUrl(url, size=None, headers=None, data=None):
|
||||
def getDomain(url):
|
||||
return urlparse.urlparse(url)[1]
|
||||
|
||||
def getEncoding(s):
|
||||
# TODO: use <meta charset />
|
||||
try:
|
||||
import charade.universaldetector
|
||||
u = charade.universaldetector.UniversalDetector()
|
||||
u.feed(s)
|
||||
u.close()
|
||||
return u.result['encoding']
|
||||
except:
|
||||
return None
|
||||
|
||||
class HtmlToText(HTMLParser, object):
|
||||
"""Taken from some eff-bot code on c.l.p."""
|
||||
entitydefs = htmlentitydefs.entitydefs.copy()
|
||||
@ -191,21 +202,10 @@ class HtmlToText(HTMLParser, object):
|
||||
text = ''.join(self.data).strip()
|
||||
return normalizeWhitespace(text)
|
||||
|
||||
def get_encoding(s):
|
||||
# TODO: use <meta charset />
|
||||
try:
|
||||
import charade.universaldetector
|
||||
u = charade.universaldetector.UniversalDetector()
|
||||
u.feed(s)
|
||||
u.close()
|
||||
return u.result['encoding']
|
||||
except:
|
||||
return None
|
||||
|
||||
def htmlToText(s, tagReplace=' '):
|
||||
"""Turns HTML into text. tagReplace is a string to replace HTML tags with.
|
||||
"""
|
||||
encoding = get_encoding(s)
|
||||
encoding = getEncoding(s)
|
||||
if encoding:
|
||||
s = s.decode(encoding)
|
||||
else:
|
||||
|
Loading…
Reference in New Issue
Block a user