Socket: Move line decoding to utils.str.

This commit is contained in:
Valentin Lorentz 2015-05-16 00:30:20 +02:00
parent 39d40ec617
commit eca7a036aa
2 changed files with 40 additions and 34 deletions

View File

@ -43,14 +43,7 @@ import socket
from .. import (conf, drivers, log, schedule, utils, world) from .. import (conf, drivers, log, schedule, utils, world)
from ..utils.iter import imap from ..utils.iter import imap
try: from ..utils.str import decode_raw_line
from charade.universaldetector import UniversalDetector
charadeLoaded = True
except:
drivers.log.debug('charade module not available, '
'cannot guess character encoding if'
'using Python3')
charadeLoaded = False
try: try:
import ssl import ssl
@ -201,32 +194,7 @@ class SocketDriver(drivers.IrcDriver, drivers.ServersMixin):
lines = self.inbuffer.split(b'\n') lines = self.inbuffer.split(b'\n')
self.inbuffer = lines.pop() self.inbuffer = lines.pop()
for line in lines: for line in lines:
if sys.version_info[0] >= 3: line = decode_raw_line(line)
#first, try to decode using utf-8
try:
line = line.decode('utf8', 'strict')
except UnicodeError:
# if this fails and charade is loaded, try to guess the correct encoding
if charadeLoaded:
u = UniversalDetector()
u.feed(line)
u.close()
if u.result['encoding']:
# try to use the guessed encoding
try:
line = line.decode(u.result['encoding'],
'strict')
# on error, give up and replace the offending characters
except UnicodeError:
line = line.decode(errors='replace')
else:
# if no encoding could be guessed, fall back to utf-8 and
# replace offending characters
line = line.decode('utf8', 'replace')
# if charade is not loaded, try to decode using utf-8 and replace any
# offending characters
else:
line = line.decode('utf8', 'replace')
msg = drivers.parseMsg(line) msg = drivers.parseMsg(line)
if msg is not None and self.irc is not None: if msg is not None and self.irc is not None:

View File

@ -46,6 +46,44 @@ from supybot.i18n import PluginInternationalization
_ = PluginInternationalization() _ = PluginInternationalization()
internationalizeFunction = _.internationalizeFunction internationalizeFunction = _.internationalizeFunction
try:
from charade.universaldetector import UniversalDetector
charadeLoaded = True
except ImportError:
charadeLoaded = False
if sys.version_info[0] >= 3:
def decode_raw_line(line):
#first, try to decode using utf-8
try:
line = line.decode('utf8', 'strict')
except UnicodeError:
# if this fails and charade is loaded, try to guess the correct encoding
if charadeLoaded:
u = UniversalDetector()
u.feed(line)
u.close()
if u.result['encoding']:
# try to use the guessed encoding
try:
line = line.decode(u.result['encoding'],
'strict')
# on error, give up and replace the offending characters
except UnicodeError:
line = line.decode(errors='replace')
else:
# if no encoding could be guessed, fall back to utf-8 and
# replace offending characters
line = line.decode('utf8', 'replace')
# if charade is not loaded, try to decode using utf-8 and replace any
# offending characters
else:
line = line.decode('utf8', 'replace')
return line
else:
def decode_raw_line(line):
return line
def rsplit(s, sep=None, maxsplit=-1): def rsplit(s, sep=None, maxsplit=-1):
"""Equivalent to str.split, except splitting from the right.""" """Equivalent to str.split, except splitting from the right."""
return s.rsplit(sep, maxsplit) return s.rsplit(sep, maxsplit)