mirror of
https://github.com/Mikaela/Limnoria.git
synced 2025-02-02 15:44:06 +01:00
Use chardet to guess the character encoding when on python3
This commit is contained in:
parent
bbd9d1636a
commit
4316e5936e
@ -45,6 +45,14 @@ import supybot.world as world
|
|||||||
import supybot.drivers as drivers
|
import supybot.drivers as drivers
|
||||||
import supybot.schedule as schedule
|
import supybot.schedule as schedule
|
||||||
from itertools import imap
|
from itertools import imap
|
||||||
|
try:
|
||||||
|
from chardet.universaldetector import UniversalDetector
|
||||||
|
chardetLoaded = True
|
||||||
|
except:
|
||||||
|
drivers.log.debug('chardet module not available, '
|
||||||
|
'cannot guess character encoding if'
|
||||||
|
'using Python3')
|
||||||
|
chardetLoaded = False
|
||||||
try:
|
try:
|
||||||
import ssl
|
import ssl
|
||||||
SSLError = ssl.SSLError
|
SSLError = ssl.SSLError
|
||||||
@ -184,7 +192,27 @@ class SocketDriver(drivers.IrcDriver, drivers.ServersMixin):
|
|||||||
self.inbuffer = lines.pop()
|
self.inbuffer = lines.pop()
|
||||||
for line in lines:
|
for line in lines:
|
||||||
if sys.version_info[0] >= 3:
|
if sys.version_info[0] >= 3:
|
||||||
line = line.decode(errors='replace')
|
#first, try to decode using utf-8
|
||||||
|
try:
|
||||||
|
line = line.decode(encoding='utf-8', errors='strict')
|
||||||
|
except UnicodeError:
|
||||||
|
# if this fails and chardet is loaded, try to guess the correct encoding
|
||||||
|
if chardetLoaded:
|
||||||
|
u = UniversalDetector()
|
||||||
|
u.feed(line)
|
||||||
|
u.close()
|
||||||
|
if u.result['encoding']:
|
||||||
|
# try to use the guessed encoding
|
||||||
|
try:
|
||||||
|
line = line.decode(u.result['encoding'], errors='strict')
|
||||||
|
# on error, give up and replace the offending characters
|
||||||
|
except UnicodeError:
|
||||||
|
line = line.decode(errors='replace')
|
||||||
|
# if chardet is not loaded, try to decode using utf-8 and replace any
|
||||||
|
# offending characters
|
||||||
|
else:
|
||||||
|
line = line.decode(encoding='utf-8', errors='replace')
|
||||||
|
|
||||||
msg = drivers.parseMsg(line)
|
msg = drivers.parseMsg(line)
|
||||||
if msg is not None:
|
if msg is not None:
|
||||||
self.irc.feedMsg(msg)
|
self.irc.feedMsg(msg)
|
||||||
|
Loading…
Reference in New Issue
Block a user