mirror of
https://github.com/Mikaela/Limnoria.git
synced 2024-11-23 11:09:23 +01:00
Partial fix of encoding handling.
This fixes mostly everything, except a little bit from the test framework. I'm just saving this in case my computer or I is destroyed in an alien invasion, because this commit is worth hours of debugging.
This commit is contained in:
parent
c9b6b56244
commit
a4a595b39a
@ -39,13 +39,15 @@ import sys
|
|||||||
import copy
|
import copy
|
||||||
import time
|
import time
|
||||||
import shlex
|
import shlex
|
||||||
|
import codecs
|
||||||
import getopt
|
import getopt
|
||||||
import inspect
|
import inspect
|
||||||
import operator
|
import operator
|
||||||
|
|
||||||
if sys.version_info < (2, 7, 0):
|
if sys.version_info[0] < 3:
|
||||||
# cStringIO is buggy.
|
# cStringIO is buggy with Python 2.6 (
|
||||||
# See http://paste.progval.net/show/227/
|
# see http://paste.progval.net/show/227/ )
|
||||||
|
# and it does not handle unicode objects in Python 2.x
|
||||||
from StringIO import StringIO
|
from StringIO import StringIO
|
||||||
else:
|
else:
|
||||||
from cStringIO import StringIO
|
from cStringIO import StringIO
|
||||||
@ -290,16 +292,17 @@ class Tokenizer(object):
|
|||||||
def _handleToken(self, token):
|
def _handleToken(self, token):
|
||||||
if token[0] == token[-1] and token[0] in self.quotes:
|
if token[0] == token[-1] and token[0] in self.quotes:
|
||||||
token = token[1:-1]
|
token = token[1:-1]
|
||||||
encoding_prefix = 'string' if sys.version_info[0]<3 else 'unicode'
|
|
||||||
# FIXME: No need to tell you this is a hack.
|
# FIXME: No need to tell you this is a hack.
|
||||||
# It has to handle both IRC commands and serialized configuration.
|
# It has to handle both IRC commands and serialized configuration.
|
||||||
|
if sys.version_info[0] < 3:
|
||||||
try:
|
try:
|
||||||
token = token.decode(encoding_prefix + '_escape')
|
token = token.encode('utf8').decode('string_escape')
|
||||||
except:
|
except:
|
||||||
try:
|
token = token.decode('string_escape')
|
||||||
token = token.encode().decode(encoding_prefix + '_escape')
|
else:
|
||||||
except:
|
token = codecs.getencoder('utf8')(token)[0]
|
||||||
pass
|
token = codecs.getdecoder('unicode_escape')(token)[0]
|
||||||
|
token = token.encode('iso-8859-1').decode()
|
||||||
return token
|
return token
|
||||||
|
|
||||||
def _insideBrackets(self, lexer):
|
def _insideBrackets(self, lexer):
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
# -*- coding: utf8 -*-
|
||||||
###
|
###
|
||||||
# Copyright (c) 2002-2005, Jeremiah Fincher
|
# Copyright (c) 2002-2005, Jeremiah Fincher
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
@ -71,6 +72,12 @@ class TokenizerTestCase(SupyTestCase):
|
|||||||
self.assertEqual(tokenize('foo "bar baz" quux'),
|
self.assertEqual(tokenize('foo "bar baz" quux'),
|
||||||
['foo', 'bar baz', 'quux'])
|
['foo', 'bar baz', 'quux'])
|
||||||
|
|
||||||
|
def testUnicode(self):
|
||||||
|
print repr((tokenize(u'好'), ['好']))
|
||||||
|
print repr((tokenize(u'"好"'), ['好']))
|
||||||
|
self.assertEqual(tokenize(u'好'), ['好'])
|
||||||
|
self.assertEqual(tokenize(u'"好"'), ['好'])
|
||||||
|
|
||||||
def testNesting(self):
|
def testNesting(self):
|
||||||
self.assertEqual(tokenize('[]'), [[]])
|
self.assertEqual(tokenize('[]'), [[]])
|
||||||
self.assertEqual(tokenize('[foo]'), [['foo']])
|
self.assertEqual(tokenize('[foo]'), [['foo']])
|
||||||
|
Loading…
Reference in New Issue
Block a user