mirror of
https://github.com/Mikaela/Limnoria.git
synced 2024-11-23 02:49:27 +01:00
Partial fix of encoding handling.
This fixes mostly everything, except a little bit from the test framework. I'm just saving this in case my computer or I is destroyed in an alien invasion, because this commit is worth hours of debugging.
This commit is contained in:
parent
c9b6b56244
commit
a4a595b39a
@ -39,13 +39,15 @@ import sys
|
||||
import copy
|
||||
import time
|
||||
import shlex
|
||||
import codecs
|
||||
import getopt
|
||||
import inspect
|
||||
import operator
|
||||
|
||||
if sys.version_info < (2, 7, 0):
|
||||
# cStringIO is buggy.
|
||||
# See http://paste.progval.net/show/227/
|
||||
if sys.version_info[0] < 3:
|
||||
# cStringIO is buggy with Python 2.6 (
|
||||
# see http://paste.progval.net/show/227/ )
|
||||
# and it does not handle unicode objects in Python 2.x
|
||||
from StringIO import StringIO
|
||||
else:
|
||||
from cStringIO import StringIO
|
||||
@ -290,16 +292,17 @@ class Tokenizer(object):
|
||||
def _handleToken(self, token):
|
||||
if token[0] == token[-1] and token[0] in self.quotes:
|
||||
token = token[1:-1]
|
||||
encoding_prefix = 'string' if sys.version_info[0]<3 else 'unicode'
|
||||
# FIXME: No need to tell you this is a hack.
|
||||
# It has to handle both IRC commands and serialized configuration.
|
||||
try:
|
||||
token = token.decode(encoding_prefix + '_escape')
|
||||
except:
|
||||
if sys.version_info[0] < 3:
|
||||
try:
|
||||
token = token.encode().decode(encoding_prefix + '_escape')
|
||||
token = token.encode('utf8').decode('string_escape')
|
||||
except:
|
||||
pass
|
||||
token = token.decode('string_escape')
|
||||
else:
|
||||
token = codecs.getencoder('utf8')(token)[0]
|
||||
token = codecs.getdecoder('unicode_escape')(token)[0]
|
||||
token = token.encode('iso-8859-1').decode()
|
||||
return token
|
||||
|
||||
def _insideBrackets(self, lexer):
|
||||
|
@ -1,3 +1,4 @@
|
||||
# -*- coding: utf8 -*-
|
||||
###
|
||||
# Copyright (c) 2002-2005, Jeremiah Fincher
|
||||
# All rights reserved.
|
||||
@ -71,6 +72,12 @@ class TokenizerTestCase(SupyTestCase):
|
||||
self.assertEqual(tokenize('foo "bar baz" quux'),
|
||||
['foo', 'bar baz', 'quux'])
|
||||
|
||||
def testUnicode(self):
|
||||
print repr((tokenize(u'好'), ['好']))
|
||||
print repr((tokenize(u'"好"'), ['好']))
|
||||
self.assertEqual(tokenize(u'好'), ['好'])
|
||||
self.assertEqual(tokenize(u'"好"'), ['好'])
|
||||
|
||||
def testNesting(self):
|
||||
self.assertEqual(tokenize('[]'), [[]])
|
||||
self.assertEqual(tokenize('[foo]'), [['foo']])
|
||||
|
Loading…
Reference in New Issue
Block a user