mirror of
				https://github.com/Mikaela/Limnoria.git
				synced 2025-10-31 15:47:25 +01:00 
			
		
		
		
	Partial fix of encoding handling.
This fixes mostly everything, except a little bit from the test framework. I'm just saving this in case my computer or I is destroyed in an alien invasion, because this commit is worth hours of debugging.
This commit is contained in:
		
							parent
							
								
									c9b6b56244
								
							
						
					
					
						commit
						a4a595b39a
					
				| @ -39,13 +39,15 @@ import sys | ||||
| import copy | ||||
| import time | ||||
| import shlex | ||||
| import codecs | ||||
| import getopt | ||||
| import inspect | ||||
| import operator | ||||
| 
 | ||||
| if sys.version_info < (2, 7, 0): | ||||
|     # cStringIO is buggy. | ||||
|     # See http://paste.progval.net/show/227/ | ||||
| if sys.version_info[0] < 3: | ||||
|     # cStringIO is buggy with Python 2.6 ( | ||||
|     # see http://paste.progval.net/show/227/ ) | ||||
|     # and it does not handle unicode objects in Python  2.x | ||||
|     from StringIO import StringIO | ||||
| else: | ||||
|     from cStringIO import StringIO | ||||
| @ -290,16 +292,17 @@ class Tokenizer(object): | ||||
|     def _handleToken(self, token): | ||||
|         if token[0] == token[-1] and token[0] in self.quotes: | ||||
|             token = token[1:-1] | ||||
|             encoding_prefix = 'string' if sys.version_info[0]<3 else 'unicode' | ||||
|             # FIXME: No need to tell you this is a hack. | ||||
|             # It has to handle both IRC commands and serialized configuration. | ||||
|             try: | ||||
|                 token = token.decode(encoding_prefix + '_escape') | ||||
|             except: | ||||
|             if sys.version_info[0] < 3: | ||||
|                 try: | ||||
|                     token = token.encode().decode(encoding_prefix + '_escape') | ||||
|                     token = token.encode('utf8').decode('string_escape') | ||||
|                 except: | ||||
|                     pass | ||||
|                     token = token.decode('string_escape') | ||||
|             else: | ||||
|                 token = codecs.getencoder('utf8')(token)[0] | ||||
|                 token = codecs.getdecoder('unicode_escape')(token)[0] | ||||
|                 token = token.encode('iso-8859-1').decode() | ||||
|         return token | ||||
| 
 | ||||
|     def _insideBrackets(self, lexer): | ||||
|  | ||||
| @ -1,3 +1,4 @@ | ||||
| # -*- coding: utf8 -*- | ||||
| ### | ||||
| # Copyright (c) 2002-2005, Jeremiah Fincher | ||||
| # All rights reserved. | ||||
| @ -71,6 +72,12 @@ class TokenizerTestCase(SupyTestCase): | ||||
|         self.assertEqual(tokenize('foo "bar baz" quux'), | ||||
|                          ['foo', 'bar baz', 'quux']) | ||||
| 
 | ||||
|     def testUnicode(self): | ||||
|         print repr((tokenize(u'好'), ['好'])) | ||||
|         print repr((tokenize(u'"好"'), ['好'])) | ||||
|         self.assertEqual(tokenize(u'好'), ['好']) | ||||
|         self.assertEqual(tokenize(u'"好"'), ['好']) | ||||
| 
 | ||||
|     def testNesting(self): | ||||
|         self.assertEqual(tokenize('[]'), [[]]) | ||||
|         self.assertEqual(tokenize('[foo]'), [['foo']]) | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Valentin Lorentz
						Valentin Lorentz