From be6bc1a73402175160a8759ba034c9c7eb5e3539 Mon Sep 17 00:00:00 2001 From: Valentin Lorentz Date: Mon, 10 Aug 2015 18:52:51 +0200 Subject: [PATCH] Remove need for fix_unicode. --- plugins/Aka/test.py | 7 +++-- plugins/Filter/plugin.py | 66 ++++++++++++++++++++------------------- plugins/Filter/test.py | 2 ++ plugins/Utilities/test.py | 6 ++-- plugins/Web/plugin.py | 2 +- setup.py | 2 +- src/minisix.py | 4 +++ src/utils/web.py | 20 +++++++----- 8 files changed, 63 insertions(+), 46 deletions(-) diff --git a/plugins/Aka/test.py b/plugins/Aka/test.py index 6d2a9ea72..056a0ecc3 100644 --- a/plugins/Aka/test.py +++ b/plugins/Aka/test.py @@ -33,6 +33,7 @@ from supybot.test import * import supybot.conf as conf import supybot.plugin as plugin import supybot.registry as registry +from supybot.minisix import u import plugin as Aka @@ -147,8 +148,8 @@ class AkaChannelTestCase(ChannelPluginTestCase): self.assertResponse('egg', 'baz') def testComplicatedNames(self): - self.assertNotError(u'aka add café "echo coffee"') - self.assertResponse(u'café', 'coffee') + self.assertNotError(u('aka add café "echo coffee"')) + self.assertResponse(u('café'), 'coffee') self.assertNotError('aka add "foo bar" "echo spam"') self.assertResponse('foo bar', 'spam') @@ -201,7 +202,7 @@ class AkaTestCase(PluginTestCase): def testAliasImport(self): self.assertNotError('alias add foo "echo bar"') - self.assertNotError(u'alias add baz "echo café"') + self.assertNotError(u('alias add baz "echo café"')) self.assertNotError('aka add qux "echo quux"') self.assertResponse('alias foo', 'bar') self.assertResponse('alias baz', 'café') diff --git a/plugins/Filter/plugin.py b/plugins/Filter/plugin.py index b3c1f621e..60cd73fa3 100644 --- a/plugins/Filter/plugin.py +++ b/plugins/Filter/plugin.py @@ -28,6 +28,8 @@ # POSSIBILITY OF SUCH DAMAGE. ### +from __future__ import unicode_literals + import re import sys import codecs @@ -653,39 +655,39 @@ class Filter(callbacks.Plugin): # XXX suckiest: B,K,P,Q,T # alternatives: 3: U+2107 _uniudMap = { - ' ': u' ', '0': u'0', '@': u'@', - '!': u'\u00a1', '1': u'1', 'A': u'\u2200', - '"': u'\u201e', '2': u'\u2681', 'B': u'q', - '#': u'#', '3': u'\u0190', 'C': u'\u0186', - '$': u'$', '4': u'\u2683', 'D': u'\u15e1', - '%': u'%', '5': u'\u1515', 'E': u'\u018e', - '&': u'\u214b', '6': u'9', 'F': u'\u2132', - "'": u'\u0375', '7': u'L', 'G': u'\u2141', - '(': u')', '8': u'8', 'H': u'H', - ')': u'(', '9': u'6', 'I': u'I', - '*': u'*', ':': u':', 'J': u'\u148b', - '+': u'+', ';': u';', 'K': u'\u029e', - ',': u'\u2018', '<': u'>', 'L': u'\u2142', - '-': u'-', '=': u'=', 'M': u'\u019c', - '.': u'\u02d9', '>': u'<', 'N': u'N', - '/': u'/', '?': u'\u00bf', 'O': u'O', + ' ': ' ', '0': '0', '@': '@', + '!': '\u00a1', '1': '1', 'A': '\u2200', + '"': '\u201e', '2': '\u2681', 'B': 'q', + '#': '#', '3': '\u0190', 'C': '\u0186', + '$': '$', '4': '\u2683', 'D': '\u15e1', + '%': '%', '5': '\u1515', 'E': '\u018e', + '&': '\u214b', '6': '9', 'F': '\u2132', + "'": '\u0375', '7': 'L', 'G': '\u2141', + '(': ')', '8': '8', 'H': 'H', + ')': '(', '9': '6', 'I': 'I', + '*': '*', ':': ':', 'J': '\u148b', + '+': '+', ';': ';', 'K': '\u029e', + ',': '\u2018', '<': '>', 'L': '\u2142', + '-': '-', '=': '=', 'M': '\u019c', + '.': '\u02d9', '>': '<', 'N': 'N', + '/': '/', '?': '\u00bf', 'O': 'O', - 'P': u'd', '`': u'\u02ce', 'p': u'd', - 'Q': u'b', 'a': u'\u0250', 'q': u'b', - 'R': u'\u1d1a', 'b': u'q', 'r': u'\u0279', - 'S': u'S', 'c': u'\u0254', 's': u's', - 'T': u'\u22a5', 'd': u'p', 't': u'\u0287', - 'U': u'\u144e', 'e': u'\u01dd', 'u': u'n', - 'V': u'\u039b', 'f': u'\u214e', 'v': u'\u028c', - 'W': u'M', 'g': u'\u0253', 'w': u'\u028d', - 'X': u'X', 'h': u'\u0265', 'x': u'x', - 'Y': u'\u2144', 'i': u'\u1d09', 'y': u'\u028e', - 'Z': u'Z', 'j': u'\u027f', 'z': u'z', - '[': u']', 'k': u'\u029e', '{': u'}', - '\\': u'\\', 'l': u'\u05df', '|': u'|', - ']': u'[', 'm': u'\u026f', '}': u'{', - '^': u'\u2335', 'n': u'u', '~': u'~', - '_': u'\u203e', 'o': u'o', + 'P': 'd', '`': '\u02ce', 'p': 'd', + 'Q': 'b', 'a': '\u0250', 'q': 'b', + 'R': '\u1d1a', 'b': 'q', 'r': '\u0279', + 'S': 'S', 'c': '\u0254', 's': 's', + 'T': '\u22a5', 'd': 'p', 't': '\u0287', + 'U': '\u144e', 'e': '\u01dd', '': 'n', + 'V': '\u039b', 'f': '\u214e', 'v': '\u028c', + 'W': 'M', 'g': '\u0253', 'w': '\u028d', + 'X': 'X', 'h': '\u0265', 'x': 'x', + 'Y': '\u2144', 'i': '\u1d09', 'y': '\u028e', + 'Z': 'Z', 'j': '\u027f', 'z': 'z', + '[': ']', 'k': '\u029e', '{': '}', + '\\': '\\', 'l': '\u05df', '|': '|', + ']': '[', 'm': '\u026f', '}': '{', + '^': '\u2335', 'n': '', '~': '~', + '_': '\u203e', 'o': 'o', } @internationalizeDocstring diff --git a/plugins/Filter/test.py b/plugins/Filter/test.py index 1d680e214..ab7785920 100644 --- a/plugins/Filter/test.py +++ b/plugins/Filter/test.py @@ -28,6 +28,8 @@ # POSSIBILITY OF SUCH DAMAGE. ### +from __future__ import unicode_literals + from supybot.test import * import re diff --git a/plugins/Utilities/test.py b/plugins/Utilities/test.py index 339a0b57d..928f66d11 100644 --- a/plugins/Utilities/test.py +++ b/plugins/Utilities/test.py @@ -28,6 +28,8 @@ # POSSIBILITY OF SUCH DAMAGE. ### + +from supybot.minisix import u from supybot.test import * class UtilitiesTestCase(PluginTestCase): @@ -46,8 +48,8 @@ class UtilitiesTestCase(PluginTestCase): def testEcho(self): self.assertHelp('echo') self.assertResponse('echo foo', 'foo') - self.assertResponse(u'echo 好', '好') - self.assertResponse(u'echo "好"', '好') + self.assertResponse(u('echo 好'), '好') + self.assertResponse(u('echo "好"'), '好') def testEchoDollarOneRepliesDollarOne(self): self.assertResponse('echo $1', '$1') diff --git a/plugins/Web/plugin.py b/plugins/Web/plugin.py index 942b031c3..3d3c050b8 100644 --- a/plugins/Web/plugin.py +++ b/plugins/Web/plugin.py @@ -78,7 +78,7 @@ class Title(HTMLParser): def handle_charref(self, name): if self.inTitle: - self.title += unichr(int(name)) + self.title += (unichr if minisix.PY2 else chr)(int(name)) class DelayedIrc: def __init__(self, irc): diff --git a/setup.py b/setup.py index b1497588a..fa542b786 100644 --- a/setup.py +++ b/setup.py @@ -160,7 +160,7 @@ try: fixer_names = ['fix_basestring', 'fix_metaclass', 'fix_numliterals', - 'fix_unicode', 'fix_xrange'] + 'fix_xrange'] fixers = list(map(lambda x:'lib2to3.fixes.'+x, fixer_names)) fixers += get_fixers_from_package('2to3') r = DistutilsRefactoringTool(fixers, options=options) diff --git a/src/minisix.py b/src/minisix.py index 7ae7cf223..dd8b3deb0 100644 --- a/src/minisix.py +++ b/src/minisix.py @@ -41,6 +41,8 @@ if sys.version_info[0] >= 3: import io import pickle import queue + + u = lambda x:x else: PY2 = True PY3 = False @@ -59,3 +61,5 @@ else: from cStringIO import StringIO as BytesIO import cPickle as pickle import Queue as queue + + u = lambda x:x.decode('utf8') diff --git a/src/utils/web.py b/src/utils/web.py index 7a6d2cf01..10e6aafa3 100644 --- a/src/utils/web.py +++ b/src/utils/web.py @@ -222,14 +222,20 @@ class HtmlToText(HTMLParser, object): self.data.append(data) def handle_entityref(self, data): - if data in name2codepoint: - self.data.append(unichr(name2codepoint[data])) - elif minisix.PY3 and isinstance(data, bytes): - self.data.append(data.decode()) - elif minisix.PY2 and isinstance(data, str): - self.data.append(data.decode('utf8', errors='replace')) + if minisix.PY3: + if data in name2codepoint: + self.data.append(chr(name2codepoint[data])) + elif isinstance(data, bytes): + self.data.append(data.decode()) + else: + self.data.append(data) else: - self.data.append(data) + if data in name2codepoint: + self.data.append(unichr(name2codepoint[data])) + elif isinstance(data, str): + self.data.append(data.decode('utf8', errors='replace')) + else: + self.data.append(data) def getText(self): text = ''.join(self.data).strip()