From 7e002ed2a23623589c5c600c7871f33acaafa983 Mon Sep 17 00:00:00 2001 From: Guido Tabbernuk Date: Mon, 23 Jul 2012 18:06:24 +0300 Subject: [PATCH] Fix normalizeWhitespace to support Unicode. --- src/utils/str.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/utils/str.py b/src/utils/str.py index 6e428a84d..c5ce82a3e 100644 --- a/src/utils/str.py +++ b/src/utils/str.py @@ -61,12 +61,16 @@ def rsplit(s, sep=None, maxsplit=-1): def normalizeWhitespace(s, removeNewline=True): """Normalizes the whitespace in a string; \s+ becomes one space.""" - s = str(s) + replace_fn = lambda x, y, z: str.replace(x, y, z) + if isinstance(s, unicode): + replace_fn = lambda x, y, z: unicode.replace(x, y, z) + else: + s = str(s) if removeNewline: - s = str.replace(s, '\n', '') - s = str.replace(s, '\t', ' ') + s = replace_fn(s, '\n', '') + s = replace_fn(s, '\t', ' ') while ' ' in s: - s = str.replace(s, ' ', ' ') + s = replace_fn(s, ' ', ' ') return s def distance(s, t):