diff --git a/src/utils.py b/src/utils.py index 6535f8166..4f1bd3518 100755 --- a/src/utils.py +++ b/src/utils.py @@ -44,6 +44,10 @@ import sgmllib import textwrap import htmlentitydefs +def normalizeWhitespace(s): + """Normalizes the whitespace in a string; \s+ becomes one space.""" + return ' '.join(s.split()) + class HtmlToText(sgmllib.SGMLParser): """Taken from some eff-bot code on c.l.p.""" entitydefs = htmlentitydefs.entitydefs @@ -63,7 +67,7 @@ class HtmlToText(sgmllib.SGMLParser): def getText(self): text = ''.join(self.data).strip() - return ' '.join(text.split()) # normalize whitespace + return normalizeWhitespace(text) def htmlToText(s, tagReplace=' '): """Turns HTML into text. tagReplace is a string to replace HTML tags with. @@ -302,4 +306,5 @@ def be(i): else: return 'are' + # vim:set shiftwidth=4 tabstop=8 expandtab textwidth=78: diff --git a/test/test_utils.py b/test/test_utils.py index 1e2eef0c4..a2ee147ed 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -157,5 +157,11 @@ class UtilsTest(unittest.TestCase): self.assertEqual(utils.unCommaThe('foo bar, the'), 'the foo bar') self.assertEqual(utils.unCommaThe('foo bar, The'), 'The foo bar') self.assertEqual(utils.unCommaThe('foo bar,the'), 'the foo bar') + + def testNormalizeWhitespace(self): + self.assertEqual(utils.normalizeWhitespace('foo bar'), 'foo bar') + self.assertEqual(utils.normalizeWhitespace('foo\nbar'), 'foo bar') + self.assertEqual(utils.normalizeWhitespace('foo\tbar'), 'foo bar') + # vim:set shiftwidth=4 tabstop=8 expandtab textwidth=78: