Added normalizeWhitespace.

This commit is contained in:
Jeremy Fincher 2003-09-01 18:39:27 +00:00
parent ebc00fe4d7
commit 83707f81c1
2 changed files with 12 additions and 1 deletions

View File

@ -44,6 +44,10 @@ import sgmllib
import textwrap
import htmlentitydefs
def normalizeWhitespace(s):
"""Normalizes the whitespace in a string; \s+ becomes one space."""
return ' '.join(s.split())
class HtmlToText(sgmllib.SGMLParser):
"""Taken from some eff-bot code on c.l.p."""
entitydefs = htmlentitydefs.entitydefs
@ -63,7 +67,7 @@ class HtmlToText(sgmllib.SGMLParser):
def getText(self):
text = ''.join(self.data).strip()
return ' '.join(text.split()) # normalize whitespace
return normalizeWhitespace(text)
def htmlToText(s, tagReplace=' '):
"""Turns HTML into text. tagReplace is a string to replace HTML tags with.
@ -302,4 +306,5 @@ def be(i):
else:
return 'are'
# vim:set shiftwidth=4 tabstop=8 expandtab textwidth=78:

View File

@ -157,5 +157,11 @@ class UtilsTest(unittest.TestCase):
self.assertEqual(utils.unCommaThe('foo bar, the'), 'the foo bar')
self.assertEqual(utils.unCommaThe('foo bar, The'), 'The foo bar')
self.assertEqual(utils.unCommaThe('foo bar,the'), 'the foo bar')
def testNormalizeWhitespace(self):
self.assertEqual(utils.normalizeWhitespace('foo bar'), 'foo bar')
self.assertEqual(utils.normalizeWhitespace('foo\nbar'), 'foo bar')
self.assertEqual(utils.normalizeWhitespace('foo\tbar'), 'foo bar')
# vim:set shiftwidth=4 tabstop=8 expandtab textwidth=78: