mirror of
https://github.com/Mikaela/Limnoria.git
synced 2024-11-23 11:09:23 +01:00
Added normalizeWhitespace.
This commit is contained in:
parent
ebc00fe4d7
commit
83707f81c1
@ -44,6 +44,10 @@ import sgmllib
|
||||
import textwrap
|
||||
import htmlentitydefs
|
||||
|
||||
def normalizeWhitespace(s):
|
||||
"""Normalizes the whitespace in a string; \s+ becomes one space."""
|
||||
return ' '.join(s.split())
|
||||
|
||||
class HtmlToText(sgmllib.SGMLParser):
|
||||
"""Taken from some eff-bot code on c.l.p."""
|
||||
entitydefs = htmlentitydefs.entitydefs
|
||||
@ -63,7 +67,7 @@ class HtmlToText(sgmllib.SGMLParser):
|
||||
|
||||
def getText(self):
|
||||
text = ''.join(self.data).strip()
|
||||
return ' '.join(text.split()) # normalize whitespace
|
||||
return normalizeWhitespace(text)
|
||||
|
||||
def htmlToText(s, tagReplace=' '):
|
||||
"""Turns HTML into text. tagReplace is a string to replace HTML tags with.
|
||||
@ -302,4 +306,5 @@ def be(i):
|
||||
else:
|
||||
return 'are'
|
||||
|
||||
|
||||
# vim:set shiftwidth=4 tabstop=8 expandtab textwidth=78:
|
||||
|
@ -157,5 +157,11 @@ class UtilsTest(unittest.TestCase):
|
||||
self.assertEqual(utils.unCommaThe('foo bar, the'), 'the foo bar')
|
||||
self.assertEqual(utils.unCommaThe('foo bar, The'), 'The foo bar')
|
||||
self.assertEqual(utils.unCommaThe('foo bar,the'), 'the foo bar')
|
||||
|
||||
def testNormalizeWhitespace(self):
|
||||
self.assertEqual(utils.normalizeWhitespace('foo bar'), 'foo bar')
|
||||
self.assertEqual(utils.normalizeWhitespace('foo\nbar'), 'foo bar')
|
||||
self.assertEqual(utils.normalizeWhitespace('foo\tbar'), 'foo bar')
|
||||
|
||||
# vim:set shiftwidth=4 tabstop=8 expandtab textwidth=78:
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user