mirror of
https://github.com/Mikaela/Limnoria.git
synced 2024-11-27 05:09:23 +01:00
Added normalizeWhitespace.
This commit is contained in:
parent
ebc00fe4d7
commit
83707f81c1
@ -44,6 +44,10 @@ import sgmllib
|
|||||||
import textwrap
|
import textwrap
|
||||||
import htmlentitydefs
|
import htmlentitydefs
|
||||||
|
|
||||||
|
def normalizeWhitespace(s):
|
||||||
|
"""Normalizes the whitespace in a string; \s+ becomes one space."""
|
||||||
|
return ' '.join(s.split())
|
||||||
|
|
||||||
class HtmlToText(sgmllib.SGMLParser):
|
class HtmlToText(sgmllib.SGMLParser):
|
||||||
"""Taken from some eff-bot code on c.l.p."""
|
"""Taken from some eff-bot code on c.l.p."""
|
||||||
entitydefs = htmlentitydefs.entitydefs
|
entitydefs = htmlentitydefs.entitydefs
|
||||||
@ -63,7 +67,7 @@ class HtmlToText(sgmllib.SGMLParser):
|
|||||||
|
|
||||||
def getText(self):
|
def getText(self):
|
||||||
text = ''.join(self.data).strip()
|
text = ''.join(self.data).strip()
|
||||||
return ' '.join(text.split()) # normalize whitespace
|
return normalizeWhitespace(text)
|
||||||
|
|
||||||
def htmlToText(s, tagReplace=' '):
|
def htmlToText(s, tagReplace=' '):
|
||||||
"""Turns HTML into text. tagReplace is a string to replace HTML tags with.
|
"""Turns HTML into text. tagReplace is a string to replace HTML tags with.
|
||||||
@ -302,4 +306,5 @@ def be(i):
|
|||||||
else:
|
else:
|
||||||
return 'are'
|
return 'are'
|
||||||
|
|
||||||
|
|
||||||
# vim:set shiftwidth=4 tabstop=8 expandtab textwidth=78:
|
# vim:set shiftwidth=4 tabstop=8 expandtab textwidth=78:
|
||||||
|
@ -157,5 +157,11 @@ class UtilsTest(unittest.TestCase):
|
|||||||
self.assertEqual(utils.unCommaThe('foo bar, the'), 'the foo bar')
|
self.assertEqual(utils.unCommaThe('foo bar, the'), 'the foo bar')
|
||||||
self.assertEqual(utils.unCommaThe('foo bar, The'), 'The foo bar')
|
self.assertEqual(utils.unCommaThe('foo bar, The'), 'The foo bar')
|
||||||
self.assertEqual(utils.unCommaThe('foo bar,the'), 'the foo bar')
|
self.assertEqual(utils.unCommaThe('foo bar,the'), 'the foo bar')
|
||||||
|
|
||||||
|
def testNormalizeWhitespace(self):
|
||||||
|
self.assertEqual(utils.normalizeWhitespace('foo bar'), 'foo bar')
|
||||||
|
self.assertEqual(utils.normalizeWhitespace('foo\nbar'), 'foo bar')
|
||||||
|
self.assertEqual(utils.normalizeWhitespace('foo\tbar'), 'foo bar')
|
||||||
|
|
||||||
# vim:set shiftwidth=4 tabstop=8 expandtab textwidth=78:
|
# vim:set shiftwidth=4 tabstop=8 expandtab textwidth=78:
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user