mirror of
https://github.com/Mikaela/Limnoria.git
synced 2025-02-17 06:00:42 +01:00
Added keyword arg to htmlToText to determine how to replace HTML tags.
This commit is contained in:
parent
dcf7fa5924
commit
5ef1d3eaaa
11
src/utils.py
11
src/utils.py
@ -43,15 +43,16 @@ import htmlentitydefs
|
|||||||
class HtmlToText(sgmllib.SGMLParser):
|
class HtmlToText(sgmllib.SGMLParser):
|
||||||
"""Taken from some eff-bot code on c.l.p."""
|
"""Taken from some eff-bot code on c.l.p."""
|
||||||
entitydefs = htmlentitydefs.entitydefs
|
entitydefs = htmlentitydefs.entitydefs
|
||||||
def __init__(self):
|
def __init__(self, tagReplace=' '):
|
||||||
self.data = []
|
self.data = []
|
||||||
|
self.tagReplace = tagReplace
|
||||||
sgmllib.SGMLParser.__init__(self)
|
sgmllib.SGMLParser.__init__(self)
|
||||||
|
|
||||||
def unknown_starttag(self, tag, attrib):
|
def unknown_starttag(self, tag, attrib):
|
||||||
self.data.append(" ")
|
self.data.append(self.tagReplace)
|
||||||
|
|
||||||
def unknown_endtag(self, tag):
|
def unknown_endtag(self, tag):
|
||||||
self.data.append(" ")
|
self.data.append(self.tagReplace)
|
||||||
|
|
||||||
def handle_data(self, data):
|
def handle_data(self, data):
|
||||||
self.data.append(data)
|
self.data.append(data)
|
||||||
@ -60,8 +61,8 @@ class HtmlToText(sgmllib.SGMLParser):
|
|||||||
text = ''.join(self.data).strip()
|
text = ''.join(self.data).strip()
|
||||||
return ' '.join(text.split()) # normalize whitespace
|
return ' '.join(text.split()) # normalize whitespace
|
||||||
|
|
||||||
def htmlToText(s):
|
def htmlToText(s, tagReplace=' '):
|
||||||
x = HtmlToText()
|
x = HtmlToText(tagReplace)
|
||||||
x.feed(s)
|
x.feed(s)
|
||||||
return x.getText()
|
return x.getText()
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user