mirror of
https://github.com/Mikaela/Limnoria.git
synced 2024-11-27 13:19:24 +01:00
Use HTMLParser instead of deprecated sgmllib in utils.web.
This commit is contained in:
parent
9ae90c3869
commit
c4dfa55d65
@ -29,13 +29,14 @@
|
||||
###
|
||||
|
||||
import re
|
||||
import sys
|
||||
import socket
|
||||
import urllib
|
||||
import urllib2
|
||||
import httplib
|
||||
import sgmllib
|
||||
import urlparse
|
||||
import htmlentitydefs
|
||||
from HTMLParser import HTMLParser
|
||||
|
||||
sockerrors = (socket.error,)
|
||||
try:
|
||||
@ -150,19 +151,19 @@ def getUrl(url, size=None, headers=None, data=None):
|
||||
def getDomain(url):
|
||||
return urlparse.urlparse(url)[1]
|
||||
|
||||
class HtmlToText(sgmllib.SGMLParser):
|
||||
class HtmlToText(HTMLParser, object):
|
||||
"""Taken from some eff-bot code on c.l.p."""
|
||||
entitydefs = htmlentitydefs.entitydefs.copy()
|
||||
entitydefs['nbsp'] = ' '
|
||||
def __init__(self, tagReplace=' '):
|
||||
self.data = []
|
||||
self.tagReplace = tagReplace
|
||||
sgmllib.SGMLParser.__init__(self)
|
||||
super(HtmlToText, self).__init__()
|
||||
|
||||
def unknown_starttag(self, tag, attr):
|
||||
def handle_starttag(self, tag, attr):
|
||||
self.data.append(self.tagReplace)
|
||||
|
||||
def unknown_endtag(self, tag):
|
||||
def handle_endtag(self, tag):
|
||||
self.data.append(self.tagReplace)
|
||||
|
||||
def handle_data(self, data):
|
||||
@ -175,6 +176,8 @@ class HtmlToText(sgmllib.SGMLParser):
|
||||
def htmlToText(s, tagReplace=' '):
|
||||
"""Turns HTML into text. tagReplace is a string to replace HTML tags with.
|
||||
"""
|
||||
if sys.version_info[0] >= 3 and isinstance(s, bytes):
|
||||
s = s.decode()
|
||||
x = HtmlToText(tagReplace)
|
||||
x.feed(s)
|
||||
return x.getText()
|
||||
|
Loading…
Reference in New Issue
Block a user