Remove need for fix_import, fix_types, and fix_urllib.

This commit is contained in:
Valentin Lorentz 2015-08-10 17:55:25 +02:00
parent 3991faf7ee
commit c0ac84bb53
24 changed files with 140 additions and 119 deletions

View File

@ -31,7 +31,6 @@
import os
import sys
import time
from cStringIO import StringIO
import supybot.conf as conf
import supybot.world as world

View File

@ -32,7 +32,6 @@ import os
import sys
import time
import string
import urllib
import supybot.conf as conf
import supybot.ircdb as ircdb
@ -128,7 +127,7 @@ class FactoidsCallback(httpserver.SupyHTTPServerCallback):
self.end_headers()
self.write(httpserver.get_template('factoids/index.html'))
elif len(parts) == 2:
channel = urllib.unquote(parts[0])
channel = utils.web.unquote(parts[0])
if not ircutils.isChannel(channel):
self.send_response(404)
self.send_header('Content-type', 'text/html; charset=utf-8')
@ -180,7 +179,7 @@ class FactoidsCallback(httpserver.SupyHTTPServerCallback):
if 'chan' in form:
self.send_response(303)
self.send_header('Location',
'./%s/' % urllib.quote(form['chan'].value))
'./%s/' % utils.web.quote(form['chan'].value))
self.end_headers()
else:
self.send_response(400)

View File

@ -33,7 +33,6 @@ import sys
import codecs
import string
import random
from cStringIO import StringIO
import supybot.conf as conf
import supybot.utils as utils
@ -610,7 +609,7 @@ class Filter(callbacks.Plugin):
## for (c, v) in d.items():
## dd[ord(c)] = unicode(v + ' ')
## irc.reply(unicode(text).translate(dd))
out = StringIO()
out = minisix.io.StringIO()
write = out.write
for c in text:
try:

View File

@ -28,9 +28,8 @@
###
from cStringIO import StringIO
from supybot.test import *
import supybot.minisix as minisix
import supybot.gpg as gpg
@ -111,25 +110,25 @@ class GPGTestCase(PluginTestCase):
return fd
(utils.web.getUrlFd, realGetUrlFd) = (fakeGetUrlFd, utils.web.getUrlFd)
fd = StringIO()
fd = minisix.io.StringIO()
fd.write('foo')
fd.seek(0)
self.assertResponse('gpg signing auth http://foo.bar/baz.gpg',
'Error: Signature or token not found.')
fd = StringIO()
fd = minisix.io.StringIO()
fd.write(token)
fd.seek(0)
self.assertResponse('gpg signing auth http://foo.bar/baz.gpg',
'Error: Signature or token not found.')
fd = StringIO()
fd = minisix.io.StringIO()
fd.write(WRONG_TOKEN_SIGNATURE)
fd.seek(0)
self.assertRegexp('gpg signing auth http://foo.bar/baz.gpg',
'Error: Unknown token.*')
fd = StringIO()
fd = minisix.io.StringIO()
fd.write(str(gpg.keyring.sign(token)))
fd.seek(0)
self.assertResponse('gpg signing auth http://foo.bar/baz.gpg',

View File

@ -34,7 +34,6 @@ import cgi
import json
import time
import socket
import urllib
import supybot.conf as conf
import supybot.utils as utils
@ -119,7 +118,7 @@ class Google(callbacks.PluginRegexp):
opts['rsz'] = 'large'
text = utils.web.getUrl('%s?%s' % (self._gsearchUrl,
urllib.urlencode(opts)),
utils.web.urlencode(opts)),
headers=headers).decode('utf8')
data = json.loads(text)
if data['responseStatus'] != 200:
@ -255,10 +254,10 @@ class Google(callbacks.PluginRegexp):
headers['User-Agent'] = ('Mozilla/5.0 (X11; U; Linux i686) '
'Gecko/20071127 Firefox/2.0.0.11')
sourceLang = urllib.quote(sourceLang)
targetLang = urllib.quote(targetLang)
sourceLang = utils.web.urlquote(sourceLang)
targetLang = utils.web.urlquote(targetLang)
text = urllib.quote(text)
text = utils.web.urlquote(text)
result = utils.web.getUrlFd('http://translate.googleapis.com/translate_a/single'
'?client=gtx&dt=t&sl=%s&tl=%s&q='
@ -291,7 +290,7 @@ class Google(callbacks.PluginRegexp):
googleSnarfer = urlSnarfer(googleSnarfer)
def _googleUrl(self, s, channel):
s = urllib.quote_plus(s)
s = utils.web.urlquote_plus(s)
url = r'http://%s/search?q=%s' % \
(self.registryValue('baseUrl', channel), s)
return url

View File

@ -10,9 +10,10 @@
#
#****************************************************************************
import re, copy, sys, os.path, StringIO
import re, copy, sys, os.path
import supybot.conf as conf
import supybot.minisix as minisix
import supybot.registry as registry
unitData = \
@ -1059,7 +1060,7 @@ class UnitData(dict):
types = []
typeUnits = {}
try:
f = StringIO.StringIO(unitData)
f = minisix.io.StringIO(unitData)
lines = f.readlines()
f.close()
except IOError:

View File

@ -32,8 +32,6 @@ import sys
import time
import string
from cStringIO import StringIO
import supybot.conf as conf
import supybot.ircdb as ircdb
import supybot.utils as utils
@ -70,7 +68,7 @@ class OptionList(object):
ret.append(token)
def tokenize(self, s):
lexer = shlex.shlex(StringIO(s))
lexer = shlex.shlex(minisix.io.StringIO(s))
lexer.commenters = ''
lexer.quotes = ''
lexer.whitespace = ''

View File

@ -33,9 +33,7 @@ import io
import sys
import json
import shutil
import urllib
import tarfile
from cStringIO import StringIO
import supybot.log as log
import supybot.conf as conf
@ -47,8 +45,6 @@ import supybot.ircutils as ircutils
import supybot.callbacks as callbacks
from supybot.i18n import PluginInternationalization, internationalizeDocstring
BytesIO = StringIO if minisix.PY2 else io.BytesIO
_ = PluginInternationalization('PluginDownloader')
class Repository:
@ -81,7 +77,7 @@ class GithubRepository(GitRepository):
def _query(self, type_, uri_end, args={}):
args = dict([(x,y) for x,y in args.items() if y is not None])
url = '%s/%s/%s?%s' % (self._apiUrl, type_, uri_end,
urllib.urlencode(args))
utils.web.urlencode(args))
return json.loads(utils.web.getUrl(url).decode('utf8'))
def getPluginList(self):
@ -109,7 +105,7 @@ class GithubRepository(GitRepository):
assert response.getcode() == 200, response.getcode()
else:
assert response.status == 200, response.status
fileObject = BytesIO()
fileObject = minisix.io.BytesIO()
fileObject.write(response.read())
finally: # urllib does not handle 'with' statements :(
response.close()

View File

@ -33,10 +33,6 @@ import feedparser
from supybot.test import *
import supybot.conf as conf
import supybot.minisix as minisix
if minisix.PY3:
from io import BytesIO
else:
from cStringIO import StringIO as BytesIO
xkcd_old = """<?xml version="1.0" encoding="utf-8"?>
<rss version="2.0"><channel><title>xkcd.com</title><link>http://xkcd.com/</link><description>xkcd.com: A webcomic of romance and math humor.</description><language>en</language><item><title>Snake Facts</title><link>http://xkcd.com/1398/</link><description>&lt;img src="http://imgs.xkcd.com/comics/snake_facts.png" title="Biologically speaking, what we call a 'snake' is actually a human digestive tract which has escaped from its host." alt="Biologically speaking, what we call a 'snake' is actually a human digestive tract which has escaped from its host." /&gt;</description><pubDate>Wed, 23 Jul 2014 04:00:00 -0000</pubDate><guid>http://xkcd.com/1398/</guid></item></channel></rss>
@ -51,7 +47,7 @@ def constant(content):
if minisix.PY3:
content = content.encode()
def f(*args, **kwargs):
return BytesIO(content)
return minisix.io.BytesIO(content)
return f
url = 'http://www.advogato.org/rss/articles.xml'

View File

@ -31,7 +31,6 @@ import time
import os
import shutil
import tempfile
import cPickle as pickle
import supybot.conf as conf
import supybot.utils as utils
@ -42,6 +41,9 @@ from supybot.i18n import PluginInternationalization, internationalizeDocstring
_ = PluginInternationalization('Scheduler')
import supybot.world as world
import supybot.minisix as minisix
pickle = minisix.pickle
datadir = conf.supybot.directories.data()
filename = conf.supybot.directories.data.dirize('Scheduler.pickle')

View File

@ -32,7 +32,6 @@ import re
import sys
import time
import json
import urllib
import supybot.log as log
import supybot.conf as conf

View File

@ -32,7 +32,6 @@ import re
import random
import shutil
import tempfile
import cPickle as pickle
import supybot.conf as conf
import supybot.ircdb as ircdb
@ -48,6 +47,8 @@ _ = PluginInternationalization('Topic')
import supybot.ircdb as ircdb
import supybot.minisix as minisix
pickle = minisix.pickle
def canChangeTopic(irc, msg, args, state):
assert not state.channel

View File

@ -31,8 +31,6 @@
import re
import sys
import socket
import HTMLParser
import htmlentitydefs
import supybot.conf as conf
import supybot.utils as utils
@ -45,14 +43,21 @@ import supybot.callbacks as callbacks
from supybot.i18n import PluginInternationalization, internationalizeDocstring
_ = PluginInternationalization('Web')
class Title(HTMLParser.HTMLParser):
entitydefs = htmlentitydefs.entitydefs.copy()
if minisix.PY3:
from html.parser import HTMLParser, HTMLParseError
from html.entities import entitydefs
else:
from HTMLParser import HTMLParser, HTMLParseError
from htmlentitydefs import entitydefs
class Title(HTMLParser):
entitydefs = entitydefs.copy()
entitydefs['nbsp'] = ' '
entitydefs['apos'] = '\''
def __init__(self):
self.inTitle = False
self.title = ''
HTMLParser.HTMLParser.__init__(self)
HTMLParser.__init__(self)
def handle_starttag(self, tag, attrs):
if tag == 'title':
@ -156,7 +161,7 @@ class Web(callbacks.PluginRegexp):
parser = Title()
try:
parser.feed(text)
except HTMLParser.HTMLParseError:
except HTMLParseError:
self.log.debug('Encountered a problem parsing %u. Title may '
'already be set, though', url)
if parser.title:
@ -286,7 +291,7 @@ class Web(callbacks.PluginRegexp):
'installing python-charade.)'), Raise=True)
try:
parser.feed(text)
except HTMLParser.HTMLParseError:
except HTMLParseError:
self.log.debug('Encountered a problem parsing %u. Title may '
'already be set, though', url)
if parser.title:

View File

@ -158,11 +158,9 @@ try:
log.debug(msg, *args)
fixer_names = ['fix_basestring',
'fix_imports',
'fix_metaclass', 'fix_methodattrs',
'fix_numliterals',
'fix_types',
'fix_unicode', 'fix_urllib', 'fix_xrange']
'fix_unicode', 'fix_xrange']
fixers = list(map(lambda x:'lib2to3.fixes.'+x, fixer_names))
fixers += get_fixers_from_package('2to3')
r = DistutilsRefactoringTool(fixers, options=options)

View File

@ -48,14 +48,6 @@ from .utils.iter import any, all
from .i18n import PluginInternationalization, internationalizeDocstring
_ = PluginInternationalization()
if minisix.PY2:
# cStringIO is buggy with Python 2.6 (
# see http://paste.progval.net/show/227/ )
# and it does not handle unicode objects in Python 2.x
from StringIO import StringIO
else:
from cStringIO import StringIO
def _addressed(nick, msg, prefixChars=None, nicks=None,
prefixStrings=None, whenAddressedByNick=None,
whenAddressedByNickAtEnd=None):
@ -329,7 +321,7 @@ class Tokenizer(object):
return ret
def tokenize(self, s):
lexer = shlex.shlex(StringIO(s))
lexer = shlex.shlex(minisix.io.StringIO(s))
lexer.commenters = ''
lexer.quotes = self.quotes
lexer.separators = self.separators

View File

@ -38,9 +38,8 @@ import os
import sys
import struct
import os.path
import cPickle as pickle
from . import utils
from . import utils, minisix
def hash(s):
"""DJB's hash function for CDB."""
@ -447,14 +446,14 @@ class ReaderWriter(utils.IterableMap):
class Shelf(ReaderWriter):
"""Uses pickle to mimic the shelf module."""
def __getitem__(self, key):
return pickle.loads(ReaderWriter.__getitem__(self, key))
return minisix.pickle.loads(ReaderWriter.__getitem__(self, key))
def __setitem__(self, key, value):
ReaderWriter.__setitem__(self, key, pickle.dumps(value, True))
ReaderWriter.__setitem__(self, key, minisix.pickle.dumps(value, True))
def items(self):
for (key, value) in ReaderWriter.items(self):
yield (key, pickle.loads(value))
yield (key, minisix.pickle.loads(value))
if __name__ == '__main__':

View File

@ -33,7 +33,6 @@ Includes wrappers for commands.
"""
import time
import Queue
import getopt
import inspect
import threading
@ -124,7 +123,7 @@ def process(f, *args, **kwargs):
raise ProcessTimeoutError("%s aborted due to timeout." % (p.name,))
try:
v = q.get(block=False)
except Queue.Empty:
except minisix.queue.Empty:
return None
finally:
q.close()

View File

@ -36,7 +36,6 @@ import sys
import cgi
import socket
from threading import Thread
from BaseHTTPServer import HTTPServer, BaseHTTPRequestHandler
import supybot.log as log
import supybot.conf as conf
@ -45,6 +44,11 @@ import supybot.minisix as minisix
from supybot.i18n import PluginInternationalization
_ = PluginInternationalization()
if minisix.PY2:
from BaseHTTPServer import HTTPServer, BaseHTTPRequestHandler
else:
from http.server import HTTPServer, BaseHTTPRequestHandler
configGroup = conf.supybot.servers.http
class RequestNotHandled(Exception):

View File

@ -45,7 +45,6 @@ import random
import string
import textwrap
import functools
from cStringIO import StringIO as sio
from . import utils
from . import minisix
@ -174,7 +173,7 @@ def _hostmaskPatternEqual(pattern, hostmask):
except KeyError:
# We make our own regexps, rather than use fnmatch, because fnmatch's
# case-insensitivity is not IRC's case-insensitity.
fd = sio()
fd = minisix.io.StringIO()
for c in pattern:
if c == '*':
fd.write('.*')
@ -528,7 +527,7 @@ class FormatContext(object):
class FormatParser(object):
def __init__(self, s):
self.fd = sio(s)
self.fd = minisix.io.StringIO(s)
self.last = None
def getChar(self):

View File

@ -37,6 +37,10 @@ if sys.version_info[0] >= 3:
intern = sys.intern
integer_types = (int,)
long = int
import io
import pickle
import queue
else:
PY2 = True
PY3 = False
@ -46,3 +50,12 @@ else:
intern = __builtins__.intern
integer_types = (int, long)
long = long
class io:
# cStringIO is buggy with Python 2.6 (
# see http://paste.progval.net/show/227/ )
# and it does not handle unicode objects in Python 2.x
from StringIO import StringIO
from cStringIO import StringIO as BytesIO
import cPickle as pickle
import Queue as queue

View File

@ -35,13 +35,21 @@ import sys
import time
import shutil
import urllib
import httplib
import unittest
import threading
from . import (callbacks, conf, drivers, httpserver, i18n, ircdb, irclib,
ircmsgs, ircutils, log, minisix, plugin, registry, utils, world)
if minisix.PY2:
from httplib import HTTPConnection
from urllib import splithost, splituser
from urllib import URLopener
else:
from http.client import HTTPConnection
from urllib.parse import splithost, splituser
from urllib.request import URLopener
i18n.import_conf()
network = True
@ -511,15 +519,15 @@ def open_http(url, data=None):
user_passwd = None
proxy_passwd= None
if isinstance(url, str):
host, selector = urllib.splithost(url)
host, selector = splithost(url)
if host:
user_passwd, host = urllib.splituser(host)
user_passwd, host = splituser(host)
host = urllib.unquote(host)
realhost = host
else:
host, selector = url
# check whether the proxy contains authorization information
proxy_passwd, host = urllib.splituser(host)
proxy_passwd, host = splituser(host)
# now we proceed with the url we want to obtain
urltype, rest = urllib.splittype(selector)
url = rest
@ -527,9 +535,9 @@ def open_http(url, data=None):
if urltype.lower() != 'http':
realhost = None
else:
realhost, rest = urllib.splithost(rest)
realhost, rest = splithost(rest)
if realhost:
user_passwd, realhost = urllib.splituser(realhost)
user_passwd, realhost = splituser(realhost)
if user_passwd:
selector = "%s://%s%s" % (urltype, realhost, rest)
if urllib.proxy_bypass(realhost):
@ -559,15 +567,15 @@ def open_http(url, data=None):
if proxy_auth: c.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
if auth: c.putheader('Authorization', 'Basic %s' % auth)
if realhost: c.putheader('Host', realhost)
for args in urllib.URLopener().addheaders: c.putheader(*args)
for args in URLopener().addheaders: c.putheader(*args)
c.endheaders()
return c
class FakeHTTPConnection(httplib.HTTPConnection):
class FakeHTTPConnection(HTTPConnection):
_data = ''
_headers = {}
def __init__(self, rfile, wfile):
httplib.HTTPConnection.__init__(self, 'localhost')
HTTPConnection.__init__(self, 'localhost')
self.rfile = rfile
self.wfile = wfile
def send(self, data):
@ -585,12 +593,8 @@ class HTTPPluginTestCase(PluginTestCase):
def request(self, url, method='GET', read=True, data={}):
assert url.startswith('/')
try:
from io import BytesIO as StringIO
except ImportError:
from StringIO import StringIO
wfile = StringIO()
rfile = StringIO()
wfile = minisix.io.StringIO()
rfile = minisix.io.StringIO()
connection = FakeHTTPConnection(wfile, rfile)
connection.putrequest(method, url)
connection.endheaders()

View File

@ -29,20 +29,20 @@
###
import sys
from .. import minisix
###
# csv.{join,split} -- useful functions that should exist.
###
import csv
import cStringIO as StringIO
def join(L):
fd = StringIO.StringIO()
fd = minisix.io.StringIO()
writer = csv.writer(fd)
writer.writerow(L)
return fd.getvalue().rstrip('\r\n')
def split(s):
fd = StringIO.StringIO(s)
fd = minisix.io.StringIO(s)
reader = csv.reader(fd)
return next(reader)
csv.join = join

View File

@ -101,7 +101,7 @@ class RingBuffer(object):
def __getitem__(self, idx):
if self.full:
oidx = idx
if isinstance(oidx, types.SliceType):
if isinstance(oidx, slice):
L = []
for i in xrange(*slice.indices(oidx, len(self))):
L.append(self[i])
@ -113,7 +113,7 @@ class RingBuffer(object):
idx = (idx + self.i) % len(self.L)
return self.L[idx]
else:
if isinstance(idx, types.SliceType):
if isinstance(idx, slice):
L = []
for i in xrange(*slice.indices(idx, len(self))):
L.append(self[i])
@ -124,7 +124,7 @@ class RingBuffer(object):
def __setitem__(self, idx, elt):
if self.full:
oidx = idx
if isinstance(oidx, types.SliceType):
if isinstance(oidx, slice):
range_ = xrange(*slice.indices(oidx, len(self)))
if len(range_) != len(elt):
raise ValueError('seq must be the same length as slice.')
@ -138,7 +138,7 @@ class RingBuffer(object):
idx = (idx + self.i) % len(self.L)
self.L[idx] = elt
else:
if isinstance(idx, types.SliceType):
if isinstance(idx, slice):
range_ = xrange(*slice.indices(idx, len(self)))
if len(range_) != len(elt):
raise ValueError('seq must be the same length as slice.')
@ -228,7 +228,7 @@ class queue(object):
def __getitem__(self, oidx):
if len(self) == 0:
raise IndexError('queue index out of range')
if isinstance(oidx, types.SliceType):
if isinstance(oidx, slice):
L = []
for i in xrange(*slice.indices(oidx, len(self))):
L.append(self[i])
@ -245,7 +245,7 @@ class queue(object):
def __setitem__(self, oidx, value):
if len(self) == 0:
raise IndexError('queue index out of range')
if isinstance(oidx, types.SliceType):
if isinstance(oidx, slice):
range_ = xrange(*slice.indices(oidx, len(self)))
if len(range_) != len(value):
raise ValueError('seq must be the same length as slice.')
@ -266,7 +266,7 @@ class queue(object):
self.back[idx-len(self.front)] = value
def __delitem__(self, oidx):
if isinstance(oidx, types.SliceType):
if isinstance(oidx, slice):
range_ = xrange(*slice.indices(oidx, len(self)))
for i in range_:
del self[i]

View File

@ -32,12 +32,6 @@ import re
import sys
import base64
import socket
import urllib
import urllib2
import httplib
import urlparse
import htmlentitydefs
from HTMLParser import HTMLParser
sockerrors = (socket.error,)
try:
@ -48,12 +42,37 @@ except AttributeError:
from .str import normalizeWhitespace
from .. import minisix
Request = urllib2.Request
urlquote = urllib.quote
urlunquote = urllib.unquote
def urlencode(*args, **kwargs):
return urllib.urlencode(*args, **kwargs).encode()
if minisix.PY2:
import urllib
import urllib2
from httplib import InvalidURL
from urlparse import urlsplit, urlunsplit, urlparse
from htmlentitydefs import entitydefs, name2codepoint
from HTMLParser import HTMLParser
Request = urllib2.Request
urlquote = urllib.quote
urlquote_plus = urllib.quote_plus
urlunquote = urllib.unquote
urlopen = urllib2.urlopen
def urlencode(*args, **kwargs):
return urllib.urlencode(*args, **kwargs).encode()
from urllib2 import HTTPError, URLError
from urllib import splithost, splituser
else:
from http.client import InvalidURL
from urllib.parse import urlsplit, urlunsplit, urlparse
from html.entities import entitydefs, name2codepoint
from html.parser import HTMLParser
import urllib.request, urllib.parse, urllib.error
Request = urllib.request.Request
urlquote = urllib.parse.quote
urlquote_plus = urllib.parse.quote_plus
urlunquote = urllib.parse.unquote
urlopen = urllib.request.urlopen
def urlencode(*args, **kwargs):
return urllib.parse.urlencode(*args, **kwargs).encode()
from urllib.error import HTTPError, URLError
from urllib.parse import splithost, splituser
class Error(Exception):
pass
@ -106,17 +125,18 @@ def getUrlFd(url, headers=None, data=None, timeout=None):
"""getUrlFd(url, headers=None, data=None, timeout=None)
Opens the given url and returns a file object. Headers and data are
a dict and string, respectively, as per urllib2.Request's arguments."""
a dict and string, respectively, as per urllib.request.Request's
arguments."""
if headers is None:
headers = defaultHeaders
if minisix.PY3 and isinstance(data, str):
data = data.encode()
try:
if not isinstance(url, urllib2.Request):
(scheme, loc, path, query, frag) = urlparse.urlsplit(url)
(user, host) = urllib.splituser(loc)
url = urlparse.urlunsplit((scheme, host, path, query, ''))
request = urllib2.Request(url, headers=headers, data=data)
if not isinstance(url, Request):
(scheme, loc, path, query, frag) = urlsplit(url)
(user, host) = splituser(loc)
url = urlunsplit((scheme, host, path, query, ''))
request = Request(url, headers=headers, data=data)
if user:
request.add_header('Authorization',
'Basic %s' % base64.b64encode(user))
@ -126,17 +146,17 @@ def getUrlFd(url, headers=None, data=None, timeout=None):
httpProxy = force(proxy)
if httpProxy:
request.set_proxy(httpProxy, 'http')
fd = urllib2.urlopen(request, timeout=timeout)
fd = urlopen(request, timeout=timeout)
return fd
except socket.timeout as e:
raise Error(TIMED_OUT)
except sockerrors as e:
raise Error(strError(e))
except httplib.InvalidURL as e:
except InvalidURL as e:
raise Error('Invalid URL: %s' % e)
except urllib2.HTTPError as e:
except HTTPError as e:
raise Error(strError(e))
except urllib2.URLError as e:
except URLError as e:
raise Error(strError(e.reason))
# Raised when urllib doesn't recognize the url type
except ValueError as e:
@ -147,7 +167,7 @@ def getUrl(url, size=None, headers=None, data=None, timeout=None):
Gets a page. Returns a string that is the page gotten. Size is an integer
number of bytes to read from the URL. Headers and data are dicts as per
urllib2.Request's arguments."""
urllib.request.Request's arguments."""
fd = getUrlFd(url, headers=headers, data=data, timeout=timeout)
try:
if size is None:
@ -160,7 +180,7 @@ def getUrl(url, size=None, headers=None, data=None, timeout=None):
return text
def getDomain(url):
return urlparse.urlparse(url)[1]
return urlparse(url)[1]
_charset_re = ('<meta[^a-z<>]+charset='
"""(?P<charset>("[^"]+"|'[^']+'))""")
@ -185,7 +205,7 @@ def getEncoding(s):
class HtmlToText(HTMLParser, object):
"""Taken from some eff-bot code on c.l.p."""
entitydefs = htmlentitydefs.entitydefs.copy()
entitydefs = entitydefs.copy()
entitydefs['nbsp'] = ' '
def __init__(self, tagReplace=' '):
self.data = []
@ -202,8 +222,8 @@ class HtmlToText(HTMLParser, object):
self.data.append(data)
def handle_entityref(self, data):
if data in htmlentitydefs.name2codepoint:
self.data.append(unichr(htmlentitydefs.name2codepoint[data]))
if data in name2codepoint:
self.data.append(unichr(name2codepoint[data]))
elif minisix.PY3 and isinstance(data, bytes):
self.data.append(data.decode())
elif minisix.PY2 and isinstance(data, str):