2003-12-02 01:12:45 +01:00
|
|
|
#!/usr/bin/env python
|
|
|
|
|
|
|
|
###
|
|
|
|
# Copyright (c) 2002, Jeremiah Fincher
|
|
|
|
# All rights reserved.
|
|
|
|
#
|
|
|
|
# Redistribution and use in source and binary forms, with or without
|
|
|
|
# modification, are permitted provided that the following conditions are met:
|
|
|
|
#
|
|
|
|
# * Redistributions of source code must retain the above copyright notice,
|
|
|
|
# this list of conditions, and the following disclaimer.
|
|
|
|
# * Redistributions in binary form must reproduce the above copyright notice,
|
|
|
|
# this list of conditions, and the following disclaimer in the
|
|
|
|
# documentation and/or other materials provided with the distribution.
|
|
|
|
# * Neither the name of the author of this software nor the name of
|
|
|
|
# contributors to this software may be used to endorse or promote products
|
|
|
|
# derived from this software without specific prior written consent.
|
|
|
|
#
|
|
|
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
|
|
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
|
|
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
|
|
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
|
|
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
|
|
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
|
|
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
|
|
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
|
|
# POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
###
|
|
|
|
|
|
|
|
__revision__ = "$Id$"
|
|
|
|
|
2004-07-24 07:07:24 +02:00
|
|
|
import supybot.fix as fix
|
2003-12-02 01:12:45 +01:00
|
|
|
|
2004-01-04 12:18:53 +01:00
|
|
|
import re
|
2003-12-02 06:49:56 +01:00
|
|
|
import socket
|
2003-12-02 01:12:45 +01:00
|
|
|
import urllib2
|
2004-01-31 18:36:17 +01:00
|
|
|
import urlparse
|
2003-12-02 01:12:45 +01:00
|
|
|
|
2004-08-05 20:29:26 +02:00
|
|
|
import supybot.conf as conf
|
|
|
|
|
|
|
|
Request = urllib2.Request
|
2003-12-03 05:57:30 +01:00
|
|
|
class WebError(Exception):
|
2003-12-02 01:12:45 +01:00
|
|
|
pass
|
|
|
|
|
2004-01-04 12:18:53 +01:00
|
|
|
urlRe = re.compile(r"(\w+://[^\])>\s]+)", re.I)
|
|
|
|
|
2004-01-15 12:32:25 +01:00
|
|
|
REFUSED = 'Connection refused.'
|
|
|
|
TIMED_OUT = 'Connection timed out.'
|
2004-03-30 10:33:20 +02:00
|
|
|
UNKNOWN_HOST = 'Unknown host.'
|
2004-01-15 12:32:25 +01:00
|
|
|
RESET_BY_PEER = 'Connection reset by peer.'
|
2004-04-09 18:54:48 +02:00
|
|
|
FORBIDDEN = 'Client forbidden from accessing URL.'
|
2004-01-15 12:32:25 +01:00
|
|
|
|
2004-03-30 10:33:20 +02:00
|
|
|
def strError(e):
|
|
|
|
try:
|
|
|
|
n = e.args[0]
|
|
|
|
except Exception:
|
|
|
|
return str(e)
|
|
|
|
if n == 111:
|
|
|
|
return REFUSED
|
|
|
|
elif n in (110, 10060):
|
|
|
|
return TIMED_OUT
|
|
|
|
elif n == 104:
|
|
|
|
return RESET_BY_PEER
|
|
|
|
elif n == 8:
|
|
|
|
return UNKNOWN_HOST
|
2004-04-09 18:54:48 +02:00
|
|
|
elif n == 403:
|
|
|
|
return FORBIDDEN
|
2004-03-30 10:33:20 +02:00
|
|
|
else:
|
|
|
|
return str(e)
|
2004-07-21 03:52:39 +02:00
|
|
|
|
2003-12-02 12:10:31 +01:00
|
|
|
def getUrlFd(url):
|
2003-12-03 05:57:30 +01:00
|
|
|
"""Gets a file-like object for a url."""
|
2003-12-02 01:12:45 +01:00
|
|
|
try:
|
2004-07-24 07:07:24 +02:00
|
|
|
if not isinstance(url, urllib2.Request):
|
|
|
|
if '#' in url:
|
|
|
|
url = url[:url.index('#')]
|
|
|
|
request = urllib2.Request(url)
|
|
|
|
else:
|
|
|
|
request = url
|
2004-07-20 10:41:25 +02:00
|
|
|
httpProxy = conf.supybot.protocols.http.proxy()
|
|
|
|
if httpProxy:
|
|
|
|
request.set_proxy(httpProxy, 'http')
|
|
|
|
fd = urllib2.urlopen(request)
|
2003-12-02 12:10:31 +01:00
|
|
|
return fd
|
2004-01-15 12:32:25 +01:00
|
|
|
except socket.timeout, e:
|
|
|
|
raise WebError, TIMED_OUT
|
2003-12-02 01:12:45 +01:00
|
|
|
except socket.error, e:
|
2004-03-30 10:33:20 +02:00
|
|
|
raise WebError, strError(e)
|
|
|
|
except urllib2.URLError, e:
|
2004-08-17 04:52:40 +02:00
|
|
|
raise WebError, strError(e.reason[1])
|
2004-03-30 10:33:20 +02:00
|
|
|
except urllib2.HTTPError, e:
|
2004-04-09 18:54:48 +02:00
|
|
|
raise WebError, strError(e)
|
2004-07-21 03:52:39 +02:00
|
|
|
|
2003-12-02 12:10:31 +01:00
|
|
|
def getUrl(url, size=None):
|
|
|
|
"""Gets a page. Returns a string that is the page gotten."""
|
|
|
|
fd = getUrlFd(url)
|
2004-02-20 23:46:10 +01:00
|
|
|
try:
|
|
|
|
if size is None:
|
|
|
|
text = fd.read()
|
|
|
|
else:
|
|
|
|
text = fd.read(size)
|
|
|
|
except socket.timeout, e:
|
|
|
|
raise WebError, TIMED_OUT
|
2003-12-02 01:12:45 +01:00
|
|
|
fd.close()
|
|
|
|
return text
|
|
|
|
|
2004-01-31 18:36:17 +01:00
|
|
|
def getDomain(url):
|
|
|
|
return urlparse.urlparse(url)[1]
|
|
|
|
|
2003-12-02 01:12:45 +01:00
|
|
|
|
|
|
|
# vim:set shiftwidth=4 tabstop=8 expandtab textwidth=78:
|
|
|
|
|