Limnoria/src/webutils.py

#!/usr/bin/env python

###
# Copyright (c) 2002, Jeremiah Fincher
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
#   * Redistributions of source code must retain the above copyright notice,
#     this list of conditions, and the following disclaimer.
#   * Redistributions in binary form must reproduce the above copyright notice,
#     this list of conditions, and the following disclaimer in the
#     documentation and/or other materials provided with the distribution.
#   * Neither the name of the author of this software nor the name of
#     contributors to this software may be used to endorse or promote products
#     derived from this software without specific prior written consent.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
###

__revision__ = "$Id$"

import fix

import re
import socket
import urllib2
import urlparse

class WebError(Exception):
    pass

urlRe = re.compile(r"(\w+://[^\])>\s]+)", re.I)

REFUSED = 'Connection refused.'
TIMED_OUT = 'Connection timed out.'
RESET_BY_PEER = 'Connection reset by peer.'

def getUrlFd(url):
    """Gets a file-like object for a url."""
    try:
        fd = urllib2.urlopen(url)
        return fd
    except socket.timeout, e:
        raise WebError, TIMED_OUT
    except socket.error, e:
        if e.args[0] == 111:
            raise WebError, REFUSED
        elif e.args[0] in (110, 10060):
            raise WebError, TIMED_OUT
        elif e.args[0] == 104:
            raise WebError, RESET_BY_PEER
        else:
            raise WebError, str(e)
    except (urllib2.HTTPError, urllib2.URLError), e:
        raise WebError, str(e)
    
def getUrl(url, size=None):
    """Gets a page.  Returns a string that is the page gotten."""
    fd = getUrlFd(url)
    if size is None:
        text = fd.read()
    else:
        text = fd.read(size)
    fd.close()
    return text

def getDomain(url):
    return urlparse.urlparse(url)[1]


# vim:set shiftwidth=4 tabstop=8 expandtab textwidth=78:
Added webutils module; used it in Http. 2003-12-02 01:12:45 +01:00			`#!/usr/bin/env python`

			`###`
			`# Copyright (c) 2002, Jeremiah Fincher`
			`# All rights reserved.`
			`#`
			`# Redistribution and use in source and binary forms, with or without`
			`# modification, are permitted provided that the following conditions are met:`
			`#`
			`# * Redistributions of source code must retain the above copyright notice,`
			`# this list of conditions, and the following disclaimer.`
			`# * Redistributions in binary form must reproduce the above copyright notice,`
			`# this list of conditions, and the following disclaimer in the`
			`# documentation and/or other materials provided with the distribution.`
			`# * Neither the name of the author of this software nor the name of`
			`# contributors to this software may be used to endorse or promote products`
			`# derived from this software without specific prior written consent.`
			`#`
			`# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"`
			`# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE`
			`# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE`
			`# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE`
			`# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR`
			`# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF`
			`# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS`
			`# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN`
			`# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)`
			`# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE`
			`# POSSIBILITY OF SUCH DAMAGE.`
			`###`

			`__revision__ = "$Id$"`

			`import fix`

Moved _urlRe to webutils as urlRe. 2004-01-04 12:18:53 +01:00			`import re`
sockets, they do a body good 2003-12-02 06:49:56 +01:00			`import socket`
Added webutils module; used it in Http. 2003-12-02 01:12:45 +01:00			`import urllib2`
Added getDomain and some tests. 2004-01-31 18:36:17 +01:00			`import urlparse`
Added webutils module; used it in Http. 2003-12-02 01:12:45 +01:00
Changed the name of WebException to WebError. 2003-12-03 05:57:30 +01:00			`class WebError(Exception):`
Added webutils module; used it in Http. 2003-12-02 01:12:45 +01:00			`pass`

Moved _urlRe to webutils as urlRe. 2004-01-04 12:18:53 +01:00			`urlRe = re.compile(r"(\w+://[^\])>\s]+)", re.I)`

Handled the ugly "timed out" error message. 2004-01-15 12:32:25 +01:00			`REFUSED = 'Connection refused.'`
			`TIMED_OUT = 'Connection timed out.'`
			`RESET_BY_PEER = 'Connection reset by peer.'`

Added getUrlFd, used it in Http.kernel. 2003-12-02 12:10:31 +01:00			`def getUrlFd(url):`
Changed the name of WebException to WebError. 2003-12-03 05:57:30 +01:00			`"""Gets a file-like object for a url."""`
Added webutils module; used it in Http. 2003-12-02 01:12:45 +01:00			`try:`
			`fd = urllib2.urlopen(url)`
Added getUrlFd, used it in Http.kernel. 2003-12-02 12:10:31 +01:00			`return fd`
Handled the ugly "timed out" error message. 2004-01-15 12:32:25 +01:00			`except socket.timeout, e:`
			`raise WebError, TIMED_OUT`
Added webutils module; used it in Http. 2003-12-02 01:12:45 +01:00			`except socket.error, e:`
			`if e.args[0] == 111:`
Handled the ugly "timed out" error message. 2004-01-15 12:32:25 +01:00			`raise WebError, REFUSED`
Added webutils module; used it in Http. 2003-12-02 01:12:45 +01:00			`elif e.args[0] in (110, 10060):`
Handled the ugly "timed out" error message. 2004-01-15 12:32:25 +01:00			`raise WebError, TIMED_OUT`
Added another case to except clauses for Connection Reset by Peer. 2003-12-06 15:16:02 +01:00			`elif e.args[0] == 104:`
Handled the ugly "timed out" error message. 2004-01-15 12:32:25 +01:00			`raise WebError, RESET_BY_PEER`
Added webutils module; used it in Http. 2003-12-02 01:12:45 +01:00			`else:`
Changed the name of WebException to WebError. 2003-12-03 05:57:30 +01:00			`raise WebError, str(e)`
Added webutils module; used it in Http. 2003-12-02 01:12:45 +01:00			`except (urllib2.HTTPError, urllib2.URLError), e:`
Changed the name of WebException to WebError. 2003-12-03 05:57:30 +01:00			`raise WebError, str(e)`
Added getUrlFd, used it in Http.kernel. 2003-12-02 12:10:31 +01:00
			`def getUrl(url, size=None):`
			`"""Gets a page. Returns a string that is the page gotten."""`
			`fd = getUrlFd(url)`
Added webutils module; used it in Http. 2003-12-02 01:12:45 +01:00			`if size is None:`
			`text = fd.read()`
			`else:`
			`text = fd.read(size)`
			`fd.close()`
			`return text`

Added getDomain and some tests. 2004-01-31 18:36:17 +01:00			`def getDomain(url):`
			`return urlparse.urlparse(url)[1]`

Added webutils module; used it in Http. 2003-12-02 01:12:45 +01:00
			`# vim:set shiftwidth=4 tabstop=8 expandtab textwidth=78:`