utils.web: Rename getUrl to getUrlContent and add getUrlTargetAndContent.

This commit is contained in:
Valentin Lorentz 2015-10-25 16:19:49 +01:00
parent e3ff413734
commit 57b77a6725
1 changed files with 18 additions and 3 deletions

View File

@ -161,10 +161,11 @@ def getUrlFd(url, headers=None, data=None, timeout=None):
except ValueError as e: except ValueError as e:
raise Error(strError(e)) raise Error(strError(e))
def getUrl(url, size=None, headers=None, data=None, timeout=None): def getUrlTargetAndContent(url, size=None, headers=None, data=None, timeout=None):
"""getUrl(url, size=None, headers=None, data=None, timeout=None) """getUrlTargetAndContent(url, size=None, headers=None, data=None, timeout=None)
Gets a page. Returns a string that is the page gotten. Size is an integer Gets a page. Returns two strings that are the page gotten and the
target URL (ie. after redirections). Size is an integer
number of bytes to read from the URL. Headers and data are dicts as per number of bytes to read from the URL. Headers and data are dicts as per
urllib.request.Request's arguments.""" urllib.request.Request's arguments."""
fd = getUrlFd(url, headers=headers, data=data, timeout=timeout) fd = getUrlFd(url, headers=headers, data=data, timeout=timeout)
@ -175,9 +176,23 @@ def getUrl(url, size=None, headers=None, data=None, timeout=None):
text = fd.read(size) text = fd.read(size)
except socket.timeout: except socket.timeout:
raise Error(TIMED_OUT) raise Error(TIMED_OUT)
target = fd.geturl()
fd.close() fd.close()
return (target, text)
def getUrlContent(*args, **kwargs):
"""getUrlContent(url, size=None, headers=None, data=None, timeout=None)
Gets a page. Returns a string that is the page gotten. Size is an integer
number of bytes to read from the URL. Headers and data are dicts as per
urllib.request.Request's arguments."""
(target, text) = getUrlTargetAndContent(*args, **kwargs)
return text return text
def getUrl(*args, **kwargs):
"""Alias for getUrlContent."""
return getUrlContent(*args, **kwargs)
def getDomain(url): def getDomain(url):
return urlparse(url)[1] return urlparse(url)[1]