Web: Use a timeout to fetch pages. Closes GH-1275.

This is required because the sandbox is not used anymore,
since 9c57199838.
This commit is contained in:
Valentin Lorentz 2016-12-08 00:48:11 +01:00
parent b9b36d4de5
commit 9fe4abec48
2 changed files with 14 additions and 6 deletions

View File

@ -73,6 +73,11 @@ conf.registerGlobalValue(Web, 'urlWhitelist',
apply to all commands that retrieve data from user-supplied URLs,
including fetch, headers, title, doctype."""))
conf.registerGlobalValue(Web, 'timeout',
registry.NonNegativeInteger(5, """Determines the maximum number of
seconds the bot will wait for the site to respond, when using a command
in this plugin other than 'fetch'. If 0, will use socket.defaulttimeout"""))
conf.registerGroup(Web, 'fetch')
conf.registerGlobalValue(Web.fetch, 'maximum',
registry.NonNegativeInteger(0, _("""Determines the maximum number of

View File

@ -140,6 +140,7 @@ def catch_web_errors(f):
class Web(callbacks.PluginRegexp):
"""Add the help for "@help Web" here."""
regexps = ['titleSnarfer']
threaded = True
def noIgnore(self, irc, msg):
return not self.registryValue('checkIgnored', msg.args[0])
@ -227,7 +228,8 @@ class Web(callbacks.PluginRegexp):
if not self._checkURLWhitelist(url):
irc.error("This url is not on the whitelist.")
return
fd = utils.web.getUrlFd(url)
timeout = self.registryValue('timeout')
fd = utils.web.getUrlFd(url, timeout=timeout)
try:
s = ', '.join([format(_('%s: %s'), k, v)
for (k, v) in fd.headers.items()])
@ -249,8 +251,8 @@ class Web(callbacks.PluginRegexp):
irc.error("This url is not on the whitelist.")
return
size = conf.supybot.protocols.http.peekSize()
s = utils.web.getUrl(url, size=size) \
.decode('utf8')
timeout = self.registryValue('timeout')
s = utils.web.getUrl(url, size=size, timeout=timeout).decode('utf8')
m = self._doctypeRe.search(s)
if m:
s = utils.str.normalizeWhitespace(m.group(0))
@ -270,7 +272,8 @@ class Web(callbacks.PluginRegexp):
if not self._checkURLWhitelist(url):
irc.error("This url is not on the whitelist.")
return
fd = utils.web.getUrlFd(url)
timeout = self.registryValue('timeout')
fd = utils.web.getUrlFd(url, timeout=timeout)
try:
try:
size = fd.headers['Content-Length']
@ -341,12 +344,12 @@ class Web(callbacks.PluginRegexp):
irc.error("This url is not on the whitelist.")
return
max = self.registryValue('fetch.maximum')
timeout = self.registryValue('fetch.timeout')
if not max:
irc.error(_('This command is disabled '
'(supybot.plugins.Web.fetch.maximum is set to 0).'),
Raise=True)
fd = utils.web.getUrl(url, size=max) \
.decode('utf8')
fd = utils.web.getUrl(url, size=max, timeout=timeout).decode('utf8')
irc.reply(fd)
Class = Web