Web: Use a timeout to fetch pages. Closes GH-1275.

This is required because the sandbox is not used anymore,
since 9c57199838.
This commit is contained in:
Valentin Lorentz 2016-12-08 00:48:11 +01:00
parent b9b36d4de5
commit 9fe4abec48
2 changed files with 14 additions and 6 deletions

View File

@ -73,6 +73,11 @@ conf.registerGlobalValue(Web, 'urlWhitelist',
apply to all commands that retrieve data from user-supplied URLs, apply to all commands that retrieve data from user-supplied URLs,
including fetch, headers, title, doctype.""")) including fetch, headers, title, doctype."""))
conf.registerGlobalValue(Web, 'timeout',
registry.NonNegativeInteger(5, """Determines the maximum number of
seconds the bot will wait for the site to respond, when using a command
in this plugin other than 'fetch'. If 0, will use socket.defaulttimeout"""))
conf.registerGroup(Web, 'fetch') conf.registerGroup(Web, 'fetch')
conf.registerGlobalValue(Web.fetch, 'maximum', conf.registerGlobalValue(Web.fetch, 'maximum',
registry.NonNegativeInteger(0, _("""Determines the maximum number of registry.NonNegativeInteger(0, _("""Determines the maximum number of

View File

@ -140,6 +140,7 @@ def catch_web_errors(f):
class Web(callbacks.PluginRegexp): class Web(callbacks.PluginRegexp):
"""Add the help for "@help Web" here.""" """Add the help for "@help Web" here."""
regexps = ['titleSnarfer'] regexps = ['titleSnarfer']
threaded = True
def noIgnore(self, irc, msg): def noIgnore(self, irc, msg):
return not self.registryValue('checkIgnored', msg.args[0]) return not self.registryValue('checkIgnored', msg.args[0])
@ -227,7 +228,8 @@ class Web(callbacks.PluginRegexp):
if not self._checkURLWhitelist(url): if not self._checkURLWhitelist(url):
irc.error("This url is not on the whitelist.") irc.error("This url is not on the whitelist.")
return return
fd = utils.web.getUrlFd(url) timeout = self.registryValue('timeout')
fd = utils.web.getUrlFd(url, timeout=timeout)
try: try:
s = ', '.join([format(_('%s: %s'), k, v) s = ', '.join([format(_('%s: %s'), k, v)
for (k, v) in fd.headers.items()]) for (k, v) in fd.headers.items()])
@ -249,8 +251,8 @@ class Web(callbacks.PluginRegexp):
irc.error("This url is not on the whitelist.") irc.error("This url is not on the whitelist.")
return return
size = conf.supybot.protocols.http.peekSize() size = conf.supybot.protocols.http.peekSize()
s = utils.web.getUrl(url, size=size) \ timeout = self.registryValue('timeout')
.decode('utf8') s = utils.web.getUrl(url, size=size, timeout=timeout).decode('utf8')
m = self._doctypeRe.search(s) m = self._doctypeRe.search(s)
if m: if m:
s = utils.str.normalizeWhitespace(m.group(0)) s = utils.str.normalizeWhitespace(m.group(0))
@ -270,7 +272,8 @@ class Web(callbacks.PluginRegexp):
if not self._checkURLWhitelist(url): if not self._checkURLWhitelist(url):
irc.error("This url is not on the whitelist.") irc.error("This url is not on the whitelist.")
return return
fd = utils.web.getUrlFd(url) timeout = self.registryValue('timeout')
fd = utils.web.getUrlFd(url, timeout=timeout)
try: try:
try: try:
size = fd.headers['Content-Length'] size = fd.headers['Content-Length']
@ -341,12 +344,12 @@ class Web(callbacks.PluginRegexp):
irc.error("This url is not on the whitelist.") irc.error("This url is not on the whitelist.")
return return
max = self.registryValue('fetch.maximum') max = self.registryValue('fetch.maximum')
timeout = self.registryValue('fetch.timeout')
if not max: if not max:
irc.error(_('This command is disabled ' irc.error(_('This command is disabled '
'(supybot.plugins.Web.fetch.maximum is set to 0).'), '(supybot.plugins.Web.fetch.maximum is set to 0).'),
Raise=True) Raise=True)
fd = utils.web.getUrl(url, size=max) \ fd = utils.web.getUrl(url, size=max, timeout=timeout).decode('utf8')
.decode('utf8')
irc.reply(fd) irc.reply(fd)
Class = Web Class = Web