Web: Use a timeout to fetch pages. Closes GH-1275.

This is required because the sandbox is not used anymore, since 9c57199838ed167ff4d810a91f489eb32bd717f0.
2025-12-28 14:07:59 +01:00 · 2016-12-08 00:48:11 +01:00 · 2016-12-08 00:48:11 +01:00 · 9fe4abec48
commit 9fe4abec48
parent b9b36d4de5
2 changed files with 14 additions and 6 deletions
--- a/plugins/Web/config.py
+++ b/plugins/Web/config.py
@ -73,6 +73,11 @@ conf.registerGlobalValue(Web, 'urlWhitelist',
    apply to all commands that retrieve data from user-supplied URLs,
    including fetch, headers, title, doctype."""))

+conf.registerGlobalValue(Web, 'timeout',
+    registry.NonNegativeInteger(5, """Determines the maximum number of
+    seconds the bot will wait for the site to respond, when using a command
+    in this plugin other than 'fetch'. If 0, will use socket.defaulttimeout"""))
+
 conf.registerGroup(Web, 'fetch')
 conf.registerGlobalValue(Web.fetch, 'maximum',
    registry.NonNegativeInteger(0, _("""Determines the maximum number of
--- a/plugins/Web/plugin.py
+++ b/plugins/Web/plugin.py
@ -140,6 +140,7 @@ def catch_web_errors(f):
 class Web(callbacks.PluginRegexp):
    """Add the help for "@help Web" here."""
    regexps = ['titleSnarfer']
+    threaded = True

    def noIgnore(self, irc, msg):
        return not self.registryValue('checkIgnored', msg.args[0])
@ -227,7 +228,8 @@ class Web(callbacks.PluginRegexp):
        if not self._checkURLWhitelist(url):
            irc.error("This url is not on the whitelist.")
            return
-        fd = utils.web.getUrlFd(url)
+        timeout = self.registryValue('timeout')
+        fd = utils.web.getUrlFd(url, timeout=timeout)
        try:
            s = ', '.join([format(_('%s: %s'), k, v)
                           for (k, v) in fd.headers.items()])
@ -249,8 +251,8 @@ class Web(callbacks.PluginRegexp):
            irc.error("This url is not on the whitelist.")
            return
        size = conf.supybot.protocols.http.peekSize()
-        s = utils.web.getUrl(url, size=size) \
-                        .decode('utf8')
+        timeout = self.registryValue('timeout')
+        s = utils.web.getUrl(url, size=size, timeout=timeout).decode('utf8')
        m = self._doctypeRe.search(s)
        if m:
            s = utils.str.normalizeWhitespace(m.group(0))
@ -270,7 +272,8 @@ class Web(callbacks.PluginRegexp):
        if not self._checkURLWhitelist(url):
            irc.error("This url is not on the whitelist.")
            return
-        fd = utils.web.getUrlFd(url)
+        timeout = self.registryValue('timeout')
+        fd = utils.web.getUrlFd(url, timeout=timeout)
        try:
            try:
                size = fd.headers['Content-Length']
@ -341,12 +344,12 @@ class Web(callbacks.PluginRegexp):
            irc.error("This url is not on the whitelist.")
            return
        max = self.registryValue('fetch.maximum')
+        timeout = self.registryValue('fetch.timeout')
        if not max:
            irc.error(_('This command is disabled '
                      '(supybot.plugins.Web.fetch.maximum is set to 0).'),
                      Raise=True)
-        fd = utils.web.getUrl(url, size=max) \
-                        .decode('utf8')
+        fd = utils.web.getUrl(url, size=max, timeout=timeout).decode('utf8')
        irc.reply(fd)

 Class = Web