From 944f9c3e3fa2947c0d44a4c7d15af6af42920792 Mon Sep 17 00:00:00 2001 From: Daniel Folkinshteyn Date: Sat, 11 May 2013 14:11:57 -0400 Subject: [PATCH] Web: create a cofigurable url whitelist Prevent various forms of abuse that result via the Web plugin, such as fetching or titling malicious content, or revealing bot IP. Conflicts: plugins/Web/plugin.py plugins/Web/test.py --- plugins/Web/config.py | 6 ++++++ plugins/Web/plugin.py | 28 ++++++++++++++++++++++++++++ plugins/Web/test.py | 19 ++++++++++++++++++- 3 files changed, 52 insertions(+), 1 deletion(-) diff --git a/plugins/Web/config.py b/plugins/Web/config.py index 91a4ae2e1..bc73d685b 100644 --- a/plugins/Web/config.py +++ b/plugins/Web/config.py @@ -57,6 +57,12 @@ conf.registerChannelValue(Web, 'nonSnarfingRegexp', will not be snarfed. Give the empty string if you have no URLs that you'd like to exclude from being snarfed."""))) +conf.registerGlobalValue(Web, 'urlWhitelist', + registry.SpaceSeparatedListOfStrings([], """If set, bot will only fetch data + from urls in the whitelist, i.e. starting with http://domain/optionalpath/. This will + apply to all commands that retrieve data from user-supplied URLs, + including fetch, headers, title, doctype.""")) + conf.registerGroup(Web, 'fetch') conf.registerGlobalValue(Web.fetch, 'maximum', registry.NonNegativeInteger(0, _("""Determines the maximum number of diff --git a/plugins/Web/plugin.py b/plugins/Web/plugin.py index 486dd3446..f289cfdfd 100644 --- a/plugins/Web/plugin.py +++ b/plugins/Web/plugin.py @@ -119,6 +119,19 @@ class Web(callbacks.PluginRegexp): titleSnarfer = urlSnarfer(titleSnarfer) titleSnarfer.__doc__ = utils.web._httpUrlRe + def _checkURLWhitelist(self, url): + if not self.registryValue('urlWhitelist'): + return True + passed = False + for wu in self.registryValue('urlWhitelist'): + if wu.endswith('/') and url.find(wu) == 0: + passed = True + break + if (not wu.endswith('/')) and (url.find(wu + '/') == 0 or url == wu): + passed = True + break + return passed + @internationalizeDocstring def headers(self, irc, msg, args, url): """ @@ -126,6 +139,9 @@ class Web(callbacks.PluginRegexp): Returns the HTTP headers of . Only HTTP urls are valid, of course. """ + if not self._checkURLWhitelist(url): + irc.error("This url is not on the whitelist.") + return fd = utils.web.getUrlFd(url) try: s = ', '.join([format(_('%s: %s'), k, v) @@ -143,6 +159,9 @@ class Web(callbacks.PluginRegexp): Returns the DOCTYPE string of . Only HTTP urls are valid, of course. """ + if not self._checkURLWhitelist(url): + irc.error("This url is not on the whitelist.") + return size = conf.supybot.protocols.http.peekSize() s = utils.web.getUrl(url, size=size) \ .decode('utf8') @@ -161,6 +180,9 @@ class Web(callbacks.PluginRegexp): Returns the Content-Length header of . Only HTTP urls are valid, of course. """ + if not self._checkURLWhitelist(url): + irc.error("This url is not on the whitelist.") + return fd = utils.web.getUrlFd(url) try: try: @@ -187,6 +209,9 @@ class Web(callbacks.PluginRegexp): If --no-filter is given, the bot won't strip special chars (action, DCC, ...). """ + if not self._checkURLWhitelist(url): + irc.error("This url is not on the whitelist.") + return size = conf.supybot.protocols.http.peekSize() text = utils.web.getUrl(url, size=size) try: @@ -239,6 +264,9 @@ class Web(callbacks.PluginRegexp): supybot.plugins.Web.fetch.maximum. If that configuration variable is set to 0, this command will be effectively disabled. """ + if not self._checkURLWhitelist(url): + irc.error("This url is not on the whitelist.") + return max = self.registryValue('fetch.maximum') if not max: irc.error(_('This command is disabled ' diff --git a/plugins/Web/test.py b/plugins/Web/test.py index 7a51832d8..21000583a 100644 --- a/plugins/Web/test.py +++ b/plugins/Web/test.py @@ -75,7 +75,7 @@ class WebTestCase(ChannelPluginTestCase): try: conf.supybot.plugins.Web.titleSnarfer.setValue(True) self.assertSnarfRegexp('http://microsoft.com/', - 'Title: Microsoft') + 'Microsoft') finally: conf.supybot.plugins.Web.titleSnarfer.setValue(False) @@ -102,5 +102,22 @@ class WebTestCase(ChannelPluginTestCase): finally: conf.supybot.plugins.Web.nonSnarfingRegexp.set('') + def testWhitelist(self): + fm = conf.supybot.plugins.Web.fetch.maximum() + uw = conf.supybot.plugins.Web.urlWhitelist() + try: + conf.supybot.plugins.Web.fetch.maximum.set(1024) + self.assertNotError('web fetch http://fsf.org') + conf.supybot.plugins.Web.urlWhitelist.set('http://slashdot.org') + self.assertError('web fetch http://fsf.org') + self.assertError('wef title http://fsf.org') + self.assertError('web fetch http://slashdot.org.evildomain.com') + self.assertNotError('web fetch http://slashdot.org') + self.assertNotError('web fetch http://slashdot.org/recent') + conf.supybot.plugins.Web.urlWhitelist.set('http://slashdot.org http://fsf.org') + self.assertNotError('doctype http://fsf.org') + finally: + conf.supybot.plugins.Web.urlWhitelist.set('') + conf.supybot.plugins.Web.fetch.maximum.set(fm) # vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79: