Web: create a cofigurable url whitelist

Prevent various forms of abuse that result via the Web plugin, such as fetching or titling
malicious content, or revealing bot IP.
This commit is contained in:
Daniel Folkinshteyn 2013-05-11 14:11:57 -04:00
parent af1931b3db
commit 81c366a6be
3 changed files with 52 additions and 1 deletions

View File

@ -53,6 +53,12 @@ conf.registerChannelValue(Web, 'nonSnarfingRegexp',
snarfed. Give the empty string if you have no URLs that you'd like to
exclude from being snarfed."""))
conf.registerGlobalValue(Web, 'urlWhitelist',
registry.SpaceSeparatedListOfStrings([], """If set, bot will only fetch data
from urls in the whitelist, i.e. starting with http://domain/optionalpath/. This will
apply to all commands that retrieve data from user-supplied URLs,
including fetch, headers, title, doctype."""))
conf.registerGroup(Web, 'fetch')
conf.registerGlobalValue(Web.fetch, 'maximum',
registry.NonNegativeInteger(0, """Determines the maximum number of

View File

@ -107,12 +107,28 @@ class Web(callbacks.PluginRegexp):
titleSnarfer = urlSnarfer(titleSnarfer)
titleSnarfer.__doc__ = utils.web._httpUrlRe
def _checkURLWhitelist(self, url):
if not self.registryValue('urlWhitelist'):
return True
passed = False
for wu in self.registryValue('urlWhitelist'):
if wu.endswith('/') and url.find(wu) == 0:
passed = True
break
if (not wu.endswith('/')) and (url.find(wu + '/') == 0 or url == wu):
passed = True
break
return passed
def headers(self, irc, msg, args, url):
"""<url>
Returns the HTTP headers of <url>. Only HTTP urls are valid, of
course.
"""
if not self._checkURLWhitelist(url):
irc.error("This url is not on the whitelist.")
return
fd = utils.web.getUrlFd(url)
try:
s = ', '.join([format('%s: %s', k, v)
@ -129,6 +145,9 @@ class Web(callbacks.PluginRegexp):
Returns the DOCTYPE string of <url>. Only HTTP urls are valid, of
course.
"""
if not self._checkURLWhitelist(url):
irc.error("This url is not on the whitelist.")
return
size = conf.supybot.protocols.http.peekSize()
s = utils.web.getUrl(url, size=size)
m = self._doctypeRe.search(s)
@ -145,6 +164,9 @@ class Web(callbacks.PluginRegexp):
Returns the Content-Length header of <url>. Only HTTP urls are valid,
of course.
"""
if not self._checkURLWhitelist(url):
irc.error("This url is not on the whitelist.")
return
fd = utils.web.getUrlFd(url)
try:
try:
@ -168,6 +190,9 @@ class Web(callbacks.PluginRegexp):
Returns the HTML <title>...</title> of a URL.
"""
if not self._checkURLWhitelist(url):
irc.error("This url is not on the whitelist.")
return
size = conf.supybot.protocols.http.peekSize()
text = utils.web.getUrl(url, size=size)
parser = Title()
@ -231,6 +256,9 @@ class Web(callbacks.PluginRegexp):
supybot.plugins.Web.fetch.maximum. If that configuration variable is
set to 0, this command will be effectively disabled.
"""
if not self._checkURLWhitelist(url):
irc.error("This url is not on the whitelist.")
return
max = self.registryValue('fetch.maximum')
if not max:
irc.error('This command is disabled '

View File

@ -98,7 +98,7 @@ class WebTestCase(ChannelPluginTestCase):
try:
conf.supybot.plugins.Web.titleSnarfer.setValue(True)
self.assertSnarfRegexp('http://microsoft.com/',
'Microsoft Corporation')
'Microsoft')
finally:
conf.supybot.plugins.Web.titleSnarfer.setValue(False)
@ -125,5 +125,22 @@ class WebTestCase(ChannelPluginTestCase):
finally:
conf.supybot.plugins.Web.nonSnarfingRegexp.set('')
def testWhitelist(self):
fm = conf.supybot.plugins.Web.fetch.maximum()
uw = conf.supybot.plugins.Web.urlWhitelist()
try:
conf.supybot.plugins.Web.fetch.maximum.set(1024)
self.assertNotError('web fetch http://fsf.org')
conf.supybot.plugins.Web.urlWhitelist.set('http://slashdot.org')
self.assertError('web fetch http://fsf.org')
self.assertError('wef title http://fsf.org')
self.assertError('web fetch http://slashdot.org.evildomain.com')
self.assertNotError('web fetch http://slashdot.org')
self.assertNotError('web fetch http://slashdot.org/recent')
conf.supybot.plugins.Web.urlWhitelist.set('http://slashdot.org http://fsf.org')
self.assertNotError('doctype http://fsf.org')
finally:
conf.supybot.plugins.Web.urlWhitelist.set('')
conf.supybot.plugins.Web.fetch.maximum.set(fm)
# vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79: