Web: create a cofigurable url whitelist

Prevent various forms of abuse that result via the Web plugin, such as fetching or titling
malicious content, or revealing bot IP.
This commit is contained in:
Daniel Folkinshteyn 2013-05-11 14:11:57 -04:00
parent af1931b3db
commit 81c366a6be
3 changed files with 52 additions and 1 deletions

View File

@ -53,6 +53,12 @@ conf.registerChannelValue(Web, 'nonSnarfingRegexp',
snarfed. Give the empty string if you have no URLs that you'd like to snarfed. Give the empty string if you have no URLs that you'd like to
exclude from being snarfed.""")) exclude from being snarfed."""))
conf.registerGlobalValue(Web, 'urlWhitelist',
registry.SpaceSeparatedListOfStrings([], """If set, bot will only fetch data
from urls in the whitelist, i.e. starting with http://domain/optionalpath/. This will
apply to all commands that retrieve data from user-supplied URLs,
including fetch, headers, title, doctype."""))
conf.registerGroup(Web, 'fetch') conf.registerGroup(Web, 'fetch')
conf.registerGlobalValue(Web.fetch, 'maximum', conf.registerGlobalValue(Web.fetch, 'maximum',
registry.NonNegativeInteger(0, """Determines the maximum number of registry.NonNegativeInteger(0, """Determines the maximum number of

View File

@ -107,12 +107,28 @@ class Web(callbacks.PluginRegexp):
titleSnarfer = urlSnarfer(titleSnarfer) titleSnarfer = urlSnarfer(titleSnarfer)
titleSnarfer.__doc__ = utils.web._httpUrlRe titleSnarfer.__doc__ = utils.web._httpUrlRe
def _checkURLWhitelist(self, url):
if not self.registryValue('urlWhitelist'):
return True
passed = False
for wu in self.registryValue('urlWhitelist'):
if wu.endswith('/') and url.find(wu) == 0:
passed = True
break
if (not wu.endswith('/')) and (url.find(wu + '/') == 0 or url == wu):
passed = True
break
return passed
def headers(self, irc, msg, args, url): def headers(self, irc, msg, args, url):
"""<url> """<url>
Returns the HTTP headers of <url>. Only HTTP urls are valid, of Returns the HTTP headers of <url>. Only HTTP urls are valid, of
course. course.
""" """
if not self._checkURLWhitelist(url):
irc.error("This url is not on the whitelist.")
return
fd = utils.web.getUrlFd(url) fd = utils.web.getUrlFd(url)
try: try:
s = ', '.join([format('%s: %s', k, v) s = ', '.join([format('%s: %s', k, v)
@ -129,6 +145,9 @@ class Web(callbacks.PluginRegexp):
Returns the DOCTYPE string of <url>. Only HTTP urls are valid, of Returns the DOCTYPE string of <url>. Only HTTP urls are valid, of
course. course.
""" """
if not self._checkURLWhitelist(url):
irc.error("This url is not on the whitelist.")
return
size = conf.supybot.protocols.http.peekSize() size = conf.supybot.protocols.http.peekSize()
s = utils.web.getUrl(url, size=size) s = utils.web.getUrl(url, size=size)
m = self._doctypeRe.search(s) m = self._doctypeRe.search(s)
@ -145,6 +164,9 @@ class Web(callbacks.PluginRegexp):
Returns the Content-Length header of <url>. Only HTTP urls are valid, Returns the Content-Length header of <url>. Only HTTP urls are valid,
of course. of course.
""" """
if not self._checkURLWhitelist(url):
irc.error("This url is not on the whitelist.")
return
fd = utils.web.getUrlFd(url) fd = utils.web.getUrlFd(url)
try: try:
try: try:
@ -168,6 +190,9 @@ class Web(callbacks.PluginRegexp):
Returns the HTML <title>...</title> of a URL. Returns the HTML <title>...</title> of a URL.
""" """
if not self._checkURLWhitelist(url):
irc.error("This url is not on the whitelist.")
return
size = conf.supybot.protocols.http.peekSize() size = conf.supybot.protocols.http.peekSize()
text = utils.web.getUrl(url, size=size) text = utils.web.getUrl(url, size=size)
parser = Title() parser = Title()
@ -231,6 +256,9 @@ class Web(callbacks.PluginRegexp):
supybot.plugins.Web.fetch.maximum. If that configuration variable is supybot.plugins.Web.fetch.maximum. If that configuration variable is
set to 0, this command will be effectively disabled. set to 0, this command will be effectively disabled.
""" """
if not self._checkURLWhitelist(url):
irc.error("This url is not on the whitelist.")
return
max = self.registryValue('fetch.maximum') max = self.registryValue('fetch.maximum')
if not max: if not max:
irc.error('This command is disabled ' irc.error('This command is disabled '

View File

@ -98,7 +98,7 @@ class WebTestCase(ChannelPluginTestCase):
try: try:
conf.supybot.plugins.Web.titleSnarfer.setValue(True) conf.supybot.plugins.Web.titleSnarfer.setValue(True)
self.assertSnarfRegexp('http://microsoft.com/', self.assertSnarfRegexp('http://microsoft.com/',
'Microsoft Corporation') 'Microsoft')
finally: finally:
conf.supybot.plugins.Web.titleSnarfer.setValue(False) conf.supybot.plugins.Web.titleSnarfer.setValue(False)
@ -125,5 +125,22 @@ class WebTestCase(ChannelPluginTestCase):
finally: finally:
conf.supybot.plugins.Web.nonSnarfingRegexp.set('') conf.supybot.plugins.Web.nonSnarfingRegexp.set('')
def testWhitelist(self):
fm = conf.supybot.plugins.Web.fetch.maximum()
uw = conf.supybot.plugins.Web.urlWhitelist()
try:
conf.supybot.plugins.Web.fetch.maximum.set(1024)
self.assertNotError('web fetch http://fsf.org')
conf.supybot.plugins.Web.urlWhitelist.set('http://slashdot.org')
self.assertError('web fetch http://fsf.org')
self.assertError('wef title http://fsf.org')
self.assertError('web fetch http://slashdot.org.evildomain.com')
self.assertNotError('web fetch http://slashdot.org')
self.assertNotError('web fetch http://slashdot.org/recent')
conf.supybot.plugins.Web.urlWhitelist.set('http://slashdot.org http://fsf.org')
self.assertNotError('doctype http://fsf.org')
finally:
conf.supybot.plugins.Web.urlWhitelist.set('')
conf.supybot.plugins.Web.fetch.maximum.set(fm)
# vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79: # vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79: