Web: create a cofigurable url whitelist

Prevent various forms of abuse that result via the Web plugin, such as fetching or titling
malicious content, or revealing bot IP.

Conflicts:
	plugins/Web/plugin.py
	plugins/Web/test.py
This commit is contained in:
Daniel Folkinshteyn 2013-05-11 14:11:57 -04:00 committed by Valentin Lorentz
parent c27070895b
commit 944f9c3e3f
3 changed files with 52 additions and 1 deletions

View File

@ -57,6 +57,12 @@ conf.registerChannelValue(Web, 'nonSnarfingRegexp',
will not be snarfed. Give the empty string if you have no URLs that you'd will not be snarfed. Give the empty string if you have no URLs that you'd
like to exclude from being snarfed."""))) like to exclude from being snarfed.""")))
conf.registerGlobalValue(Web, 'urlWhitelist',
registry.SpaceSeparatedListOfStrings([], """If set, bot will only fetch data
from urls in the whitelist, i.e. starting with http://domain/optionalpath/. This will
apply to all commands that retrieve data from user-supplied URLs,
including fetch, headers, title, doctype."""))
conf.registerGroup(Web, 'fetch') conf.registerGroup(Web, 'fetch')
conf.registerGlobalValue(Web.fetch, 'maximum', conf.registerGlobalValue(Web.fetch, 'maximum',
registry.NonNegativeInteger(0, _("""Determines the maximum number of registry.NonNegativeInteger(0, _("""Determines the maximum number of

View File

@ -119,6 +119,19 @@ class Web(callbacks.PluginRegexp):
titleSnarfer = urlSnarfer(titleSnarfer) titleSnarfer = urlSnarfer(titleSnarfer)
titleSnarfer.__doc__ = utils.web._httpUrlRe titleSnarfer.__doc__ = utils.web._httpUrlRe
def _checkURLWhitelist(self, url):
if not self.registryValue('urlWhitelist'):
return True
passed = False
for wu in self.registryValue('urlWhitelist'):
if wu.endswith('/') and url.find(wu) == 0:
passed = True
break
if (not wu.endswith('/')) and (url.find(wu + '/') == 0 or url == wu):
passed = True
break
return passed
@internationalizeDocstring @internationalizeDocstring
def headers(self, irc, msg, args, url): def headers(self, irc, msg, args, url):
"""<url> """<url>
@ -126,6 +139,9 @@ class Web(callbacks.PluginRegexp):
Returns the HTTP headers of <url>. Only HTTP urls are valid, of Returns the HTTP headers of <url>. Only HTTP urls are valid, of
course. course.
""" """
if not self._checkURLWhitelist(url):
irc.error("This url is not on the whitelist.")
return
fd = utils.web.getUrlFd(url) fd = utils.web.getUrlFd(url)
try: try:
s = ', '.join([format(_('%s: %s'), k, v) s = ', '.join([format(_('%s: %s'), k, v)
@ -143,6 +159,9 @@ class Web(callbacks.PluginRegexp):
Returns the DOCTYPE string of <url>. Only HTTP urls are valid, of Returns the DOCTYPE string of <url>. Only HTTP urls are valid, of
course. course.
""" """
if not self._checkURLWhitelist(url):
irc.error("This url is not on the whitelist.")
return
size = conf.supybot.protocols.http.peekSize() size = conf.supybot.protocols.http.peekSize()
s = utils.web.getUrl(url, size=size) \ s = utils.web.getUrl(url, size=size) \
.decode('utf8') .decode('utf8')
@ -161,6 +180,9 @@ class Web(callbacks.PluginRegexp):
Returns the Content-Length header of <url>. Only HTTP urls are valid, Returns the Content-Length header of <url>. Only HTTP urls are valid,
of course. of course.
""" """
if not self._checkURLWhitelist(url):
irc.error("This url is not on the whitelist.")
return
fd = utils.web.getUrlFd(url) fd = utils.web.getUrlFd(url)
try: try:
try: try:
@ -187,6 +209,9 @@ class Web(callbacks.PluginRegexp):
If --no-filter is given, the bot won't strip special chars (action, If --no-filter is given, the bot won't strip special chars (action,
DCC, ...). DCC, ...).
""" """
if not self._checkURLWhitelist(url):
irc.error("This url is not on the whitelist.")
return
size = conf.supybot.protocols.http.peekSize() size = conf.supybot.protocols.http.peekSize()
text = utils.web.getUrl(url, size=size) text = utils.web.getUrl(url, size=size)
try: try:
@ -239,6 +264,9 @@ class Web(callbacks.PluginRegexp):
supybot.plugins.Web.fetch.maximum. If that configuration variable is supybot.plugins.Web.fetch.maximum. If that configuration variable is
set to 0, this command will be effectively disabled. set to 0, this command will be effectively disabled.
""" """
if not self._checkURLWhitelist(url):
irc.error("This url is not on the whitelist.")
return
max = self.registryValue('fetch.maximum') max = self.registryValue('fetch.maximum')
if not max: if not max:
irc.error(_('This command is disabled ' irc.error(_('This command is disabled '

View File

@ -75,7 +75,7 @@ class WebTestCase(ChannelPluginTestCase):
try: try:
conf.supybot.plugins.Web.titleSnarfer.setValue(True) conf.supybot.plugins.Web.titleSnarfer.setValue(True)
self.assertSnarfRegexp('http://microsoft.com/', self.assertSnarfRegexp('http://microsoft.com/',
'Title: Microsoft') 'Microsoft')
finally: finally:
conf.supybot.plugins.Web.titleSnarfer.setValue(False) conf.supybot.plugins.Web.titleSnarfer.setValue(False)
@ -102,5 +102,22 @@ class WebTestCase(ChannelPluginTestCase):
finally: finally:
conf.supybot.plugins.Web.nonSnarfingRegexp.set('') conf.supybot.plugins.Web.nonSnarfingRegexp.set('')
def testWhitelist(self):
fm = conf.supybot.plugins.Web.fetch.maximum()
uw = conf.supybot.plugins.Web.urlWhitelist()
try:
conf.supybot.plugins.Web.fetch.maximum.set(1024)
self.assertNotError('web fetch http://fsf.org')
conf.supybot.plugins.Web.urlWhitelist.set('http://slashdot.org')
self.assertError('web fetch http://fsf.org')
self.assertError('wef title http://fsf.org')
self.assertError('web fetch http://slashdot.org.evildomain.com')
self.assertNotError('web fetch http://slashdot.org')
self.assertNotError('web fetch http://slashdot.org/recent')
conf.supybot.plugins.Web.urlWhitelist.set('http://slashdot.org http://fsf.org')
self.assertNotError('doctype http://fsf.org')
finally:
conf.supybot.plugins.Web.urlWhitelist.set('')
conf.supybot.plugins.Web.fetch.maximum.set(fm)
# vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79: # vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79: