Limnoria/plugins/Web/test.py

###
# Copyright (c) 2005, Jeremiah Fincher
# Copyright (c) 2010-2021, The Limnoria Contributors
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
#   * Redistributions of source code must retain the above copyright notice,
#     this list of conditions, and the following disclaimer.
#   * Redistributions in binary form must reproduce the above copyright notice,
#     this list of conditions, and the following disclaimer in the
#     documentation and/or other materials provided with the distribution.
#   * Neither the name of the author of this software nor the name of
#     contributors to this software may be used to endorse or promote products
#     derived from this software without specific prior written consent.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
###

from supybot.test import *

class WebTestCase(ChannelPluginTestCase):
    plugins = ('Web', 'Admin',)
    timeout = 10
    if network:
        def testHeaders(self):
            self.assertError('headers ftp://ftp.cdrom.com/pub/linux')
            self.assertNotError('headers http://www.slashdot.org/')

        def testLocation(self):
            self.assertError('location ftp://ftp.cdrom.com/pub/linux')
            self.assertResponse(
                'location http://limnoria.net/', 'https://limnoria.net/')
            self.assertResponse(
                'location https://www.limnoria.net/', 'https://limnoria.net/')

        def testDoctype(self):
            self.assertError('doctype ftp://ftp.cdrom.com/pub/linux')
            self.assertNotError('doctype http://www.slashdot.org/')
            m = self.getMsg('doctype http://moobot.sf.net/')
            self.assertTrue(m.args[1].endswith('>'))

        def testSize(self):
            self.assertError('size ftp://ftp.cdrom.com/pub/linux')
            self.assertNotError('size http://supybot.sf.net/')
            self.assertNotError('size http://www.slashdot.org/')

        def testTitle(self):
            # Checks for @title not-working correctly
            self.assertResponse('title '
                'http://www.catb.org/~esr/jargon/html/F/foo.html',
                'foo')
            # Checks for only grabbing the real title tags instead of title
            # tags inside, for example, script tags. Bug #1190350
            self.assertNotRegexp('title '
                'http://www.irinnews.org/report.asp?ReportID=45910&'
                'SelectRegion=West_Africa&SelectCountry=CHAD',
                r'document\.write\(')
            # Checks that title parser grabs the full title instead of just
            # part of it.
            self.assertRegexp('title http://www.n-e-r-d.com/', 'N.*E.*R.*D')
            # Checks that the parser doesn't hang on invalid tags
            self.assertNotError(
                        'title http://www.youtube.com/watch?v=x4BtiqPN4u8')
            self.assertResponse(
                    'title http://www.thefreedictionary.com/don%27t',
                    "Don't - definition of don't by The Free Dictionary")
            self.assertRegexp(
                    'title '
                    'https://twitter.com/rlbarnes/status/656554266744586240',
                    '"PSA: In Firefox 44 Nightly, "http:" pages with '
                    '<input type="password"> are now marked insecure. '
                    'https://t.co/qS9LxuRPdm"$')

        def testTitleSnarfer(self):
            try:
                conf.supybot.plugins.Web.titleSnarfer.setValue(True)
                self.assertSnarfRegexp('http://microsoft.com/',
                                         'Microsoft')
            finally:
                conf.supybot.plugins.Web.titleSnarfer.setValue(False)

        def testMultipleTitleSnarfer(self):
            try:
                conf.supybot.plugins.Web.titleSnarfer.setValue(True)
                conf.supybot.plugins.Web.snarfMultipleUrls.setValue(True)
                self.feedMsg(
                        'https://microsoft.com/ https://google.com/')
                m1 = self.getMsg(' ')
                m2 = self.getMsg(' ')
                self.assertTrue(('Microsoft' in m1.args[1]) ^
                        ('Microsoft' in m2.args[1]))
                self.assertTrue(('Google' in m1.args[1]) ^
                        ('Google' in m2.args[1]))
            finally:
                conf.supybot.plugins.Web.titleSnarfer.setValue(False)
                conf.supybot.plugins.Web.snarfMultipleUrls.setValue(False)

        def testNonSnarfing(self):
            snarf = conf.supybot.plugins.Web.nonSnarfingRegexp()
            title = conf.supybot.plugins.Web.titleSnarfer()
            try:
                conf.supybot.plugins.Web.nonSnarfingRegexp.set('m/fr/')
                try:
                    conf.supybot.plugins.Web.titleSnarfer.setValue(True)
                    self.assertSnarfNoResponse('https://www.google.fr/', 2)
                    self.assertSnarfRegexp('https://www.google.com/',
                                           r'Google')
                finally:
                    conf.supybot.plugins.Web.titleSnarfer.setValue(title)
            finally:
                conf.supybot.plugins.Web.nonSnarfingRegexp.setValue(snarf)

        def testSnarferIgnore(self):
            conf.supybot.plugins.Web.titleSnarfer.setValue(True)
            (oldprefix, self.prefix) = (self.prefix, 'foo!bar@baz')
            try:
                self.assertSnarfRegexp('http://google.com/', 'Google')
                self.assertNotError('admin ignore add %s' % self.prefix)
                self.assertSnarfNoResponse('http://google.com/')
                self.assertNoResponse('title http://www.google.com/')
            finally:
                conf.supybot.plugins.Web.titleSnarfer.setValue(False)
                (self.prefix, oldprefix) = (oldprefix, self.prefix)
                self.assertNotError('admin ignore remove %s' % oldprefix)

        def testSnarferNotIgnore(self):
            conf.supybot.plugins.Web.titleSnarfer.setValue(True)
            conf.supybot.plugins.Web.checkIgnored.setValue(False)
            (oldprefix, self.prefix) = (self.prefix, 'foo!bar@baz')
            try:
                self.assertSnarfRegexp('https://google.it/', 'Google')
                self.assertNotError('admin ignore add %s' % self.prefix)
                self.assertSnarfRegexp('https://www.google.it/', 'Google')
                self.assertNoResponse('title http://www.google.it/')
            finally:
                conf.supybot.plugins.Web.titleSnarfer.setValue(False)
                conf.supybot.plugins.Web.checkIgnored.setValue(True)
                (self.prefix, oldprefix) = (oldprefix, self.prefix)
                self.assertNotError('admin ignore remove %s' % oldprefix)

        def testWhitelist(self):
            fm = conf.supybot.plugins.Web.fetch.maximum()
            uw = conf.supybot.plugins.Web.urlWhitelist()
            try:
                conf.supybot.plugins.Web.fetch.maximum.set(1024)
                self.assertNotError('web fetch http://fsf.org')
                conf.supybot.plugins.Web.urlWhitelist.set('http://slashdot.org')
                self.assertError('web fetch http://fsf.org')
                self.assertError('wef title http://fsf.org')
                self.assertError('web fetch http://slashdot.org.evildomain.com')
                self.assertNotError('web fetch http://slashdot.org')
                self.assertNotError('web fetch http://slashdot.org/recent')
                conf.supybot.plugins.Web.urlWhitelist.set('http://slashdot.org http://fsf.org')
                self.assertNotError('doctype http://fsf.org')
            finally:
                conf.supybot.plugins.Web.urlWhitelist.set('')
                conf.supybot.plugins.Web.fetch.maximum.set(fm)

    def testNonSnarfingRegexpConfigurable(self):
        self.assertSnarfNoResponse('http://foo.bar.baz/', 2)
        try:
            conf.supybot.plugins.Web.nonSnarfingRegexp.set('m/biff/')
            self.assertSnarfNoResponse('http://biff.bar.baz/', 2)
        finally:
            conf.supybot.plugins.Web.nonSnarfingRegexp.set('')

    def testFetchIri(self):
        self.assertRegexp('fetch http://café.example.org/',
            'Error: .*is not a valid')
        self.assertRegexp('fetch http://example.org/café',
            'Error: .*is not a valid')


# vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79:
Added the Web plugin (from pieces of Http, Fun, and URL) in the new plugin format. 2005-02-01 10:41:54 +01:00			`###`
			`# Copyright (c) 2005, Jeremiah Fincher`
all: Add generic 'The Limnoria Contributors' to copyright notices. No need to bother with details (that are all outdated / out of sync anyway), just look up the git history. 2021-08-01 21:47:37 +02:00			`# Copyright (c) 2010-2021, The Limnoria Contributors`
Added the Web plugin (from pieces of Http, Fun, and URL) in the new plugin format. 2005-02-01 10:41:54 +01:00			`# All rights reserved.`
			`#`
			`# Redistribution and use in source and binary forms, with or without`
			`# modification, are permitted provided that the following conditions are met:`
			`#`
			`# * Redistributions of source code must retain the above copyright notice,`
			`# this list of conditions, and the following disclaimer.`
			`# * Redistributions in binary form must reproduce the above copyright notice,`
			`# this list of conditions, and the following disclaimer in the`
			`# documentation and/or other materials provided with the distribution.`
			`# * Neither the name of the author of this software nor the name of`
			`# contributors to this software may be used to endorse or promote products`
			`# derived from this software without specific prior written consent.`
			`#`
			`# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"`
			`# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE`
			`# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE`
			`# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE`
			`# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR`
			`# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF`
			`# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS`
			`# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN`
			`# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)`
			`# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE`
			`# POSSIBILITY OF SUCH DAMAGE.`
			`###`

			`from supybot.test import *`

Bug #1190350, Don't grab fake title. 2005-04-30 14:53:42 +02:00			`class WebTestCase(ChannelPluginTestCase):`
Web: add option for having titlesnarfer immune to defaultignore. Closes GH-1101 2015-05-15 12:38:56 +02:00			`plugins = ('Web', 'Admin',)`
plugins/Web/test.py: Bump the timeout since we're fetching a lot of information from other sources, which may take a while. 2006-05-03 15:30:27 +02:00			`timeout = 10`
Added the Web plugin (from pieces of Http, Fun, and URL) in the new plugin format. 2005-02-01 10:41:54 +01:00			`if network:`
			`def testHeaders(self):`
			`self.assertError('headers ftp://ftp.cdrom.com/pub/linux')`
			`self.assertNotError('headers http://www.slashdot.org/')`

Web: Add new @location command, to follow HTTP redirects. Useful to un-tinify URLs. 2020-10-13 22:28:52 +02:00			`def testLocation(self):`
			`self.assertError('location ftp://ftp.cdrom.com/pub/linux')`
			`self.assertResponse(`
			`'location http://limnoria.net/', 'https://limnoria.net/')`
			`self.assertResponse(`
			`'location https://www.limnoria.net/', 'https://limnoria.net/')`

Added the Web plugin (from pieces of Http, Fun, and URL) in the new plugin format. 2005-02-01 10:41:54 +01:00			`def testDoctype(self):`
			`self.assertError('doctype ftp://ftp.cdrom.com/pub/linux')`
			`self.assertNotError('doctype http://www.slashdot.org/')`
			`m = self.getMsg('doctype http://moobot.sf.net/')`
Replace failUnless/failIf with assertTrue/assertFalse. The old names are deprecated. 2020-01-26 11:13:56 +01:00			`self.assertTrue(m.args[1].endswith('>'))`
Added the Web plugin (from pieces of Http, Fun, and URL) in the new plugin format. 2005-02-01 10:41:54 +01:00
			`def testSize(self):`
			`self.assertError('size ftp://ftp.cdrom.com/pub/linux')`
			`self.assertNotError('size http://supybot.sf.net/')`
			`self.assertNotError('size http://www.slashdot.org/')`

			`def testTitle(self):`
			`# Checks for @title not-working correctly`
Bug #1190350, Don't grab fake title. 2005-04-30 14:53:42 +02:00			`self.assertResponse('title '`
Added the Web plugin (from pieces of Http, Fun, and URL) in the new plugin format. 2005-02-01 10:41:54 +01:00			`'http://www.catb.org/~esr/jargon/html/F/foo.html',`
			`'foo')`
Bug #1190350, Don't grab fake title. 2005-04-30 14:53:42 +02:00			`# Checks for only grabbing the real title tags instead of title`
			`# tags inside, for example, script tags. Bug #1190350`
			`self.assertNotRegexp('title '`
			`'http://www.irinnews.org/report.asp?ReportID=45910&'`
			`'SelectRegion=West_Africa&SelectCountry=CHAD',`
			`r'document\.write\(')`
plugins/Web: Fixed the title-retrieval parser to actually retrieve the entire title. 2005-07-19 15:55:37 +02:00			`# Checks that title parser grabs the full title instead of just`
			`# part of it.`
			`self.assertRegexp('title http://www.n-e-r-d.com/', 'N.E.R.*D')`
plugins/Web: Swtich the title parser back to HTMLParser sing sgmllib's parser spins on invalid input. 2006-09-13 21:40:51 +02:00			`# Checks that the parser doesn't hang on invalid tags`
			`self.assertNotError(`
			`'title http://www.youtube.com/watch?v=x4BtiqPN4u8')`
Web: Add support for charrefs. Closes GH-923. 2014-12-11 09:59:08 +01:00			`self.assertResponse(`
			`'title http://www.thefreedictionary.com/don%27t',`
Web: Correct capitalization in testTitle. 2015-10-22 16:19:03 +02:00			`"Don't - definition of don't by The Free Dictionary")`
Web & core: Merge features of Web's title parser and utils.web.HtmlToText + don't unescape HTML twice. Closes GH-1176. 2015-10-23 07:41:36 +02:00			`self.assertRegexp(`
			`'title '`
			`'https://twitter.com/rlbarnes/status/656554266744586240',`
			`'"PSA: In Firefox 44 Nightly, "http:" pages with '`
			`'<input type="password"> are now marked insecure. '`
			`'https://t.co/qS9LxuRPdm"$')`
Added the Web plugin (from pieces of Http, Fun, and URL) in the new plugin format. 2005-02-01 10:41:54 +01:00
			`def testTitleSnarfer(self):`
			`try:`
			`conf.supybot.plugins.Web.titleSnarfer.setValue(True)`
Web: Remove/update non-functional tests Signed-off-by: James McCoy <jamessan@users.sourceforge.net> 2012-09-06 03:35:56 +02:00			`self.assertSnarfRegexp('http://microsoft.com/',`
Web: create a cofigurable url whitelist Prevent various forms of abuse that result via the Web plugin, such as fetching or titling malicious content, or revealing bot IP. Conflicts: plugins/Web/plugin.py plugins/Web/test.py 2013-05-11 20:11:57 +02:00			`'Microsoft')`
Added the Web plugin (from pieces of Http, Fun, and URL) in the new plugin format. 2005-02-01 10:41:54 +01:00			`finally:`
			`conf.supybot.plugins.Web.titleSnarfer.setValue(False)`

Web: Add supybot.plugins.snarfMultipleUrls. Also, fix Web's test cases. 2018-04-14 21:50:32 +02:00			`def testMultipleTitleSnarfer(self):`
			`try:`
			`conf.supybot.plugins.Web.titleSnarfer.setValue(True)`
			`conf.supybot.plugins.Web.snarfMultipleUrls.setValue(True)`
			`self.feedMsg(`
			`'https://microsoft.com/ https://google.com/')`
			`m1 = self.getMsg(' ')`
			`m2 = self.getMsg(' ')`
			`self.assertTrue(('Microsoft' in m1.args[1]) ^`
			`('Microsoft' in m2.args[1]))`
			`self.assertTrue(('Google' in m1.args[1]) ^`
			`('Google' in m2.args[1]))`
			`finally:`
			`conf.supybot.plugins.Web.titleSnarfer.setValue(False)`
			`conf.supybot.plugins.Web.snarfMultipleUrls.setValue(False)`

Added the Web plugin (from pieces of Http, Fun, and URL) in the new plugin format. 2005-02-01 10:41:54 +01:00			`def testNonSnarfing(self):`
			`snarf = conf.supybot.plugins.Web.nonSnarfingRegexp()`
			`title = conf.supybot.plugins.Web.titleSnarfer()`
			`try:`
Web: Add supybot.plugins.snarfMultipleUrls. Also, fix Web's test cases. 2018-04-14 21:50:32 +02:00			`conf.supybot.plugins.Web.nonSnarfingRegexp.set('m/fr/')`
Added the Web plugin (from pieces of Http, Fun, and URL) in the new plugin format. 2005-02-01 10:41:54 +01:00			`try:`
			`conf.supybot.plugins.Web.titleSnarfer.setValue(True)`
Web: Add supybot.plugins.snarfMultipleUrls. Also, fix Web's test cases. 2018-04-14 21:50:32 +02:00			`self.assertSnarfNoResponse('https://www.google.fr/', 2)`
			`self.assertSnarfRegexp('https://www.google.com/',`
			`r'Google')`
Added the Web plugin (from pieces of Http, Fun, and URL) in the new plugin format. 2005-02-01 10:41:54 +01:00			`finally:`
			`conf.supybot.plugins.Web.titleSnarfer.setValue(title)`
			`finally:`
			`conf.supybot.plugins.Web.nonSnarfingRegexp.setValue(snarf)`

Web: add option for having titlesnarfer immune to defaultignore. Closes GH-1101 2015-05-15 12:38:56 +02:00			`def testSnarferIgnore(self):`
			`conf.supybot.plugins.Web.titleSnarfer.setValue(True)`
			`(oldprefix, self.prefix) = (self.prefix, 'foo!bar@baz')`
			`try:`
			`self.assertSnarfRegexp('http://google.com/', 'Google')`
			`self.assertNotError('admin ignore add %s' % self.prefix)`
			`self.assertSnarfNoResponse('http://google.com/')`
			`self.assertNoResponse('title http://www.google.com/')`
			`finally:`
			`conf.supybot.plugins.Web.titleSnarfer.setValue(False)`
			`(self.prefix, oldprefix) = (oldprefix, self.prefix)`
			`self.assertNotError('admin ignore remove %s' % oldprefix)`

			`def testSnarferNotIgnore(self):`
			`conf.supybot.plugins.Web.titleSnarfer.setValue(True)`
			`conf.supybot.plugins.Web.checkIgnored.setValue(False)`
			`(oldprefix, self.prefix) = (self.prefix, 'foo!bar@baz')`
			`try:`
Web: Add supybot.plugins.snarfMultipleUrls. Also, fix Web's test cases. 2018-04-14 21:50:32 +02:00			`self.assertSnarfRegexp('https://google.it/', 'Google')`
Web: add option for having titlesnarfer immune to defaultignore. Closes GH-1101 2015-05-15 12:38:56 +02:00			`self.assertNotError('admin ignore add %s' % self.prefix)`
Web: Add supybot.plugins.snarfMultipleUrls. Also, fix Web's test cases. 2018-04-14 21:50:32 +02:00			`self.assertSnarfRegexp('https://www.google.it/', 'Google')`
			`self.assertNoResponse('title http://www.google.it/')`
Web: add option for having titlesnarfer immune to defaultignore. Closes GH-1101 2015-05-15 12:38:56 +02:00			`finally:`
			`conf.supybot.plugins.Web.titleSnarfer.setValue(False)`
			`conf.supybot.plugins.Web.checkIgnored.setValue(True)`
			`(self.prefix, oldprefix) = (oldprefix, self.prefix)`
			`self.assertNotError('admin ignore remove %s' % oldprefix)`

Web: Disable testWhitelist if --no-network is given. 2013-07-09 09:18:33 +02:00			`def testWhitelist(self):`
			`fm = conf.supybot.plugins.Web.fetch.maximum()`
			`uw = conf.supybot.plugins.Web.urlWhitelist()`
			`try:`
			`conf.supybot.plugins.Web.fetch.maximum.set(1024)`
			`self.assertNotError('web fetch http://fsf.org')`
			`conf.supybot.plugins.Web.urlWhitelist.set('http://slashdot.org')`
			`self.assertError('web fetch http://fsf.org')`
			`self.assertError('wef title http://fsf.org')`
			`self.assertError('web fetch http://slashdot.org.evildomain.com')`
			`self.assertNotError('web fetch http://slashdot.org')`
			`self.assertNotError('web fetch http://slashdot.org/recent')`
			`conf.supybot.plugins.Web.urlWhitelist.set('http://slashdot.org http://fsf.org')`
			`self.assertNotError('doctype http://fsf.org')`
			`finally:`
			`conf.supybot.plugins.Web.urlWhitelist.set('')`
			`conf.supybot.plugins.Web.fetch.maximum.set(fm)`

Added the Web plugin (from pieces of Http, Fun, and URL) in the new plugin format. 2005-02-01 10:41:54 +01:00			`def testNonSnarfingRegexpConfigurable(self):`
			`self.assertSnarfNoResponse('http://foo.bar.baz/', 2)`
			`try:`
			`conf.supybot.plugins.Web.nonSnarfingRegexp.set('m/biff/')`
			`self.assertSnarfNoResponse('http://biff.bar.baz/', 2)`
			`finally:`
			`conf.supybot.plugins.Web.nonSnarfingRegexp.set('')`

commands: Disallow IRIs from 'url' and 'httpUrl' converters. urllib doesn't support IRIs, and gives out a cryptic 'UnicodeEncodeError: 'ascii' codec can't encode character ...' if we don't validate it. 2021-08-25 21:53:05 +02:00			`def testFetchIri(self):`
			`self.assertRegexp('fetch http://café.example.org/',`
			`'Error: .*is not a valid')`
commands: fix _checkUrl 2021-08-25 23:28:25 +02:00			`self.assertRegexp('fetch http://example.org/café',`
			`'Error: .*is not a valid')`
commands: Disallow IRIs from 'url' and 'httpUrl' converters. urllib doesn't support IRIs, and gives out a cryptic 'UnicodeEncodeError: 'ascii' codec can't encode character ...' if we don't validate it. 2021-08-25 21:53:05 +02:00
Added the Web plugin (from pieces of Http, Fun, and URL) in the new plugin format. 2005-02-01 10:41:54 +01:00
Change the modeline to use softtabstop instead of tabstop. 2006-02-11 16:52:51 +01:00			`# vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79:`