Limnoria/plugins/Web/test.py

###
# Copyright (c) 2005, Jeremiah Fincher
# Copyright (c) 2010-2021, Valentin Lorentz
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
#   * Redistributions of source code must retain the above copyright notice,
#     this list of conditions, and the following disclaimer.
#   * Redistributions in binary form must reproduce the above copyright notice,
#     this list of conditions, and the following disclaimer in the
#     documentation and/or other materials provided with the distribution.
#   * Neither the name of the author of this software nor the name of
#     contributors to this software may be used to endorse or promote products
#     derived from this software without specific prior written consent.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
###

from supybot.test import *

class WebTestCase(ChannelPluginTestCase):
    plugins = ('Web', 'Admin',)
    timeout = 10
    if network:
        def testHeaders(self):
            self.assertError('headers ftp://ftp.cdrom.com/pub/linux')
            self.assertNotError('headers http://www.slashdot.org/')

        def testLocation(self):
            self.assertError('location ftp://ftp.cdrom.com/pub/linux')
            self.assertResponse(
                'location http://limnoria.net/', 'https://limnoria.net/')
            self.assertResponse(
                'location https://www.limnoria.net/', 'https://limnoria.net/')

        def testDoctype(self):
            self.assertError('doctype ftp://ftp.cdrom.com/pub/linux')
            self.assertNotError('doctype http://www.slashdot.org/')
            m = self.getMsg('doctype http://moobot.sf.net/')
            self.assertTrue(m.args[1].endswith('>'))

        def testSize(self):
            self.assertError('size ftp://ftp.cdrom.com/pub/linux')
            self.assertNotError('size http://supybot.sf.net/')
            self.assertNotError('size http://www.slashdot.org/')

        def testTitle(self):
            # Checks for @title not-working correctly
            self.assertResponse('title '
                'http://www.catb.org/~esr/jargon/html/F/foo.html',
                'foo')
            # Checks for only grabbing the real title tags instead of title
            # tags inside, for example, script tags. Bug #1190350
            self.assertNotRegexp('title '
                'http://www.irinnews.org/report.asp?ReportID=45910&'
                'SelectRegion=West_Africa&SelectCountry=CHAD',
                r'document\.write\(')
            # Checks that title parser grabs the full title instead of just
            # part of it.
            self.assertRegexp('title http://www.n-e-r-d.com/', 'N.*E.*R.*D')
            # Checks that the parser doesn't hang on invalid tags
            self.assertNotError(
                        'title http://www.youtube.com/watch?v=x4BtiqPN4u8')
            self.assertResponse(
                    'title http://www.thefreedictionary.com/don%27t',
                    "Don't - definition of don't by The Free Dictionary")
            self.assertRegexp(
                    'title '
                    'https://twitter.com/rlbarnes/status/656554266744586240',
                    '"PSA: In Firefox 44 Nightly, "http:" pages with '
                    '<input type="password"> are now marked insecure. '
                    'https://t.co/qS9LxuRPdm"$')

        def testTitleSnarfer(self):
            try:
                conf.supybot.plugins.Web.titleSnarfer.setValue(True)
                self.assertSnarfRegexp('http://microsoft.com/',
                                         'Microsoft')
            finally:
                conf.supybot.plugins.Web.titleSnarfer.setValue(False)

        def testMultipleTitleSnarfer(self):
            try:
                conf.supybot.plugins.Web.titleSnarfer.setValue(True)
                conf.supybot.plugins.Web.snarfMultipleUrls.setValue(True)
                self.feedMsg(
                        'https://microsoft.com/ https://google.com/')
                m1 = self.getMsg(' ')
                m2 = self.getMsg(' ')
                self.assertTrue(('Microsoft' in m1.args[1]) ^
                        ('Microsoft' in m2.args[1]))
                self.assertTrue(('Google' in m1.args[1]) ^
                        ('Google' in m2.args[1]))
            finally:
                conf.supybot.plugins.Web.titleSnarfer.setValue(False)
                conf.supybot.plugins.Web.snarfMultipleUrls.setValue(False)

        def testNonSnarfing(self):
            snarf = conf.supybot.plugins.Web.nonSnarfingRegexp()
            title = conf.supybot.plugins.Web.titleSnarfer()
            try:
                conf.supybot.plugins.Web.nonSnarfingRegexp.set('m/fr/')
                try:
                    conf.supybot.plugins.Web.titleSnarfer.setValue(True)
                    self.assertSnarfNoResponse('https://www.google.fr/', 2)
                    self.assertSnarfRegexp('https://www.google.com/',
                                           r'Google')
                finally:
                    conf.supybot.plugins.Web.titleSnarfer.setValue(title)
            finally:
                conf.supybot.plugins.Web.nonSnarfingRegexp.setValue(snarf)

        def testSnarferIgnore(self):
            conf.supybot.plugins.Web.titleSnarfer.setValue(True)
            (oldprefix, self.prefix) = (self.prefix, 'foo!bar@baz')
            try:
                self.assertSnarfRegexp('http://google.com/', 'Google')
                self.assertNotError('admin ignore add %s' % self.prefix)
                self.assertSnarfNoResponse('http://google.com/')
                self.assertNoResponse('title http://www.google.com/')
            finally:
                conf.supybot.plugins.Web.titleSnarfer.setValue(False)
                (self.prefix, oldprefix) = (oldprefix, self.prefix)
                self.assertNotError('admin ignore remove %s' % oldprefix)

        def testSnarferNotIgnore(self):
            conf.supybot.plugins.Web.titleSnarfer.setValue(True)
            conf.supybot.plugins.Web.checkIgnored.setValue(False)
            (oldprefix, self.prefix) = (self.prefix, 'foo!bar@baz')
            try:
                self.assertSnarfRegexp('https://google.it/', 'Google')
                self.assertNotError('admin ignore add %s' % self.prefix)
                self.assertSnarfRegexp('https://www.google.it/', 'Google')
                self.assertNoResponse('title http://www.google.it/')
            finally:
                conf.supybot.plugins.Web.titleSnarfer.setValue(False)
                conf.supybot.plugins.Web.checkIgnored.setValue(True)
                (self.prefix, oldprefix) = (oldprefix, self.prefix)
                self.assertNotError('admin ignore remove %s' % oldprefix)

        def testWhitelist(self):
            fm = conf.supybot.plugins.Web.fetch.maximum()
            uw = conf.supybot.plugins.Web.urlWhitelist()
            try:
                conf.supybot.plugins.Web.fetch.maximum.set(1024)
                self.assertNotError('web fetch http://fsf.org')
                conf.supybot.plugins.Web.urlWhitelist.set('http://slashdot.org')
                self.assertError('web fetch http://fsf.org')
                self.assertError('wef title http://fsf.org')
                self.assertError('web fetch http://slashdot.org.evildomain.com')
                self.assertNotError('web fetch http://slashdot.org')
                self.assertNotError('web fetch http://slashdot.org/recent')
                conf.supybot.plugins.Web.urlWhitelist.set('http://slashdot.org http://fsf.org')
                self.assertNotError('doctype http://fsf.org')
            finally:
                conf.supybot.plugins.Web.urlWhitelist.set('')
                conf.supybot.plugins.Web.fetch.maximum.set(fm)

    def testNonSnarfingRegexpConfigurable(self):
        self.assertSnarfNoResponse('http://foo.bar.baz/', 2)
        try:
            conf.supybot.plugins.Web.nonSnarfingRegexp.set('m/biff/')
            self.assertSnarfNoResponse('http://biff.bar.baz/', 2)
        finally:
            conf.supybot.plugins.Web.nonSnarfingRegexp.set('')

    def testFetchIri(self):
        self.assertRegexp('fetch http://café.example.org/',
            'Error: .*is not a valid')
        self.assertRegexp('fetch http://example.org/café',
            'Error: .*is not a valid')


# vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79:
Added the Web plugin (from pieces of Http, Fun, and URL) in the new plugin format. 2005-02-01 10:41:54 +01:00			`###`
			`# Copyright (c) 2005, Jeremiah Fincher`
Revert generic 'The Limnoria Contributors' in copyright notices This commit reverts db7ef3f02517f9f2a3c56829a22b9fad3c36e374 (though it keeps the year updates) After discussion with several people, it seems better to mention copyright owners explicitly. eg. https://reuse.software/faq/#vcs-copyright explains the issue of using VCSs to track copyright. As db7ef3f02517f9f2a3c56829a22b9fad3c36e374 only replaced mentions of my name with 'The Limnoria Contributors', this commit only needs to undo that + add one person who contributed to setup.py. 2021-10-17 09:54:06 +02:00			`# Copyright (c) 2010-2021, Valentin Lorentz`
Added the Web plugin (from pieces of Http, Fun, and URL) in the new plugin format. 2005-02-01 10:41:54 +01:00			`# All rights reserved.`
			`#`
			`# Redistribution and use in source and binary forms, with or without`
			`# modification, are permitted provided that the following conditions are met:`
			`#`
			`# * Redistributions of source code must retain the above copyright notice,`
			`# this list of conditions, and the following disclaimer.`
			`# * Redistributions in binary form must reproduce the above copyright notice,`
			`# this list of conditions, and the following disclaimer in the`
			`# documentation and/or other materials provided with the distribution.`
			`# * Neither the name of the author of this software nor the name of`
			`# contributors to this software may be used to endorse or promote products`
			`# derived from this software without specific prior written consent.`
			`#`
			`# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"`
			`# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE`
			`# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE`
			`# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE`
			`# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR`
			`# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF`
			`# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS`
			`# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN`
			`# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)`
			`# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE`
			`# POSSIBILITY OF SUCH DAMAGE.`
			`###`

			`from supybot.test import *`

Bug #1190350, Don't grab fake title. 2005-04-30 14:53:42 +02:00			`class WebTestCase(ChannelPluginTestCase):`
Web: add option for having titlesnarfer immune to defaultignore. Closes GH-1101 2015-05-15 12:38:56 +02:00			`plugins = ('Web', 'Admin',)`
plugins/Web/test.py: Bump the timeout since we're fetching a lot of information from other sources, which may take a while. 2006-05-03 15:30:27 +02:00			`timeout = 10`
Added the Web plugin (from pieces of Http, Fun, and URL) in the new plugin format. 2005-02-01 10:41:54 +01:00			`if network:`
			`def testHeaders(self):`
			`self.assertError('headers ftp://ftp.cdrom.com/pub/linux')`
			`self.assertNotError('headers http://www.slashdot.org/')`

Web: Add new @location command, to follow HTTP redirects. Useful to un-tinify URLs. 2020-10-13 22:28:52 +02:00			`def testLocation(self):`
			`self.assertError('location ftp://ftp.cdrom.com/pub/linux')`
			`self.assertResponse(`
			`'location http://limnoria.net/', 'https://limnoria.net/')`
			`self.assertResponse(`
			`'location https://www.limnoria.net/', 'https://limnoria.net/')`

Added the Web plugin (from pieces of Http, Fun, and URL) in the new plugin format. 2005-02-01 10:41:54 +01:00			`def testDoctype(self):`
			`self.assertError('doctype ftp://ftp.cdrom.com/pub/linux')`
			`self.assertNotError('doctype http://www.slashdot.org/')`
			`m = self.getMsg('doctype http://moobot.sf.net/')`
Replace failUnless/failIf with assertTrue/assertFalse. The old names are deprecated. 2020-01-26 11:13:56 +01:00			`self.assertTrue(m.args[1].endswith('>'))`
Added the Web plugin (from pieces of Http, Fun, and URL) in the new plugin format. 2005-02-01 10:41:54 +01:00
			`def testSize(self):`
			`self.assertError('size ftp://ftp.cdrom.com/pub/linux')`
			`self.assertNotError('size http://supybot.sf.net/')`
			`self.assertNotError('size http://www.slashdot.org/')`

			`def testTitle(self):`
			`# Checks for @title not-working correctly`
Bug #1190350, Don't grab fake title. 2005-04-30 14:53:42 +02:00			`self.assertResponse('title '`
Added the Web plugin (from pieces of Http, Fun, and URL) in the new plugin format. 2005-02-01 10:41:54 +01:00			`'http://www.catb.org/~esr/jargon/html/F/foo.html',`
			`'foo')`
Bug #1190350, Don't grab fake title. 2005-04-30 14:53:42 +02:00			`# Checks for only grabbing the real title tags instead of title`
			`# tags inside, for example, script tags. Bug #1190350`
			`self.assertNotRegexp('title '`
			`'http://www.irinnews.org/report.asp?ReportID=45910&'`
			`'SelectRegion=West_Africa&SelectCountry=CHAD',`
			`r'document\.write\(')`
plugins/Web: Fixed the title-retrieval parser to actually retrieve the entire title. 2005-07-19 15:55:37 +02:00			`# Checks that title parser grabs the full title instead of just`
			`# part of it.`
			`self.assertRegexp('title http://www.n-e-r-d.com/', 'N.E.R.*D')`
plugins/Web: Swtich the title parser back to HTMLParser sing sgmllib's parser spins on invalid input. 2006-09-13 21:40:51 +02:00			`# Checks that the parser doesn't hang on invalid tags`
			`self.assertNotError(`
			`'title http://www.youtube.com/watch?v=x4BtiqPN4u8')`
Web: Add support for charrefs. Closes GH-923. 2014-12-11 09:59:08 +01:00			`self.assertResponse(`
			`'title http://www.thefreedictionary.com/don%27t',`
Web: Correct capitalization in testTitle. 2015-10-22 16:19:03 +02:00			`"Don't - definition of don't by The Free Dictionary")`
Web & core: Merge features of Web's title parser and utils.web.HtmlToText + don't unescape HTML twice. Closes GH-1176. 2015-10-23 07:41:36 +02:00			`self.assertRegexp(`
			`'title '`
			`'https://twitter.com/rlbarnes/status/656554266744586240',`
			`'"PSA: In Firefox 44 Nightly, "http:" pages with '`
			`'<input type="password"> are now marked insecure. '`
			`'https://t.co/qS9LxuRPdm"$')`
Added the Web plugin (from pieces of Http, Fun, and URL) in the new plugin format. 2005-02-01 10:41:54 +01:00
			`def testTitleSnarfer(self):`
			`try:`
			`conf.supybot.plugins.Web.titleSnarfer.setValue(True)`
Web: Remove/update non-functional tests Signed-off-by: James McCoy <jamessan@users.sourceforge.net> 2012-09-06 03:35:56 +02:00			`self.assertSnarfRegexp('http://microsoft.com/',`
Web: create a cofigurable url whitelist Prevent various forms of abuse that result via the Web plugin, such as fetching or titling malicious content, or revealing bot IP. Conflicts: plugins/Web/plugin.py plugins/Web/test.py 2013-05-11 20:11:57 +02:00			`'Microsoft')`
Added the Web plugin (from pieces of Http, Fun, and URL) in the new plugin format. 2005-02-01 10:41:54 +01:00			`finally:`
			`conf.supybot.plugins.Web.titleSnarfer.setValue(False)`

Web: Add supybot.plugins.snarfMultipleUrls. Also, fix Web's test cases. 2018-04-14 21:50:32 +02:00			`def testMultipleTitleSnarfer(self):`
			`try:`
			`conf.supybot.plugins.Web.titleSnarfer.setValue(True)`
			`conf.supybot.plugins.Web.snarfMultipleUrls.setValue(True)`
			`self.feedMsg(`
			`'https://microsoft.com/ https://google.com/')`
			`m1 = self.getMsg(' ')`
			`m2 = self.getMsg(' ')`
			`self.assertTrue(('Microsoft' in m1.args[1]) ^`
			`('Microsoft' in m2.args[1]))`
			`self.assertTrue(('Google' in m1.args[1]) ^`
			`('Google' in m2.args[1]))`
			`finally:`
			`conf.supybot.plugins.Web.titleSnarfer.setValue(False)`
			`conf.supybot.plugins.Web.snarfMultipleUrls.setValue(False)`

Added the Web plugin (from pieces of Http, Fun, and URL) in the new plugin format. 2005-02-01 10:41:54 +01:00			`def testNonSnarfing(self):`
			`snarf = conf.supybot.plugins.Web.nonSnarfingRegexp()`
			`title = conf.supybot.plugins.Web.titleSnarfer()`
			`try:`
Web: Add supybot.plugins.snarfMultipleUrls. Also, fix Web's test cases. 2018-04-14 21:50:32 +02:00			`conf.supybot.plugins.Web.nonSnarfingRegexp.set('m/fr/')`
Added the Web plugin (from pieces of Http, Fun, and URL) in the new plugin format. 2005-02-01 10:41:54 +01:00			`try:`
			`conf.supybot.plugins.Web.titleSnarfer.setValue(True)`
Web: Add supybot.plugins.snarfMultipleUrls. Also, fix Web's test cases. 2018-04-14 21:50:32 +02:00			`self.assertSnarfNoResponse('https://www.google.fr/', 2)`
			`self.assertSnarfRegexp('https://www.google.com/',`
			`r'Google')`
Added the Web plugin (from pieces of Http, Fun, and URL) in the new plugin format. 2005-02-01 10:41:54 +01:00			`finally:`
			`conf.supybot.plugins.Web.titleSnarfer.setValue(title)`
			`finally:`
			`conf.supybot.plugins.Web.nonSnarfingRegexp.setValue(snarf)`

Web: add option for having titlesnarfer immune to defaultignore. Closes GH-1101 2015-05-15 12:38:56 +02:00			`def testSnarferIgnore(self):`
			`conf.supybot.plugins.Web.titleSnarfer.setValue(True)`
			`(oldprefix, self.prefix) = (self.prefix, 'foo!bar@baz')`
			`try:`
			`self.assertSnarfRegexp('http://google.com/', 'Google')`
			`self.assertNotError('admin ignore add %s' % self.prefix)`
			`self.assertSnarfNoResponse('http://google.com/')`
			`self.assertNoResponse('title http://www.google.com/')`
			`finally:`
			`conf.supybot.plugins.Web.titleSnarfer.setValue(False)`
			`(self.prefix, oldprefix) = (oldprefix, self.prefix)`
			`self.assertNotError('admin ignore remove %s' % oldprefix)`

			`def testSnarferNotIgnore(self):`
			`conf.supybot.plugins.Web.titleSnarfer.setValue(True)`
			`conf.supybot.plugins.Web.checkIgnored.setValue(False)`
			`(oldprefix, self.prefix) = (self.prefix, 'foo!bar@baz')`
			`try:`
Web: Add supybot.plugins.snarfMultipleUrls. Also, fix Web's test cases. 2018-04-14 21:50:32 +02:00			`self.assertSnarfRegexp('https://google.it/', 'Google')`
Web: add option for having titlesnarfer immune to defaultignore. Closes GH-1101 2015-05-15 12:38:56 +02:00			`self.assertNotError('admin ignore add %s' % self.prefix)`
Web: Add supybot.plugins.snarfMultipleUrls. Also, fix Web's test cases. 2018-04-14 21:50:32 +02:00			`self.assertSnarfRegexp('https://www.google.it/', 'Google')`
			`self.assertNoResponse('title http://www.google.it/')`
Web: add option for having titlesnarfer immune to defaultignore. Closes GH-1101 2015-05-15 12:38:56 +02:00			`finally:`
			`conf.supybot.plugins.Web.titleSnarfer.setValue(False)`
			`conf.supybot.plugins.Web.checkIgnored.setValue(True)`
			`(self.prefix, oldprefix) = (oldprefix, self.prefix)`
			`self.assertNotError('admin ignore remove %s' % oldprefix)`

Web: Disable testWhitelist if --no-network is given. 2013-07-09 09:18:33 +02:00			`def testWhitelist(self):`
			`fm = conf.supybot.plugins.Web.fetch.maximum()`
			`uw = conf.supybot.plugins.Web.urlWhitelist()`
			`try:`
			`conf.supybot.plugins.Web.fetch.maximum.set(1024)`
			`self.assertNotError('web fetch http://fsf.org')`
			`conf.supybot.plugins.Web.urlWhitelist.set('http://slashdot.org')`
			`self.assertError('web fetch http://fsf.org')`
			`self.assertError('wef title http://fsf.org')`
			`self.assertError('web fetch http://slashdot.org.evildomain.com')`
			`self.assertNotError('web fetch http://slashdot.org')`
			`self.assertNotError('web fetch http://slashdot.org/recent')`
			`conf.supybot.plugins.Web.urlWhitelist.set('http://slashdot.org http://fsf.org')`
			`self.assertNotError('doctype http://fsf.org')`
			`finally:`
			`conf.supybot.plugins.Web.urlWhitelist.set('')`
			`conf.supybot.plugins.Web.fetch.maximum.set(fm)`

Added the Web plugin (from pieces of Http, Fun, and URL) in the new plugin format. 2005-02-01 10:41:54 +01:00			`def testNonSnarfingRegexpConfigurable(self):`
			`self.assertSnarfNoResponse('http://foo.bar.baz/', 2)`
			`try:`
			`conf.supybot.plugins.Web.nonSnarfingRegexp.set('m/biff/')`
			`self.assertSnarfNoResponse('http://biff.bar.baz/', 2)`
			`finally:`
			`conf.supybot.plugins.Web.nonSnarfingRegexp.set('')`

commands: Disallow IRIs from 'url' and 'httpUrl' converters. urllib doesn't support IRIs, and gives out a cryptic 'UnicodeEncodeError: 'ascii' codec can't encode character ...' if we don't validate it. 2021-08-25 21:53:05 +02:00			`def testFetchIri(self):`
			`self.assertRegexp('fetch http://café.example.org/',`
			`'Error: .*is not a valid')`
commands: fix _checkUrl 2021-08-25 23:28:25 +02:00			`self.assertRegexp('fetch http://example.org/café',`
			`'Error: .*is not a valid')`
commands: Disallow IRIs from 'url' and 'httpUrl' converters. urllib doesn't support IRIs, and gives out a cryptic 'UnicodeEncodeError: 'ascii' codec can't encode character ...' if we don't validate it. 2021-08-25 21:53:05 +02:00
Added the Web plugin (from pieces of Http, Fun, and URL) in the new plugin format. 2005-02-01 10:41:54 +01:00
Change the modeline to use softtabstop instead of tabstop. 2006-02-11 16:52:51 +01:00			`# vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79:`