Limnoria/plugins/Web/plugin.py

###
# Copyright (c) 2005, Jeremiah Fincher
# Copyright (c) 2009, James Vega
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
#   * Redistributions of source code must retain the above copyright notice,
#     this list of conditions, and the following disclaimer.
#   * Redistributions in binary form must reproduce the above copyright notice,
#     this list of conditions, and the following disclaimer in the
#     documentation and/or other materials provided with the distribution.
#   * Neither the name of the author of this software nor the name of
#     contributors to this software may be used to endorse or promote products
#     derived from this software without specific prior written consent.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
###

import re
import HTMLParser
import htmlentitydefs

import supybot.conf as conf
import supybot.utils as utils
from supybot.commands import *
import supybot.plugins as plugins
import supybot.ircutils as ircutils
import supybot.callbacks as callbacks
from supybot.i18n import PluginInternationalization, internationalizeDocstring
_ = PluginInternationalization('Web')

class Title(HTMLParser.HTMLParser):
    entitydefs = htmlentitydefs.entitydefs.copy()
    entitydefs['nbsp'] = ' '
    entitydefs['apos'] = '\''
    def __init__(self):
        self.inTitle = False
        self.title = ''
        HTMLParser.HTMLParser.__init__(self)

    def handle_starttag(self, tag, attrs):
        if tag == 'title':
            self.inTitle = True

    def handle_endtag(self, tag):
        if tag == 'title':
            self.inTitle = False

    def handle_data(self, data):
        if self.inTitle:
            self.title += data

    def handle_entityref(self, name):
        if self.inTitle:
            if name in self.entitydefs:
                self.title += self.entitydefs[name]

class Web(callbacks.PluginRegexp):
    """Add the help for "@help Web" here."""
    threaded = True
    regexps = ['titleSnarfer']
    def callCommand(self, command, irc, msg, *args, **kwargs):
        try:
            super(Web, self).callCommand(command, irc, msg, *args, **kwargs)
        except utils.web.Error, e:
            irc.reply(str(e))

    def titleSnarfer(self, irc, msg, match):
        channel = msg.args[0]
        if not irc.isChannel(channel):
            return
        if callbacks.addressed(irc.nick, msg):
            return
        if self.registryValue('titleSnarfer', channel):
            url = match.group(0)
            r = self.registryValue('nonSnarfingRegexp', channel)
            if r and r.search(url):
                self.log.debug('Not titleSnarfing %q.', url)
                return
            try:
                size = conf.supybot.protocols.http.peekSize()
                text = utils.web.getUrl(url, size=size)
            except utils.web.Error, e:
                self.log.info('Couldn\'t snarf title of %u: %s.', url, e)
                return
            parser = Title()
            try:
                parser.feed(text)
            except HTMLParser.HTMLParseError:
                self.log.debug('Encountered a problem parsing %u.  Title may '
                               'already be set, though', url)
            if parser.title:
                domain = utils.web.getDomain(url)
                title = utils.web.htmlToText(parser.title.strip())
                s = format(_('Title: %s (at %s)'), title, domain)
                irc.reply(s, prefixNick=False)
    titleSnarfer = urlSnarfer(titleSnarfer)
    titleSnarfer.__doc__ = utils.web._httpUrlRe

    @internationalizeDocstring
    def headers(self, irc, msg, args, url):
        """<url>

        Returns the HTTP headers of <url>.  Only HTTP urls are valid, of
        course.
        """
        fd = utils.web.getUrlFd(url)
        try:
            s = ', '.join([format(_('%s: %s'), k, v)
                           for (k, v) in fd.headers.items()])
            irc.reply(s)
        finally:
            fd.close()
    headers = wrap(headers, ['httpUrl'])

    _doctypeRe = re.compile(r'(<!DOCTYPE[^>]+>)', re.M)
    @internationalizeDocstring
    def doctype(self, irc, msg, args, url):
        """<url>

        Returns the DOCTYPE string of <url>.  Only HTTP urls are valid, of
        course.
        """
        size = conf.supybot.protocols.http.peekSize()
        s = utils.web.getUrl(url, size=size)
        m = self._doctypeRe.search(s)
        if m:
            s = utils.str.normalizeWhitespace(m.group(0))
            irc.reply(s)
        else:
            irc.reply(_('That URL has no specified doctype.'))
    doctype = wrap(doctype, ['httpUrl'])

    @internationalizeDocstring
    def size(self, irc, msg, args, url):
        """<url>

        Returns the Content-Length header of <url>.  Only HTTP urls are valid,
        of course.
        """
        fd = utils.web.getUrlFd(url)
        try:
            try:
                size = fd.headers['Content-Length']
                irc.reply(format(_('%u is %S long.'), url, int(size)))
            except KeyError:
                size = conf.supybot.protocols.http.peekSize()
                s = fd.read(size)
                if len(s) != size:
                    irc.reply(format(_('%u is %S long.'), url, len(s)))
                else:
                    irc.reply(format(_('The server didn\'t tell me how long %u '
                                     'is but it\'s longer than %S.'),
                                     url, size))
        finally:
            fd.close()
    size = wrap(size, ['httpUrl'])

    @internationalizeDocstring
    def title(self, irc, msg, args, url):
        """<url>

        Returns the HTML <title>...</title> of a URL.
        """
        size = conf.supybot.protocols.http.peekSize()
        text = utils.web.getUrl(url, size=size)
        parser = Title()
        try:
            parser.feed(text)
        except HTMLParser.HTMLParseError:
            self.log.debug('Encountered a problem parsing %u.  Title may '
                           'already be set, though', url)
        if parser.title:
            irc.reply(utils.web.htmlToText(parser.title.strip()))
        elif len(text) < size:
            irc.reply(_('That URL appears to have no HTML title.'))
        else:
            irc.reply(format(_('That URL appears to have no HTML title '
                             'within the first %S.'), size))
    title = wrap(title, ['httpUrl'])

    _netcraftre = re.compile(r'td align="left">\s+<a[^>]+>(.*?)<a href',
                             re.S | re.I)
    @internationalizeDocstring
    def netcraft(self, irc, msg, args, hostname):
        """<hostname|ip>

        Returns Netcraft.com's determination of what operating system and
        webserver is running on the host given.
        """
        url = 'http://uptime.netcraft.com/up/graph/?host=' + hostname
        html = utils.web.getUrl(url)
        m = self._netcraftre.search(html)
        if m:
            html = m.group(1)
            s = utils.web.htmlToText(html, tagReplace='').strip()
            s = s.rstrip('-').strip()
            irc.reply(s) # Snip off "the site"
        elif 'We could not get any results' in html:
            irc.reply(_('No results found for %s.') % hostname)
        else:
            irc.error(_('The format of page the was odd.'))
    netcraft = wrap(netcraft, ['text'])

    @internationalizeDocstring
    def urlquote(self, irc, msg, args, text):
        """<text>

        Returns the URL quoted form of the text.
        """
        irc.reply(utils.web.urlquote(text))
    urlquote = wrap(urlquote, ['text'])

    @internationalizeDocstring
    def urlunquote(self, irc, msg, args, text):
        """<text>

        Returns the text un-URL quoted.
        """
        s = utils.web.urlunquote(text)
        irc.reply(s)
    urlunquote = wrap(urlunquote, ['text'])

    @internationalizeDocstring
    def fetch(self, irc, msg, args, url):
        """<url>

        Returns the contents of <url>, or as much as is configured in
        supybot.plugins.Web.fetch.maximum.  If that configuration variable is
        set to 0, this command will be effectively disabled.
        """
        max = self.registryValue('fetch.maximum')
        if not max:
            irc.error(_('This command is disabled '
                      '(supybot.plugins.Web.fetch.maximum is set to 0).'),
                      Raise=True)
        fd = utils.web.getUrlFd(url)
        irc.reply(fd.read(max))
    fetch = wrap(fetch, ['url'])

Class = Web

# vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79:
Added the Web plugin (from pieces of Http, Fun, and URL) in the new plugin format. 2005-02-01 10:41:54 +01:00			`###`
			`# Copyright (c) 2005, Jeremiah Fincher`
Use utils.web.httpUrlRe for the Web/ShrinkUrl snarfer regexes. Signed-off-by: James Vega <jamessan@users.sourceforge.net> 2009-10-05 03:41:05 +02:00			`# Copyright (c) 2009, James Vega`
Added the Web plugin (from pieces of Http, Fun, and URL) in the new plugin format. 2005-02-01 10:41:54 +01:00			`# All rights reserved.`
			`#`
			`# Redistribution and use in source and binary forms, with or without`
			`# modification, are permitted provided that the following conditions are met:`
			`#`
			`# * Redistributions of source code must retain the above copyright notice,`
			`# this list of conditions, and the following disclaimer.`
			`# * Redistributions in binary form must reproduce the above copyright notice,`
			`# this list of conditions, and the following disclaimer in the`
			`# documentation and/or other materials provided with the distribution.`
			`# * Neither the name of the author of this software nor the name of`
			`# contributors to this software may be used to endorse or promote products`
			`# derived from this software without specific prior written consent.`
			`#`
			`# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"`
			`# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE`
			`# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE`
			`# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE`
			`# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR`
			`# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF`
			`# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS`
			`# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN`
			`# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)`
			`# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE`
			`# POSSIBILITY OF SUCH DAMAGE.`
			`###`

			`import re`
plugins/Web: Swtich the title parser back to HTMLParser sing sgmllib's parser spins on invalid input. 2006-09-13 21:40:51 +02:00			`import HTMLParser`
plugins/Web: Fixed the title-retrieval parser to actually retrieve the entire title. 2005-07-19 15:55:37 +02:00			`import htmlentitydefs`
Bug #1190350, Don't grab fake title. 2005-04-30 14:53:42 +02:00
Added the Web plugin (from pieces of Http, Fun, and URL) in the new plugin format. 2005-02-01 10:41:54 +01:00			`import supybot.conf as conf`
			`import supybot.utils as utils`
			`from supybot.commands import *`
			`import supybot.plugins as plugins`
			`import supybot.ircutils as ircutils`
			`import supybot.callbacks as callbacks`
Internationalize Time, Todo, Topic, URL, Unix, Utilities, and Web 2010-10-20 09:39:44 +02:00			`from supybot.i18n import PluginInternationalization, internationalizeDocstring`
			`_ = PluginInternationalization('Web')`
Added the Web plugin (from pieces of Http, Fun, and URL) in the new plugin format. 2005-02-01 10:41:54 +01:00
plugins/Web: Swtich the title parser back to HTMLParser sing sgmllib's parser spins on invalid input. 2006-09-13 21:40:51 +02:00			`class Title(HTMLParser.HTMLParser):`
plugins/Web: Fixed the title-retrieval parser to actually retrieve the entire title. 2005-07-19 15:55:37 +02:00			`entitydefs = htmlentitydefs.entitydefs.copy()`
			`entitydefs['nbsp'] = ' '`
plugins/Web: Swtich the title parser back to HTMLParser sing sgmllib's parser spins on invalid input. 2006-09-13 21:40:51 +02:00			`entitydefs['apos'] = '\''`
plugins/Web: Fixed the title-retrieval parser to actually retrieve the entire title. 2005-07-19 15:55:37 +02:00			`def __init__(self):`
Bug #1190350, Don't grab fake title. 2005-04-30 14:53:42 +02:00			`self.inTitle = False`
plugins/Web: Fixed the title-retrieval parser to actually retrieve the entire title. 2005-07-19 15:55:37 +02:00			`self.title = ''`
plugins/Web: Swtich the title parser back to HTMLParser sing sgmllib's parser spins on invalid input. 2006-09-13 21:40:51 +02:00			`HTMLParser.HTMLParser.__init__(self)`
Bug #1190350, Don't grab fake title. 2005-04-30 14:53:42 +02:00
plugins/Web: Swtich the title parser back to HTMLParser sing sgmllib's parser spins on invalid input. 2006-09-13 21:40:51 +02:00			`def handle_starttag(self, tag, attrs):`
			`if tag == 'title':`
			`self.inTitle = True`
Bug #1190350, Don't grab fake title. 2005-04-30 14:53:42 +02:00
plugins/Web: Swtich the title parser back to HTMLParser sing sgmllib's parser spins on invalid input. 2006-09-13 21:40:51 +02:00			`def handle_endtag(self, tag):`
			`if tag == 'title':`
			`self.inTitle = False`
plugins/Web: Fixed the title-retrieval parser to actually retrieve the entire title. 2005-07-19 15:55:37 +02:00
			`def handle_data(self, data):`
			`if self.inTitle:`
			`self.title += data`
Bug #1190350, Don't grab fake title. 2005-04-30 14:53:42 +02:00
plugins/Web: Swtich the title parser back to HTMLParser sing sgmllib's parser spins on invalid input. 2006-09-13 21:40:51 +02:00			`def handle_entityref(self, name):`
			`if self.inTitle:`
			`if name in self.entitydefs:`
			`self.title += self.entitydefs[name]`

Changed callbacks.Privmsg to be callbacks.Plugin, and callbacks.PrivmsgCommandAndRegexp to be callbacks.Plugin. 2005-02-09 08:04:04 +01:00			`class Web(callbacks.PluginRegexp):`
Added the Web plugin (from pieces of Http, Fun, and URL) in the new plugin format. 2005-02-01 10:41:54 +01:00			`"""Add the help for "@help Web" here."""`
			`threaded = True`
Added the Web.fetch command. 2005-03-14 03:44:55 +01:00			`regexps = ['titleSnarfer']`
Added a callCommand to the Web plugin to catch utils.web.Error. 2005-03-09 08:26:32 +01:00			`def callCommand(self, command, irc, msg, args, *kwargs):`
			`try:`
			`super(Web, self).callCommand(command, irc, msg, args, *kwargs)`
			`except utils.web.Error, e:`
			`irc.reply(str(e))`
Bug #1190350, Don't grab fake title. 2005-04-30 14:53:42 +02:00
Added the Web plugin (from pieces of Http, Fun, and URL) in the new plugin format. 2005-02-01 10:41:54 +01:00			`def titleSnarfer(self, irc, msg, match):`
			`channel = msg.args[0]`
			`if not irc.isChannel(channel):`
			`return`
			`if callbacks.addressed(irc.nick, msg):`
			`return`
			`if self.registryValue('titleSnarfer', channel):`
			`url = match.group(0)`
			`r = self.registryValue('nonSnarfingRegexp', channel)`
			`if r and r.search(url):`
			`self.log.debug('Not titleSnarfing %q.', url)`
			`return`
			`try:`
			`size = conf.supybot.protocols.http.peekSize()`
			`text = utils.web.getUrl(url, size=size)`
			`except utils.web.Error, e:`
			`self.log.info('Couldn\'t snarf title of %u: %s.', url, e)`
			`return`
Bug #1190350, Don't grab fake title. 2005-04-30 14:53:42 +02:00			`parser = Title()`
Catch HTMLParserErrors when we're trying to grab the <title>. 2005-05-07 05:24:10 +02:00			`try:`
Revert "Web: Fix unicode problems for titleSnarfer." This reverts commit 3579701d2d0461ea9532201f0cb25937e1728c56. 2012-05-11 18:10:23 +02:00			`parser.feed(text)`
Catch the proper exception when parsing the title fails. 2007-10-04 14:57:00 +02:00			`except HTMLParser.HTMLParseError:`
plugins/Web: Encountering an HTMLParser exception doesn't mean the title hasn't already been snarfed. Don't bail right away. 2005-06-29 21:05:20 +02:00			`self.log.debug('Encountered a problem parsing %u. Title may '`
			`'already be set, though', url)`
plugins/Web: Fixed the title-retrieval parser to actually retrieve the entire title. 2005-07-19 15:55:37 +02:00			`if parser.title:`
Added the Web plugin (from pieces of Http, Fun, and URL) in the new plugin format. 2005-02-01 10:41:54 +01:00			`domain = utils.web.getDomain(url)`
Revert "Web: Fix unicode problems for titleSnarfer." This reverts commit 3579701d2d0461ea9532201f0cb25937e1728c56. 2012-05-11 18:10:23 +02:00			`title = utils.web.htmlToText(parser.title.strip())`
Web: localized 'Title:' in titleSnarfer 2011-10-26 11:25:51 +02:00			`s = format(_('Title: %s (at %s)'), title, domain)`
Changed prefixName to prefixNick, which is more appropriate, and has always bothered me. Better now than later. 2005-06-01 23:08:30 +02:00			`irc.reply(s, prefixNick=False)`
Added the Web plugin (from pieces of Http, Fun, and URL) in the new plugin format. 2005-02-01 10:41:54 +01:00			`titleSnarfer = urlSnarfer(titleSnarfer)`
utils.web: Provide access to the raw httpUrlRe/urlRe strings Using the compiled regexps for a PluginRegexp method's __doc__ doesn't work. Closes Sourceforge #2879862 Signed-off-by: James Vega <jamessan@users.sourceforge.net> 2009-10-16 03:56:26 +02:00			`titleSnarfer.__doc__ = utils.web._httpUrlRe`
Added the Web plugin (from pieces of Http, Fun, and URL) in the new plugin format. 2005-02-01 10:41:54 +01:00
Internationalize Time, Todo, Topic, URL, Unix, Utilities, and Web 2010-10-20 09:39:44 +02:00			`@internationalizeDocstring`
Added the Web plugin (from pieces of Http, Fun, and URL) in the new plugin format. 2005-02-01 10:41:54 +01:00			`def headers(self, irc, msg, args, url):`
			`"""<url>`

			`Returns the HTTP headers of <url>. Only HTTP urls are valid, of`
			`course.`
			`"""`
			`fd = utils.web.getUrlFd(url)`
			`try:`
Internationalize Time, Todo, Topic, URL, Unix, Utilities, and Web 2010-10-20 09:39:44 +02:00			`s = ', '.join([format(_('%s: %s'), k, v)`
Added the Web plugin (from pieces of Http, Fun, and URL) in the new plugin format. 2005-02-01 10:41:54 +01:00			`for (k, v) in fd.headers.items()])`
			`irc.reply(s)`
			`finally:`
			`fd.close()`
			`headers = wrap(headers, ['httpUrl'])`

			`_doctypeRe = re.compile(r'(<!DOCTYPE[^>]+>)', re.M)`
Internationalize Time, Todo, Topic, URL, Unix, Utilities, and Web 2010-10-20 09:39:44 +02:00			`@internationalizeDocstring`
Added the Web plugin (from pieces of Http, Fun, and URL) in the new plugin format. 2005-02-01 10:41:54 +01:00			`def doctype(self, irc, msg, args, url):`
			`"""<url>`

			`Returns the DOCTYPE string of <url>. Only HTTP urls are valid, of`
			`course.`
			`"""`
			`size = conf.supybot.protocols.http.peekSize()`
			`s = utils.web.getUrl(url, size=size)`
			`m = self._doctypeRe.search(s)`
			`if m:`
			`s = utils.str.normalizeWhitespace(m.group(0))`
			`irc.reply(s)`
			`else:`
Internationalize Time, Todo, Topic, URL, Unix, Utilities, and Web 2010-10-20 09:39:44 +02:00			`irc.reply(_('That URL has no specified doctype.'))`
Added the Web plugin (from pieces of Http, Fun, and URL) in the new plugin format. 2005-02-01 10:41:54 +01:00			`doctype = wrap(doctype, ['httpUrl'])`

Internationalize Time, Todo, Topic, URL, Unix, Utilities, and Web 2010-10-20 09:39:44 +02:00			`@internationalizeDocstring`
Added the Web plugin (from pieces of Http, Fun, and URL) in the new plugin format. 2005-02-01 10:41:54 +01:00			`def size(self, irc, msg, args, url):`
			`"""<url>`

			`Returns the Content-Length header of <url>. Only HTTP urls are valid,`
			`of course.`
			`"""`
			`fd = utils.web.getUrlFd(url)`
			`try:`
			`try:`
			`size = fd.headers['Content-Length']`
Web: Fix use of %S converter. 2011-07-01 17:07:54 +02:00			`irc.reply(format(_('%u is %S long.'), url, int(size)))`
Added the Web plugin (from pieces of Http, Fun, and URL) in the new plugin format. 2005-02-01 10:41:54 +01:00			`except KeyError:`
			`size = conf.supybot.protocols.http.peekSize()`
			`s = fd.read(size)`
			`if len(s) != size:`
Merge branch 'readability' 2010-10-23 10:38:52 +02:00			`irc.reply(format(_('%u is %S long.'), url, len(s)))`
Added the Web plugin (from pieces of Http, Fun, and URL) in the new plugin format. 2005-02-01 10:41:54 +01:00			`else:`
Internationalize Time, Todo, Topic, URL, Unix, Utilities, and Web 2010-10-20 09:39:44 +02:00			`irc.reply(format(_('The server didn\'t tell me how long %u '`
Merge branch 'readability' 2010-10-23 10:38:52 +02:00			`'is but it\'s longer than %S.'),`
Added the Web plugin (from pieces of Http, Fun, and URL) in the new plugin format. 2005-02-01 10:41:54 +01:00			`url, size))`
			`finally:`
			`fd.close()`
			`size = wrap(size, ['httpUrl'])`

Internationalize Time, Todo, Topic, URL, Unix, Utilities, and Web 2010-10-20 09:39:44 +02:00			`@internationalizeDocstring`
Added the Web plugin (from pieces of Http, Fun, and URL) in the new plugin format. 2005-02-01 10:41:54 +01:00			`def title(self, irc, msg, args, url):`
			`"""<url>`

			`Returns the HTML <title>...</title> of a URL.`
			`"""`
			`size = conf.supybot.protocols.http.peekSize()`
			`text = utils.web.getUrl(url, size=size)`
Bug #1190350, Don't grab fake title. 2005-04-30 14:53:42 +02:00			`parser = Title()`
Catch HTMLParserErrors when we're trying to grab the <title>. 2005-05-07 05:24:10 +02:00			`try:`
Revert "Web: Fix unicode problems for titleSnarfer." This reverts commit 3579701d2d0461ea9532201f0cb25937e1728c56. 2012-05-11 18:10:23 +02:00			`parser.feed(text)`
plugins/Web: Swtich the title parser back to HTMLParser sing sgmllib's parser spins on invalid input. 2006-09-13 21:40:51 +02:00			`except HTMLParser.HTMLParseError:`
plugins/Web: Encountering an HTMLParser exception doesn't mean the title hasn't already been snarfed. Don't bail right away. 2005-06-29 21:05:20 +02:00			`self.log.debug('Encountered a problem parsing %u. Title may '`
			`'already be set, though', url)`
plugins/Web: Fixed the title-retrieval parser to actually retrieve the entire title. 2005-07-19 15:55:37 +02:00			`if parser.title:`
Revert "Web: Fix unicode problems for titleSnarfer." This reverts commit 3579701d2d0461ea9532201f0cb25937e1728c56. 2012-05-11 18:10:23 +02:00			`irc.reply(utils.web.htmlToText(parser.title.strip()))`
Use a more appropriate message if the URL definitely has no title. 2008-09-24 18:30:31 +02:00			`elif len(text) < size:`
Internationalize Time, Todo, Topic, URL, Unix, Utilities, and Web 2010-10-20 09:39:44 +02:00			`irc.reply(_('That URL appears to have no HTML title.'))`
Added the Web plugin (from pieces of Http, Fun, and URL) in the new plugin format. 2005-02-01 10:41:54 +01:00			`else:`
Internationalize Time, Todo, Topic, URL, Unix, Utilities, and Web 2010-10-20 09:39:44 +02:00			`irc.reply(format(_('That URL appears to have no HTML title '`
Merge branch 'readability' 2010-10-23 10:38:52 +02:00			`'within the first %S.'), size))`
Added the Web plugin (from pieces of Http, Fun, and URL) in the new plugin format. 2005-02-01 10:41:54 +01:00			`title = wrap(title, ['httpUrl'])`

			`_netcraftre = re.compile(r'td align="left">\s+<a[^>]+>(.*?)<a href',`
			`re.S \| re.I)`
Internationalize Time, Todo, Topic, URL, Unix, Utilities, and Web 2010-10-20 09:39:44 +02:00			`@internationalizeDocstring`
Added the Web plugin (from pieces of Http, Fun, and URL) in the new plugin format. 2005-02-01 10:41:54 +01:00			`def netcraft(self, irc, msg, args, hostname):`
			`"""<hostname\|ip>`

			`Returns Netcraft.com's determination of what operating system and`
			`webserver is running on the host given.`
			`"""`
			`url = 'http://uptime.netcraft.com/up/graph/?host=' + hostname`
			`html = utils.web.getUrl(url)`
			`m = self._netcraftre.search(html)`
			`if m:`
			`html = m.group(1)`
			`s = utils.web.htmlToText(html, tagReplace='').strip()`
			`s = s.rstrip('-').strip()`
			`irc.reply(s) # Snip off "the site"`
			`elif 'We could not get any results' in html:`
Internationalize Time, Todo, Topic, URL, Unix, Utilities, and Web 2010-10-20 09:39:44 +02:00			`irc.reply(_('No results found for %s.') % hostname)`
Added the Web plugin (from pieces of Http, Fun, and URL) in the new plugin format. 2005-02-01 10:41:54 +01:00			`else:`
Internationalize Time, Todo, Topic, URL, Unix, Utilities, and Web 2010-10-20 09:39:44 +02:00			`irc.error(_('The format of page the was odd.'))`
Added the Web plugin (from pieces of Http, Fun, and URL) in the new plugin format. 2005-02-01 10:41:54 +01:00			`netcraft = wrap(netcraft, ['text'])`

Internationalize Time, Todo, Topic, URL, Unix, Utilities, and Web 2010-10-20 09:39:44 +02:00			`@internationalizeDocstring`
Added the Web plugin (from pieces of Http, Fun, and URL) in the new plugin format. 2005-02-01 10:41:54 +01:00			`def urlquote(self, irc, msg, args, text):`
			`"""<text>`

			`Returns the URL quoted form of the text.`
			`"""`
			`irc.reply(utils.web.urlquote(text))`
			`urlquote = wrap(urlquote, ['text'])`

Internationalize Time, Todo, Topic, URL, Unix, Utilities, and Web 2010-10-20 09:39:44 +02:00			`@internationalizeDocstring`
Added the Web plugin (from pieces of Http, Fun, and URL) in the new plugin format. 2005-02-01 10:41:54 +01:00			`def urlunquote(self, irc, msg, args, text):`
			`"""<text>`

			`Returns the text un-URL quoted.`
			`"""`
			`s = utils.web.urlunquote(text)`
			`irc.reply(s)`
			`urlunquote = wrap(urlunquote, ['text'])`

Internationalize Time, Todo, Topic, URL, Unix, Utilities, and Web 2010-10-20 09:39:44 +02:00			`@internationalizeDocstring`
Added the Web.fetch command. 2005-03-14 03:44:55 +01:00			`def fetch(self, irc, msg, args, url):`
			`"""<url>`
Added the Web plugin (from pieces of Http, Fun, and URL) in the new plugin format. 2005-02-01 10:41:54 +01:00
Added the Web.fetch command. 2005-03-14 03:44:55 +01:00			`Returns the contents of <url>, or as much as is configured in`
			`supybot.plugins.Web.fetch.maximum. If that configuration variable is`
			`set to 0, this command will be effectively disabled.`
			`"""`
			`max = self.registryValue('fetch.maximum')`
			`if not max:`
Internationalize Time, Todo, Topic, URL, Unix, Utilities, and Web 2010-10-20 09:39:44 +02:00			`irc.error(_('This command is disabled '`
			`'(supybot.plugins.Web.fetch.maximum is set to 0).'),`
Added the Web.fetch command. 2005-03-14 03:44:55 +01:00			`Raise=True)`
			`fd = utils.web.getUrlFd(url)`
			`irc.reply(fd.read(max))`
			`fetch = wrap(fetch, ['url'])`
Added the Web plugin (from pieces of Http, Fun, and URL) in the new plugin format. 2005-02-01 10:41:54 +01:00
			`Class = Web`

Change the modeline to use softtabstop instead of tabstop. 2006-02-11 16:52:51 +01:00			`# vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79:`
No results found.