### # Copyright (c) 2002-2004, Jeremiah Fincher # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # * Redistributions of source code must retain the above copyright notice, # this list of conditions, and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright notice, # this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of the author of this software nor the name of # contributors to this software may be used to endorse or promote products # derived from this software without specific prior written consent. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. ### """ Keeps track of URLs posted to a channel, along with relevant context. Allows searching for URLs and returning random URLs. Also provides statistics on the URLs in the database. """ __revision__ = "$Id$" import supybot.plugins as plugins import re import supybot.dbi as dbi import supybot.conf as conf import supybot.utils as utils from supybot.commands import * import supybot.ircmsgs as ircmsgs import supybot.ircutils as ircutils import supybot.webutils as webutils import supybot.registry as registry import supybot.callbacks as callbacks def configure(advanced): from supybot.questions import output, expect, anything, something, yn conf.registerPlugin('URL', True) if yn("""This plugin also offers a snarfer that will try to fetch the title of URLs that it sees in the channel. Would like you this snarfer to be enabled?""", default=False): conf.supybot.plugins.URL.titleSnarfer.setValue(True) conf.registerPlugin('URL') conf.registerChannelValue(conf.supybot.plugins.URL, 'titleSnarfer', registry.Boolean(False, """Determines whether the bot will output the HTML title of URLs it sees in the channel.""")) conf.registerChannelValue(conf.supybot.plugins.URL, 'nonSnarfingRegexp', registry.Regexp(None, """Determines what URLs are to be snarfed and stored in the database in the channel; URLs matching the regexp given will not be snarfed. Give the empty string if you have no URLs that you'd like to exclude from being snarfed.""")) class UrlRecord(dbi.Record): __fields__ = [ ('url', eval), ('by', eval), ('near', eval), ('at', eval), ] class DbiUrlDB(plugins.DbiChannelDB): class DB(dbi.DB): Record = UrlRecord def add(self, url, msg): record = self.Record(url=url, by=msg.nick, near=msg.args[1], at=msg.receivedAt) super(self.__class__, self).add(record) def urls(self, p): L = list(self.select(p)) L.reverse() return L URLDB = plugins.DB('URL', {'flat': DbiUrlDB}) class URL(callbacks.PrivmsgCommandAndRegexp): priority = 100 # lower than 99, the normal priority. regexps = ['titleSnarfer'] _titleRe = re.compile('(.*?)', re.I | re.S) def __init__(self): self.__parent = super(URL, self) self.__parent.__init__() self.db = URLDB() def doPrivmsg(self, irc, msg): channel = msg.args[0] if ircutils.isChannel(channel): if ircmsgs.isAction(msg): text = ircmsgs.unAction(msg) else: text = msg.args[1] for url in webutils.urlRe.findall(text): r = self.registryValue('nonSnarfingRegexp', channel) if r and r.search(url): self.log.debug('Skipping adding %s to db.', utils.quoted(url)) continue self.log.debug('Adding %s to db.', utils.quoted(url)) self.db.add(channel, url, msg) self.__parent.doPrivmsg(irc, msg) def titleSnarfer(self, irc, msg, match): r"https?://[^\])>\s]+" channel = msg.args[0] if not ircutils.isChannel(channel): return if callbacks.addressed(irc.nick, msg): return if self.registryValue('titleSnarfer', channel): url = match.group(0) r = self.registryValue('nonSnarfingRegexp', channel) if r and r.search(url): self.log.debug('Not titleSnarfing %s.', utils.quoted(url)) return try: size = conf.supybot.protocols.http.peekSize() text = webutils.getUrl(url, size=size) except webutils.WebError, e: self.log.info('Couldn\'t snarf title of %s, %s.', url, e) return m = self._titleRe.search(text) if m is not None: domain = webutils.getDomain(url) title = utils.htmlToText(m.group(1).strip()) s = 'Title: %s (at %s)' % (title, domain) irc.reply(s, prefixName=False) titleSnarfer = urlSnarfer(titleSnarfer) def stats(self, irc, msg, args, channel): """[] Returns the number of URLs in the URL database. is only required if the message isn't sent in the channel itself. """ self.db.vacuum(channel) count = self.db.size(channel) irc.reply('I have %s in my database.' % utils.nItems('URL', count)) stats = wrap(stats, ['channeldb']) def last(self, irc, msg, args, channel, optlist): """[] [--{from,with,without,near,proto}=] --nolimit Gives the last URL matching the given criteria. --from is from whom the URL came; --proto is the protocol the URL used; --with is something inside the URL; --without is something that should not be in the URL; --near is something in the same message as the URL; If --nolimit is given, returns all the URLs that are found. to just the URL. is only necessary if the message isn't sent in the channel itself. """ predicates = [] f = None nolimit = False for (option, arg) in optlist: if option == 'nolimit': nolimit = True elif option == 'from': def f(record, arg=arg): return ircutils.strEqual(record.by, arg) elif option == 'with': def f(record, arg=arg): return arg in record.url elif option == 'without': def f(record, arg=arg): return arg not in record.url elif option == 'proto': def f(record, arg=arg): return record.url.startswith(arg) elif option == 'near': def f(record, arg=arg): return arg in record.near if f is not None: predicates.append(f) def predicate(record): for predicate in predicates: if not predicate(record): return False return True urls = [record.url for record in self.db.urls(channel, predicate)] if not urls: irc.reply('No URLs matched that criteria.') else: if nolimit: urls = ['<%s>' % url for url in urls] s = ', '.join(urls) else: # We should optimize this with another URLDB method eventually. s = urls[0] irc.reply(s) last = wrap(last, ['channeldb', getopts({'from': 'text', 'with': 'text', 'near': 'text', 'proto': 'text', 'nolimit': '', 'without': 'text',})]) Class = URL # vim:set shiftwidth=4 tabstop=8 expandtab textwidth=78: