Limnoria/plugins/URLSnarfer.py

#!/usr/bin/env python

###
# Copyright (c) 2002, Jeremiah Fincher
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
#   * Redistributions of source code must retain the above copyright notice,
#     this list of conditions, and the following disclaimer.
#   * Redistributions in binary form must reproduce the above copyright notice,
#     this list of conditions, and the following disclaimer in the
#     documentation and/or other materials provided with the distribution.
#   * Neither the name of the author of this software nor the name of
#     contributors to this software may be used to endorse or promote products
#     derived from this software without specific prior written consent.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
###

"""
Keeps track of URLs posted to a channel, along with relevant context.  Allows
searching for URLs and returning random URLs.  Also provides statistics on the
URLs in the database.
"""

from baseplugin import *

import re
import time
import getopt
import urlparse

import sqlite

import privmsgs
import callbacks


def configure(onStart, afterConnect, advanced):
    # This will be called by setup.py to configure this module.  onStart and
    # afterConnect are both lists.  Append to onStart the commands you would
    # like to be run when the bot is started; append to afterConnect the
    # commands you would like to be run when the bot has finished connecting.
    from questions import expect, anything, something, yn
    onStart.append('load URLSnarfer')

class URLSnarfer(callbacks.Privmsg, ChannelDBHandler):
    def __init__(self):
        self.nextMsgs = {}
        callbacks.Privmsg.__init__(self)
        ChannelDBHandler.__init__(self)

    def makeDb(self, filename):
        if os.path.exists(filename):
            return sqlite.connect(filename)
        db = sqlite.connect(filename)
        cursor = db.cursor()
        cursor.execute("""CREATE TABLE urls (
                          id INTEGER PRIMARY KEY,
                          url TEXT,
                          added TIMESTAMP,
                          added_by TEXT,
                          previous_msg TEXT,
                          current_msg TEXT,
                          next_msg TEXT,
                          protocol TEXT,
                          site TEXT,
                          filename TEXT
                          )""")
        db.commit()
        return db

    _urlRe = re.compile(r"(\S+://\S+)", re.I)
    def doPrivmsg(self, irc, msg):
        callbacks.Privmsg.doPrivmsg(self, irc, msg)
        channel = msg.args[0]
        db = self.getDb(channel)
        cursor = db.cursor()
        if (msg.nick, channel) in self.nextMsgs:
            L = self.nextMsgs.pop((msg.nick, msg.args[0]))
            for (url, added) in L:
                cursor.execute("""UPDATE urls SET next_msg=%s
                                  WHERE url=%s AND added=%s""",
                               msg.args[1], url, added)
        for url in self._urlRe.findall(msg.args[1]):
            (protocol, site, filename, _, _, _) = urlparse.urlparse(url)
            previousMsg = ''
            for oldMsg in reviter(irc.state.history):
                if oldMsg.command == 'PRIVMSG':
                    if oldMsg.nick == msg.nick and oldMsg.args[0] == channel:
                        previousMsg = oldMsg.args[1]
            addedBy = msg.nick
            added = int(time.time())
            cursor.execute("""INSERT INTO urls VALUES
                              (NULL, %s, %s, %s, %s, %s, '', %s, %s, %s)""",
                           url, added, addedBy, msg.args[1], previousMsg,
                           protocol, site, filename)
            key = (msg.nick, channel)
            self.nextMsgs.setdefault(key, []).append((url, added))
        db.commit()

    def randomurl(self, irc, msg, args):
        """[<channel>]

        Returns a random URL from the URL database.  <channel> is only required
        if the message isn't sent in the channel itself.
        """
        channel = privmsgs.getChannel(msg, args)
        db = self.getDb(channel)
        cursor = db.cursor()
        cursor.execute("""SELECT * FROM urls ORDER BY random() LIMIT 1""")
        (id, url, added, addedBy, _, _, _, _, _, _) = cursor.fetchone()
        when = time.ctime(int(added))
        s = '%s: <%s> (added by %s on %s)' % (id, url, addedBy, when)
        irc.reply(msg, s)

    def numurls(self, irc, msg, args):
        """[<channel>]

        Returns the number of URLs in the URL database.  <channel> is only
        required if the message isn't sent in the channel itself.
        """
        channel = privmsgs.getChannel(msg, args)
        db = self.getDb(channel)
        cursor = db.cursor()
        cursor.execute("""SELECT COUNT(*) FROM urls""")
        (count,) = cursor.fetchone()
        irc.reply(msg, 'I have %s %s in my database.' % \
                  (count, int(count) == 1 and 'URL' or 'URLs'))

    def lasturl(self, irc, msg, args):
        """[<channel>] [--{from,with,at,proto,near}=<value>]

        Gives the last URL matching the given criteria.  --from is from whom
        the URL came; --at is the site of the URL; --proto is the protocol the
        URL used; --with is something inside the URL; --near is a string in the
        messages before and after the link.  <channel> is only necessary if the
        message isn't sent in the channel itself.
        """
        channel = privmsgs.getChannel(msg, args)
        (optlist, rest) = getopt.getopt(args, '', ['from=', 'with=', 'at=',
                                                   'proto=', 'near='])
        criteria = ['1=1']
        formats = []
        for (option, argument) in optlist:
            option = option[2:] # Strip off the --.
            if option == 'from':
                criteria.append('added_by LIKE %s')
                formats.append(argument)
            elif option == 'with':
                criteria.append('url LIKE %s')
                formats.append(argument)
            elif option == 'at':
                criteria.append('site LIKE %s')
                formats.append(argument)
            elif option == 'proto':
                criteria.append('protocol=%s')
                formats.append(argument)
            elif option == 'near':
                criteria.append("""(previous_msg LIKE %s OR
                                    next_msg LIKE %s OR
                                    current_msg LIKE %s)""")
                if '%' not in argument:
                    argument = '%%%s%%' % argument
                formats.append(argument)
                formats.append(argument)
                formats.append(argument)
        db = self.getDb(channel)
        cursor = db.cursor()
        criterion = ' AND '.join(criteria)
        sql = 'SELECT url FROM urls WHERE %s ORDER BY id DESC' % criterion
        cursor.execute(sql, *formats)
        if cursor.rowcount == 0:
            irc.reply(msg, 'No URLs matched that criteria.')
        else:
            (url,) = cursor.fetchone()
            irc.reply(msg, url)
            
    
Class = URLSnarfer

# vim:set shiftwidth=4 tabstop=8 expandtab textwidth=78:
Basic snarfing and random URL returning is done. 2003-08-17 11:31:04 +02:00			`#!/usr/bin/env python`

			`###`
			`# Copyright (c) 2002, Jeremiah Fincher`
			`# All rights reserved.`
			`#`
			`# Redistribution and use in source and binary forms, with or without`
			`# modification, are permitted provided that the following conditions are met:`
			`#`
			`# * Redistributions of source code must retain the above copyright notice,`
			`# this list of conditions, and the following disclaimer.`
			`# * Redistributions in binary form must reproduce the above copyright notice,`
			`# this list of conditions, and the following disclaimer in the`
			`# documentation and/or other materials provided with the distribution.`
			`# * Neither the name of the author of this software nor the name of`
			`# contributors to this software may be used to endorse or promote products`
			`# derived from this software without specific prior written consent.`
			`#`
			`# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"`
			`# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE`
			`# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE`
			`# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE`
			`# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR`
			`# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF`
			`# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS`
			`# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN`
			`# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)`
			`# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE`
			`# POSSIBILITY OF SUCH DAMAGE.`
			`###`

			`"""`
			`Keeps track of URLs posted to a channel, along with relevant context. Allows`
			`searching for URLs and returning random URLs. Also provides statistics on the`
			`URLs in the database.`
			`"""`

			`from baseplugin import *`

			`import re`
			`import time`
Added lasturl command. 2003-08-19 11:10:41 +02:00			`import getopt`
Basic snarfing and random URL returning is done. 2003-08-17 11:31:04 +02:00			`import urlparse`

			`import sqlite`

			`import privmsgs`
			`import callbacks`


			`def configure(onStart, afterConnect, advanced):`
			`# This will be called by setup.py to configure this module. onStart and`
			`# afterConnect are both lists. Append to onStart the commands you would`
			`# like to be run when the bot is started; append to afterConnect the`
			`# commands you would like to be run when the bot has finished connecting.`
			`from questions import expect, anything, something, yn`
			`onStart.append('load URLSnarfer')`

			`class URLSnarfer(callbacks.Privmsg, ChannelDBHandler):`
			`def __init__(self):`
			`self.nextMsgs = {}`
			`callbacks.Privmsg.__init__(self)`
			`ChannelDBHandler.__init__(self)`

			`def makeDb(self, filename):`
			`if os.path.exists(filename):`
			`return sqlite.connect(filename)`
			`db = sqlite.connect(filename)`
			`cursor = db.cursor()`
			`cursor.execute("""CREATE TABLE urls (`
			`id INTEGER PRIMARY KEY,`
			`url TEXT,`
			`added TIMESTAMP,`
			`added_by TEXT,`
			`previous_msg TEXT,`
			`current_msg TEXT,`
			`next_msg TEXT,`
			`protocol TEXT,`
			`site TEXT,`
			`filename TEXT`
			`)""")`
			`db.commit()`
			`return db`

Made it snarf all URLs, not just ftp/http. 2003-08-19 19:23:06 +02:00			`_urlRe = re.compile(r"(\S+://\S+)", re.I)`
Basic snarfing and random URL returning is done. 2003-08-17 11:31:04 +02:00			`def doPrivmsg(self, irc, msg):`
			`callbacks.Privmsg.doPrivmsg(self, irc, msg)`
			`channel = msg.args[0]`
			`db = self.getDb(channel)`
			`cursor = db.cursor()`
			`if (msg.nick, channel) in self.nextMsgs:`
			`L = self.nextMsgs.pop((msg.nick, msg.args[0]))`
			`for (url, added) in L:`
			`cursor.execute("""UPDATE urls SET next_msg=%s`
			`WHERE url=%s AND added=%s""",`
			`msg.args[1], url, added)`
			`for url in self._urlRe.findall(msg.args[1]):`
			`(protocol, site, filename, _, _, _) = urlparse.urlparse(url)`
			`previousMsg = ''`
			`for oldMsg in reviter(irc.state.history):`
			`if oldMsg.command == 'PRIVMSG':`
			`if oldMsg.nick == msg.nick and oldMsg.args[0] == channel:`
			`previousMsg = oldMsg.args[1]`
			`addedBy = msg.nick`
			`added = int(time.time())`
			`cursor.execute("""INSERT INTO urls VALUES`
			`(NULL, %s, %s, %s, %s, %s, '', %s, %s, %s)""",`
			`url, added, addedBy, msg.args[1], previousMsg,`
			`protocol, site, filename)`
			`key = (msg.nick, channel)`
			`self.nextMsgs.setdefault(key, []).append((url, added))`
			`db.commit()`

			`def randomurl(self, irc, msg, args):`
			`"""[<channel>]`

			`Returns a random URL from the URL database. <channel> is only required`
			`if the message isn't sent in the channel itself.`
			`"""`
Added lasturl command. 2003-08-19 11:10:41 +02:00			`channel = privmsgs.getChannel(msg, args)`
Basic snarfing and random URL returning is done. 2003-08-17 11:31:04 +02:00			`db = self.getDb(channel)`
			`cursor = db.cursor()`
			`cursor.execute("""SELECT * FROM urls ORDER BY random() LIMIT 1""")`
			`(id, url, added, addedBy, _, _, _, _, _, _) = cursor.fetchone()`
			`when = time.ctime(int(added))`
Added lasturl command. 2003-08-19 11:10:41 +02:00			`s = '%s: <%s> (added by %s on %s)' % (id, url, addedBy, when)`
Basic snarfing and random URL returning is done. 2003-08-17 11:31:04 +02:00			`irc.reply(msg, s)`

			`def numurls(self, irc, msg, args):`
			`"""[<channel>]`

			`Returns the number of URLs in the URL database. <channel> is only`
			`required if the message isn't sent in the channel itself.`
			`"""`
Added lasturl command. 2003-08-19 11:10:41 +02:00			`channel = privmsgs.getChannel(msg, args)`
Basic snarfing and random URL returning is done. 2003-08-17 11:31:04 +02:00			`db = self.getDb(channel)`
			`cursor = db.cursor()`
			`cursor.execute("""SELECT COUNT(*) FROM urls""")`
			`(count,) = cursor.fetchone()`
			`irc.reply(msg, 'I have %s %s in my database.' % \`
			`(count, int(count) == 1 and 'URL' or 'URLs'))`
Added lasturl command. 2003-08-19 11:10:41 +02:00
			`def lasturl(self, irc, msg, args):`
			`"""[<channel>] [--{from,with,at,proto,near}=<value>]`

			`Gives the last URL matching the given criteria. --from is from whom`
			`the URL came; --at is the site of the URL; --proto is the protocol the`
			`URL used; --with is something inside the URL; --near is a string in the`
			`messages before and after the link. <channel> is only necessary if the`
			`message isn't sent in the channel itself.`
			`"""`
			`channel = privmsgs.getChannel(msg, args)`
			`(optlist, rest) = getopt.getopt(args, '', ['from=', 'with=', 'at=',`
			`'proto=', 'near='])`
			`criteria = ['1=1']`
			`formats = []`
			`for (option, argument) in optlist:`
			`option = option[2:] # Strip off the --.`
			`if option == 'from':`
			`criteria.append('added_by LIKE %s')`
			`formats.append(argument)`
			`elif option == 'with':`
			`criteria.append('url LIKE %s')`
			`formats.append(argument)`
			`elif option == 'at':`
			`criteria.append('site LIKE %s')`
			`formats.append(argument)`
			`elif option == 'proto':`
			`criteria.append('protocol=%s')`
			`formats.append(argument)`
			`elif option == 'near':`
			`criteria.append("""(previous_msg LIKE %s OR`
			`next_msg LIKE %s OR`
			`current_msg LIKE %s)""")`
			`if '%' not in argument:`
			`argument = '%%%s%%' % argument`
			`formats.append(argument)`
			`formats.append(argument)`
			`formats.append(argument)`
			`db = self.getDb(channel)`
			`cursor = db.cursor()`
			`criterion = ' AND '.join(criteria)`
			`sql = 'SELECT url FROM urls WHERE %s ORDER BY id DESC' % criterion`
			`cursor.execute(sql, *formats)`
			`if cursor.rowcount == 0:`
			`irc.reply(msg, 'No URLs matched that criteria.')`
			`else:`
			`(url,) = cursor.fetchone()`
			`irc.reply(msg, url)`
Basic snarfing and random URL returning is done. 2003-08-17 11:31:04 +02:00

			`Class = URLSnarfer`

			`# vim:set shiftwidth=4 tabstop=8 expandtab textwidth=78:`