2003-08-17 11:31:04 +02:00
|
|
|
#!/usr/bin/env python
|
|
|
|
|
|
|
|
###
|
|
|
|
# Copyright (c) 2002, Jeremiah Fincher
|
|
|
|
# All rights reserved.
|
|
|
|
#
|
|
|
|
# Redistribution and use in source and binary forms, with or without
|
|
|
|
# modification, are permitted provided that the following conditions are met:
|
|
|
|
#
|
|
|
|
# * Redistributions of source code must retain the above copyright notice,
|
|
|
|
# this list of conditions, and the following disclaimer.
|
|
|
|
# * Redistributions in binary form must reproduce the above copyright notice,
|
|
|
|
# this list of conditions, and the following disclaimer in the
|
|
|
|
# documentation and/or other materials provided with the distribution.
|
|
|
|
# * Neither the name of the author of this software nor the name of
|
|
|
|
# contributors to this software may be used to endorse or promote products
|
|
|
|
# derived from this software without specific prior written consent.
|
|
|
|
#
|
|
|
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
|
|
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
|
|
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
|
|
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
|
|
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
|
|
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
|
|
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
|
|
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
|
|
# POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
###
|
|
|
|
|
|
|
|
"""
|
|
|
|
Keeps track of URLs posted to a channel, along with relevant context. Allows
|
|
|
|
searching for URLs and returning random URLs. Also provides statistics on the
|
|
|
|
URLs in the database.
|
|
|
|
"""
|
|
|
|
|
|
|
|
from baseplugin import *
|
|
|
|
|
|
|
|
import re
|
|
|
|
import time
|
2003-08-19 11:10:41 +02:00
|
|
|
import getopt
|
2003-08-17 11:31:04 +02:00
|
|
|
import urlparse
|
|
|
|
|
|
|
|
import sqlite
|
|
|
|
|
2003-08-27 18:25:43 +02:00
|
|
|
import utils
|
2003-08-17 11:31:04 +02:00
|
|
|
import privmsgs
|
|
|
|
import callbacks
|
|
|
|
|
2003-08-27 18:25:43 +02:00
|
|
|
example = utils.wrapLines("""
|
2003-08-26 13:39:18 +02:00
|
|
|
<supybot> lasturl, numurls, randomurl
|
|
|
|
<jemfinch> @numurls
|
|
|
|
<supybot> Error: Command must be sent in a channel or include a channel in its arguments.
|
|
|
|
<jemfinch> (each channel has its own URL database, so these commands need to know which channel database to access)
|
|
|
|
<jemfinch> @numurls #sourcereview
|
|
|
|
<supybot> I have 93 URLs in my database.
|
|
|
|
<jemfinch> @randomurl #sourcereview
|
2003-08-26 13:54:24 +02:00
|
|
|
<supybot> <http://gameknot.com/chess.pl?bd=1022045&r=735> (added by Strike at 06:25 PM, August 17, 2003)
|
2003-08-26 13:39:18 +02:00
|
|
|
<jemfinch> @lasturl
|
|
|
|
<supybot> Error: Command must be sent in a channel or include a channel in its arguments.
|
|
|
|
<jemfinch> @lasturl #sourcereview
|
|
|
|
<supybot> <http://lambda.weblogs.com/xml/rss.xml>, added by Strike at 12:12 AM, August 26, 2003.
|
|
|
|
<jemfinch> @lasturl #sourcereview --proto ftp
|
|
|
|
<supybot> <ftp://ftp.us.debian.org/debian/dists/unstable/Contents-i386.gz>, added by Strike|work at 07:27 PM, August 21, 2003.
|
|
|
|
<jemfinch> @lasturl #sourcereview --near Stuka
|
|
|
|
<supybot> <http://slashdot.org/comments.pl?sid=75020&cid=6720123>, added by jemfinch at 03:16 PM, August 25, 2003.
|
|
|
|
<jemfinch> @lasturl #sourcereview --at coderforums.com
|
|
|
|
<supybot> <http://coderforums.com/showthread.php?s=&postid=15798#post15798>, added by Strike|work at 02:41 PM, August 25, 2003.
|
2003-08-27 18:25:43 +02:00
|
|
|
""")
|
2003-08-17 11:31:04 +02:00
|
|
|
|
|
|
|
def configure(onStart, afterConnect, advanced):
|
|
|
|
# This will be called by setup.py to configure this module. onStart and
|
|
|
|
# afterConnect are both lists. Append to onStart the commands you would
|
|
|
|
# like to be run when the bot is started; append to afterConnect the
|
|
|
|
# commands you would like to be run when the bot has finished connecting.
|
|
|
|
from questions import expect, anything, something, yn
|
|
|
|
onStart.append('load URLSnarfer')
|
|
|
|
|
|
|
|
class URLSnarfer(callbacks.Privmsg, ChannelDBHandler):
|
|
|
|
def __init__(self):
|
|
|
|
self.nextMsgs = {}
|
|
|
|
callbacks.Privmsg.__init__(self)
|
|
|
|
ChannelDBHandler.__init__(self)
|
|
|
|
|
|
|
|
def makeDb(self, filename):
|
|
|
|
if os.path.exists(filename):
|
|
|
|
return sqlite.connect(filename)
|
|
|
|
db = sqlite.connect(filename)
|
|
|
|
cursor = db.cursor()
|
|
|
|
cursor.execute("""CREATE TABLE urls (
|
|
|
|
id INTEGER PRIMARY KEY,
|
|
|
|
url TEXT,
|
|
|
|
added TIMESTAMP,
|
|
|
|
added_by TEXT,
|
|
|
|
previous_msg TEXT,
|
|
|
|
current_msg TEXT,
|
|
|
|
next_msg TEXT,
|
|
|
|
protocol TEXT,
|
|
|
|
site TEXT,
|
|
|
|
filename TEXT
|
|
|
|
)""")
|
|
|
|
db.commit()
|
|
|
|
return db
|
|
|
|
|
2003-08-26 13:39:18 +02:00
|
|
|
_urlRe = re.compile(r"([^<\s]+://[^>\s]+)", re.I)
|
2003-08-17 11:31:04 +02:00
|
|
|
def doPrivmsg(self, irc, msg):
|
|
|
|
callbacks.Privmsg.doPrivmsg(self, irc, msg)
|
|
|
|
channel = msg.args[0]
|
|
|
|
db = self.getDb(channel)
|
|
|
|
cursor = db.cursor()
|
|
|
|
if (msg.nick, channel) in self.nextMsgs:
|
|
|
|
L = self.nextMsgs.pop((msg.nick, msg.args[0]))
|
|
|
|
for (url, added) in L:
|
|
|
|
cursor.execute("""UPDATE urls SET next_msg=%s
|
|
|
|
WHERE url=%s AND added=%s""",
|
|
|
|
msg.args[1], url, added)
|
|
|
|
for url in self._urlRe.findall(msg.args[1]):
|
|
|
|
(protocol, site, filename, _, _, _) = urlparse.urlparse(url)
|
|
|
|
previousMsg = ''
|
|
|
|
for oldMsg in reviter(irc.state.history):
|
|
|
|
if oldMsg.command == 'PRIVMSG':
|
|
|
|
if oldMsg.nick == msg.nick and oldMsg.args[0] == channel:
|
|
|
|
previousMsg = oldMsg.args[1]
|
|
|
|
addedBy = msg.nick
|
|
|
|
added = int(time.time())
|
|
|
|
cursor.execute("""INSERT INTO urls VALUES
|
|
|
|
(NULL, %s, %s, %s, %s, %s, '', %s, %s, %s)""",
|
|
|
|
url, added, addedBy, msg.args[1], previousMsg,
|
|
|
|
protocol, site, filename)
|
|
|
|
key = (msg.nick, channel)
|
|
|
|
self.nextMsgs.setdefault(key, []).append((url, added))
|
|
|
|
db.commit()
|
|
|
|
|
2003-08-29 02:40:28 +02:00
|
|
|
def _formatUrl(self, url, added, addedBy):
|
|
|
|
#debug.printf((url, added, addedBy))
|
|
|
|
when = time.strftime(conf.humanTimestampFormat,
|
|
|
|
time.localtime(int(added)))
|
|
|
|
return '<%s> (added by %s at %s)' % (url, addedBy, when)
|
|
|
|
|
|
|
|
def _formatUrlWithId(self, id, url, added, addedBy):
|
|
|
|
#debug.printf((id, url, added, addedBy))
|
|
|
|
return '#%s: %s' % (id, self._formatUrl(url, added, addedBy))
|
|
|
|
|
2003-08-17 11:31:04 +02:00
|
|
|
def randomurl(self, irc, msg, args):
|
|
|
|
"""[<channel>]
|
|
|
|
|
|
|
|
Returns a random URL from the URL database. <channel> is only required
|
|
|
|
if the message isn't sent in the channel itself.
|
|
|
|
"""
|
2003-08-19 11:10:41 +02:00
|
|
|
channel = privmsgs.getChannel(msg, args)
|
2003-08-17 11:31:04 +02:00
|
|
|
db = self.getDb(channel)
|
|
|
|
cursor = db.cursor()
|
2003-08-29 01:59:03 +02:00
|
|
|
cursor.execute("""SELECT id, url, added, added_by
|
|
|
|
FROM urls
|
|
|
|
ORDER BY random()
|
|
|
|
LIMIT 1""")
|
|
|
|
if cursor.rowcount == 0:
|
|
|
|
irc.reply(msg, 'I have no URLs in my database for %s' % channel)
|
|
|
|
else:
|
2003-08-29 02:40:28 +02:00
|
|
|
irc.reply(msg, self._formatUrlWithId(*cursor.fetchone()))
|
|
|
|
|
|
|
|
def geturl(self, irc, msg, args):
|
|
|
|
"""[<channel>] <id>
|
|
|
|
|
|
|
|
Gets the URL with id <id> from the URL database for <channel>.
|
|
|
|
<channel> is only necessary if not sent in the channel itself.
|
|
|
|
"""
|
|
|
|
channel = privmsgs.getChannel(msg, args)
|
|
|
|
db = self.getDb(channel)
|
|
|
|
cursor = db.cursor()
|
|
|
|
id = privmsgs.getArgs(args)
|
|
|
|
cursor.execute("""SELECT url, added, added_by
|
|
|
|
FROM urls
|
|
|
|
WHERE id=%s""", id)
|
|
|
|
if cursor.rowcount == 0:
|
|
|
|
irc.reply(msg, 'No URL was found with that id.')
|
|
|
|
else:
|
|
|
|
irc.reply(msg, self._formatUrl(*cursor.fetchone()))
|
2003-08-17 11:31:04 +02:00
|
|
|
|
|
|
|
def numurls(self, irc, msg, args):
|
|
|
|
"""[<channel>]
|
|
|
|
|
|
|
|
Returns the number of URLs in the URL database. <channel> is only
|
|
|
|
required if the message isn't sent in the channel itself.
|
|
|
|
"""
|
2003-08-19 11:10:41 +02:00
|
|
|
channel = privmsgs.getChannel(msg, args)
|
2003-08-17 11:31:04 +02:00
|
|
|
db = self.getDb(channel)
|
|
|
|
cursor = db.cursor()
|
|
|
|
cursor.execute("""SELECT COUNT(*) FROM urls""")
|
|
|
|
(count,) = cursor.fetchone()
|
|
|
|
irc.reply(msg, 'I have %s %s in my database.' % \
|
|
|
|
(count, int(count) == 1 and 'URL' or 'URLs'))
|
2003-08-19 11:10:41 +02:00
|
|
|
|
2003-08-29 02:40:28 +02:00
|
|
|
def lasturls(self, irc, msg, args):
|
2003-08-30 04:40:03 +02:00
|
|
|
"""[<channel>] [--{from,with,at,proto,near}=<value>]
|
|
|
|
|
|
|
|
Uses arguments in the same way as lasturl; acts as if lasturl was given
|
|
|
|
the --nolimit option.
|
|
|
|
"""
|
|
|
|
if '--nolimit' not in args:
|
|
|
|
args.append('--nolimit')
|
2003-08-29 02:40:28 +02:00
|
|
|
self.lasturl(irc, msg, args)
|
|
|
|
|
2003-08-19 11:10:41 +02:00
|
|
|
def lasturl(self, irc, msg, args):
|
2003-08-29 02:40:28 +02:00
|
|
|
"""[<channel>] [--{from,with,at,proto,near}=<value>] [--nolimit]
|
2003-08-19 11:10:41 +02:00
|
|
|
|
|
|
|
Gives the last URL matching the given criteria. --from is from whom
|
|
|
|
the URL came; --at is the site of the URL; --proto is the protocol the
|
|
|
|
URL used; --with is something inside the URL; --near is a string in the
|
2003-08-29 02:40:28 +02:00
|
|
|
messages before and after the link. If --nolimit is given, returns as
|
|
|
|
many URLs as can fit in the message. <channel> is only necessary if the
|
2003-08-19 11:10:41 +02:00
|
|
|
message isn't sent in the channel itself.
|
|
|
|
"""
|
|
|
|
channel = privmsgs.getChannel(msg, args)
|
|
|
|
(optlist, rest) = getopt.getopt(args, '', ['from=', 'with=', 'at=',
|
2003-08-29 02:40:28 +02:00
|
|
|
'proto=', 'near=',
|
|
|
|
'nolimit'])
|
2003-08-19 11:10:41 +02:00
|
|
|
criteria = ['1=1']
|
|
|
|
formats = []
|
2003-08-29 02:40:28 +02:00
|
|
|
nolimit = False
|
2003-08-19 11:10:41 +02:00
|
|
|
for (option, argument) in optlist:
|
|
|
|
option = option[2:] # Strip off the --.
|
2003-08-29 02:40:28 +02:00
|
|
|
if option == 'nolimit':
|
|
|
|
nolimit = True
|
2003-08-19 11:10:41 +02:00
|
|
|
if option == 'from':
|
|
|
|
criteria.append('added_by LIKE %s')
|
|
|
|
formats.append(argument)
|
|
|
|
elif option == 'with':
|
2003-08-29 01:59:03 +02:00
|
|
|
if '%' not in argument and '_' not in argument:
|
|
|
|
argument = '%%%s%%' % argument
|
2003-08-19 11:10:41 +02:00
|
|
|
criteria.append('url LIKE %s')
|
|
|
|
formats.append(argument)
|
|
|
|
elif option == 'at':
|
2003-08-26 20:14:13 +02:00
|
|
|
if '%' not in argument and '_' not in argument:
|
|
|
|
argument = '%' + argument
|
2003-08-19 11:10:41 +02:00
|
|
|
criteria.append('site LIKE %s')
|
|
|
|
formats.append(argument)
|
|
|
|
elif option == 'proto':
|
|
|
|
criteria.append('protocol=%s')
|
|
|
|
formats.append(argument)
|
|
|
|
elif option == 'near':
|
|
|
|
criteria.append("""(previous_msg LIKE %s OR
|
|
|
|
next_msg LIKE %s OR
|
|
|
|
current_msg LIKE %s)""")
|
|
|
|
if '%' not in argument:
|
|
|
|
argument = '%%%s%%' % argument
|
|
|
|
formats.append(argument)
|
|
|
|
formats.append(argument)
|
|
|
|
formats.append(argument)
|
|
|
|
db = self.getDb(channel)
|
|
|
|
cursor = db.cursor()
|
|
|
|
criterion = ' AND '.join(criteria)
|
2003-08-26 13:15:15 +02:00
|
|
|
sql = """SELECT url, added, added_by
|
|
|
|
FROM urls
|
2003-08-29 02:40:28 +02:00
|
|
|
WHERE %s ORDER BY id DESC
|
|
|
|
LIMIT 10""" % criterion
|
2003-08-19 11:10:41 +02:00
|
|
|
cursor.execute(sql, *formats)
|
|
|
|
if cursor.rowcount == 0:
|
|
|
|
irc.reply(msg, 'No URLs matched that criteria.')
|
|
|
|
else:
|
2003-08-29 02:40:28 +02:00
|
|
|
if nolimit:
|
|
|
|
urls = ['<%s>' % t[0] for t in cursor.fetchall()]
|
|
|
|
s = ircutils.privmsgPayload(urls, ', ', 400)
|
|
|
|
else:
|
|
|
|
(url, added, added_by) = cursor.fetchone()
|
|
|
|
timestamp = time.strftime('%I:%M %p, %B %d, %Y',
|
|
|
|
time.localtime(int(added)))
|
|
|
|
s = '<%s>, added by %s at %s.' % (url, added_by, timestamp)
|
2003-08-26 13:15:15 +02:00
|
|
|
irc.reply(msg, s)
|
2003-08-20 18:26:23 +02:00
|
|
|
|
|
|
|
|
2003-08-17 11:31:04 +02:00
|
|
|
Class = URLSnarfer
|
|
|
|
|
|
|
|
# vim:set shiftwidth=4 tabstop=8 expandtab textwidth=78:
|