Limnoria/plugins/URLSnarfer.py

348 lines
14 KiB
Python
Raw Normal View History

#!/usr/bin/env python
###
# Copyright (c) 2002, Jeremiah Fincher
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions, and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions, and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the author of this software nor the name of
# contributors to this software may be used to endorse or promote products
# derived from this software without specific prior written consent.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
###
"""
Keeps track of URLs posted to a channel, along with relevant context. Allows
searching for URLs and returning random URLs. Also provides statistics on the
URLs in the database.
"""
import plugins
import os
import re
import time
2003-08-19 11:10:41 +02:00
import getopt
2003-11-03 06:39:14 +01:00
import urllib2
import urlparse
import sqlite
import conf
2003-11-03 06:39:14 +01:00
import debug
import utils
2003-09-09 09:36:41 +02:00
import ircmsgs
import privmsgs
import callbacks
example = utils.wrapLines("""
<supybot> lasturl, numurls, randomurl
<jemfinch> @numurls
<supybot> Error: Command must be sent in a channel or include a channel in its arguments.
<jemfinch> (each channel has its own URL database, so these commands need to know which channel database to access)
<jemfinch> @numurls #sourcereview
<supybot> I have 93 URLs in my database.
<jemfinch> @randomurl #sourcereview
<supybot> <http://gameknot.com/chess.pl?bd=1022045&r=735> (added by Strike at 06:25 PM, August 17, 2003)
<jemfinch> @lasturl
<supybot> Error: Command must be sent in a channel or include a channel in its arguments.
<jemfinch> @lasturl #sourcereview
<supybot> <http://lambda.weblogs.com/xml/rss.xml>, added by Strike at 12:12 AM, August 26, 2003.
<jemfinch> @lasturl #sourcereview --proto ftp
<supybot> <ftp://ftp.us.debian.org/debian/dists/unstable/Contents-i386.gz>, added by Strike|work at 07:27 PM, August 21, 2003.
<jemfinch> @lasturl #sourcereview --near Stuka
<supybot> <http://slashdot.org/comments.pl?sid=75020&cid=6720123>, added by jemfinch at 03:16 PM, August 25, 2003.
<jemfinch> @lasturl #sourcereview --at coderforums.com
<supybot> <http://coderforums.com/showthread.php?s=&postid=15798#post15798>, added by Strike|work at 02:41 PM, August 25, 2003.
""")
def configure(onStart, afterConnect, advanced):
# This will be called by setup.py to configure this module. onStart and
# afterConnect are both lists. Append to onStart the commands you would
# like to be run when the bot is started; append to afterConnect the
# commands you would like to be run when the bot has finished connecting.
from questions import expect, anything, something, yn
onStart.append('load URLSnarfer')
2003-11-03 06:39:14 +01:00
class URLSnarfer(plugins.ChannelDBHandler, callbacks.Privmsg,
plugins.Toggleable):
toggles = plugins.ToggleDictionary({'tinysnarf':True,
'tinyreply':True})
_maxUrlLen = 46
def __init__(self):
self.nextMsgs = {}
callbacks.Privmsg.__init__(self)
plugins.ChannelDBHandler.__init__(self)
2003-11-03 06:39:14 +01:00
plugins.Toggleable.__init__(self)
def makeDb(self, filename):
if os.path.exists(filename):
return sqlite.connect(filename)
db = sqlite.connect(filename)
cursor = db.cursor()
cursor.execute("""CREATE TABLE urls (
id INTEGER PRIMARY KEY,
url TEXT,
added TIMESTAMP,
added_by TEXT,
previous_msg TEXT,
current_msg TEXT,
next_msg TEXT,
protocol TEXT,
site TEXT,
filename TEXT
)""")
2003-11-03 06:39:14 +01:00
cursor.execute("""CREATE TABLE tinyurls (
id INTEGER PRIMARY KEY,
url_id INTEGER,
tinyurl TEXT
)""")
db.commit()
return db
_urlRe = re.compile(r"([^\[<(\s]+://[^\])>\s]+)", re.I)
def doPrivmsg(self, irc, msg):
channel = msg.args[0]
db = self.getDb(channel)
cursor = db.cursor()
if (msg.nick, channel) in self.nextMsgs:
L = self.nextMsgs.pop((msg.nick, msg.args[0]))
for (url, added) in L:
cursor.execute("""UPDATE urls SET next_msg=%s
WHERE url=%s AND added=%s""",
msg.args[1], url, added)
2003-09-09 09:36:41 +02:00
if ircmsgs.isAction(msg):
text = ircmsgs.unAction(msg)
else:
text = msg.args[1]
for url in self._urlRe.findall(text):
(protocol, site, filename, _, _, _) = urlparse.urlparse(url)
previousMsg = ''
for oldMsg in reviter(irc.state.history):
if oldMsg.command == 'PRIVMSG':
if oldMsg.nick == msg.nick and oldMsg.args[0] == channel:
previousMsg = oldMsg.args[1]
addedBy = msg.nick
added = int(time.time())
cursor.execute("""INSERT INTO urls VALUES
(NULL, %s, %s, %s, %s, %s, '', %s, %s, %s)""",
url, added, addedBy, msg.args[1], previousMsg,
protocol, site, filename)
2003-11-03 06:39:14 +01:00
if self.toggles.get('tinysnarf', channel=msg.args[0]) and\
len(url) > self._maxUrlLen:
cursor.execute("""SELECT id FROM urls WHERE url=%s AND
added=%s AND added_by=%s""", url, added, addedBy)
if cursor.rowcount != 0:
#debug.printf(url)
tinyurl = self._getTinyUrl(url)
if tinyurl:
id = int(cursor.fetchone()[0])
cursor.execute("""INSERT INTO tinyurls VALUES
(NULL, %s, %s)""", id, tinyurl)
if self.toggles.get('tinyreply', channel=msg.args[0]):
irc.queueMsg(callbacks.reply(msg, 'TinyURL: %s' %
tinyurl, prefixName=False))
key = (msg.nick, channel)
self.nextMsgs.setdefault(key, []).append((url, added))
db.commit()
2003-11-03 06:39:14 +01:00
_tinyRe = re.compile(r'23 characters:\n<blockquote>(http://tinyurl.com/\w{4})'\
'</blockquote>')
def _getTinyUrl(self, url, cmd=False):
try:
fd = urllib2.urlopen('http://tinyurl.com/create.php?url=%s' % url)
s = fd.read()
fd.close()
m = self._tinyRe.search(s)
if m is None:
return None
return m.group(1)
except urllib2.HTTPError, e:
if cmd:
raise callbacks.Error, e.msg()
else:
debug.msg(e.msg())
def _formatUrl(self, url, added, addedBy):
#debug.printf((url, added, addedBy))
when = time.strftime(conf.humanTimestampFormat,
time.localtime(int(added)))
return '<%s> (added by %s at %s)' % (url, addedBy, when)
def _formatUrlWithId(self, id, url, added, addedBy):
#debug.printf((id, url, added, addedBy))
return '#%s: %s' % (id, self._formatUrl(url, added, addedBy))
def randomurl(self, irc, msg, args):
"""[<channel>]
Returns a random URL from the URL database. <channel> is only required
if the message isn't sent in the channel itself.
"""
2003-08-19 11:10:41 +02:00
channel = privmsgs.getChannel(msg, args)
db = self.getDb(channel)
cursor = db.cursor()
cursor.execute("""SELECT id, url, added, added_by
FROM urls
ORDER BY random()
LIMIT 1""")
if cursor.rowcount == 0:
irc.reply(msg, 'I have no URLs in my database for %s' % channel)
else:
irc.reply(msg, self._formatUrlWithId(*cursor.fetchone()))
2003-11-03 06:39:14 +01:00
def tinyurl(self, irc, msg, args):
"""<url>
Returns a TinyURL.com version of <url>
"""
url = privmsgs.getArgs(args)
if self.toggles.get('tinysnarf', channel=msg.args[0]) and\
self.toggles.get('tinyreply', channel=msg.args[0]):
return
url = self._getTinyUrl(url)
if not url:
irc.error(msg, 'Could not parse the TinyURL.com results page. '\
'(%s)' % conf.replyPossibleBug)
else:
irc.reply(msg, url)
def geturl(self, irc, msg, args):
"""[<channel>] <id>
Gets the URL with id <id> from the URL database for <channel>.
<channel> is only necessary if not sent in the channel itself.
"""
channel = privmsgs.getChannel(msg, args)
db = self.getDb(channel)
cursor = db.cursor()
id = privmsgs.getArgs(args)
cursor.execute("""SELECT url, added, added_by
FROM urls
WHERE id=%s""", id)
if cursor.rowcount == 0:
irc.reply(msg, 'No URL was found with that id.')
else:
irc.reply(msg, self._formatUrl(*cursor.fetchone()))
def numurls(self, irc, msg, args):
"""[<channel>]
Returns the number of URLs in the URL database. <channel> is only
required if the message isn't sent in the channel itself.
"""
2003-08-19 11:10:41 +02:00
channel = privmsgs.getChannel(msg, args)
db = self.getDb(channel)
cursor = db.cursor()
cursor.execute("""SELECT COUNT(*) FROM urls""")
(count,) = cursor.fetchone()
irc.reply(msg, 'I have %s %s in my database.' % \
(count, int(count) == 1 and 'URL' or 'URLs'))
2003-08-19 11:10:41 +02:00
def lasturls(self, irc, msg, args):
"""[<channel>] [--{from,with,at,proto,near}=<value>]
Uses arguments in the same way as lasturl; acts as if lasturl was given
the --nolimit option.
"""
if '--nolimit' not in args:
args.append('--nolimit')
self.lasturl(irc, msg, args)
2003-08-19 11:10:41 +02:00
def lasturl(self, irc, msg, args):
"""[<channel>] [--{from,with,at,proto,near}=<value>] --{nolimit,fancy}
2003-08-19 11:10:41 +02:00
Gives the last URL matching the given criteria. --from is from whom
the URL came; --at is the site of the URL; --proto is the protocol the
URL used; --with is something inside the URL; --near is a string in the
messages before and after the link. If --nolimit is given, returns as
many URLs as can fit in the message. --fancy returns information in
addition to just the URL. <channel> is only necessary if the
2003-08-19 11:10:41 +02:00
message isn't sent in the channel itself.
"""
channel = privmsgs.getChannel(msg, args)
(optlist, rest) = getopt.getopt(args, '', ['from=', 'with=', 'at=',
'proto=', 'near=',
'nolimit', 'fancy'])
2003-08-19 11:10:41 +02:00
criteria = ['1=1']
formats = []
simple = True
nolimit = False
2003-08-19 11:10:41 +02:00
for (option, argument) in optlist:
2003-09-05 21:37:58 +02:00
option = option.lstrip('-') # Strip off the --.
if option == 'nolimit':
nolimit = True
if option == 'fancy':
simple = False
2003-09-03 19:03:38 +02:00
elif option == 'from':
2003-08-19 11:10:41 +02:00
criteria.append('added_by LIKE %s')
formats.append(argument)
elif option == 'with':
if '%' not in argument and '_' not in argument:
argument = '%%%s%%' % argument
2003-08-19 11:10:41 +02:00
criteria.append('url LIKE %s')
formats.append(argument)
elif option == 'at':
if '%' not in argument and '_' not in argument:
argument = '%' + argument
2003-08-19 11:10:41 +02:00
criteria.append('site LIKE %s')
formats.append(argument)
elif option == 'proto':
criteria.append('protocol=%s')
formats.append(argument)
elif option == 'near':
criteria.append("""(previous_msg LIKE %s OR
next_msg LIKE %s OR
current_msg LIKE %s)""")
if '%' not in argument:
argument = '%%%s%%' % argument
formats.append(argument)
formats.append(argument)
formats.append(argument)
db = self.getDb(channel)
cursor = db.cursor()
criterion = ' AND '.join(criteria)
2003-09-08 21:44:09 +02:00
sql = """SELECT id, url, added, added_by
FROM urls
WHERE %s ORDER BY id DESC
2003-09-07 08:23:32 +02:00
LIMIT 100""" % criterion
2003-08-19 11:10:41 +02:00
cursor.execute(sql, *formats)
if cursor.rowcount == 0:
irc.reply(msg, 'No URLs matched that criteria.')
else:
if nolimit:
2003-09-08 21:44:09 +02:00
urls = ['<%s>' % t[1] for t in cursor.fetchall()]
s = ', '.join(urls)
2003-09-05 21:37:58 +02:00
elif simple:
s = cursor.fetchone()[1]
else:
2003-09-08 21:44:09 +02:00
(id, url, added, added_by) = cursor.fetchone()
timestamp = time.strftime('%I:%M %p, %B %d, %Y',
time.localtime(int(added)))
2003-09-08 21:44:09 +02:00
s = '#%s: <%s>, added by %s at %s.' % \
(id, url, added_by, timestamp)
irc.reply(msg, s)
2003-08-20 18:26:23 +02:00
Class = URLSnarfer
# vim:set shiftwidth=4 tabstop=8 expandtab textwidth=78: