Re-added near functionality and re-abstracted the plugin.

This commit is contained in:
Jeremy Fincher 2004-09-28 19:58:32 +00:00
parent f9ae666f3b
commit 3139ffe909

View File

@ -45,6 +45,7 @@ import getopt
import urlparse import urlparse
import itertools import itertools
import supybot.dbi as dbi
import supybot.conf as conf import supybot.conf as conf
import supybot.utils as utils import supybot.utils as utils
import supybot.ircmsgs as ircmsgs import supybot.ircmsgs as ircmsgs
@ -86,97 +87,40 @@ conf.registerChannelValue(conf.supybot.plugins.URL, 'nonSnarfingRegexp',
snarfed. Give the empty string if you have no URLs that you'd like to snarfed. Give the empty string if you have no URLs that you'd like to
exclude from being snarfed.""")) exclude from being snarfed."""))
class URLDB(object): class UrlRecord(dbi.Record):
def __init__(self, channel, log): __fields__ = [
self.log = log 'url',
self.filename = plugins.makeChannelFilename('URL.db', channel) 'by',
'near',
'at',
]
def _getFile(self): class DbiUrlDB(plugins.DbiChannelDB):
try: class DB(dbi.DB):
fd = file(self.filename) Record = UrlRecord
return fd def add(self, url, msg):
except EnvironmentError, e: record = self.Record(url=url, by=msg.nick,
self.log.warning('Couldn\'t open %s: %s', near=msg.args[1], at=msg.receivedAt)
self.filename, utils.exnToString(e)) super(self.__class__, self).add(record)
return None def urls(self, p):
L = list(self.select(p))
def _formatRecord(self, url, nick):
return '%s %s\n' % (url, nick)
def addUrl(self, url, nick):
fd = file(self.filename, 'a')
fd.write(self._formatRecord(url, nick))
fd.close()
def numUrls(self):
fd = self._getFile()
if fd is None:
return 0
try:
return itertools.ilen(fd)
finally:
fd.close()
def getUrlsAndNicks(self, p=None):
L = []
fd = self._getFile()
if fd is None:
return []
try:
for line in fd:
line = line.strip()
try:
(url, nick) = line.split()
except ValueError: # unpack list of wrong size.
self.log.warning('Invalid line in URLDB: %r.', line)
continue
if p(url, nick):
L.append((url, nick))
seen = sets.Set()
L.reverse() L.reverse()
for (i, (url, nick)) in enumerate(L):
if url in seen:
L[i] = None
else:
seen.add(url)
L = filter(None, L)
return L return L
finally:
fd.close()
def getUrls(self, p): URLDB = plugins.DB('URL', {'flat': DbiUrlDB})
return [url for (url, nick) in self.getUrlsAndNicks(p)]
def vacuum(self):
out = utils.transactionalFile(self.filename)
notAdded = 0
urls = self.getUrlsAndNicks(lambda *args: True)
seen = sets.Set()
for (i, (url, nick)) in enumerate(urls):
if url not in seen:
seen.add(url)
else:
urls[i] = None
notAdded += 1
urls.reverse()
for urlNick in urls:
if urlNick is not None:
out.write(self._formatRecord(*urlNick))
out.close()
self.log.info('Vacuumed %s, removed %s records.',
self.filename, notAdded)
class URL(callbacks.PrivmsgCommandAndRegexp): class URL(callbacks.PrivmsgCommandAndRegexp):
priority = 100 # lower than 99, the normal priority. priority = 100 # lower than 99, the normal priority.
regexps = ['titleSnarfer', 'tinyurlSnarfer'] regexps = ['titleSnarfer', 'tinyurlSnarfer']
_titleRe = re.compile('<title>(.*?)</title>', re.I | re.S) _titleRe = re.compile('<title>(.*?)</title>', re.I | re.S)
def getDb(self, channel): def __init__(self):
return URLDB(channel, self.log) self.__parent = super(URL, self)
self.__parent.__init__()
self.db = URLDB()
def doPrivmsg(self, irc, msg): def doPrivmsg(self, irc, msg):
channel = msg.args[0] channel = msg.args[0]
if ircutils.isChannel(channel): if ircutils.isChannel(channel):
db = self.getDb(channel)
if ircmsgs.isAction(msg): if ircmsgs.isAction(msg):
text = ircmsgs.unAction(msg) text = ircmsgs.unAction(msg)
else: else:
@ -187,8 +131,8 @@ class URL(callbacks.PrivmsgCommandAndRegexp):
self.log.debug('Skipping adding %r to db.', url) self.log.debug('Skipping adding %r to db.', url)
continue continue
self.log.debug('Adding %r to db.', url) self.log.debug('Adding %r to db.', url)
db.addUrl(url, msg.nick) self.db.add(channel, url, msg)
callbacks.PrivmsgCommandAndRegexp.doPrivmsg(self, irc, msg) self.__parent.doPrivmsg(irc, msg)
def tinyurlSnarfer(self, irc, msg, match): def tinyurlSnarfer(self, irc, msg, match):
r"https?://[^\])>\s]{13,}" r"https?://[^\])>\s]{13,}"
@ -292,47 +236,49 @@ class URL(callbacks.PrivmsgCommandAndRegexp):
required if the message isn't sent in the channel itself. required if the message isn't sent in the channel itself.
""" """
channel = privmsgs.getChannel(msg, args) channel = privmsgs.getChannel(msg, args)
db = self.getDb(channel) self.db.vacuum(channel)
db.vacuum() count = self.db.size(channel)
count = db.numUrls()
irc.reply('I have %s in my database.' % utils.nItems('URL', count)) irc.reply('I have %s in my database.' % utils.nItems('URL', count))
def last(self, irc, msg, args): def last(self, irc, msg, args):
"""[<channel>] [--{from,with,proto}=<value>] --{nolimit} """[<channel>] [--{from,with,near,proto}=<value>] --{nolimit}
Gives the last URL matching the given criteria. --from is from whom Gives the last URL matching the given criteria. --from is from whom
the URL came; --proto is the protocol the URL used; --with is something the URL came; --proto is the protocol the URL used; --with is something
inside the URL; If --nolimit is given, returns all the URLs that are inside the URL; --near is something in the same message as the URL; If
found. to just the URL. <channel> is only necessary if the message --nolimit is given, returns all the URLs that are found. to just the
isn't sent in the channel itself. URL. <channel> is only necessary if the message isn't sent in the
channel itself.
""" """
channel = privmsgs.getChannel(msg, args) channel = privmsgs.getChannel(msg, args)
(optlist, rest) = getopt.getopt(args, '', ['from=', 'with=', (optlist, rest) = getopt.getopt(args, '', ['from=', 'with=', 'near=',
'proto=', 'nolimit',]) 'proto=', 'nolimit',])
predicates = [] predicates = []
f = None
nolimit = False nolimit = False
for (option, arg) in optlist: for (option, arg) in optlist:
if option == '--nolimit': if option == '--nolimit':
nolimit = True nolimit = True
elif option == '--from': elif option == '--from':
def from_(url, nick, arg=arg): def f(record, arg=arg):
return ircutils.strEqual(nick, arg) return ircutils.strEqual(record.by, arg)
predicates.append(from_)
elif option == '--with': elif option == '--with':
def with(url, nick, arg=arg): def f(record, arg=arg):
return arg in url return arg in record.url
predicates.append(with)
elif option == '--proto': elif option == '--proto':
def proto(url, nick, arg=arg): def f(record, arg=arg):
return url.startswith(arg) return record.url.startswith(arg)
predicates.append(proto) elif option == '--near':
db = self.getDb(channel) def f(record, arg=arg):
def predicate(url, nick): return arg in record.near
if f is not None:
predicates.append(f)
def predicate(record):
for predicate in predicates: for predicate in predicates:
if not predicate(url, nick): if not predicate(record):
return False return False
return True return True
urls = db.getUrls(predicate) urls = [record.url for record in self.db.urls(channel, predicate)]
if not urls: if not urls:
irc.reply('No URLs matched that criteria.') irc.reply('No URLs matched that criteria.')
else: else: