Limnoria/plugins/WordStats.py

330 lines
12 KiB
Python
Raw Normal View History

###
# Copyright (c) 2002-2004, Jeremiah Fincher
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions, and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions, and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the author of this software nor the name of
# contributors to this software may be used to endorse or promote products
# derived from this software without specific prior written consent.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
###
"""
Keeps statistics on who says what words in a channel.
"""
__revision__ = "$Id$"
2004-02-09 05:40:30 +01:00
import os
2004-02-08 04:23:30 +01:00
import csv
import string
2004-07-24 07:18:26 +02:00
import supybot.log as log
import supybot.conf as conf
import supybot.utils as utils
import supybot.world as world
import supybot.ircdb as ircdb
2004-10-26 23:07:53 +02:00
from supybot.commands import *
2004-07-24 07:18:26 +02:00
import supybot.plugins as plugins
import supybot.ircutils as ircutils
import supybot.registry as registry
import supybot.callbacks as callbacks
conf.registerPlugin('WordStats')
2004-04-14 12:13:53 +02:00
conf.registerChannelValue(conf.supybot.plugins.WordStats, 'rankingDisplay',
registry.PositiveInteger(3, """Determines the maximum number of top users
to show for a given wordstat when someone requests the wordstats for a
particular word."""))
2004-04-14 12:13:53 +02:00
conf.registerChannelValue(conf.supybot.plugins.WordStats, 'ignoreQueries',
registry.Boolean(False, """Determines whether the bot will ignore words
said in a channel if they're in a wordstats query (command)."""))
2004-02-08 04:23:30 +01:00
nonAlphaNumeric = filter(lambda s: not s.isalnum(), string.ascii)
2004-04-09 18:29:16 +02:00
WordDict = utils.InsensitivePreservingDict
2004-02-08 04:23:30 +01:00
class WordStatsDB(plugins.ChannelUserDB):
def __init__(self, *args, **kwargs):
self.channelWords = ircutils.IrcDict()
plugins.ChannelUserDB.__init__(self, *args, **kwargs)
def close(self):
if self.channelWords:
plugins.ChannelUserDB.close(self)
2004-02-08 04:23:30 +01:00
def serialize(self, v):
L = []
for (word, count) in v.iteritems():
L.append('%s:%s' % (word, count))
return L
2004-02-08 04:23:30 +01:00
def deserialize(self, channel, id, L):
2004-04-09 18:29:16 +02:00
d = WordDict()
2004-02-08 04:23:30 +01:00
for s in L:
(word, count) = s.split(':')
count = int(count)
d[word] = count
if channel not in self.channelWords:
2004-04-09 18:29:16 +02:00
self.channelWords[channel] = WordDict()
2004-02-08 04:23:30 +01:00
self.channelWords[channel].setdefault(word, 0)
self.channelWords[channel][word] += count
return d
2004-07-21 21:36:35 +02:00
2004-02-08 04:23:30 +01:00
def getWordCount(self, channel, id, word):
return self[channel, id][word]
def getUserWordCounts(self, channel, id):
return self[channel, id].items()
def getWords(self, channel):
if channel not in self.channelWords:
self.channelWords[channel] = {}
L = self.channelWords[channel].keys()
L.sort()
return L
2004-07-21 21:36:35 +02:00
2004-02-08 04:23:30 +01:00
def getTotalWordCount(self, channel, word):
return self.channelWords[channel][word]
def getNumUsers(self, channel):
i = 0
for ((chan, _), _) in self.iteritems():
2004-04-09 18:29:16 +02:00
if ircutils.nickEqual(chan, channel):
2004-02-08 04:23:30 +01:00
i += 1
return i
def getTopUsers(self, channel, word, n):
L = [(id, d[word]) for ((chan, id), d) in self.iteritems()
2004-04-09 18:29:16 +02:00
if ircutils.nickEqual(channel, chan) and word in d]
2004-02-08 04:23:30 +01:00
utils.sortBy(lambda (_, i): i, L)
L = L[-n:]
L.reverse()
return L
def getRankAndNumber(self, channel, id, word):
L = self.getTopUsers(channel, word, 0)
n = 0
for (someId, count) in L:
n += 1
if id == someId:
return (n, count)
raise KeyError
2004-02-08 04:23:30 +01:00
def addWord(self, channel, word):
if channel not in self.channelWords:
self.channelWords[channel] = {}
self.channelWords[channel][word] = 0
for ((chan, id), d) in self.iteritems():
2004-04-09 18:29:16 +02:00
if ircutils.nickEqual(chan, channel):
2004-02-08 04:23:30 +01:00
if word not in d:
d[word] = 0
def delWord(self, channel, word):
if word in self.channelWords[channel]:
del self.channelWords[channel][word]
for ((chan, id), d) in self.iteritems():
2004-04-09 18:29:16 +02:00
if ircutils.nickEqual(chan, channel):
if word in d:
del d[word]
2004-07-21 21:36:35 +02:00
2004-02-08 04:23:30 +01:00
def addMsg(self, msg):
assert msg.command == 'PRIVMSG'
2004-04-14 12:13:53 +02:00
(channel, text) = msg.args
if not ircutils.isChannel(channel):
return
text = text.strip().lower()
if not text:
return
try:
id = ircdb.users.getUserId(msg.prefix)
except KeyError:
return
2004-02-08 04:23:30 +01:00
msgwords = [s.strip(nonAlphaNumeric) for s in text.split()]
if channel not in self.channelWords:
self.channelWords[channel] = {}
for word in self.channelWords[channel]:
2004-04-09 18:29:16 +02:00
word = word.lower()
2004-02-08 04:23:30 +01:00
for msgword in msgwords:
if msgword == word:
self.channelWords[channel][word] += 1
if (channel, id) not in self:
self[channel, id] = {}
if word not in self[channel, id]:
self[channel, id][word] = 0
self[channel, id][word] += 1
2004-07-21 21:36:35 +02:00
2004-02-08 04:23:30 +01:00
filename=os.path.join(conf.supybot.directories.data(), 'WordStats.db')
class WordStats(callbacks.Privmsg):
noIgnore = True
def __init__(self):
2004-09-20 01:51:21 +02:00
self.__parent = super(WordStats, self)
self.__parent.__init__()
2004-02-08 04:23:30 +01:00
self.db = WordStatsDB(filename)
2004-04-14 12:13:53 +02:00
self.queried = False
2004-02-08 04:23:30 +01:00
world.flushers.append(self.db.flush)
def die(self):
if self.db.flush in world.flushers:
world.flushers.remove(self.db.flush)
self.db.close()
2004-09-20 01:51:21 +02:00
self.__parent.die()
2004-02-08 04:23:30 +01:00
2004-04-14 16:35:06 +02:00
def callCommand(self, *args, **kwargs):
self.queried = True
2004-09-20 01:51:21 +02:00
return self.__parent.callCommand(*args, **kwargs)
2004-04-14 16:35:06 +02:00
2004-02-08 04:23:30 +01:00
def doPrivmsg(self, irc, msg):
2004-04-14 12:13:53 +02:00
# This depends on the fact that it's called after the command.
try:
channel = msg.args[0]
if ircutils.isChannel(channel):
if not (self.queried and
self.registryValue('ignoreQueries', channel)):
self.db.addMsg(msg)
else:
self.log.debug('Queried and ignoring.')
finally:
self.queried = False
2004-07-21 21:36:35 +02:00
2004-10-26 23:07:53 +02:00
def add(self, irc, msg, args, channel, word):
"""[<channel>] <word>
Keeps stats on <word> in <channel>. <channel> is only necessary if the
message isn't sent in the channel itself.
"""
word = word.strip()
2004-02-08 04:23:30 +01:00
if word.strip(nonAlphaNumeric) != word:
irc.error('<word> must not contain non-alphanumeric chars.')
return
2004-02-08 04:23:30 +01:00
self.db.addWord(channel, word)
irc.replySuccess()
2004-10-26 23:07:53 +02:00
add = wrap(add, ['channel', 'somethingWithoutSpaces'])
2004-10-26 23:07:53 +02:00
def remove(self, irc, msg, args, channel, word):
"""[<channel>] <word>
Removes <word> from the list of words being tracked. If <channel> is
not specified, uses current channel.
"""
words = self.db.getWords(channel)
if words:
if word in words:
self.db.delWord(channel, word)
irc.replySuccess()
else:
irc.error('%s doesn\'t look like a word I am keeping stats '
'on.' % utils.quoted(word))
else:
irc.error('I am not currently keeping any word stats.')
2004-10-26 23:07:53 +02:00
remove = wrap(remove, ['channel', 'somethingWithoutSpaces'])
2004-10-26 23:07:53 +02:00
def wordstats(self, irc, msg, args, channel, user, word):
"""[<channel>] [<user>] [<word>]
With no arguments, returns the list of words that are being monitored
for stats. With <user> alone, returns all the stats for that user.
With <word> alone, returns the top users for that word. With <user>
and <word>, returns that user's stat for that word. <channel> is only
needed if not said in the channel. (Note: if only one of <user> or
<word> is given, <word> is assumed first and only if no stats are
available for that word, do we assume it's <user>.)
"""
2004-10-26 23:07:53 +02:00
if not user and not word:
2004-02-08 04:23:30 +01:00
words = self.db.getWords(channel)
if words:
commaAndify = utils.commaAndify
s = 'I am currently keeping stats for %s.' % commaAndify(words)
irc.reply(s)
else:
irc.reply('I am not currently keeping any word stats.')
return
2004-10-26 23:07:53 +02:00
elif user and word:
2004-02-08 04:23:30 +01:00
try:
2004-10-26 23:07:53 +02:00
count = self.db.getWordCount(channel, user.id, word)
2004-02-08 04:23:30 +01:00
except KeyError:
irc.error('I\'m not keeping stats on %s.' %
utils.quoted(word))
return
2004-02-08 04:23:30 +01:00
if count:
s = '%s has said %s %s.' % \
2004-10-26 23:07:53 +02:00
(user.name, utils.quoted(word),
utils.nItems('time', count))
2004-02-08 04:23:30 +01:00
irc.reply(s)
else:
irc.error('%s has never said %s.' %
(user, utils.quoted(word)))
2004-10-26 23:07:53 +02:00
elif word in WordDict.fromkeys(self.db.getWords(channel)):
2004-02-08 04:23:30 +01:00
total = self.db.getTotalWordCount(channel, word)
2004-04-14 12:13:53 +02:00
if total == 0:
irc.reply('I\'m keeping stats on %s, but I haven\'t seen it '
'in this channel.' % word)
return
2004-02-08 04:23:30 +01:00
n = self.registryValue('rankingDisplay', channel)
try:
id = ircdb.users.getUserId(msg.prefix)
(rank, number) = self.db.getRankAndNumber(channel, id, word)
except (KeyError, ValueError):
id = None
rank = None
number = None
ers = '%ser' % utils.quoted(word)
2004-02-08 04:23:30 +01:00
L = []
for (userid, count) in self.db.getTopUsers(channel, word, n):
if userid == id:
rank = None
try:
username = ircdb.users.getUser(userid).name
2004-04-09 18:29:16 +02:00
L.append('%s: %s' % (username, count))
except KeyError:
L.append('%s: %s' % ('unregistered user', count))
2004-02-08 04:23:30 +01:00
ret = 'Top %s (out of a total of %s seen):' % \
(utils.nItems(ers, len(L)), utils.nItems(repr(word), total))
users = self.db.getNumUsers(channel)
if rank is not None:
s = ' You are ranked %s out of %s with %s.' % \
(rank, utils.nItems(ers, users),
utils.nItems(repr(word), number))
else:
s = ''
ret = '%s %s.%s' % (ret, utils.commaAndify(L), s)
irc.reply(ret)
2004-10-26 23:07:53 +02:00
elif word:
irc.error('%r doesn\'t look like a word I\'m keeping stats '
'on or a user in my database.' % word)
else:
2004-02-08 04:23:30 +01:00
try:
2004-10-26 23:07:53 +02:00
L = ['%s: %s' % (utils.quoted(w), c)
for (w, c) in self.db.getUserWordCounts(channel,user.id)]
L.sort()
irc.reply(utils.commaAndify(L))
2004-02-08 04:23:30 +01:00
except KeyError:
2004-10-26 23:07:53 +02:00
irc.error('I have no wordstats for %s.' % user.name)
wordstats = wrap(wordstats,
['channel',
optional('otherUser'),
additional('somethingWithoutSpaces')])
Class = WordStats
# vim:set shiftwidth=4 tabstop=8 expandtab textwidth=78: