Limnoria/plugins/WordStats.py

323 lines
12 KiB
Python
Raw Normal View History

#!/usr/bin/python
###
# Copyright (c) 2002, Jeremiah Fincher
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions, and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions, and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the author of this software nor the name of
# contributors to this software may be used to endorse or promote products
# derived from this software without specific prior written consent.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
###
"""
Keeps statistics on who says what words in a channel.
"""
__revision__ = "$Id$"
2004-02-09 05:40:30 +01:00
import os
2004-02-08 04:23:30 +01:00
import csv
import string
2004-02-08 04:23:30 +01:00
import log
import conf
import utils
2004-02-08 04:23:30 +01:00
import world
import ircdb
import plugins
import ircutils
import privmsgs
import registry
import callbacks
conf.registerPlugin('WordStats')
conf.registerChannelValue(conf.supybot.plugins.WordStats,
2004-01-31 22:10:23 +01:00
'rankingDisplay',
registry.PositiveInteger(3, """Determines the maximum number of top users
to show for a given wordstat when someone requests the wordstats for a
particular word."""))
2004-02-08 04:23:30 +01:00
nonAlphaNumeric = filter(lambda s: not s.isalnum(), string.ascii)
2004-04-09 18:29:16 +02:00
WordDict = utils.InsensitivePreservingDict
2004-02-08 04:23:30 +01:00
class WordStatsDB(plugins.ChannelUserDB):
def __init__(self, *args, **kwargs):
self.channelWords = ircutils.IrcDict()
plugins.ChannelUserDB.__init__(self, *args, **kwargs)
2004-02-08 04:23:30 +01:00
def serialize(self, v):
L = []
for (word, count) in v.iteritems():
L.append('%s:%s' % (word, count))
return L
2004-02-08 04:23:30 +01:00
def deserialize(self, channel, id, L):
2004-04-09 18:29:16 +02:00
d = WordDict()
2004-02-08 04:23:30 +01:00
for s in L:
(word, count) = s.split(':')
count = int(count)
d[word] = count
if channel not in self.channelWords:
2004-04-09 18:29:16 +02:00
self.channelWords[channel] = WordDict()
2004-02-08 04:23:30 +01:00
self.channelWords[channel].setdefault(word, 0)
self.channelWords[channel][word] += count
return d
def getWordCount(self, channel, id, word):
return self[channel, id][word]
def getUserWordCounts(self, channel, id):
return self[channel, id].items()
def getWords(self, channel):
if channel not in self.channelWords:
self.channelWords[channel] = {}
L = self.channelWords[channel].keys()
L.sort()
return L
def getTotalWordCount(self, channel, word):
return self.channelWords[channel][word]
def getNumUsers(self, channel):
i = 0
for ((chan, _), _) in self.iteritems():
2004-04-09 18:29:16 +02:00
if ircutils.nickEqual(chan, channel):
2004-02-08 04:23:30 +01:00
i += 1
return i
def getTopUsers(self, channel, word, n):
L = [(id, d[word]) for ((chan, id), d) in self.iteritems()
2004-04-09 18:29:16 +02:00
if ircutils.nickEqual(channel, chan) and word in d]
2004-02-08 04:23:30 +01:00
utils.sortBy(lambda (_, i): i, L)
L = L[-n:]
L.reverse()
return L
def getRankAndNumber(self, channel, id, word):
L = self.getTopUsers(channel, word, 0)
n = 0
for (someId, count) in L:
n += 1
if id == someId:
return (n, count)
raise KeyError
2004-02-08 04:23:30 +01:00
def addWord(self, channel, word):
if channel not in self.channelWords:
self.channelWords[channel] = {}
self.channelWords[channel][word] = 0
for ((chan, id), d) in self.iteritems():
2004-04-09 18:29:16 +02:00
if ircutils.nickEqual(chan, channel):
2004-02-08 04:23:30 +01:00
if word not in d:
d[word] = 0
def delWord(self, channel, word):
if word in self.channelWords[channel]:
del self.channelWords[channel][word]
for ((chan, id), d) in self.iteritems():
2004-04-09 18:29:16 +02:00
if ircutils.nickEqual(chan, channel):
if word in d:
del d[word]
2004-02-08 04:23:30 +01:00
def addMsg(self, msg):
assert msg.command == 'PRIVMSG'
try:
id = ircdb.users.getUserId(msg.prefix)
except KeyError:
return
2004-02-08 04:23:30 +01:00
(channel, text) = msg.args
text = text.strip().lower()
if not ircutils.isChannel(channel) or not text:
return
2004-02-08 04:23:30 +01:00
msgwords = [s.strip(nonAlphaNumeric) for s in text.split()]
if channel not in self.channelWords:
self.channelWords[channel] = {}
for word in self.channelWords[channel]:
2004-04-09 18:29:16 +02:00
word = word.lower()
2004-02-08 04:23:30 +01:00
for msgword in msgwords:
if msgword == word:
self.channelWords[channel][word] += 1
if (channel, id) not in self:
self[channel, id] = {}
if word not in self[channel, id]:
self[channel, id][word] = 0
self[channel, id][word] += 1
filename=os.path.join(conf.supybot.directories.data(), 'WordStats.db')
class WordStats(callbacks.Privmsg):
noIgnore = True
def __init__(self):
callbacks.Privmsg.__init__(self)
self.db = WordStatsDB(filename)
world.flushers.append(self.db.flush)
def die(self):
if self.db.flush in world.flushers:
world.flushers.remove(self.db.flush)
self.db.close()
callbacks.Privmsg.die(self)
def doPrivmsg(self, irc, msg):
self.db.addMsg(msg)
def add(self, irc, msg, args):
"""[<channel>] <word>
Keeps stats on <word> in <channel>. <channel> is only necessary if the
message isn't sent in the channel itself.
"""
channel = privmsgs.getChannel(msg, args)
word = privmsgs.getArgs(args)
word = word.strip()
2004-02-08 04:23:30 +01:00
if word.strip(nonAlphaNumeric) != word:
irc.error('<word> must not contain non-alphanumeric chars.')
return
2004-02-08 04:23:30 +01:00
self.db.addWord(channel, word)
irc.replySuccess()
def remove(self, irc, msg, args):
"""[<channel>] <word>
Removes <word> from the list of words being tracked. If <channel> is
not specified, uses current channel.
"""
channel = privmsgs.getChannel(msg, args)
word = privmsgs.getArgs(args)
words = self.db.getWords(channel)
if words:
if word in words:
self.db.delWord(channel, word)
irc.replySuccess()
else:
irc.error('%r doesn\'t look like a word I am keeping stats '
'on.' % word)
return
else:
irc.error('I am not currently keeping any word stats.')
return
def wordstats(self, irc, msg, args):
"""[<channel>] [<user>] [<word>]
With no arguments, returns the list of words that are being monitored
for stats. With <user> alone, returns all the stats for that user.
With <word> alone, returns the top users for that word. With <user>
and <word>, returns that user's stat for that word. <channel> is only
needed if not said in the channel. (Note: if only one of <user> or
<word> is given, <word> is assumed first and only if no stats are
available for that word, do we assume it's <user>.)
"""
channel = privmsgs.getChannel(msg, args)
(arg1, arg2) = privmsgs.getArgs(args, required=0, optional=2)
if not arg1 and not arg2:
2004-02-08 04:23:30 +01:00
words = self.db.getWords(channel)
if words:
commaAndify = utils.commaAndify
s = 'I am currently keeping stats for %s.' % commaAndify(words)
irc.reply(s)
else:
irc.reply('I am not currently keeping any word stats.')
return
elif arg1 and arg2:
user, word = (arg1, arg2)
try:
id = ircdb.users.getUserId(user)
except KeyError: # Maybe it was a nick. Check the hostmask.
try:
hostmask = irc.state.nickToHostmask(user)
id = ircdb.users.getUserId(hostmask)
except KeyError:
irc.errorNoUser()
return
2004-02-08 04:23:30 +01:00
try:
count = self.db.getWordCount(channel, id, word)
except KeyError:
irc.error('I\'m not keeping stats on %r.' % word)
return
2004-02-08 04:23:30 +01:00
if count:
s = '%s has said %r %s.' % \
(user, word, utils.nItems('time', count))
irc.reply(s)
else:
irc.error('%s has never said %r.' % (user, word))
2004-04-09 18:29:16 +02:00
elif arg1 in WordDict.fromkeys(self.db.getWords(channel)):
2004-02-08 04:23:30 +01:00
word = arg1
total = self.db.getTotalWordCount(channel, word)
n = self.registryValue('rankingDisplay', channel)
try:
id = ircdb.users.getUserId(msg.prefix)
(rank, number) = self.db.getRankAndNumber(channel, id, word)
except (KeyError, ValueError):
id = None
rank = None
number = None
ers = '%rer' % word
L = []
for (userid, count) in self.db.getTopUsers(channel, word, n):
if userid == id:
rank = None
try:
username = ircdb.users.getUser(userid).name
2004-04-09 18:29:16 +02:00
L.append('%s: %s' % (username, count))
except KeyError:
2004-04-09 18:29:16 +02:00
self.log.warning('Odd, I have a user in my WordStats '
'database that doesn\'t exist in my '
'user database: #%s.', userid)
2004-02-08 04:23:30 +01:00
ret = 'Top %s (out of a total of %s seen):' % \
(utils.nItems(ers, len(L)), utils.nItems(repr(word), total))
users = self.db.getNumUsers(channel)
if rank is not None:
s = ' You are ranked %s out of %s with %s.' % \
(rank, utils.nItems(ers, users),
utils.nItems(repr(word), number))
else:
s = ''
ret = '%s %s.%s' % (ret, utils.commaAndify(L), s)
irc.reply(ret)
else:
2004-02-08 04:23:30 +01:00
user = arg1
try:
id = ircdb.users.getUserId(user)
except KeyError:
irc.error('%r doesn\'t look like a word I\'m keeping stats '
'on or a user in my database.' % user)
return
2004-02-08 04:23:30 +01:00
try:
L = ['%r: %s' % (word, count)
for (word,count) in self.db.getUserWordCounts(channel,id)]
if L:
L.sort()
irc.reply(utils.commaAndify(L))
else:
irc.error('%r doesn\'t look like a word I\'m keeping stats'
' on or a user in my database.' % user)
return
2004-02-08 04:23:30 +01:00
except KeyError:
irc.error('I have no word stats for that person.')
Class = WordStats
# vim:set shiftwidth=4 tabstop=8 expandtab textwidth=78: