### # Copyright (c) 2004, Jeremiah Fincher # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # * Redistributions of source code must retain the above copyright notice, # this list of conditions, and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright notice, # this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of the author of this software nor the name of # contributors to this software may be used to endorse or promote products # derived from this software without specific prior written consent. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. ### """ Watches for paste-floods in a channel and takes appropriate measures against violators. """ import supybot __revision__ = "$Id$" __author__ = supybot.authors.jemfinch __contributors__ = {} import supybot.plugins as plugins import glob import os.path import reverend.thomas from cStringIO import StringIO as sio import supybot.conf as conf import supybot.utils as utils from supybot.commands import * import supybot.ircutils as ircutils import supybot.registry as registry import supybot.callbacks as callbacks def configure(advanced): # This will be called by setup.py to configure this module. Advanced is # a bool that specifies whether the user identified himself as an advanced # user or not. You should effect your configuration by manipulating the # registry as appropriate. from supybot.questions import expect, anything, something, yn conf.registerPlugin('Bayes', True) Bayes = conf.registerPlugin('Bayes') conf.registerChannelValue(Bayes, 'maximumLines', registry.NonNegativeInteger(4, """Determines the maximum allowable number of consecutive messages that classify as a paste. If this value is 0, no checking will be done.""")) def tokenize(s): return s.lower().split() class PickleBayesDB(plugins.DbiChannelDB): class DB(object): def __init__(self, filename): self.filename = filename self.nickFilename = self.filename.replace('pickle', 'nick.pickle') self.bayes = reverend.thomas.Bayes(tokenize) if os.path.exists(self.filename): try: self.bayes.load(self.filename) except (EOFError, EnvironmentError), e: log.error('Couldn\'t load bayes pickle from %s: %s', self.filename, utils.exnToString(e)) self.nickBayes = reverend.thomas.Bayes(tokenize) if os.path.exists(self.nickFilename): try: self.nickBayes.load(self.nickFilename) except (EOFError, EnvironmentError), e: log.error('Couldn\'t load nickbayes pickle from %s: %s', self.nickFilename, utils.exnToString(e)) def close(self): self.bayes.save(self.filename) self.nickBayes.save(self.nickFilename) flush = close def train(self, kind, s): self.bayes.train(kind, s) def trainNick(self, nick, s): self.nickBayes.train(nick, s) def guess(self, s): matches = self.bayes.guess(s) if matches: if matches[0][1] > 0.5: if len(matches) > 1 and \ matches[0][1] - matches[1][1] < 0.4: return None else: return matches[0] else: self.bayes.train('normal', s) return None def guessNick(self, s): L = [t for t in self.nickBayes.guess(s) if t[1] > 0.01] if len(L) > 1: if L[0][1] / L[1][1] > 2: return [L[0]] return L BayesDB = plugins.DB('Bayes', {'pickle': PickleBayesDB}) class Bayes(callbacks.Privmsg): def __init__(self): self.__parent = super(Bayes, self) self.__parent.__init__() global log log = self.log self.db = BayesDB() def die(self): self.db.close() def doPrivmsg(self, irc, msg): (channel, text) = msg.args if not ircutils.isChannel(channel) or msg.guessed: return kind = self.db.guess(channel, text) if kind is not None: (kind, prob) = kind prob *= 100 text = utils.ellipsisify(text, 30) self.log.debug('Classified %s as %s. (%.2f%%)', utils.quoted(text), kind, prob) self.db.trainNick(channel, msg.nick, text) def guess(self, irc, msg, args, channel, text): """[<channel>] <text> Guesses how <text> should be classified according to the Bayesian classifier for <channel>. <channel> is only necessary if the message isn't sent in the channel itself, and then only if supybot.databases.plugins.channelSpecific is True. """ msg.tag('guessed') kind = self.db.guess(channel, text) if kind is not None: (kind, prob) = kind prob *= 100 irc.reply('That seems to me to be %s, ' 'but I\'m only %.2f certain.' % (kind, prob)) else: irc.reply('I don\'t know what the heck that is.') guess = wrap(guess, ['channeldb', 'something']) def who(self, irc, msg, args, channel, text): """[<channel>] <text> Guesses who might have said <text>. <channel> is only necessary if the message isn't sent in the channel itself, and then only if supybot.databases.plugins.channelSpecific is True. """ msg.tag('guessed') kinds = self.db.guessNick(channel, text) if kinds: if len(kinds) == 1: (kind, prob) = kinds.pop() irc.reply('It seems to me (with %.2f%% certainty) ' 'that %s said that.' % (prob*100, kind)) else: kinds = ['%s (%.2f%%)' % (k, prob*100) for (k, prob) in kinds] irc.reply('I\'m not quite sure who said that, but it could be ' + utils.commaAndify(kinds, And='or')) else: irc.reply('I have no idea who might\'ve said that.') who = wrap(who, ['channeldb', 'something']) def train(self, irc, msg, args, channel, language, pattern): """[<channel>] <language> <glob> Trains the bot to recognize text similar to that contained in the files matching <glob> as text of the language <language>. <channel> is only necessary if the message isn't sent in the channel itself, and then only if supybot.databases.plugins.channelSpecific is True. """ filenames = glob.glob(pattern) if not filenames: irc.errorInvalid('glob', pattern) for filename in filenames: fd = file(filename) for line in fd: self.db.train(channel, language, line) fd.close() irc.replySuccess() train = wrap(train, ['channeldb', 'something', 'something']) Class = Bayes # vim:set shiftwidth=4 tabstop=8 expandtab textwidth=78: