mirror of
https://github.com/Mikaela/Limnoria.git
synced 2024-11-30 06:49:24 +01:00
215 lines
8.0 KiB
Python
215 lines
8.0 KiB
Python
###
|
|
# Copyright (c) 2004, Jeremiah Fincher
|
|
# All rights reserved.
|
|
#
|
|
# Redistribution and use in source and binary forms, with or without
|
|
# modification, are permitted provided that the following conditions are met:
|
|
#
|
|
# * Redistributions of source code must retain the above copyright notice,
|
|
# this list of conditions, and the following disclaimer.
|
|
# * Redistributions in binary form must reproduce the above copyright notice,
|
|
# this list of conditions, and the following disclaimer in the
|
|
# documentation and/or other materials provided with the distribution.
|
|
# * Neither the name of the author of this software nor the name of
|
|
# contributors to this software may be used to endorse or promote products
|
|
# derived from this software without specific prior written consent.
|
|
#
|
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
# POSSIBILITY OF SUCH DAMAGE.
|
|
###
|
|
|
|
"""
|
|
Watches for paste-floods in a channel and takes appropriate measures against
|
|
violators.
|
|
"""
|
|
|
|
import supybot
|
|
|
|
__revision__ = "$Id$"
|
|
__author__ = supybot.authors.jemfinch
|
|
__contributors__ = {}
|
|
|
|
import supybot.plugins as plugins
|
|
|
|
import glob
|
|
import os.path
|
|
import reverend.thomas
|
|
from cStringIO import StringIO as sio
|
|
|
|
import supybot.conf as conf
|
|
import supybot.utils as utils
|
|
from supybot.commands import *
|
|
import supybot.ircutils as ircutils
|
|
import supybot.registry as registry
|
|
import supybot.callbacks as callbacks
|
|
|
|
|
|
def configure(advanced):
|
|
# This will be called by setup.py to configure this module. Advanced is
|
|
# a bool that specifies whether the user identified himself as an advanced
|
|
# user or not. You should effect your configuration by manipulating the
|
|
# registry as appropriate.
|
|
from supybot.questions import expect, anything, something, yn
|
|
conf.registerPlugin('Bayes', True)
|
|
|
|
Bayes = conf.registerPlugin('Bayes')
|
|
conf.registerChannelValue(Bayes, 'maximumLines',
|
|
registry.NonNegativeInteger(4, """Determines the maximum allowable number
|
|
of consecutive messages that classify as a paste. If this value is 0, no
|
|
checking will be done."""))
|
|
|
|
def tokenize(s):
|
|
return s.lower().split()
|
|
|
|
class PickleBayesDB(plugins.DbiChannelDB):
|
|
class DB(object):
|
|
def __init__(self, filename):
|
|
self.filename = filename
|
|
self.nickFilename = self.filename.replace('pickle', 'nick.pickle')
|
|
self.bayes = reverend.thomas.Bayes(tokenize)
|
|
if os.path.exists(self.filename):
|
|
try:
|
|
self.bayes.load(self.filename)
|
|
except (EOFError, EnvironmentError), e:
|
|
log.error('Couldn\'t load bayes pickle from %s: %s',
|
|
self.filename, utils.exnToString(e))
|
|
self.nickBayes = reverend.thomas.Bayes(tokenize)
|
|
if os.path.exists(self.nickFilename):
|
|
try:
|
|
self.nickBayes.load(self.nickFilename)
|
|
except (EOFError, EnvironmentError), e:
|
|
log.error('Couldn\'t load nickbayes pickle from %s: %s',
|
|
self.nickFilename, utils.exnToString(e))
|
|
|
|
def close(self):
|
|
self.bayes.save(self.filename)
|
|
self.nickBayes.save(self.nickFilename)
|
|
flush = close
|
|
|
|
def train(self, kind, s):
|
|
self.bayes.train(kind, s)
|
|
|
|
def trainNick(self, nick, s):
|
|
self.nickBayes.train(nick, s)
|
|
|
|
def guess(self, s):
|
|
matches = self.bayes.guess(s)
|
|
if matches:
|
|
if matches[0][1] > 0.5:
|
|
if len(matches) > 1 and \
|
|
matches[0][1] - matches[1][1] < 0.4:
|
|
return None
|
|
else:
|
|
return matches[0]
|
|
else:
|
|
self.bayes.train('normal', s)
|
|
return None
|
|
|
|
def guessNick(self, s):
|
|
L = [t for t in self.nickBayes.guess(s) if t[1] > 0.01]
|
|
if len(L) > 1:
|
|
if L[0][1] / L[1][1] > 2:
|
|
return [L[0]]
|
|
return L
|
|
|
|
BayesDB = plugins.DB('Bayes', {'pickle': PickleBayesDB})
|
|
|
|
class Bayes(callbacks.Privmsg):
|
|
def __init__(self):
|
|
self.__parent = super(Bayes, self)
|
|
self.__parent.__init__()
|
|
global log
|
|
log = self.log
|
|
self.db = BayesDB()
|
|
|
|
def die(self):
|
|
self.db.close()
|
|
|
|
def doPrivmsg(self, irc, msg):
|
|
(channel, text) = msg.args
|
|
if not ircutils.isChannel(channel) or msg.guessed:
|
|
return
|
|
kind = self.db.guess(channel, text)
|
|
if kind is not None:
|
|
(kind, prob) = kind
|
|
prob *= 100
|
|
text = utils.ellipsisify(text, 30)
|
|
self.log.debug('Classified %s as %s. (%.2f%%)',
|
|
utils.quoted(text), kind, prob)
|
|
self.db.trainNick(channel, msg.nick, text)
|
|
|
|
def guess(self, irc, msg, args, channel, text):
|
|
"""[<channel>] <text>
|
|
|
|
Guesses how <text> should be classified according to the Bayesian
|
|
classifier for <channel>. <channel> is only necessary if the message
|
|
isn't sent in the channel itself, and then only if
|
|
supybot.databases.plugins.channelSpecific is True.
|
|
"""
|
|
msg.tag('guessed')
|
|
kind = self.db.guess(channel, text)
|
|
if kind is not None:
|
|
(kind, prob) = kind
|
|
prob *= 100
|
|
irc.reply('That seems to me to be %s, '
|
|
'but I\'m only %.2f certain.' % (kind, prob))
|
|
else:
|
|
irc.reply('I don\'t know what the heck that is.')
|
|
guess = wrap(guess, ['channeldb', 'something'])
|
|
|
|
def who(self, irc, msg, args, channel, text):
|
|
"""[<channel>] <text>
|
|
|
|
Guesses who might have said <text>. <channel> is only necessary if the
|
|
message isn't sent in the channel itself, and then only if
|
|
supybot.databases.plugins.channelSpecific is True.
|
|
"""
|
|
msg.tag('guessed')
|
|
kinds = self.db.guessNick(channel, text)
|
|
if kinds:
|
|
if len(kinds) == 1:
|
|
(kind, prob) = kinds.pop()
|
|
irc.reply('It seems to me (with %.2f%% certainty) '
|
|
'that %s said that.' % (prob*100, kind))
|
|
else:
|
|
kinds = ['%s (%.2f%%)' % (k, prob*100) for (k, prob) in kinds]
|
|
irc.reply('I\'m not quite sure who said that, but it could be '
|
|
+ utils.commaAndify(kinds, And='or'))
|
|
else:
|
|
irc.reply('I have no idea who might\'ve said that.')
|
|
who = wrap(who, ['channeldb', 'something'])
|
|
|
|
def train(self, irc, msg, args, channel, language, pattern):
|
|
"""[<channel>] <language> <glob>
|
|
|
|
|
|
Trains the bot to recognize text similar to that contained in the files
|
|
matching <glob> as text of the language <language>. <channel> is only
|
|
necessary if the message isn't sent in the channel itself, and then
|
|
only if supybot.databases.plugins.channelSpecific is True.
|
|
"""
|
|
filenames = glob.glob(pattern)
|
|
if not filenames:
|
|
irc.errorInvalid('glob', pattern)
|
|
for filename in filenames:
|
|
fd = file(filename)
|
|
for line in fd:
|
|
self.db.train(channel, language, line)
|
|
fd.close()
|
|
irc.replySuccess()
|
|
train = wrap(train, ['channeldb', 'something', 'something'])
|
|
|
|
|
|
Class = Bayes
|
|
|
|
# vim:set shiftwidth=4 tabstop=8 expandtab textwidth=78:
|