New Markov implementation. It's still broken, but I gotta run, so I'll fix it later.

This commit is contained in:
Jeremy Fincher 2004-08-01 20:28:08 +00:00
parent f44f5410f3
commit d81ca6fa08

View File

@ -40,82 +40,159 @@ __revision__ = "$Id$"
import supybot.plugins as plugins import supybot.plugins as plugins
import Queue
import anydbm
import random
import os.path import os.path
import threading
import supybot.ircmsgs as ircmsgs import supybot.ircmsgs as ircmsgs
import supybot.ircutils as ircutils import supybot.ircutils as ircutils
import supybot.privmsgs as privmsgs import supybot.privmsgs as privmsgs
import supybot.callbacks as callbacks import supybot.callbacks as callbacks
try: class MarkovDBInterface(object):
import sqlite def close(self):
except ImportError: pass
raise callbacks.Error, 'You need to have PySQLite installed to use this ' \
'plugin. Download it at <http://pysqlite.sf.net/>' def addPair(self, channel, first, second, follower,
isFirst=False, isLast=False):
pass
class Markov(plugins.ChannelDBHandler, callbacks.Privmsg): def getFirstPair(self, channel):
threaded = True pass
def getPair(self, channel, first, second):
# Returns (follower, last) tuple.
pass
class SqliteMarkovDB(object):
def addPair(self, channel, first, second, follower,
isFirst=False, isLast=False):
pass
def getFirstPair(self, channel):
pass
def getFollower(self, channel, first, second):
# Returns (follower, last) tuple.
pass
class DbmMarkovDB(object):
def __init__(self): def __init__(self):
plugins.ChannelDBHandler.__init__(self) self.dbs = ircutils.IrcDict()
callbacks.Privmsg.__init__(self)
def close(self):
for db in self.dbs.values():
db.close()
def makeDb(self, filename): def _getDb(self, channel):
if os.path.exists(filename): if channel not in self.dbs:
return sqlite.connect(filename) # Stupid anydbm seems to append .db to the end of this.
db = sqlite.connect(filename) self.dbs[channel] = anydbm.open('%s-DbmMarkovDB' % channel, 'c')
cursor = db.cursor() self.dbs[channel]['lasts'] = ''
cursor.execute("""CREATE TABLE pairs ( self.dbs[channel]['firsts'] = ''
id INTEGER PRIMARY KEY, return self.dbs[channel]
first TEXT,
second TEXT, def _addFirst(self, db, combined):
is_first BOOLEAN, db['firsts'] = db['firsts'] + (combined + '\n')
UNIQUE (first, second) ON CONFLICT IGNORE
)""") def _addLast(self, db, second, follower):
cursor.execute("""CREATE TABLE follows ( combined = self._combine(second, follower)
id INTEGER PRIMARY KEY, db['lasts'] = db['lasts'] + (combined + '\n')
pair_id INTEGER,
word TEXT def addPair(self, channel, first, second, follower,
)""") isFirst=False, isLast=False):
cursor.execute("""CREATE INDEX follows_pair_id ON follows (pair_id)""") db = self._getDb(channel)
db.commit() combined = self._combine(first, second)
return db if isFirst:
self._addFirst(db, combined)
elif isLast:
self._addLast(db, second, follower)
else:
if db.has_key(combined): # EW!
db[combined] = db[combined] + (' ' + follower)
else:
db[combined] = follower
#db.flush()
def getFirstPair(self, channel):
db = self._getDb(channel)
firsts = db['firsts'].splitlines()
if firsts:
firsts.pop() # Empty line.
if firsts:
return random.choice(firsts).split()
else:
raise KeyError, 'No firsts for %s.' % channel
else:
raise KeyError, 'No firsts for %s.' % channel
def _combine(self, first, second):
return '%s %s' % (first, second)
def getFollower(self, channel, first, second):
db = self._getDb(channel)
followers = db[self._combine(first, second)]
follower = random.choice(followers.split())
if self._combine(second, follower) in db['lasts']:
last = True
else:
last = False
return (follower, last)
def MarkovDB():
return DbmMarkovDB()
class MarkovWorkQueue(threading.Thread):
def __init__(self, *args, **kwargs):
threading.Thread.__init__(self)
self.db = MarkovDB(*args, **kwargs)
self.q = Queue.Queue()
self.killed = False
self.start()
def die(self):
self.killed = True
def enqueue(self, f):
self.q.put(f)
def run(self):
while not self.killed:
f = self.q.get()
f(self.db)
self.db.close()
class Markov(callbacks.Privmsg):
def __init__(self):
self.q = MarkovWorkQueue()
callbacks.Privmsg.__init__(self)
def die(self):
self.q.die()
def tokenize(self, s):
# XXX: Should this be smarter?
return s.split()
def doPrivmsg(self, irc, msg): def doPrivmsg(self, irc, msg):
if not ircutils.isChannel(msg.args[0]):
return
channel = msg.args[0] channel = msg.args[0]
db = self.getDb(channel) if ircutils.isChannel(channel):
cursor = db.cursor() words = self.tokenize(msg.args[1])
if ircmsgs.isAction(msg): if len(words) >= 3:
words = ircmsgs.unAction(msg).split() def doPrivmsg(db):
else: db.addPair(channel, words[0], words[1], words[2],
words = msg.args[1].split() isFirst=True)
isFirst = True db.addPair(channel, words[-3], words[-2], words[-1],
for (first, second, follower) in window(words, 3): isLast=True)
if isFirst: del words[0] # Remove first.
cursor.execute("""INSERT OR REPLACE del words[-1] # Remove last.
INTO pairs VALUES (NULL, %s, %s, 1)""", for (first, second, follower) in window(words, 3):
first, second) db.addPair(channel, first, second, follower)
isFirst = False self.q.enqueue(doPrivmsg)
else:
cursor.execute("INSERT INTO pairs VALUES (NULL, %s, %s, 0)",
first, second)
cursor.execute("""SELECT id FROM pairs
WHERE first=%s AND second=%s""", first, second)
id = int(cursor.fetchone()[0])
cursor.execute("""INSERT INTO follows VALUES (NULL, %s, %s)""",
id, follower)
if not isFirst: # i.e., if the loop iterated at all.
cursor.execute("""INSERT INTO pairs VALUES (NULL, %s, %s, 0)""",
second, follower)
cursor.execute("""SELECT id FROM pairs
WHERE first=%s AND second=%s""", second,follower)
id = int(cursor.fetchone()[0])
cursor.execute("INSERT INTO follows VALUES (NULL, %s, NULL)", id)
db.commit()
_maxMarkovLength = 80
_minMarkovLength = 7
def markov(self, irc, msg, args): def markov(self, irc, msg, args):
"""[<channel>] """[<channel>]
@ -123,99 +200,19 @@ class Markov(plugins.ChannelDBHandler, callbacks.Privmsg):
data kept on <channel> (which is only necessary if not sent in the data kept on <channel> (which is only necessary if not sent in the
channel itself). channel itself).
""" """
argsCopy = args[:]
channel = privmsgs.getChannel(msg, args) channel = privmsgs.getChannel(msg, args)
db = self.getDb(channel) def markov(db):
cursor = db.cursor() try:
words = [] words = list(db.getFirstPair(channel))
cursor.execute("""SELECT id, first, second FROM pairs except KeyError:
WHERE is_first=1 irc.error('I don\'t have any first pairs for %s.' % channel)
ORDER BY random() return
LIMIT 1""") last = False
if cursor.rowcount == 0: while not last:
irc.error('I have no records for that channel.') (follower,last) = db.getFollower(channel, words[-2], words[-1])
return words.append(follower)
(id, first, second) = cursor.fetchone()
id = int(id)
words.append(first)
words.append(second)
sql = """SELECT follows.word FROM pairs, follows
WHERE pairs.first=%s AND
pairs.second=%s AND
pairs.id=follows.pair_id
ORDER BY random()
LIMIT 1"""
while len(words) < self._maxMarkovLength:
cursor.execute(sql, words[-2], words[-1])
results = cursor.fetchone()
if not results:
break
word = results[0]
if word is None:
break
words.append(word)
if len(words) < self._minMarkovLength:
self.markov(irc, msg, argsCopy)
else:
irc.reply(' '.join(words)) irc.reply(' '.join(words))
self.q.enqueue(markov)
def pairs(self, irc, msg, args):
"""[<channel>]
Returns the number of Markov's chain links in the database for
<channel>.
"""
channel = privmsgs.getChannel(msg, args)
db = self.getDb(channel)
cursor = db.cursor()
cursor.execute("""SELECT COUNT(*) FROM pairs""")
n = int(cursor.fetchone()[0])
s = 'There are %s pairs in my Markov database for %s' % (n, channel)
irc.reply(s)
def firsts(self, irc, msg, args):
"""[<channel>]
Returns the number of Markov's first links in the database for
<channel>.
"""
channel = privmsgs.getChannel(msg, args)
db = self.getDb(channel)
cursor = db.cursor()
cursor.execute("""SELECT COUNT(*) FROM pairs WHERE is_first=1""")
n = int(cursor.fetchone()[0])
s = 'There are %s first pairs in my Markov database for %s'%(n,channel)
irc.reply(s)
def follows(self, irc, msg, args):
"""[<channel>]
Returns the number of Markov's third links in the database for
<channel>.
"""
channel = privmsgs.getChannel(msg, args)
db = self.getDb(channel)
cursor = db.cursor()
cursor.execute("""SELECT COUNT(*) FROM follows""")
n = int(cursor.fetchone()[0])
s = 'There are %s follows in my Markov database for %s' % (n, channel)
irc.reply(s)
def lasts(self, irc, msg, args):
"""[<channel>]
Returns the number of Markov's last links in the database for
<channel>.
"""
channel = privmsgs.getChannel(msg, args)
db = self.getDb(channel)
cursor = db.cursor()
cursor.execute("""SELECT COUNT(*) FROM follows WHERE word ISNULL""")
n = int(cursor.fetchone()[0])
s = 'There are %s lasts in my Markov database for %s' % (n, channel)
irc.reply(s)
Class = Markov Class = Markov
# vim:set shiftwidth=4 tabstop=8 expandtab textwidth=78: