New Markov implementation. It's still broken, but I gotta run, so I'll fix it later.

This commit is contained in:
Jeremy Fincher 2004-08-01 20:28:08 +00:00
parent f44f5410f3
commit d81ca6fa08

View File

@ -40,82 +40,159 @@ __revision__ = "$Id$"
import supybot.plugins as plugins import supybot.plugins as plugins
import Queue
import anydbm
import random
import os.path import os.path
import threading
import supybot.ircmsgs as ircmsgs import supybot.ircmsgs as ircmsgs
import supybot.ircutils as ircutils import supybot.ircutils as ircutils
import supybot.privmsgs as privmsgs import supybot.privmsgs as privmsgs
import supybot.callbacks as callbacks import supybot.callbacks as callbacks
try: class MarkovDBInterface(object):
import sqlite def close(self):
except ImportError: pass
raise callbacks.Error, 'You need to have PySQLite installed to use this ' \
'plugin. Download it at <http://pysqlite.sf.net/>'
class Markov(plugins.ChannelDBHandler, callbacks.Privmsg): def addPair(self, channel, first, second, follower,
threaded = True isFirst=False, isLast=False):
pass
def getFirstPair(self, channel):
pass
def getPair(self, channel, first, second):
# Returns (follower, last) tuple.
pass
class SqliteMarkovDB(object):
def addPair(self, channel, first, second, follower,
isFirst=False, isLast=False):
pass
def getFirstPair(self, channel):
pass
def getFollower(self, channel, first, second):
# Returns (follower, last) tuple.
pass
class DbmMarkovDB(object):
def __init__(self): def __init__(self):
plugins.ChannelDBHandler.__init__(self) self.dbs = ircutils.IrcDict()
def close(self):
for db in self.dbs.values():
db.close()
def _getDb(self, channel):
if channel not in self.dbs:
# Stupid anydbm seems to append .db to the end of this.
self.dbs[channel] = anydbm.open('%s-DbmMarkovDB' % channel, 'c')
self.dbs[channel]['lasts'] = ''
self.dbs[channel]['firsts'] = ''
return self.dbs[channel]
def _addFirst(self, db, combined):
db['firsts'] = db['firsts'] + (combined + '\n')
def _addLast(self, db, second, follower):
combined = self._combine(second, follower)
db['lasts'] = db['lasts'] + (combined + '\n')
def addPair(self, channel, first, second, follower,
isFirst=False, isLast=False):
db = self._getDb(channel)
combined = self._combine(first, second)
if isFirst:
self._addFirst(db, combined)
elif isLast:
self._addLast(db, second, follower)
else:
if db.has_key(combined): # EW!
db[combined] = db[combined] + (' ' + follower)
else:
db[combined] = follower
#db.flush()
def getFirstPair(self, channel):
db = self._getDb(channel)
firsts = db['firsts'].splitlines()
if firsts:
firsts.pop() # Empty line.
if firsts:
return random.choice(firsts).split()
else:
raise KeyError, 'No firsts for %s.' % channel
else:
raise KeyError, 'No firsts for %s.' % channel
def _combine(self, first, second):
return '%s %s' % (first, second)
def getFollower(self, channel, first, second):
db = self._getDb(channel)
followers = db[self._combine(first, second)]
follower = random.choice(followers.split())
if self._combine(second, follower) in db['lasts']:
last = True
else:
last = False
return (follower, last)
def MarkovDB():
return DbmMarkovDB()
class MarkovWorkQueue(threading.Thread):
def __init__(self, *args, **kwargs):
threading.Thread.__init__(self)
self.db = MarkovDB(*args, **kwargs)
self.q = Queue.Queue()
self.killed = False
self.start()
def die(self):
self.killed = True
def enqueue(self, f):
self.q.put(f)
def run(self):
while not self.killed:
f = self.q.get()
f(self.db)
self.db.close()
class Markov(callbacks.Privmsg):
def __init__(self):
self.q = MarkovWorkQueue()
callbacks.Privmsg.__init__(self) callbacks.Privmsg.__init__(self)
def makeDb(self, filename): def die(self):
if os.path.exists(filename): self.q.die()
return sqlite.connect(filename)
db = sqlite.connect(filename) def tokenize(self, s):
cursor = db.cursor() # XXX: Should this be smarter?
cursor.execute("""CREATE TABLE pairs ( return s.split()
id INTEGER PRIMARY KEY,
first TEXT,
second TEXT,
is_first BOOLEAN,
UNIQUE (first, second) ON CONFLICT IGNORE
)""")
cursor.execute("""CREATE TABLE follows (
id INTEGER PRIMARY KEY,
pair_id INTEGER,
word TEXT
)""")
cursor.execute("""CREATE INDEX follows_pair_id ON follows (pair_id)""")
db.commit()
return db
def doPrivmsg(self, irc, msg): def doPrivmsg(self, irc, msg):
if not ircutils.isChannel(msg.args[0]):
return
channel = msg.args[0] channel = msg.args[0]
db = self.getDb(channel) if ircutils.isChannel(channel):
cursor = db.cursor() words = self.tokenize(msg.args[1])
if ircmsgs.isAction(msg): if len(words) >= 3:
words = ircmsgs.unAction(msg).split() def doPrivmsg(db):
else: db.addPair(channel, words[0], words[1], words[2],
words = msg.args[1].split() isFirst=True)
isFirst = True db.addPair(channel, words[-3], words[-2], words[-1],
isLast=True)
del words[0] # Remove first.
del words[-1] # Remove last.
for (first, second, follower) in window(words, 3): for (first, second, follower) in window(words, 3):
if isFirst: db.addPair(channel, first, second, follower)
cursor.execute("""INSERT OR REPLACE self.q.enqueue(doPrivmsg)
INTO pairs VALUES (NULL, %s, %s, 1)""",
first, second)
isFirst = False
else:
cursor.execute("INSERT INTO pairs VALUES (NULL, %s, %s, 0)",
first, second)
cursor.execute("""SELECT id FROM pairs
WHERE first=%s AND second=%s""", first, second)
id = int(cursor.fetchone()[0])
cursor.execute("""INSERT INTO follows VALUES (NULL, %s, %s)""",
id, follower)
if not isFirst: # i.e., if the loop iterated at all.
cursor.execute("""INSERT INTO pairs VALUES (NULL, %s, %s, 0)""",
second, follower)
cursor.execute("""SELECT id FROM pairs
WHERE first=%s AND second=%s""", second,follower)
id = int(cursor.fetchone()[0])
cursor.execute("INSERT INTO follows VALUES (NULL, %s, NULL)", id)
db.commit()
_maxMarkovLength = 80
_minMarkovLength = 7
def markov(self, irc, msg, args): def markov(self, irc, msg, args):
"""[<channel>] """[<channel>]
@ -123,99 +200,19 @@ class Markov(plugins.ChannelDBHandler, callbacks.Privmsg):
data kept on <channel> (which is only necessary if not sent in the data kept on <channel> (which is only necessary if not sent in the
channel itself). channel itself).
""" """
argsCopy = args[:]
channel = privmsgs.getChannel(msg, args) channel = privmsgs.getChannel(msg, args)
db = self.getDb(channel) def markov(db):
cursor = db.cursor() try:
words = [] words = list(db.getFirstPair(channel))
cursor.execute("""SELECT id, first, second FROM pairs except KeyError:
WHERE is_first=1 irc.error('I don\'t have any first pairs for %s.' % channel)
ORDER BY random()
LIMIT 1""")
if cursor.rowcount == 0:
irc.error('I have no records for that channel.')
return return
(id, first, second) = cursor.fetchone() last = False
id = int(id) while not last:
words.append(first) (follower,last) = db.getFollower(channel, words[-2], words[-1])
words.append(second) words.append(follower)
sql = """SELECT follows.word FROM pairs, follows
WHERE pairs.first=%s AND
pairs.second=%s AND
pairs.id=follows.pair_id
ORDER BY random()
LIMIT 1"""
while len(words) < self._maxMarkovLength:
cursor.execute(sql, words[-2], words[-1])
results = cursor.fetchone()
if not results:
break
word = results[0]
if word is None:
break
words.append(word)
if len(words) < self._minMarkovLength:
self.markov(irc, msg, argsCopy)
else:
irc.reply(' '.join(words)) irc.reply(' '.join(words))
self.q.enqueue(markov)
def pairs(self, irc, msg, args):
"""[<channel>]
Returns the number of Markov's chain links in the database for
<channel>.
"""
channel = privmsgs.getChannel(msg, args)
db = self.getDb(channel)
cursor = db.cursor()
cursor.execute("""SELECT COUNT(*) FROM pairs""")
n = int(cursor.fetchone()[0])
s = 'There are %s pairs in my Markov database for %s' % (n, channel)
irc.reply(s)
def firsts(self, irc, msg, args):
"""[<channel>]
Returns the number of Markov's first links in the database for
<channel>.
"""
channel = privmsgs.getChannel(msg, args)
db = self.getDb(channel)
cursor = db.cursor()
cursor.execute("""SELECT COUNT(*) FROM pairs WHERE is_first=1""")
n = int(cursor.fetchone()[0])
s = 'There are %s first pairs in my Markov database for %s'%(n,channel)
irc.reply(s)
def follows(self, irc, msg, args):
"""[<channel>]
Returns the number of Markov's third links in the database for
<channel>.
"""
channel = privmsgs.getChannel(msg, args)
db = self.getDb(channel)
cursor = db.cursor()
cursor.execute("""SELECT COUNT(*) FROM follows""")
n = int(cursor.fetchone()[0])
s = 'There are %s follows in my Markov database for %s' % (n, channel)
irc.reply(s)
def lasts(self, irc, msg, args):
"""[<channel>]
Returns the number of Markov's last links in the database for
<channel>.
"""
channel = privmsgs.getChannel(msg, args)
db = self.getDb(channel)
cursor = db.cursor()
cursor.execute("""SELECT COUNT(*) FROM follows WHERE word ISNULL""")
n = int(cursor.fetchone()[0])
s = 'There are %s lasts in my Markov database for %s' % (n, channel)
irc.reply(s)
Class = Markov Class = Markov
# vim:set shiftwidth=4 tabstop=8 expandtab textwidth=78: