Fix a bug with adding lasts. Add some elucidating (as jemfinch would put it)

documentation.  Modify the behavior of DbmMarkovDB.firsts and .lasts so
they only report unique entries since this is how we used to behave.  We'll
still keep duplicate entries in the db because it affects randomness.
This commit is contained in:
James Vega 2004-08-25 18:54:21 +00:00
parent fbc1b04cae
commit 3635c654ef

View File

@ -40,6 +40,7 @@ __revision__ = "$Id$"
import supybot.plugins as plugins import supybot.plugins as plugins
import sets
import Queue import Queue
import anydbm import anydbm
import random import random
@ -92,6 +93,13 @@ class DbmMarkovDB(object):
if channel not in self.dbs: if channel not in self.dbs:
# Stupid anydbm seems to append .db to the end of this. # Stupid anydbm seems to append .db to the end of this.
filename = plugins.makeChannelFilename(channel, 'DbmMarkovDB') filename = plugins.makeChannelFilename(channel, 'DbmMarkovDB')
# To keep the code simpler for addPair, I decided not to make
# self.dbs[channel]['firsts'] and ['lasts']. Instead, we'll pad
# the words list being sent to addPair such that ['\n \n'] will be
# ['firsts'] and ['\n'] will be ['lasts']. This also means isFirst
# and isLast aren't necessary, but they'll be left alone in case
# one of the other Db formats uses them or someone decides that I
# was wrong and changes my code.
self.dbs[channel] = anydbm.open(filename, 'c') self.dbs[channel] = anydbm.open(filename, 'c')
return self.dbs[channel] return self.dbs[channel]
@ -112,11 +120,11 @@ class DbmMarkovDB(object):
def getFirstPair(self, channel): def getFirstPair(self, channel):
db = self._getDb(channel) db = self._getDb(channel)
firsts = db['\r \r'].split() firsts = db['\n \n'].split()
if firsts: if firsts:
firsts.pop() # Empty line. firsts.pop() # Empty line.
if firsts: if firsts:
return ('\r', random.choice(firsts)) return ('\n', random.choice(firsts))
else: else:
raise KeyError, 'No firsts for %s.' % channel raise KeyError, 'No firsts for %s.' % channel
else: else:
@ -133,15 +141,15 @@ class DbmMarkovDB(object):
def firsts(self, channel): def firsts(self, channel):
db = self._getDb(channel) db = self._getDb(channel)
if '\r \r' in db: if '\n \n' in db:
return len(db['\r \r'].split()) return len(sets.Set(db['\n \n'].split()))
else: else:
return 0 return 0
def lasts(self, channel): def lasts(self, channel):
db = self._getDb(channel) db = self._getDb(channel)
if '\n' in db: if '\n' in db:
return len(db['\n'].split()) return len(sets.Set(db['\n'].split()))
else: else:
return 0 return 0
@ -199,9 +207,9 @@ class Markov(callbacks.Privmsg):
channel = msg.args[0] channel = msg.args[0]
if ircutils.isChannel(channel): if ircutils.isChannel(channel):
words = self.tokenize(msg.args[1]) words = self.tokenize(msg.args[1])
words.insert(0, '\r') words.insert(0, '\n')
words.insert(0, '\r') words.insert(0, '\n')
words.insert(-1, '\n') words.append('\n')
def doPrivmsg(db): def doPrivmsg(db):
for (first, second, follower) in window(words, 3): for (first, second, follower) in window(words, 3):
db.addPair(channel, first, second, follower) db.addPair(channel, first, second, follower)