Added soundex to utils (along with a test) and to FunCommands.

This commit is contained in:
Jeremy Fincher 2003-04-05 11:23:28 +00:00
parent 4790d580ef
commit a50643c025
3 changed files with 51 additions and 1 deletions

View File

@ -433,6 +433,20 @@ class FunCommands(callbacks.Privmsg):
(s1, s2) = privmsgs.getArgs(args, needed=2)
irc.reply(msg, str(utils.distance(s1, s2)))
def soundex(self, irc, msg, args):
"""<string> [<length>]
Returns the Soundex hash to a given length. The length defaults to
4, since that's the standard length for a soundex hash. For unlimited
length, use 0.
"""
(s, length) = privmsgs.getArgs(args, optional=1)
if length:
length = int(length)
else:
length = 4
irc.reply(msg, utils.soundex(s, length))
modulechars = '%s%s%s' % (string.ascii_letters, string.digits, '_.')
def pydoc(self, irc, msg, args):
"""<python function>

View File

@ -37,6 +37,7 @@ from __future__ import generators
from fix import *
import string
import sgmllib
import htmlentitydefs
@ -173,6 +174,23 @@ def distance(s, t):
cost = 1
d[i][j] = min(d[i-1][j]+1, d[i][j-1]+1, d[i-1][j-1]+cost)
return d[n][m]
_soundextrans = string.maketrans(string.ascii_uppercase,
'01230120022455012623010202')
_notUpper = string.ascii.translate(string.ascii, string.ascii_uppercase)
def soundex(s, length=4):
assert s
s = s.upper() # Make everything uppercase.
firstChar = s[0] # Save the first character.
s = s.translate(string.ascii, _notUpper) # Delete non-letters.
s = s.translate(_soundextrans) # Convert to soundex numbers.
s = s.lstrip(s[0]) # Remove all repeated first characters.
L = [firstChar]
for c in s:
if c != L[-1]:
L.append(c)
L = [c for c in L if c != '0'] + ['0', '0', '0']
s = ''.join(L)
return length and s[:length] or s
# vim:set shiftwidth=4 tabstop=8 expandtab textwidth=78:

View File

@ -77,3 +77,21 @@ class UtilsTest(unittest.TestCase):
self.assertEqual(d['ab'], 'abc')
self.assertEqual(d['fool'], 'fool')
self.assertEqual(d['foo'], 'foo')
def testSoundex(self):
L = [('Euler', 'E460'),
('Ellery', 'E460'),
('Gauss', 'G200'),
('Ghosh', 'G200'),
('Hilbert', 'H416'),
('Heilbronn', 'H416'),
('Knuth', 'K530'),
('Kant', 'K530'),
('Lloyd', 'L300'),
('Ladd', 'L300'),
('Lukasiewicz', 'L222'),
('Lissajous', 'L222')]
for (name, key) in L:
soundex = utils.soundex(name)
self.assertEqual(soundex, key,
'%s was %s, not %s' % (name, soundex, key))