diff --git a/plugins/FunCommands.py b/plugins/FunCommands.py index 975ec04cc..9efd753b7 100644 --- a/plugins/FunCommands.py +++ b/plugins/FunCommands.py @@ -433,6 +433,20 @@ class FunCommands(callbacks.Privmsg): (s1, s2) = privmsgs.getArgs(args, needed=2) irc.reply(msg, str(utils.distance(s1, s2))) + def soundex(self, irc, msg, args): + """ [] + + Returns the Soundex hash to a given length. The length defaults to + 4, since that's the standard length for a soundex hash. For unlimited + length, use 0. + """ + (s, length) = privmsgs.getArgs(args, optional=1) + if length: + length = int(length) + else: + length = 4 + irc.reply(msg, utils.soundex(s, length)) + modulechars = '%s%s%s' % (string.ascii_letters, string.digits, '_.') def pydoc(self, irc, msg, args): """ diff --git a/src/utils.py b/src/utils.py index 8a13ac2f4..937524073 100755 --- a/src/utils.py +++ b/src/utils.py @@ -37,6 +37,7 @@ from __future__ import generators from fix import * +import string import sgmllib import htmlentitydefs @@ -173,6 +174,23 @@ def distance(s, t): cost = 1 d[i][j] = min(d[i-1][j]+1, d[i][j-1]+1, d[i-1][j-1]+cost) return d[n][m] - + +_soundextrans = string.maketrans(string.ascii_uppercase, + '01230120022455012623010202') +_notUpper = string.ascii.translate(string.ascii, string.ascii_uppercase) +def soundex(s, length=4): + assert s + s = s.upper() # Make everything uppercase. + firstChar = s[0] # Save the first character. + s = s.translate(string.ascii, _notUpper) # Delete non-letters. + s = s.translate(_soundextrans) # Convert to soundex numbers. + s = s.lstrip(s[0]) # Remove all repeated first characters. + L = [firstChar] + for c in s: + if c != L[-1]: + L.append(c) + L = [c for c in L if c != '0'] + ['0', '0', '0'] + s = ''.join(L) + return length and s[:length] or s # vim:set shiftwidth=4 tabstop=8 expandtab textwidth=78: diff --git a/test/utils_test.py b/test/utils_test.py index bffb165ed..8963dc941 100644 --- a/test/utils_test.py +++ b/test/utils_test.py @@ -77,3 +77,21 @@ class UtilsTest(unittest.TestCase): self.assertEqual(d['ab'], 'abc') self.assertEqual(d['fool'], 'fool') self.assertEqual(d['foo'], 'foo') + + def testSoundex(self): + L = [('Euler', 'E460'), + ('Ellery', 'E460'), + ('Gauss', 'G200'), + ('Ghosh', 'G200'), + ('Hilbert', 'H416'), + ('Heilbronn', 'H416'), + ('Knuth', 'K530'), + ('Kant', 'K530'), + ('Lloyd', 'L300'), + ('Ladd', 'L300'), + ('Lukasiewicz', 'L222'), + ('Lissajous', 'L222')] + for (name, key) in L: + soundex = utils.soundex(name) + self.assertEqual(soundex, key, + '%s was %s, not %s' % (name, soundex, key))