mirror of
https://github.com/Mikaela/Limnoria.git
synced 2024-12-18 16:12:42 +01:00
Use MultipleReplace for utils.str.soundex.
This commit is contained in:
parent
9edda206ea
commit
1a64f1052a
@ -92,17 +92,42 @@ def distance(s, t):
|
|||||||
d[i][j] = min(d[i-1][j]+1, d[i][j-1]+1, d[i-1][j-1]+cost)
|
d[i][j] = min(d[i-1][j]+1, d[i][j-1]+1, d[i-1][j-1]+cost)
|
||||||
return d[n][m]
|
return d[n][m]
|
||||||
|
|
||||||
_soundextrans = string.maketrans(string.ascii_uppercase,
|
class MultipleReplacer:
|
||||||
'01230120022455012623010202')
|
"""Return a callable that replaces all dict keys by the associated
|
||||||
_notUpper = chars.translate(chars, string.ascii_uppercase)
|
value. More efficient than multiple .replace()."""
|
||||||
|
|
||||||
|
# We use an object instead of a lambda function because it avoids the
|
||||||
|
# need for using the staticmethod() on the lambda function if assigning
|
||||||
|
# it to a class in Python 3.
|
||||||
|
def __init__(self, dict_):
|
||||||
|
self._dict = dict_
|
||||||
|
dict_ = {re.escape(key): val for key,val in dict_.items()}
|
||||||
|
self._matcher = re.compile('|'.join(dict_.keys()))
|
||||||
|
def __call__(self, s):
|
||||||
|
return self._matcher.sub(lambda m: self._dict[m.group(0)], s)
|
||||||
|
def multipleReplacer(dict_):
|
||||||
|
return MultipleReplacer(dict_)
|
||||||
|
|
||||||
|
class MultipleRemover:
|
||||||
|
"""Return a callable that removes all words in the list. A bit more
|
||||||
|
efficient than multipleReplacer"""
|
||||||
|
# See comment of MultipleReplacer
|
||||||
|
def __init__(self, list_):
|
||||||
|
list_ = [re.escape(x) for x in list_]
|
||||||
|
self._matcher = re.compile('|'.join(list_))
|
||||||
|
def __call__(self, s):
|
||||||
|
return self._matcher.sub(lambda m: '', s)
|
||||||
|
|
||||||
|
_soundextrans = MultipleReplacer(dict(zip(string.ascii_uppercase,
|
||||||
|
'01230120022455012623010202')))
|
||||||
def soundex(s, length=4):
|
def soundex(s, length=4):
|
||||||
"""Returns the soundex hash of a given string."""
|
"""Returns the soundex hash of a given string."""
|
||||||
s = s.upper() # Make everything uppercase.
|
s = s.upper() # Make everything uppercase.
|
||||||
s = s.translate(chars, _notUpper) # Delete non-letters.
|
s = ''.join([x for x in s if x in string.ascii_uppercase])
|
||||||
if not s:
|
if not s:
|
||||||
raise ValueError, 'Invalid string for soundex: %s'
|
raise ValueError, 'Invalid string for soundex: %s'
|
||||||
firstChar = s[0] # Save the first character.
|
firstChar = s[0] # Save the first character.
|
||||||
s = s.translate(_soundextrans) # Convert to soundex numbers.
|
s = _soundextrans(s) # Convert to soundex numbers.
|
||||||
s = s.lstrip(s[0]) # Remove all repeated first characters.
|
s = s.lstrip(s[0]) # Remove all repeated first characters.
|
||||||
L = [firstChar]
|
L = [firstChar]
|
||||||
for c in s:
|
for c in s:
|
||||||
@ -217,34 +242,6 @@ def perlVariableSubstitute(vars, text):
|
|||||||
return '$' + unbraced
|
return '$' + unbraced
|
||||||
return _perlVarSubstituteRe.sub(replacer, text)
|
return _perlVarSubstituteRe.sub(replacer, text)
|
||||||
|
|
||||||
class MultipleReplacer:
|
|
||||||
"""Return a callable that replaces all dict keys by the associated
|
|
||||||
value. More efficient than multiple .replace()."""
|
|
||||||
|
|
||||||
# We use an object instead of a lambda function because it avoids the
|
|
||||||
# need for using the staticmethod() on the lambda function if assigning
|
|
||||||
# it to a class in Python 3.
|
|
||||||
def __init__(self, dict_):
|
|
||||||
self._dict = dict_
|
|
||||||
dict_ = {re.escape(key): val for key,val in dict_.items()}
|
|
||||||
self._matcher = re.compile('|'.join(dict_.keys()))
|
|
||||||
def __call__(self, s):
|
|
||||||
return self._matcher.sub(lambda m: self._dict[m.group(0)], s)
|
|
||||||
def multipleReplacer(dict_):
|
|
||||||
return MultipleReplacer(dict_)
|
|
||||||
|
|
||||||
class MultipleRemover:
|
|
||||||
"""Return a callable that removes all words in the list. A bit more
|
|
||||||
efficient than multipleReplacer"""
|
|
||||||
# See comment of MultipleReplacer
|
|
||||||
def __init__(self, list_):
|
|
||||||
list_ = [re.escape(x) for x in list_]
|
|
||||||
self._matcher = re.compile('|'.join(list_))
|
|
||||||
def __call__(self, s):
|
|
||||||
return self._matcher.sub(lambda m: '', s)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def commaAndify(seq, comma=',', And='and'):
|
def commaAndify(seq, comma=',', And='and'):
|
||||||
"""Given a a sequence, returns an English clause for that sequence.
|
"""Given a a sequence, returns an English clause for that sequence.
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user