### # Copyright (c) 2002-2005, Jeremiah Fincher # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # * Redistributions of source code must retain the above copyright notice, # this list of conditions, and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright notice, # this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of the author of this software nor the name of # contributors to this software may be used to endorse or promote products # derived from this software without specific prior written consent. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. ### """ Simple utility functions related to strings. """ import re import new import string import textwrap import supybot.structures as structures curry = new.instancemethod chars = string.maketrans('', '') def rsplit(s, sep=None, maxsplit=-1): """Equivalent to str.split, except splitting from the right.""" if sys.version_info < (2, 4, 0): if sep is not None: sep = sep[::-1] L = s[::-1].split(sep, maxsplit) L.reverse() return [s[::-1] for s in L] else: return s.rsplit(sep, maxsplit) def normalizeWhitespace(s): """Normalizes the whitespace in a string; \s+ becomes one space.""" return ' '.join(s.split()) def distance(s, t): """Returns the levenshtein edit distance between two strings.""" n = len(s) m = len(t) if n == 0: return m elif m == 0: return n d = [] for i in xrange(n+1): d.append([]) for j in xrange(m+1): d[i].append(0) d[0][j] = j d[i][0] = i for i in xrange(1, n+1): cs = s[i-1] for j in xrange(1, m+1): ct = t[j-1] cost = int(cs != ct) d[i][j] = min(d[i-1][j]+1, d[i][j-1]+1, d[i-1][j-1]+cost) return d[n][m] _soundextrans = string.maketrans(string.ascii_uppercase, '01230120022455012623010202') _notUpper = chars.translate(chars, string.ascii_uppercase) def soundex(s, length=4): """Returns the soundex hash of a given string.""" s = s.upper() # Make everything uppercase. s = s.translate(chars, _notUpper) # Delete non-letters. if not s: raise ValueError, 'Invalid string for soundex: %s' firstChar = s[0] # Save the first character. s = s.translate(_soundextrans) # Convert to soundex numbers. s = s.lstrip(s[0]) # Remove all repeated first characters. L = [firstChar] for c in s: if c != L[-1]: L.append(c) L = [c for c in L if c != '0'] + (['0']*(length-1)) s = ''.join(L) return length and s[:length] or s.rstrip('0') def dqrepr(s): """Returns a repr() of s guaranteed to be in double quotes.""" # The wankers-that-be decided not to use double-quotes anymore in 2.3. # return '"' + repr("'\x00" + s)[6:] return '"%s"' % s.encode('string_escape').replace('"', '\\"') def quoted(s): """Returns a quoted s.""" return '"%s"' % s def _getSep(s): if len(s) < 2: raise ValueError, 'string given to _getSep is too short: %r' % s if s.startswith('m') or s.startswith('s'): separator = s[1] else: separator = s[0] if separator.isalnum() or separator in '{}[]()<>': raise ValueError, \ 'Invalid separator: separator must not be alphanumeric or in ' \ '"{}[]()<>"' return separator def _getSplitterRe(s): separator = _getSep(s) return re.compile(r'(?>> nItems('clock', 1) '1 clock' >>> nItems('clock', 10) '10 clocks' >>> nItems('clock', 10, between='grandfather') '10 grandfather clocks' """ if between is None: return '%s %s' % (n, pluralize(item, n)) else: return '%s %s %s' % (n, between, pluralize(item, n)) def be(i): """Returns the form of the verb 'to be' based on the number i.""" if i == 1: return 'is' else: return 'are' def has(i): """Returns the form of the verb 'to have' based on the number i.""" if i == 1: return 'has' else: return 'have' _formatRe = re.compile('%([isfbhL])') def format(s, *args, **kwargs): kwargs.setdefault('decimalSeparator', decimalSeparator) kwargs.setdefault('thousandsSeparator', thousandsSeparator) args = list(args) args.reverse() # For more efficiency popping. def sub(match): char = match.group(1) if char == 's': # Plain string. return str(args.pop()) elif char == 'i': # Integer # XXX Improve me! return str(args.pop()) elif char == 'f': # Float # XXX Improve me! return str(args.pop()) elif char == 'b': # form of the verb 'to be' return be(args.pop()) elif char == 'h': # form of the verb 'to have' return has(args.pop()) elif char == 'L': # commaAndify the list. return commaAndify(args.pop()) else: assert False, 'Invalid char in sub (in format).' return _formatRe.sub(sub, s) def toBool(s): s = s.strip().lower() if s in ('true', 'on', 'enable', 'enabled', '1'): return True elif s in ('false', 'off', 'disable', 'disabled', '0'): return False else: raise ValueError, 'Invalid string for toBool: %s' % quoted(s) # vim:set shiftwidth=4 tabstop=8 expandtab textwidth=78: