### # Copyright (c) 2002-2005, Jeremiah Fincher # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # * Redistributions of source code must retain the above copyright notice, # this list of conditions, and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright notice, # this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of the author of this software nor the name of # contributors to this software may be used to endorse or promote products # derived from this software without specific prior written consent. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. ### """ Simple utility functions related to strings. """ import re import new import sys import string import textwrap import supybot.structures as structures from iter import all, any curry = new.instancemethod chars = string.maketrans('', '') def rsplit(s, sep=None, maxsplit=-1): """Equivalent to str.split, except splitting from the right.""" if sys.version_info < (2, 4, 0): if sep is not None: sep = sep[::-1] L = s[::-1].split(sep, maxsplit) L.reverse() return [s[::-1] for s in L] else: return s.rsplit(sep, maxsplit) def normalizeWhitespace(s): """Normalizes the whitespace in a string; \s+ becomes one space.""" return ' '.join(s.split()) def distance(s, t): """Returns the levenshtein edit distance between two strings.""" n = len(s) m = len(t) if n == 0: return m elif m == 0: return n d = [] for i in xrange(n+1): d.append([]) for j in xrange(m+1): d[i].append(0) d[0][j] = j d[i][0] = i for i in xrange(1, n+1): cs = s[i-1] for j in xrange(1, m+1): ct = t[j-1] cost = int(cs != ct) d[i][j] = min(d[i-1][j]+1, d[i][j-1]+1, d[i-1][j-1]+cost) return d[n][m] _soundextrans = string.maketrans(string.ascii_uppercase, '01230120022455012623010202') _notUpper = chars.translate(chars, string.ascii_uppercase) def soundex(s, length=4): """Returns the soundex hash of a given string.""" s = s.upper() # Make everything uppercase. s = s.translate(chars, _notUpper) # Delete non-letters. if not s: raise ValueError, 'Invalid string for soundex: %s' firstChar = s[0] # Save the first character. s = s.translate(_soundextrans) # Convert to soundex numbers. s = s.lstrip(s[0]) # Remove all repeated first characters. L = [firstChar] for c in s: if c != L[-1]: L.append(c) L = [c for c in L if c != '0'] + (['0']*(length-1)) s = ''.join(L) return length and s[:length] or s.rstrip('0') def dqrepr(s): """Returns a repr() of s guaranteed to be in double quotes.""" # The wankers-that-be decided not to use double-quotes anymore in 2.3. # return '"' + repr("'\x00" + s)[6:] return '"%s"' % s.encode('string_escape').replace('"', '\\"') def quoted(s): """Returns a quoted s.""" return '"%s"' % s def _getSep(s): if len(s) < 2: raise ValueError, 'string given to _getSep is too short: %r' % s if s.startswith('m') or s.startswith('s'): separator = s[1] else: separator = s[0] if separator.isalnum() or separator in '{}[]()<>': raise ValueError, \ 'Invalid separator: separator must not be alphanumeric or in ' \ '"{}[]()<>"' return separator def _getSplitterRe(s): separator = _getSep(s) return re.compile(r'(?>> nItems(1, 'clock') '1 clock' >>> nItems(10, 'clock') '10 clocks' >>> nItems(10, 'clock', between='grandfather') '10 grandfather clocks' """ assert isinstance(n, int), \ 'The order of the arguments to nItems changed again, sorry.' if between is None: if n != 1: return format('%s %p', n, item) else: return format('%s %s', n, item) else: if n != 1: return format('%s %s %p', n, between, item) else: return format('%s %s %s', n, between, item) def be(i): """Returns the form of the verb 'to be' based on the number i.""" if i == 1: return 'is' else: return 'are' def has(i): """Returns the form of the verb 'to have' based on the number i.""" if i == 1: return 'has' else: return 'have' def toBool(s): s = s.strip().lower() if s in ('true', 'on', 'enable', 'enabled', '1'): return True elif s in ('false', 'off', 'disable', 'disabled', '0'): return False else: raise ValueError, 'Invalid string for toBool: %s' % quoted(s) # Replace me! def timestamp(t): return time.ctime(t) _formatRe = re.compile('%([bfhiLnpqst%])') def format(s, *args, **kwargs): """w00t. %: literal %. i: integer s: string f: float b: form of the verb 'to be' (takes an int) h: form of the verb 'to have' (takes an int) L: commaAndify (takes a list of strings or a tuple of ([strings], and)) p: pluralize (takes a string) q: quoted (takes a string) n: nItems (takes a 2-tuple of (n, item) or a 3-tuple of (n, between, item)) t: time, formatted (takes an int) """ args = list(args) args.reverse() # For more efficient popping. def sub(match): char = match.group(1) if char == 's': return str(args.pop()) elif char == 'i': # XXX Improve me! return str(args.pop()) elif char == 'f': # XXX Improve me! return str(args.pop()) elif char == 'b': return be(args.pop()) elif char == 'h': return has(args.pop()) elif char == 'L': t = args.pop() if isinstance(t, list): return commaAndify(t) elif isinstance(t, tuple) and len(t) == 2: if not isinstance(t[0], list): raise ValueError, 'Invalid list for %%L in format: %s' % t if not isinstance(t[1], basestring): raise ValueError, 'Invalid string for %%L in format: %s' % t return commaAndify(t[0], And=t[1]) else: raise ValueError, 'Invalid value for %%L in format: %s' % t elif char == 'p': return pluralize(args.pop()) elif char == 'q': return quoted(args.pop()) elif char == 'n': t = args.pop() if not isinstance(t, (tuple, list)): raise ValueError, 'Invalid value for %%n in format: %s' % t if len(t) == 2: return nItems(*t) elif len(t) == 3: return nItems(t[0], t[2], between=t[1]) else: raise ValueError, 'Invalid value for %%n in format: %s' % t elif char == 't': return timestamp(args.pop()) elif char == '%': return '%' else: raise ValueError, 'Invalid char in sub (in format).' try: return _formatRe.sub(sub, s) except IndexError: raise ValueError, 'Extra format chars in format spec: %r' % s # vim:set shiftwidth=4 tabstop=8 expandtab textwidth=78: