Limnoria/src/utils.py

#!/usr/bin/env python

###
# Copyright (c) 2002, Jeremiah Fincher
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
#   * Redistributions of source code must retain the above copyright notice,
#     this list of conditions, and the following disclaimer.
#   * Redistributions in binary form must reproduce the above copyright notice,
#     this list of conditions, and the following disclaimer in the
#     documentation and/or other materials provided with the distribution.
#   * Neither the name of the author of this software nor the name of
#     contributors to this software may be used to endorse or promote products
#     derived from this software without specific prior written consent.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
###

"""
Simple utility functions.
"""

## from __future__ import generators

from fix import *

import os
import re
import string
import sgmllib
import textwrap
import htmlentitydefs

def normalizeWhitespace(s):
    """Normalizes the whitespace in a string; \s+ becomes one space."""
    return ' '.join(s.split())

class HtmlToText(sgmllib.SGMLParser):
    """Taken from some eff-bot code on c.l.p."""
    entitydefs = htmlentitydefs.entitydefs
    def __init__(self, tagReplace=' '):
        self.data = []
        self.tagReplace = tagReplace
        sgmllib.SGMLParser.__init__(self)

    def unknown_starttag(self, tag, attr):
        self.data.append(self.tagReplace)

    def unknown_endtag(self, tag):
        self.data.append(self.tagReplace)

    def handle_data(self, data):
        self.data.append(data)

    def getText(self):
        text = ''.join(self.data).strip()
        return normalizeWhitespace(text)

def htmlToText(s, tagReplace=' '):
    """Turns HTML into text.  tagReplace is a string to replace HTML tags with.
    """
    x = HtmlToText(tagReplace)
    x.feed(s)
    return x.getText()

def eachSubstring(s):
    """Returns every substring starting at the first index until the last."""
    for i in xrange(1, len(s)+1):
        yield s[:i]

def abbrev(strings):
    """Returns a dictionary mapping unambiguous abbreviations to full forms."""
    d = {}
    for s in strings:
        for abbreviation in eachSubstring(s):
            if abbreviation not in d:
                d[abbreviation] = s
            else:
                if abbreviation not in strings:
                    d[abbreviation] = None
    removals = []
    for key in d:
        if d[key] is None:
            removals.append(key)
    for key in removals:
        del d[key]
    return d

def timeElapsed(elapsed, leadingZeroes=False, years=True, weeks=True,
                days=True, hours=True, minutes=True, seconds=True):
    """Given <elapsed> seconds, returns a string with an English description of
    how much time as passed.  leadingZeroes determines whether 0 days, 0 hours,
    etc. will be printed; the others determine what larger time periods should
    be used.
    """
    elapsed = int(elapsed)
    assert years or weeks or days or \
           hours or minutes or seconds, 'One flag must be True'
    ret = []
    if years:
        yrs, elapsed = elapsed // 31536000, elapsed % 31536000
        if leadingZeroes or yrs:
            if yrs:
                leadingZeroes = True
            if yrs != 1:
                yrs = '%s years' % yrs
            else:
                yrs = '1 year'
            ret.append(yrs)
    if weeks:
        wks, elapsed = elapsed // 604800, elapsed % 604800
        if leadingZeroes or wks:
            if wks:
                leadingZeroes = True
            if wks != 1:
                wks = '%s weeks' % wks
            else:
                wks = '1 week'
            ret.append(wks)
    if days:
        ds, elapsed = elapsed // 86400, elapsed % 86400
        if leadingZeroes or ds:
            if ds:
                leadingZeroes = True
            if ds != 1:
                ds = '%s days' % ds
            else:
                ds = '1 day'
            ret.append(ds)
    if hours:
        hrs, elapsed = elapsed // 3600, elapsed % 3600
        if leadingZeroes or hrs:
            if hrs:
                leadingZeroes = True
            if hrs != 1:
                hrs = '%s hours' % hrs
            else:
                hrs = '1 hour'
            ret.append(hrs)
    if minutes or seconds:
        mins, secs = elapsed // 60, elapsed % 60
        if leadingZeroes or mins:
            if mins != 1:
                mins = '%s minutes' % mins
            else:
                mins = '1 minute'
            ret.append(mins)
        if seconds:
            if secs != 1:
                secs = '%s seconds' % secs
            else:
                secs = '1 second'
            ret.append(secs)
    if len(ret) == 0:
        raise ValueError, 'Time difference not great enough to be noted.'
    if len(ret) == 1:
        return ret[0]
    else:
        return commaAndify(ret)

def distance(s, t):
    """Returns the levenshtein edit distance between two strings."""
    n = len(s)
    m = len(t)
    if n == 0:
        return m
    elif m == 0:
        return n
    d = []
    for i in range(n+1):
        d.append([])
        for j in range(m+1):
            d[i].append(0)
            d[0][j] = j
        d[i][0] = i
    for i in range(1, n+1):
        cs = s[i-1]
        for j in range(1, m+1):
            ct = t[j-1]
            cost = int(cs != ct)
            d[i][j] = min(d[i-1][j]+1, d[i][j-1]+1, d[i-1][j-1]+cost)
    return d[n][m]

_soundextrans = string.maketrans(string.ascii_uppercase,
                                 '01230120022455012623010202')
_notUpper = string.ascii.translate(string.ascii, string.ascii_uppercase)
def soundex(s, length=4):
    """Returns the soundex hash of a given string."""
    assert s
    s = s.upper() # Make everything uppercase.
    firstChar = s[0] # Save the first character.
    s = s.translate(string.ascii, _notUpper) # Delete non-letters.
    s = s.translate(_soundextrans) # Convert to soundex numbers.
    s = s.lstrip(s[0]) # Remove all repeated first characters.
    L = [firstChar]
    for c in s:
        if c != L[-1]:
            L.append(c)
    L = [c for c in L if c != '0'] + (['0']*(length-1))
    s = ''.join(L)
    return length and s[:length] or s.rstrip('0')

def dqrepr(s):
    """Returns a repr() of s guaranteed to be in double quotes."""
    # The wankers-that-be decided not to use double-quotes anymore in 2.3.
    # return '"' + repr("'\x00" + s)[6:]
    return '"%s"' % s.encode('string_escape').replace('"', '\\"')

nonEscapedSlashes = re.compile(r'(?<!\\)/')
def perlReToPythonRe(s):
    """Converts a string representation of a Perl regular expression (i.e.,
    m/^foo$/i or /foo|bar/) to a Python regular expression.
    """
    (kind, regexp, flags) = nonEscapedSlashes.split(s)
    regexp = regexp.replace('\\/', '/')
    if kind not in ('', 'm'):
        raise ValueError, 'Invalid kind: must be in ("", "m")'
    flag = 0
    try:
        for c in flags.upper():
            flag |= getattr(re, c)
    except AttributeError:
        raise ValueError, 'Invalid flag: %s' % c
    return re.compile(regexp, flag)

def perlReToReplacer(s):
    """Converts a string representation of a Perl regular expression (i.e.,
    s/foo/bar/g or s/foo/bar/i) to a Python function doing the equivalent
    replacement.
    """
    (kind, regexp, replace, flags) = nonEscapedSlashes.split(s)
    if kind != 's':
        raise ValueError, 'Invalid kind: must be "s"'
    g = False
    if 'g' in flags:
        g = True
        flags = filter('g'.__ne__, flags)
    r = perlReToPythonRe('/'.join(('', regexp, flags)))
    if g:
        return lambda s: r.sub(replace, s)
    else:
        return lambda s: r.sub(replace, s, 1)

def findBinaryInPath(s):
    """Return full path of a binary if it's in PATH, otherwise return None."""
    cmdLine = None
    for dir in os.getenv('PATH').split(':'):
        filename = os.path.join(dir, s)
        if os.path.exists(filename):
            cmdLine = filename
            break
    return cmdLine

def commaAndify(seq):
    """Given a a sequence, returns an english clause for that sequence.

    I.e., given [1, 2, 3], returns '1, 2, and 3'
    """
    L = list(seq)
    if len(L) == 0:
        return ''
    elif len(L) == 1:
        return L[0]
    elif len(L) == 2:
        return '%s and %s' % (L[0], L[1])
    else:
        L[-1] = 'and %s' % L[-1]
        return ', '.join(L)

_unCommaTheRe = re.compile(r'(.*),\s*(the)$', re.I)
def unCommaThe(s):
    """Takes a string of the form 'foo, the' and turns it into 'the foo'."""
    m = _unCommaTheRe.match(s)
    if m is not None:
        return '%s %s' % (m.group(2), m.group(1))
    else:
        return s

def wrapLines(s):
    """Word wraps several paragraphs in a string s."""
    L = []
    for line in s.splitlines():
        L.append(textwrap.fill(line))
    return '\n'.join(L)

plurals = {}
def pluralize(i, s):
    """Returns the plural of s based on its number i.  Put any exceptions to
    the general English rule of appending 's' in the plurals dictionary.
    """
    if i == 1:
        return s
    else:
        if s in plurals:
            return plurals[s]
        else:
            return s + 's'

def nItems(n, item, between=None):
    if between is None:
        return '%s %s' % (n, pluralize(n, item))
    else:
        return '%s %s %s' % (n, between, pluralize(n, item))

def be(i):
    """Returns the form of the verb 'to be' based on the number i."""
    if i == 1:
        return 'is'
    else:
        return 'are'

def sortBy(f, L, cmp=cmp):
    """Uses the decorate-sort-undecorate pattern to sort L by function f."""
    for (i, elt) in enumerate(L):
        L[i] = (f(elt), elt)
    L.sort(cmp)
    for (i, elt) in enumerate(L):
        L[i] = L[i][1]

def mktemp(suffix=''):
    """Gives a decent random string, suitable for a filename."""
    import sha
    import md5
    import time
    import random
    r = random.Random()
    m = md5.md5(suffix)
    r.seed(time.time())
    s = str(r.getstate())
    for x in xrange(0, random.randrange(400), random.randrange(1, 5)):
        m.update(str(x))
        m.update(s)
        m.update(str(time.time()))
        s = m.hexdigest()
    return sha.sha(s + str(time.time())).hexdigest() + suffix

def itersplit(isSeparator, iterable, maxsplit=-1, yieldEmpty=False):
    """Splits an iterator based on a predicate isSeparator."""
    acc = []
    for element in iterable:
        if maxsplit == 0 or not isSeparator(element):
            acc.append(element)
        else:
            maxsplit -= 1
            if acc or yieldEmpty:
                yield acc
            acc = []
    if acc or yieldEmpty:
        yield acc

def flatten(seq, strings=False):
    """Flattens a list of lists into a single list.  See the test for examples.
    """
    for elt in seq:
        if not strings and type(elt) == str or type(elt) == unicode:
            yield elt
        else:
            try:
                for x in flatten(elt):
                    yield x
            except TypeError:
                yield elt

class IterableMap(object):
    """Define .iteritems() in a class and subclass this to get the other iters.
    """
    def iteritems(self):
        raise NotImplementedError

    def iterkeys(self):
        for (key, _) in self.iteritems():
            yield key

    def itervalues(self):
        for (_, value) in self.iteritems():
            yield value

    def items(self):
        return list(self.iteritems())

    def keys(self):
        return list(self.iterkeys())

    def values(self):
        return list(self.itervalues())

    def __len__(self):
        ret = 0
        for _ in self.iteritems():
            ret += 1
        return ret

    def __nonzero__(self):
        for _ in self.iteritems():
            return True
        return False


# vim:set shiftwidth=4 tabstop=8 expandtab textwidth=78:
Added utils.py, with a quality HTML stripper; removed stripHtml from other source files 2003-03-27 21:10:10 +01:00			`#!/usr/bin/env python`

			`###`
			`# Copyright (c) 2002, Jeremiah Fincher`
			`# All rights reserved.`
			`#`
			`# Redistribution and use in source and binary forms, with or without`
			`# modification, are permitted provided that the following conditions are met:`
			`#`
			`# * Redistributions of source code must retain the above copyright notice,`
			`# this list of conditions, and the following disclaimer.`
			`# * Redistributions in binary form must reproduce the above copyright notice,`
			`# this list of conditions, and the following disclaimer in the`
			`# documentation and/or other materials provided with the distribution.`
			`# * Neither the name of the author of this software nor the name of`
			`# contributors to this software may be used to endorse or promote products`
			`# derived from this software without specific prior written consent.`
			`#`
			`# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"`
			`# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE`
			`# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE`
			`# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE`
			`# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR`
			`# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF`
			`# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS`
			`# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN`
			`# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)`
			`# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE`
			`# POSSIBILITY OF SUCH DAMAGE.`
			`###`

			`"""`
			`Simple utility functions.`
			`"""`

Updated stuff for 2.3 to break 2.2 compatibility :) 2003-08-07 04:48:44 +02:00			`## from __future__ import generators`
Added __future__ import 2003-03-31 08:55:34 +02:00
Added utils.py, with a quality HTML stripper; removed stripHtml from other source files 2003-03-27 21:10:10 +01:00			`from fix import *`

* added new utility, findBinaryInPath which searches the PATH for a specific binary * removed getSpellBinary, and replaced with findBinaryInPath calls 2003-08-17 22:09:09 +02:00			`import os`
Added perlReToPythonRe and perlReToReplacer and associated tests. 2003-04-16 07:26:24 +02:00			`import re`
Added soundex to utils (along with a test) and to FunCommands. 2003-04-05 13:23:28 +02:00			`import string`
Added utils.py, with a quality HTML stripper; removed stripHtml from other source files 2003-03-27 21:10:10 +01:00			`import sgmllib`
Added utils.wrapLines and wrapped the lines of each plugin's example. 2003-08-27 18:25:43 +02:00			`import textwrap`
Added utils.py, with a quality HTML stripper; removed stripHtml from other source files 2003-03-27 21:10:10 +01:00			`import htmlentitydefs`

Added normalizeWhitespace. 2003-09-01 20:39:27 +02:00			`def normalizeWhitespace(s):`
			`"""Normalizes the whitespace in a string; \s+ becomes one space."""`
			`return ' '.join(s.split())`

Added utils.py, with a quality HTML stripper; removed stripHtml from other source files 2003-03-27 21:10:10 +01:00			`class HtmlToText(sgmllib.SGMLParser):`
			`"""Taken from some eff-bot code on c.l.p."""`
			`entitydefs = htmlentitydefs.entitydefs`
Added keyword arg to htmlToText to determine how to replace HTML tags. 2003-04-02 11:20:49 +02:00			`def __init__(self, tagReplace=' '):`
Added utils.py, with a quality HTML stripper; removed stripHtml from other source files 2003-03-27 21:10:10 +01:00			`self.data = []`
Added keyword arg to htmlToText to determine how to replace HTML tags. 2003-04-02 11:20:49 +02:00			`self.tagReplace = tagReplace`
Added utils.py, with a quality HTML stripper; removed stripHtml from other source files 2003-03-27 21:10:10 +01:00			`sgmllib.SGMLParser.__init__(self)`

Made some changes to satisfy PyChecker. 2003-08-19 12:38:45 +02:00			`def unknown_starttag(self, tag, attr):`
Added keyword arg to htmlToText to determine how to replace HTML tags. 2003-04-02 11:20:49 +02:00			`self.data.append(self.tagReplace)`
Added utils.py, with a quality HTML stripper; removed stripHtml from other source files 2003-03-27 21:10:10 +01:00
			`def unknown_endtag(self, tag):`
Added keyword arg to htmlToText to determine how to replace HTML tags. 2003-04-02 11:20:49 +02:00			`self.data.append(self.tagReplace)`
Added utils.py, with a quality HTML stripper; removed stripHtml from other source files 2003-03-27 21:10:10 +01:00
			`def handle_data(self, data):`
			`self.data.append(data)`

			`def getText(self):`
			`text = ''.join(self.data).strip()`
Added normalizeWhitespace. 2003-09-01 20:39:27 +02:00			`return normalizeWhitespace(text)`
Added utils.py, with a quality HTML stripper; removed stripHtml from other source files 2003-03-27 21:10:10 +01:00
Added keyword arg to htmlToText to determine how to replace HTML tags. 2003-04-02 11:20:49 +02:00			`def htmlToText(s, tagReplace=' '):`
Added more docstrings. 2003-08-10 12:45:44 +02:00			`"""Turns HTML into text. tagReplace is a string to replace HTML tags with.`
			`"""`
Added keyword arg to htmlToText to determine how to replace HTML tags. 2003-04-02 11:20:49 +02:00			`x = HtmlToText(tagReplace)`
Added utils.py, with a quality HTML stripper; removed stripHtml from other source files 2003-03-27 21:10:10 +01:00			`x.feed(s)`
			`return x.getText()`

Added abbrev 2003-03-31 07:14:21 +02:00			`def eachSubstring(s):`
Added more docstrings. 2003-08-10 12:45:44 +02:00			`"""Returns every substring starting at the first index until the last."""`
			`for i in xrange(1, len(s)+1):`
Added abbrev 2003-03-31 07:14:21 +02:00			`yield s[:i]`

			`def abbrev(strings):`
Added more docstrings. 2003-08-10 12:45:44 +02:00			`"""Returns a dictionary mapping unambiguous abbreviations to full forms."""`
Added abbrev 2003-03-31 07:14:21 +02:00			`d = {}`
			`for s in strings:`
			`for abbreviation in eachSubstring(s):`
			`if abbreviation not in d:`
			`d[abbreviation] = s`
			`else:`
Added test for abbrev and fixed a bug it found. 2003-04-04 18:14:58 +02:00			`if abbreviation not in strings:`
			`d[abbreviation] = None`
Added abbrev 2003-03-31 07:14:21 +02:00			`removals = []`
			`for key in d:`
			`if d[key] is None:`
			`removals.append(key)`
			`for key in removals:`
			`del d[key]`
			`return d`

Changed utils.timeElapsed to use just a seconds instead of a now/then argument. 2003-05-20 17:37:25 +02:00			`def timeElapsed(elapsed, leadingZeroes=False, years=True, weeks=True,`
Added timeElapsed and converted FunCommands to use it. 2003-04-03 11:11:57 +02:00			`days=True, hours=True, minutes=True, seconds=True):`
Added more docstrings. 2003-08-10 12:45:44 +02:00			`"""Given <elapsed> seconds, returns a string with an English description of`
			`how much time as passed. leadingZeroes determines whether 0 days, 0 hours,`
			`etc. will be printed; the others determine what larger time periods should`
			`be used.`
			`"""`
Made sure utils.timeElapsed doesn't go all decimal on me. 2003-06-03 05:18:14 +02:00			`elapsed = int(elapsed)`
Changed utils.timeElapsed to use just a seconds instead of a now/then argument. 2003-05-20 17:37:25 +02:00			`assert years or weeks or days or \`
			`hours or minutes or seconds, 'One flag must be True'`
Added timeElapsed and converted FunCommands to use it. 2003-04-03 11:11:57 +02:00			`ret = []`
			`if years:`
			`yrs, elapsed = elapsed // 31536000, elapsed % 31536000`
			`if leadingZeroes or yrs:`
			`if yrs:`
			`leadingZeroes = True`
			`if yrs != 1:`
			`yrs = '%s years' % yrs`
			`else:`
			`yrs = '1 year'`
			`ret.append(yrs)`
			`if weeks:`
			`wks, elapsed = elapsed // 604800, elapsed % 604800`
			`if leadingZeroes or wks:`
			`if wks:`
			`leadingZeroes = True`
			`if wks != 1:`
			`wks = '%s weeks' % wks`
			`else:`
			`wks = '1 week'`
			`ret.append(wks)`
			`if days:`
			`ds, elapsed = elapsed // 86400, elapsed % 86400`
			`if leadingZeroes or ds:`
			`if ds:`
			`leadingZeroes = True`
			`if ds != 1:`
			`ds = '%s days' % ds`
			`else:`
			`ds = '1 day'`
			`ret.append(ds)`
			`if hours:`
			`hrs, elapsed = elapsed // 3600, elapsed % 3600`
			`if leadingZeroes or hrs:`
			`if hrs:`
			`leadingZeroes = True`
			`if hrs != 1:`
			`hrs = '%s hours' % hrs`
			`else:`
			`hrs = '1 hour'`
			`ret.append(hrs)`
			`if minutes or seconds:`
			`mins, secs = elapsed // 60, elapsed % 60`
			`if leadingZeroes or mins:`
			`if mins != 1:`
			`mins = '%s minutes' % mins`
			`else:`
			`mins = '1 minute'`
			`ret.append(mins)`
			`if seconds:`
			`if secs != 1:`
			`secs = '%s seconds' % secs`
			`else:`
			`secs = '1 second'`
			`ret.append(secs)`
			`if len(ret) == 0:`
			`raise ValueError, 'Time difference not great enough to be noted.'`
			`if len(ret) == 1:`
			`return ret[0]`
			`else:`
commaAndified timeElapsed. 2003-08-21 18:31:11 +02:00			`return commaAndify(ret)`
Added function commaAndify to turn a list of strings into a proper English foo, bar, and baz string. 2003-08-20 11:24:57 +02:00
Added levenshtein distance 2003-04-04 17:49:24 +02:00			`def distance(s, t):`
Added more docstrings. 2003-08-10 12:45:44 +02:00			`"""Returns the levenshtein edit distance between two strings."""`
Added levenshtein distance 2003-04-04 17:49:24 +02:00			`n = len(s)`
			`m = len(t)`
			`if n == 0:`
			`return m`
			`elif m == 0:`
			`return n`
Fixed bug (and added test) in distance. 2003-04-06 17:10:14 +02:00			`d = []`
			`for i in range(n+1):`
			`d.append([])`
			`for j in range(m+1):`
			`d[i].append(0)`
			`d[0][j] = j`
			`d[i][0] = i`
Added levenshtein distance 2003-04-04 17:49:24 +02:00			`for i in range(1, n+1):`
			`cs = s[i-1]`
			`for j in range(1, m+1):`
			`ct = t[j-1]`
Fixed bug (and added test) in distance. 2003-04-06 17:10:14 +02:00			`cost = int(cs != ct)`
Added levenshtein distance 2003-04-04 17:49:24 +02:00			`d[i][j] = min(d[i-1][j]+1, d[i][j-1]+1, d[i-1][j-1]+cost)`
			`return d[n][m]`
Added soundex to utils (along with a test) and to FunCommands. 2003-04-05 13:23:28 +02:00
			`_soundextrans = string.maketrans(string.ascii_uppercase,`
			`'01230120022455012623010202')`
			`_notUpper = string.ascii.translate(string.ascii, string.ascii_uppercase)`
			`def soundex(s, length=4):`
Added more docstrings. 2003-08-10 12:45:44 +02:00			`"""Returns the soundex hash of a given string."""`
Added soundex to utils (along with a test) and to FunCommands. 2003-04-05 13:23:28 +02:00			`assert s`
			`s = s.upper() # Make everything uppercase.`
			`firstChar = s[0] # Save the first character.`
			`s = s.translate(string.ascii, _notUpper) # Delete non-letters.`
			`s = s.translate(_soundextrans) # Convert to soundex numbers.`
			`s = s.lstrip(s[0]) # Remove all repeated first characters.`
			`L = [firstChar]`
			`for c in s:`
			`if c != L[-1]:`
			`L.append(c)`
Added more docstrings. 2003-08-10 12:45:44 +02:00			`L = [c for c in L if c != '0'] + (['0']*(length-1))`
Added soundex to utils (along with a test) and to FunCommands. 2003-04-05 13:23:28 +02:00			`s = ''.join(L)`
Added length=0 handling to soundex to return the everything. 2003-04-05 13:29:29 +02:00			`return length and s[:length] or s.rstrip('0')`
Added timeElapsed and converted FunCommands to use it. 2003-04-03 11:11:57 +02:00
Added dqrepr. 2003-04-12 14:50:20 +02:00			`def dqrepr(s):`
			`"""Returns a repr() of s guaranteed to be in double quotes."""`
Upgraded to 2.3. 2003-07-31 08:20:58 +02:00			`# The wankers-that-be decided not to use double-quotes anymore in 2.3.`
			`# return '"' + repr("'\x00" + s)[6:]`
string_escape is better than unicode_escape. 2003-08-17 08:28:05 +02:00			`return '"%s"' % s.encode('string_escape').replace('"', '\\"')`
Added dqrepr. 2003-04-12 14:50:20 +02:00
Added perlReToPythonRe and perlReToReplacer and associated tests. 2003-04-16 07:26:24 +02:00			`nonEscapedSlashes = re.compile(r'(?<!\\)/')`
			`def perlReToPythonRe(s):`
Added more docstrings. 2003-08-10 12:45:44 +02:00			`"""Converts a string representation of a Perl regular expression (i.e.,`
			`m/^foo$/i or /foo\|bar/) to a Python regular expression.`
			`"""`
Added perlReToPythonRe and perlReToReplacer and associated tests. 2003-04-16 07:26:24 +02:00			`(kind, regexp, flags) = nonEscapedSlashes.split(s)`
			`regexp = regexp.replace('\\/', '/')`
			`if kind not in ('', 'm'):`
			`raise ValueError, 'Invalid kind: must be in ("", "m")'`
			`flag = 0`
			`try:`
			`for c in flags.upper():`
Found bug in perlReToPythonRe and fixed it. 2003-08-30 20:39:19 +02:00			`flag \|= getattr(re, c)`
Added perlReToPythonRe and perlReToReplacer and associated tests. 2003-04-16 07:26:24 +02:00			`except AttributeError:`
			`raise ValueError, 'Invalid flag: %s' % c`
			`return re.compile(regexp, flag)`
Added function commaAndify to turn a list of strings into a proper English foo, bar, and baz string. 2003-08-20 11:24:57 +02:00
Added perlReToPythonRe and perlReToReplacer and associated tests. 2003-04-16 07:26:24 +02:00			`def perlReToReplacer(s):`
Added more docstrings. 2003-08-10 12:45:44 +02:00			`"""Converts a string representation of a Perl regular expression (i.e.,`
			`s/foo/bar/g or s/foo/bar/i) to a Python function doing the equivalent`
			`replacement.`
			`"""`
Added perlReToPythonRe and perlReToReplacer and associated tests. 2003-04-16 07:26:24 +02:00			`(kind, regexp, replace, flags) = nonEscapedSlashes.split(s)`
			`if kind != 's':`
			`raise ValueError, 'Invalid kind: must be "s"'`
			`g = False`
			`if 'g' in flags:`
			`g = True`
			`flags = filter('g'.__ne__, flags)`
			`r = perlReToPythonRe('/'.join(('', regexp, flags)))`
			`if g:`
			`return lambda s: r.sub(replace, s)`
			`else:`
			`return lambda s: r.sub(replace, s, 1)`

* added new utility, findBinaryInPath which searches the PATH for a specific binary * removed getSpellBinary, and replaced with findBinaryInPath calls 2003-08-17 22:09:09 +02:00			`def findBinaryInPath(s):`
Line shortened. 2003-08-19 21:02:59 +02:00			`"""Return full path of a binary if it's in PATH, otherwise return None."""`
* added new utility, findBinaryInPath which searches the PATH for a specific binary * removed getSpellBinary, and replaced with findBinaryInPath calls 2003-08-17 22:09:09 +02:00			`cmdLine = None`
			`for dir in os.getenv('PATH').split(':'):`
			`filename = os.path.join(dir, s)`
			`if os.path.exists(filename):`
			`cmdLine = filename`
			`break`
			`return cmdLine`

Committing jemfinch's commaAndify fixes 2003-08-22 23:31:17 +02:00			`def commaAndify(seq):`
Added sortBy, which uses the decorate/sort/undecorate pattern to sort by a certain criteria. 2003-09-03 10:51:45 +02:00			`"""Given a a sequence, returns an english clause for that sequence.`

			`I.e., given [1, 2, 3], returns '1, 2, and 3'`
			`"""`
Committing jemfinch's commaAndify fixes 2003-08-22 23:31:17 +02:00			`L = list(seq)`
Added function commaAndify to turn a list of strings into a proper English foo, bar, and baz string. 2003-08-20 11:24:57 +02:00			`if len(L) == 0:`
			`return ''`
			`elif len(L) == 1:`
			`return L[0]`
			`elif len(L) == 2:`
			`return '%s and %s' % (L[0], L[1])`
			`else:`
			`L[-1] = 'and %s' % L[-1]`
Committing jemfinch's commaAndify fixes 2003-08-22 23:31:17 +02:00			`return ', '.join(L)`
Added function commaAndify to turn a list of strings into a proper English foo, bar, and baz string. 2003-08-20 11:24:57 +02:00
Added unCommaThe function. 2003-08-23 09:57:04 +02:00			`_unCommaTheRe = re.compile(r'(.),\s(the)$', re.I)`
			`def unCommaThe(s):`
Added sortBy, which uses the decorate/sort/undecorate pattern to sort by a certain criteria. 2003-09-03 10:51:45 +02:00			`"""Takes a string of the form 'foo, the' and turns it into 'the foo'."""`
Added unCommaThe function. 2003-08-23 09:57:04 +02:00			`m = _unCommaTheRe.match(s)`
			`if m is not None:`
			`return '%s %s' % (m.group(2), m.group(1))`
			`else:`
			`return s`

Added utils.wrapLines and wrapped the lines of each plugin's example. 2003-08-27 18:25:43 +02:00			`def wrapLines(s):`
Added sortBy, which uses the decorate/sort/undecorate pattern to sort by a certain criteria. 2003-09-03 10:51:45 +02:00			`"""Word wraps several paragraphs in a string s."""`
Added utils.wrapLines and wrapped the lines of each plugin's example. 2003-08-27 18:25:43 +02:00			`L = []`
			`for line in s.splitlines():`
			`L.append(textwrap.fill(line))`
			`return '\n'.join(L)`

Added utils.pluralize. 2003-09-01 07:42:35 +02:00			`plurals = {}`
			`def pluralize(i, s):`
Added sortBy, which uses the decorate/sort/undecorate pattern to sort by a certain criteria. 2003-09-03 10:51:45 +02:00			`"""Returns the plural of s based on its number i. Put any exceptions to`
			`the general English rule of appending 's' in the plurals dictionary.`
			`"""`
Added utils.pluralize. 2003-09-01 07:42:35 +02:00			`if i == 1:`
			`return s`
			`else:`
			`if s in plurals:`
			`return plurals[s]`
			`else:`
			`return s + 's'`

Added nItems. 2003-09-03 11:40:26 +02:00			`def nItems(n, item, between=None):`
			`if between is None:`
			`return '%s %s' % (n, pluralize(n, item))`
			`else:`
			`return '%s %s %s' % (n, between, pluralize(n, item))`

Added utils.pluralize. 2003-09-01 07:42:35 +02:00			`def be(i):`
Added sortBy, which uses the decorate/sort/undecorate pattern to sort by a certain criteria. 2003-09-03 10:51:45 +02:00			`"""Returns the form of the verb 'to be' based on the number i."""`
Added utils.pluralize. 2003-09-01 07:42:35 +02:00			`if i == 1:`
			`return 'is'`
			`else:`
			`return 'are'`

Added sortBy, which uses the decorate/sort/undecorate pattern to sort by a certain criteria. 2003-09-03 10:51:45 +02:00			`def sortBy(f, L, cmp=cmp):`
			`"""Uses the decorate-sort-undecorate pattern to sort L by function f."""`
			`for (i, elt) in enumerate(L):`
			`L[i] = (f(elt), elt)`
			`L.sort(cmp)`
			`for (i, elt) in enumerate(L):`
			`L[i] = L[i][1]`
Added normalizeWhitespace. 2003-09-01 20:39:27 +02:00
Moved several things from fix.py to utils.py. 2003-09-04 22:42:37 +02:00			`def mktemp(suffix=''):`
			`"""Gives a decent random string, suitable for a filename."""`
			`import sha`
			`import md5`
			`import time`
			`import random`
			`r = random.Random()`
			`m = md5.md5(suffix)`
			`r.seed(time.time())`
			`s = str(r.getstate())`
			`for x in xrange(0, random.randrange(400), random.randrange(1, 5)):`
			`m.update(str(x))`
			`m.update(s)`
			`m.update(str(time.time()))`
			`s = m.hexdigest()`
			`return sha.sha(s + str(time.time())).hexdigest() + suffix`

			`def itersplit(isSeparator, iterable, maxsplit=-1, yieldEmpty=False):`
			`"""Splits an iterator based on a predicate isSeparator."""`
			`acc = []`
			`for element in iterable:`
			`if maxsplit == 0 or not isSeparator(element):`
			`acc.append(element)`
			`else:`
			`maxsplit -= 1`
			`if acc or yieldEmpty:`
			`yield acc`
			`acc = []`
			`if acc or yieldEmpty:`
			`yield acc`

			`def flatten(seq, strings=False):`
			`"""Flattens a list of lists into a single list. See the test for examples.`
			`"""`
			`for elt in seq:`
			`if not strings and type(elt) == str or type(elt) == unicode:`
			`yield elt`
			`else:`
			`try:`
			`for x in flatten(elt):`
			`yield x`
			`except TypeError:`
			`yield elt`

			`class IterableMap(object):`
			`"""Define .iteritems() in a class and subclass this to get the other iters.`
			`"""`
			`def iteritems(self):`
			`raise NotImplementedError`

			`def iterkeys(self):`
			`for (key, _) in self.iteritems():`
			`yield key`

			`def itervalues(self):`
			`for (_, value) in self.iteritems():`
			`yield value`

			`def items(self):`
			`return list(self.iteritems())`

			`def keys(self):`
			`return list(self.iterkeys())`

			`def values(self):`
			`return list(self.itervalues())`

			`def __len__(self):`
			`ret = 0`
			`for _ in self.iteritems():`
			`ret += 1`
			`return ret`

			`def __nonzero__(self):`
			`for _ in self.iteritems():`
			`return True`
			`return False`


Added utils.py, with a quality HTML stripper; removed stripHtml from other source files 2003-03-27 21:10:10 +01:00			`# vim:set shiftwidth=4 tabstop=8 expandtab textwidth=78:`