Limnoria/sandbox/coverage.py

#!/usr/bin/python
#
#             Perforce Defect Tracking Integration Project
#              <http://www.ravenbrook.com/project/p4dti/>
#
#                   COVERAGE.PY -- COVERAGE TESTING
#
#             Gareth Rees, Ravenbrook Limited, 2001-12-04
#
#
# 1. INTRODUCTION
#
# This module provides coverage testing for Python code.
#
# The intended readership is all Python developers.
#
# This document is not confidential.
#
# See [GDR 2001-12-04a] for the command-line interface, programmatic
# interface and limitations.  See [GDR 2001-12-04b] for requirements and
# design.

"""Usage:

coverage.py -x MODULE.py [ARG1 ARG2 ...]
    Execute module, passing the given command-line arguments, collecting
    coverage data.

coverage.py -e
    Erase collected coverage data.

coverage.py -r [-m] FILE1 FILE2 ...
    Report on the statement coverage for the given files.  With the -m
    option, show line numbers of the statements that weren't executed.

coverage.py -a [-d dir] FILE1 FILE2 ...
    Make annotated copies of the given files, marking statements that
    are executed with > and statements that are missed with !.  With
    the -d option, make the copies in that directory.  Without the -d
    option, make each copy in the same directory as the original.

Coverage data is saved in the file .coverage by default.  Set the
COVERAGE_FILE environment variable to save it somewhere else."""

import os
import re
import string
import sys
import types


# 2. IMPLEMENTATION
#
# This uses the "singleton" pattern.
#
# The word "morf" means a module object (from which the source file can
# be deduced by suitable manipulation of the __file__ attribute) or a
# filename.
#
# When we generate a coverage report we have to canonicalize every
# filename in the coverage dictionary just in case it refers to the
# module we are reporting on.  It seems a shame to throw away this
# information so the data in the coverage dictionary is transferred to
# the 'cexecuted' dictionary under the canonical filenames.
#
# The coverage dictionary is called "c" and the trace function "t".  The
# reason for these short names is that Python looks up variables by name
# at runtime and so execution time depends on the length of variables!
# In the bottleneck of this application it's appropriate to abbreviate
# names to increase speed.

# A dictionary with an entry for (Python source file name, line number
# in that file) if that line has been executed.
c = {}

# t(f, x, y).  This method is passed to sys.settrace as a trace
# function.  See [van Rossum 2001-07-20b, 9.2] for an explanation of
# sys.settrace and the arguments and return value of the trace function.
# See [van Rossum 2001-07-20a, 3.2] for a description of frame and code
# objects.

def t(f, x, y):
    c[(f.f_code.co_filename, f.f_lineno)] = 1
    return t

the_coverage = None

class coverage:
    error = "coverage error"

    # Name of the cache file (unless environment variable is set).
    cache_default = ".coverage"

    # Environment variable naming the cache file.
    cache_env = "COVERAGE_FILE"

    # A map from canonical Python source file name to a dictionary in
    # which there's an entry for each line number that has been
    # executed.
    cexecuted = {}

    # Cache of results of calling the analysis() method, so that you can
    # specify both -r and -a without doing double work.
    analysis_cache = {}

    # Cache of results of calling the canonical_filename() method, to
    # avoid duplicating work.
    canonical_filename_cache = {}

    def __init__(self):
        global the_coverage
        if the_coverage:
            raise self.error, "Only one coverage object allowed."
        self.cache = os.environ.get(self.cache_env, self.cache_default)
        self.restore()
        self.analysis_cache = {}

    def help(self, error=None):
        if error:
            print error
            print
        print __doc__
        sys.exit(1)

    def command_line(self):
        import getopt
        settings = {}
        optmap = {
            '-a': 'annotate',
            '-d:': 'directory=',
            '-e': 'erase',
            '-h': 'help',
            '-i': 'ignore-errors',
            '-m': 'show-missing',
            '-r': 'report',
            '-x': 'execute',
            }
        short_opts = string.join(map(lambda o: o[1:], optmap.keys()), '')
        long_opts = optmap.values()
        options, args = getopt.getopt(sys.argv[1:], short_opts,
                                      long_opts)
        for o, a in options:
            if optmap.has_key(o):
                settings[optmap[o]] = 1
            elif optmap.has_key(o + ':'):
                settings[optmap[o + ':']] = a
            elif o[2:] in long_opts:
                settings[o[2:]] = 1
            elif o[2:] + '=' in long_opts:
                settings[o[2:]] = a
            else:
                self.help("Unknown option: '%s'." % o)
        if settings.get('help'):
            self.help()
        for i in ['erase', 'execute']:
            for j in ['annotate', 'report']:
                if settings.get(i) and settings.get(j):
                    self.help("You can't specify the '%s' and '%s' "
                              "options at the same time." % (i, j))
        args_needed = (settings.get('execute')
                       or settings.get('annotate')
                       or settings.get('report'))
        action = settings.get('erase') or args_needed
        if not action:
            self.help("You must specify at least one of -e, -x, -r, "
                      "or -a.")
        if not args_needed and args:
            self.help("Unexpected arguments %s." % args)
        if settings.get('erase'):
            self.erase()
        if settings.get('execute'):
            if not args:
                self.help("Nothing to do.")
            sys.argv = args
            self.start()
            import __main__
            sys.path[0] = os.path.dirname(sys.argv[0])
            execfile(sys.argv[0], __main__.__dict__)
        if not args:
            args = self.cexecuted.keys()
        ignore_errors = settings.get('ignore-errors')
        show_missing = settings.get('show-missing')
        directory = settings.get('directory=')
        if settings.get('report'):
            self.report(args, show_missing, ignore_errors)
        if settings.get('annotate'):
            self.annotate(args, directory, ignore_errors)

    def start(self):
        sys.settrace(t)

    def stop(self):
        sys.settrace(None)

    def erase(self):
        global c
        c = {}
        self.analysis_cache = {}
        self.cexecuted = {}
        if os.path.exists(self.cache):
            os.remove(self.cache)

    # save().  Save coverage data to the coverage cache.

    def save(self):
        self.canonicalize_filenames()
        cache = open(self.cache, 'wb')
        import marshal
        marshal.dump(self.cexecuted, cache)
        cache.close()

    # restore().  Restore coverage data from the coverage cache (if it
    # exists).

    def restore(self):
        global c
        c = {}
        self.cexecuted = {}
        if not os.path.exists(self.cache):
            return
        try:
            cache = open(self.cache, 'rb')
            import marshal
            cexecuted = marshal.load(cache)
            cache.close()
            if isinstance(cexecuted, types.DictType):
                self.cexecuted = cexecuted
        except:
            pass

    # canonical_filename(filename).  Return a canonical filename for the
    # file (that is, an absolute path with no redundant components and
    # normalized case).  See [GDR 2001-12-04b, 3.3].

    def canonical_filename(self, filename):
        if not self.canonical_filename_cache.has_key(filename):
            f = filename
            if os.path.isabs(f) and not os.path.exists(f):
                f = os.path.basename(f)
            if not os.path.isabs(f):
                for path in [os.curdir] + sys.path:
                    g = os.path.join(path, f)
                    if os.path.exists(g):
                        f = g
                        break
            cf = os.path.normcase(os.path.abspath(f))
            self.canonical_filename_cache[filename] = cf
        return self.canonical_filename_cache[filename]

    # canonicalize_filenames().  Copy results from "executed" to
    # "cexecuted", canonicalizing filenames on the way.  Clear the
    # "executed" map.

    def canonicalize_filenames(self):
        global c
        for filename, lineno in c.keys():
            f = self.canonical_filename(filename)
            if not self.cexecuted.has_key(f):
                self.cexecuted[f] = {}
            self.cexecuted[f][lineno] = 1
        c = {}

    # morf_filename(morf).  Return the filename for a module or file.

    def morf_filename(self, morf):
        if isinstance(morf, types.ModuleType):
            if not hasattr(morf, '__file__'):
                raise self.error, "Module has no __file__ attribute."
            file = morf.__file__
        else:
            file = morf
        return self.canonical_filename(file)

    # analyze_morf(morf).  Analyze the module or filename passed as
    # the argument.  If the source code can't be found, raise an error.
    # Otherwise, return a pair of (1) the canonical filename of the
    # source code for the module, and (2) a list of lines of statements
    # in the source code.

    def analyze_morf(self, morf):
        if self.analysis_cache.has_key(morf):
            return self.analysis_cache[morf]
        filename = self.morf_filename(morf)
        ext = os.path.splitext(filename)[1]
        if ext == '.pyc':
            if not os.path.exists(filename[0:-1]):
                raise self.error, ("No source for compiled code '%s'."
                                   % filename)
            filename = filename[0:-1]
        elif ext != '.py':
            raise self.error, "File '%s' not Python source." % filename
        source = open(filename, 'r')
        import parser
        tree = parser.suite(source.read()).totuple(1)
        source.close()
        statements = {}
        self.find_statements(tree, statements)
        lines = statements.keys()
        lines.sort()
        result = filename, lines
        self.analysis_cache[morf] = result
        return result

    # find_statements(tree, dict).  Find each statement in the parse
    # tree and record the line on which the statement starts in the
    # dictionary (by assigning it to 1).
    #
    # It works by walking the whole tree depth-first.  Every time it
    # comes across a statement (symbol.stmt -- this includes compound
    # statements like 'if' and 'while') it calls find_statement, which
    # descends the tree below the statement to find the first terminal
    # token in that statement and record the lines on which that token
    # was found.
    #
    # This algorithm may find some lines several times (because of the
    # grammar production statement -> compound statement -> statement),
    # but that doesn't matter because we record lines as the keys of the
    # dictionary.
    #
    # See also [GDR 2001-12-04b, 3.2].

    def find_statements(self, tree, dict):
        import symbol, token
        if token.ISNONTERMINAL(tree[0]):
            for t in tree[1:]:
                self.find_statements(t, dict)
            if tree[0] == symbol.stmt:
                self.find_statement(tree[1], dict)
        elif (tree[0] == token.NAME
              and tree[1] in ['elif', 'except', 'finally']):
            dict[tree[2]] = 1

    def find_statement(self, tree, dict):
        import token
        while token.ISNONTERMINAL(tree[0]):
            tree = tree[1]
        dict[tree[2]] = 1

    # format_lines(statements, lines).  Format a list of line numbers
    # for printing by coalescing groups of lines as long as the lines
    # represent consecutive statements.  This will coalesce even if
    # there are gaps between statements, so if statements =
    # [1,2,3,4,5,10,11,12,13,14] and lines = [1,2,5,10,11,13,14] then
    # format_lines will return "1-2, 5-11, 13-14".

    def format_lines(self, statements, lines):
        pairs = []
        i = 0
        j = 0
        start = None
        pairs = []
        while i < len(statements) and j < len(lines):
            if statements[i] == lines[j]:
                if start == None:
                    start = lines[j]
                end = lines[j]
                j = j + 1
            elif start:
                pairs.append((start, end))
                start = None
            i = i + 1
        if start:
            pairs.append((start, end))
        def stringify(pair):
            start, end = pair
            if start == end:
                return "%d" % start
            else:
                return "%d-%d" % (start, end)
        import string
        return string.join(map(stringify, pairs), ", ")

    def analysis(self, morf):
        filename, statements = self.analyze_morf(morf)
        self.canonicalize_filenames()
        if not self.cexecuted.has_key(filename):
            self.cexecuted[filename] = {}
        missing = []
        for line in statements:
            if not self.cexecuted[filename].has_key(line):
                missing.append(line)
        return (filename, statements, missing,
                self.format_lines(statements, missing))

    def morf_name(self, morf):
        if isinstance(morf, types.ModuleType):
            return morf.__name__
        else:
            return os.path.splitext(os.path.basename(morf))[0]

    def report(self, morfs, show_missing=1, ignore_errors=0):
        if not isinstance(morfs, types.ListType):
            morfs = [morfs]
        max_name = max([5,] + map(len, map(self.morf_name, morfs)))
        fmt_name = "%%- %ds  " % max_name
        fmt_err = fmt_name + "%s: %s"
        header = fmt_name % "Name" + " Stmts   Exec  Cover"
        fmt_coverage = fmt_name + "% 6d % 6d % 5d%%"
        if show_missing:
            header = header + "   Missing"
            fmt_coverage = fmt_coverage + "   %s"
        print header
        print "-" * len(header)
        total_statements = 0
        total_executed = 0
        for morf in morfs:
            name = self.morf_name(morf)
            try:
                _, statements, missing, readable  = self.analysis(morf)
                n = len(statements)
                m = n - len(missing)
                if n > 0:
                    pc = 100.0 * m / n
                else:
                    pc = 100.0
                args = (name, n, m, pc)
                if show_missing:
                    args = args + (readable,)
                print fmt_coverage % args
                total_statements = total_statements + n
                total_executed = total_executed + m
            except KeyboardInterrupt:
                raise
            except:
                if not ignore_errors:
                    type, msg = sys.exc_info()[0:2]
                    print fmt_err % (name, type, msg)
        if len(morfs) > 1:
            print "-" * len(header)
            if total_statements > 0:
                pc = 100.0 * total_executed / total_statements
            else:
                pc = 100.0
            args = ("TOTAL", total_statements, total_executed, pc)
            if show_missing:
                args = args + ("",)
            print fmt_coverage % args

    # annotate(morfs, ignore_errors).

    blank_re = re.compile("\\s*(#|$)")
    else_re = re.compile("\\s*else\\s*:\\s*(#|$)")

    def annotate(self, morfs, directory=None, ignore_errors=0):
        for morf in morfs:
            try:
                filename, statements, missing, _ = self.analysis(morf)
                source = open(filename, 'r')
                if directory:
                    dest_file = os.path.join(directory,
                                             os.path.basename(filename)
                                             + ',cover')
                else:
                    dest_file = filename + ',cover'
                dest = open(dest_file, 'w')
                lineno = 0
                i = 0
                j = 0
                covered = 1
                while 1:
                    line = source.readline()
                    if line == '':
                        break
                    lineno = lineno + 1
                    while i < len(statements) and statements[i] < lineno:
                        i = i + 1
                    while j < len(missing) and missing[j] < lineno:
                        j = j + 1
                    if i < len(statements) and statements[i] == lineno:
                        covered = j >= len(missing) or missing[j] > lineno
                    if self.blank_re.match(line):
                        dest.write('  ')
                    elif self.else_re.match(line):
                        # Special logic for lines containing only
                        # 'else:'.  See [GDR 2001-12-04b, 3.2].
                        if i >= len(statements) and j >= len(missing):
                            dest.write('! ')
                        elif i >= len(statements) or j >= len(missing):
                            dest.write('> ')
                        elif statements[i] == missing[j]:
                            dest.write('! ')
                        else:
                            dest.write('> ')
                    elif covered:
                        dest.write('> ')
                    else:
                        dest.write('! ')
                    dest.write(line)
                source.close()
                dest.close()
            except KeyboardInterrupt:
                raise
            except:
                if not ignore_errors:
                    raise


# Singleton object.
the_coverage = coverage()

# Module functions call methods in the singleton object.
def start(*args): return apply(the_coverage.start, args)
def stop(*args): return apply(the_coverage.stop, args)
def erase(*args): return apply(the_coverage.erase, args)
def analysis(*args): return apply(the_coverage.analysis, args)
def report(*args): return apply(the_coverage.report, args)

# Save coverage data when Python exits.  (The atexit module wasn't
# introduced until Python 2.0, so use sys.exitfunc when it's not
# available.)
try:
    import atexit
    atexit.register(the_coverage.save)
except ImportError:
    sys.exitfunc = the_coverage.save

# Command-line interface.
if __name__ == '__main__':
    the_coverage.command_line()


# A. REFERENCES
#
# [GDR 2001-12-04a] "Statement coverage for Python"; Gareth Rees;
# Ravenbrook Limited; 2001-12-04;
# <http://www.garethrees.org/2001/12/04/python-coverage/>.
#
# [GDR 2001-12-04b] "Statement coverage for Python: design and
# analysis"; Gareth Rees; Ravenbrook Limited; 2001-12-04;
# <http://www.garethrees.org/2001/12/04/python-coverage/design.html>.
#
# [van Rossum 2001-07-20a] "Python Reference Manual (releae 2.1.1)";
# Guide van Rossum; 2001-07-20;
# <http://www.python.org/doc/2.1.1/ref/ref.html>.
#
# [van Rossum 2001-07-20b] "Python Library Reference"; Guido van Rossum;
# 2001-07-20; <http://www.python.org/doc/2.1.1/lib/lib.html>.
#
#
# B. DOCUMENT HISTORY
#
# 2001-12-04 GDR Created.
#
# 2001-12-06 GDR Added command-line interface and source code
# annotation.
#
# 2001-12-09 GDR Moved design and interface to separate documents.
#
# 2001-12-10 GDR Open cache file as binary on Windows.  Allow
# simultaneous -e and -x, or -a and -r.
#
# 2001-12-12 GDR Added command-line help.  Cache analysis so that it
# only needs to be done once when you specify -a and -r.
#
# 2001-12-13 GDR Improved speed while recording.  Portable between
# Python 1.5.2 and 2.1.1.
#
# 2002-01-03 GDR Module-level functions work correctly.
#
# 2002-01-07 GDR Update sys.path when running a file with the -x option,
# so that it matches the value the program would get if it were run on
# its own.
#
#
# C. COPYRIGHT AND LICENCE
#
# Copyright 2001 Gareth Rees.  All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# 1. Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the
#    distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# HOLDERS AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
# TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
# USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
# DAMAGE.
#
#
#
# $Id$