From ae76d59cbccffe6d6cea6efc85ed8570bacd7504 Mon Sep 17 00:00:00 2001 From: Jeremy Fincher Date: Fri, 1 Oct 2004 11:03:58 +0000 Subject: [PATCH] Initial checkin. --- tools/coverage.py | 596 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 596 insertions(+) create mode 100644 tools/coverage.py diff --git a/tools/coverage.py b/tools/coverage.py new file mode 100644 index 000000000..557dd04e3 --- /dev/null +++ b/tools/coverage.py @@ -0,0 +1,596 @@ +#!/usr/bin/python +# +# Perforce Defect Tracking Integration Project +# +# +# COVERAGE.PY -- COVERAGE TESTING +# +# Gareth Rees, Ravenbrook Limited, 2001-12-04 +# +# +# 1. INTRODUCTION +# +# This module provides coverage testing for Python code. +# +# The intended readership is all Python developers. +# +# This document is not confidential. +# +# See [GDR 2001-12-04a] for the command-line interface, programmatic +# interface and limitations. See [GDR 2001-12-04b] for requirements and +# design. + +"""Usage: + +coverage.py -x MODULE.py [ARG1 ARG2 ...] + Execute module, passing the given command-line arguments, collecting + coverage data. + +coverage.py -e + Erase collected coverage data. + +coverage.py -r [-m] FILE1 FILE2 ... + Report on the statement coverage for the given files. With the -m + option, show line numbers of the statements that weren't executed. + +coverage.py -a [-d dir] FILE1 FILE2 ... + Make annotated copies of the given files, marking statements that + are executed with > and statements that are missed with !. With + the -d option, make the copies in that directory. Without the -d + option, make each copy in the same directory as the original. + +Coverage data is saved in the file .coverage by default. Set the +COVERAGE_FILE environment variable to save it somewhere else.""" + +import os +import re +import string +import sys +import types + + +# 2. IMPLEMENTATION +# +# This uses the "singleton" pattern. +# +# The word "morf" means a module object (from which the source file can +# be deduced by suitable manipulation of the __file__ attribute) or a +# filename. +# +# When we generate a coverage report we have to canonicalize every +# filename in the coverage dictionary just in case it refers to the +# module we are reporting on. It seems a shame to throw away this +# information so the data in the coverage dictionary is transferred to +# the 'cexecuted' dictionary under the canonical filenames. +# +# The coverage dictionary is called "c" and the trace function "t". The +# reason for these short names is that Python looks up variables by name +# at runtime and so execution time depends on the length of variables! +# In the bottleneck of this application it's appropriate to abbreviate +# names to increase speed. + +# A dictionary with an entry for (Python source file name, line number +# in that file) if that line has been executed. +c = {} + +# t(f, x, y). This method is passed to sys.settrace as a trace +# function. See [van Rossum 2001-07-20b, 9.2] for an explanation of +# sys.settrace and the arguments and return value of the trace function. +# See [van Rossum 2001-07-20a, 3.2] for a description of frame and code +# objects. + +def t(f, x, y): + c[(f.f_code.co_filename, f.f_lineno)] = 1 + return t + +the_coverage = None + +class coverage: + error = "coverage error" + + # Name of the cache file (unless environment variable is set). + cache_default = ".coverage" + + # Environment variable naming the cache file. + cache_env = "COVERAGE_FILE" + + # A map from canonical Python source file name to a dictionary in + # which there's an entry for each line number that has been + # executed. + cexecuted = {} + + # Cache of results of calling the analysis() method, so that you can + # specify both -r and -a without doing double work. + analysis_cache = {} + + # Cache of results of calling the canonical_filename() method, to + # avoid duplicating work. + canonical_filename_cache = {} + + def __init__(self): + global the_coverage + if the_coverage: + raise self.error, "Only one coverage object allowed." + self.cache = os.environ.get(self.cache_env, self.cache_default) + self.restore() + self.analysis_cache = {} + + def help(self, error=None): + if error: + print error + print + print __doc__ + sys.exit(1) + + def command_line(self): + import getopt + settings = {} + optmap = { + '-a': 'annotate', + '-d:': 'directory=', + '-e': 'erase', + '-h': 'help', + '-i': 'ignore-errors', + '-m': 'show-missing', + '-r': 'report', + '-x': 'execute', + } + short_opts = string.join(map(lambda o: o[1:], optmap.keys()), '') + long_opts = optmap.values() + options, args = getopt.getopt(sys.argv[1:], short_opts, + long_opts) + for o, a in options: + if optmap.has_key(o): + settings[optmap[o]] = 1 + elif optmap.has_key(o + ':'): + settings[optmap[o + ':']] = a + elif o[2:] in long_opts: + settings[o[2:]] = 1 + elif o[2:] + '=' in long_opts: + settings[o[2:]] = a + else: + self.help("Unknown option: '%s'." % o) + if settings.get('help'): + self.help() + for i in ['erase', 'execute']: + for j in ['annotate', 'report']: + if settings.get(i) and settings.get(j): + self.help("You can't specify the '%s' and '%s' " + "options at the same time." % (i, j)) + args_needed = (settings.get('execute') + or settings.get('annotate') + or settings.get('report')) + action = settings.get('erase') or args_needed + if not action: + self.help("You must specify at least one of -e, -x, -r, " + "or -a.") + if not args_needed and args: + self.help("Unexpected arguments %s." % args) + if settings.get('erase'): + self.erase() + if settings.get('execute'): + if not args: + self.help("Nothing to do.") + sys.argv = args + self.start() + import __main__ + sys.path[0] = os.path.dirname(sys.argv[0]) + execfile(sys.argv[0], __main__.__dict__) + if not args: + args = self.cexecuted.keys() + ignore_errors = settings.get('ignore-errors') + show_missing = settings.get('show-missing') + directory = settings.get('directory=') + if settings.get('report'): + self.report(args, show_missing, ignore_errors) + if settings.get('annotate'): + self.annotate(args, directory, ignore_errors) + + def start(self): + sys.settrace(t) + + def stop(self): + sys.settrace(None) + + def erase(self): + global c + c = {} + self.analysis_cache = {} + self.cexecuted = {} + if os.path.exists(self.cache): + os.remove(self.cache) + + # save(). Save coverage data to the coverage cache. + + def save(self): + self.canonicalize_filenames() + cache = open(self.cache, 'wb') + import marshal + marshal.dump(self.cexecuted, cache) + cache.close() + + # restore(). Restore coverage data from the coverage cache (if it + # exists). + + def restore(self): + global c + c = {} + self.cexecuted = {} + if not os.path.exists(self.cache): + return + try: + cache = open(self.cache, 'rb') + import marshal + cexecuted = marshal.load(cache) + cache.close() + if isinstance(cexecuted, types.DictType): + self.cexecuted = cexecuted + except: + pass + + # canonical_filename(filename). Return a canonical filename for the + # file (that is, an absolute path with no redundant components and + # normalized case). See [GDR 2001-12-04b, 3.3]. + + def canonical_filename(self, filename): + if not self.canonical_filename_cache.has_key(filename): + f = filename + if os.path.isabs(f) and not os.path.exists(f): + f = os.path.basename(f) + if not os.path.isabs(f): + for path in [os.curdir] + sys.path: + g = os.path.join(path, f) + if os.path.exists(g): + f = g + break + cf = os.path.normcase(os.path.abspath(f)) + self.canonical_filename_cache[filename] = cf + return self.canonical_filename_cache[filename] + + # canonicalize_filenames(). Copy results from "executed" to + # "cexecuted", canonicalizing filenames on the way. Clear the + # "executed" map. + + def canonicalize_filenames(self): + global c + for filename, lineno in c.keys(): + f = self.canonical_filename(filename) + if not self.cexecuted.has_key(f): + self.cexecuted[f] = {} + self.cexecuted[f][lineno] = 1 + c = {} + + # morf_filename(morf). Return the filename for a module or file. + + def morf_filename(self, morf): + if isinstance(morf, types.ModuleType): + if not hasattr(morf, '__file__'): + raise self.error, "Module has no __file__ attribute." + file = morf.__file__ + else: + file = morf + return self.canonical_filename(file) + + # analyze_morf(morf). Analyze the module or filename passed as + # the argument. If the source code can't be found, raise an error. + # Otherwise, return a pair of (1) the canonical filename of the + # source code for the module, and (2) a list of lines of statements + # in the source code. + + def analyze_morf(self, morf): + if self.analysis_cache.has_key(morf): + return self.analysis_cache[morf] + filename = self.morf_filename(morf) + ext = os.path.splitext(filename)[1] + if ext == '.pyc': + if not os.path.exists(filename[0:-1]): + raise self.error, ("No source for compiled code '%s'." + % filename) + filename = filename[0:-1] + elif ext != '.py': + raise self.error, "File '%s' not Python source." % filename + source = open(filename, 'r') + import parser + tree = parser.suite(source.read()).totuple(1) + source.close() + statements = {} + self.find_statements(tree, statements) + lines = statements.keys() + lines.sort() + result = filename, lines + self.analysis_cache[morf] = result + return result + + # find_statements(tree, dict). Find each statement in the parse + # tree and record the line on which the statement starts in the + # dictionary (by assigning it to 1). + # + # It works by walking the whole tree depth-first. Every time it + # comes across a statement (symbol.stmt -- this includes compound + # statements like 'if' and 'while') it calls find_statement, which + # descends the tree below the statement to find the first terminal + # token in that statement and record the lines on which that token + # was found. + # + # This algorithm may find some lines several times (because of the + # grammar production statement -> compound statement -> statement), + # but that doesn't matter because we record lines as the keys of the + # dictionary. + # + # See also [GDR 2001-12-04b, 3.2]. + + def find_statements(self, tree, dict): + import symbol, token + if token.ISNONTERMINAL(tree[0]): + for t in tree[1:]: + self.find_statements(t, dict) + if tree[0] == symbol.stmt: + self.find_statement(tree[1], dict) + elif (tree[0] == token.NAME + and tree[1] in ['elif', 'except', 'finally']): + dict[tree[2]] = 1 + + def find_statement(self, tree, dict): + import token + while token.ISNONTERMINAL(tree[0]): + tree = tree[1] + dict[tree[2]] = 1 + + # format_lines(statements, lines). Format a list of line numbers + # for printing by coalescing groups of lines as long as the lines + # represent consecutive statements. This will coalesce even if + # there are gaps between statements, so if statements = + # [1,2,3,4,5,10,11,12,13,14] and lines = [1,2,5,10,11,13,14] then + # format_lines will return "1-2, 5-11, 13-14". + + def format_lines(self, statements, lines): + pairs = [] + i = 0 + j = 0 + start = None + pairs = [] + while i < len(statements) and j < len(lines): + if statements[i] == lines[j]: + if start == None: + start = lines[j] + end = lines[j] + j = j + 1 + elif start: + pairs.append((start, end)) + start = None + i = i + 1 + if start: + pairs.append((start, end)) + def stringify(pair): + start, end = pair + if start == end: + return "%d" % start + else: + return "%d-%d" % (start, end) + import string + return string.join(map(stringify, pairs), ", ") + + def analysis(self, morf): + filename, statements = self.analyze_morf(morf) + self.canonicalize_filenames() + if not self.cexecuted.has_key(filename): + self.cexecuted[filename] = {} + missing = [] + for line in statements: + if not self.cexecuted[filename].has_key(line): + missing.append(line) + return (filename, statements, missing, + self.format_lines(statements, missing)) + + def morf_name(self, morf): + if isinstance(morf, types.ModuleType): + return morf.__name__ + else: + return os.path.splitext(os.path.basename(morf))[0] + + def report(self, morfs, show_missing=1, ignore_errors=0): + if not isinstance(morfs, types.ListType): + morfs = [morfs] + max_name = max([5,] + map(len, map(self.morf_name, morfs))) + fmt_name = "%%- %ds " % max_name + fmt_err = fmt_name + "%s: %s" + header = fmt_name % "Name" + " Stmts Exec Cover" + fmt_coverage = fmt_name + "% 6d % 6d % 5d%%" + if show_missing: + header = header + " Missing" + fmt_coverage = fmt_coverage + " %s" + print header + print "-" * len(header) + total_statements = 0 + total_executed = 0 + for morf in morfs: + name = self.morf_name(morf) + try: + _, statements, missing, readable = self.analysis(morf) + n = len(statements) + m = n - len(missing) + if n > 0: + pc = 100.0 * m / n + else: + pc = 100.0 + args = (name, n, m, pc) + if show_missing: + args = args + (readable,) + print fmt_coverage % args + total_statements = total_statements + n + total_executed = total_executed + m + except KeyboardInterrupt: + raise + except: + if not ignore_errors: + type, msg = sys.exc_info()[0:2] + print fmt_err % (name, type, msg) + if len(morfs) > 1: + print "-" * len(header) + if total_statements > 0: + pc = 100.0 * total_executed / total_statements + else: + pc = 100.0 + args = ("TOTAL", total_statements, total_executed, pc) + if show_missing: + args = args + ("",) + print fmt_coverage % args + + # annotate(morfs, ignore_errors). + + blank_re = re.compile("\\s*(#|$)") + else_re = re.compile("\\s*else\\s*:\\s*(#|$)") + + def annotate(self, morfs, directory=None, ignore_errors=0): + for morf in morfs: + try: + filename, statements, missing, _ = self.analysis(morf) + source = open(filename, 'r') + if directory: + dest_file = os.path.join(directory, + os.path.basename(filename) + + ',cover') + else: + dest_file = filename + ',cover' + dest = open(dest_file, 'w') + lineno = 0 + i = 0 + j = 0 + covered = 1 + while 1: + line = source.readline() + if line == '': + break + lineno = lineno + 1 + while i < len(statements) and statements[i] < lineno: + i = i + 1 + while j < len(missing) and missing[j] < lineno: + j = j + 1 + if i < len(statements) and statements[i] == lineno: + covered = j >= len(missing) or missing[j] > lineno + if self.blank_re.match(line): + dest.write(' ') + elif self.else_re.match(line): + # Special logic for lines containing only + # 'else:'. See [GDR 2001-12-04b, 3.2]. + if i >= len(statements) and j >= len(missing): + dest.write('! ') + elif i >= len(statements) or j >= len(missing): + dest.write('> ') + elif statements[i] == missing[j]: + dest.write('! ') + else: + dest.write('> ') + elif covered: + dest.write('> ') + else: + dest.write('! ') + dest.write(line) + source.close() + dest.close() + except KeyboardInterrupt: + raise + except: + if not ignore_errors: + raise + + +# Singleton object. +the_coverage = coverage() + +# Module functions call methods in the singleton object. +def start(*args): return apply(the_coverage.start, args) +def stop(*args): return apply(the_coverage.stop, args) +def erase(*args): return apply(the_coverage.erase, args) +def analysis(*args): return apply(the_coverage.analysis, args) +def report(*args): return apply(the_coverage.report, args) + +# Save coverage data when Python exits. (The atexit module wasn't +# introduced until Python 2.0, so use sys.exitfunc when it's not +# available.) +try: + import atexit + atexit.register(the_coverage.save) +except ImportError: + sys.exitfunc = the_coverage.save + +# Command-line interface. +if __name__ == '__main__': + the_coverage.command_line() + + +# A. REFERENCES +# +# [GDR 2001-12-04a] "Statement coverage for Python"; Gareth Rees; +# Ravenbrook Limited; 2001-12-04; +# . +# +# [GDR 2001-12-04b] "Statement coverage for Python: design and +# analysis"; Gareth Rees; Ravenbrook Limited; 2001-12-04; +# . +# +# [van Rossum 2001-07-20a] "Python Reference Manual (releae 2.1.1)"; +# Guide van Rossum; 2001-07-20; +# . +# +# [van Rossum 2001-07-20b] "Python Library Reference"; Guido van Rossum; +# 2001-07-20; . +# +# +# B. DOCUMENT HISTORY +# +# 2001-12-04 GDR Created. +# +# 2001-12-06 GDR Added command-line interface and source code +# annotation. +# +# 2001-12-09 GDR Moved design and interface to separate documents. +# +# 2001-12-10 GDR Open cache file as binary on Windows. Allow +# simultaneous -e and -x, or -a and -r. +# +# 2001-12-12 GDR Added command-line help. Cache analysis so that it +# only needs to be done once when you specify -a and -r. +# +# 2001-12-13 GDR Improved speed while recording. Portable between +# Python 1.5.2 and 2.1.1. +# +# 2002-01-03 GDR Module-level functions work correctly. +# +# 2002-01-07 GDR Update sys.path when running a file with the -x option, +# so that it matches the value the program would get if it were run on +# its own. +# +# +# C. COPYRIGHT AND LICENCE +# +# Copyright 2001 Gareth Rees. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the +# distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDERS AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS +# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR +# TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +# USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +# DAMAGE. +# +# +# +# $Id$