Tools/scripts/trace.py

   1 #!/usr/bin/env python
   2
   3 # Copyright 2000, Mojam Media, Inc., all rights reserved.
   4 # Author: Skip Montanaro
   5 #
   6 # Copyright 1999, Bioreason, Inc., all rights reserved.
   7 # Author: Andrew Dalke
   8 #
   9 # Copyright 1995-1997, Automatrix, Inc., all rights reserved.
  10 # Author: Skip Montanaro
  11 #
  12 # Copyright 1991-1995, Stichting Mathematisch Centrum, all rights reserved.
  13 #
  14 #
  15 # Permission to use, copy, modify, and distribute this Python software and
  16 # its associated documentation for any purpose without fee is hereby
  17 # granted, provided that the above copyright notice appears in all copies,
  18 # and that both that copyright notice and this permission notice appear in
  19 # supporting documentation, and that the name of neither Automatrix,
  20 # Bioreason or Mojam Media be used in advertising or publicity pertaining to
  21 # distribution of the software without specific, written prior permission.
  22 #
  23 #
  24 # Summary of recent changes:
  25 #   Support for files with the same basename (submodules in packages)
  26 #   Expanded the idea of how to ignore files or modules
  27 #   Split tracing and counting into different classes
  28 #   Extracted count information and reporting from the count class
  29 #   Added some ability to detect which missing lines could be executed
  30 #   Added pseudo-pragma to prohibit complaining about unexecuted lines
  31 #   Rewrote the main program
  32
  33 # Summary of older changes:
  34 #   Added run-time display of statements being executed
  35 #   Incorporated portability and performance fixes from Greg Stein
  36 #   Incorporated main program from Michael Scharf
  37
  38 """
  39 program/module to trace Python program or function execution
  40
  41 Sample use, command line:
  42   trace.py -c -f counts --ignore-dir '$prefix' spam.py eggs
  43   trace.py -t --ignore-dir '$prefix' spam.py eggs
  44
  45 Sample use, programmatically (still more complicated than it should be)
  46    # create an Ignore option, telling it what you want to ignore
  47    ignore = trace.Ignore(dirs = [sys.prefix, sys.exec_prefix])
  48    # create a Coverage object, telling it what to ignore
  49    coverage = trace.Coverage(ignore)
  50    # run the new command using the given trace
  51    trace.run(coverage.trace, 'main()')
  52
  53    # make a report, telling it where you want output
  54    t = trace.create_results_log(coverage.results(),
  55                                 '/usr/local/Automatrix/concerts/coverage')
  56                                 show_missing = 1)
  57
  58    The Trace class can be instantited instead of the Coverage class if
  59    runtime display of executable lines is desired instead of statement
  60    converage measurement.
  61 """
  62
  63 import sys, os, string, marshal, tempfile, copy, operator
  64
  65 def usage(outfile):
  66     outfile.write("""Usage: %s [OPTIONS] <file> [ARGS]
  67
  68 Execution:
  69       --help           Display this help then exit.
  70       --version        Output version information then exit.
  71    -t,--trace          Print the line to be executed to sys.stdout.
  72    -c,--count          Count the number of times a line is executed.
  73                          Results are written in the results file, if given.
  74    -r,--report         Generate a report from a results file; do not
  75                          execute any code.
  76         (One of `-t', `-c' or `-r' must be specified)
  77
  78 I/O:
  79    -f,--file=          File name for accumulating results over several runs.
  80                          (No file name means do not archive results)
  81    -d,--logdir=        Directory to use when writing annotated log files.
  82                          Log files are the module __name__ with `.` replaced
  83                          by os.sep and with '.pyl' added.
  84    -m,--missing        Annotate all executable lines which were not executed
  85                          with a '>>>>>> '.
  86    -R,--no-report      Do not generate the annotated reports.  Useful if
  87                          you want to accumulate several over tests.
  88
  89 Selection:                 Do not trace or log lines from ...
  90   --ignore-module=[string]   modules with the given __name__, and submodules
  91                               of that module
  92   --ignore-dir=[string]      files in the stated directory (multiple
  93                               directories can be joined by os.pathsep)
  94
  95   The selection options can be listed multiple times to ignore different
  96 modules.
  97 """ % sys.argv[0])
  98
  99
 100 class Ignore:
 101     def __init__(self, modules = None, dirs = None):
 102         self._mods = modules or []
 103         self._dirs = dirs or []
 104
 105         self._ignore = { '<string>': 1 }
 106
 107
 108     def names(self, filename, modulename):
 109         if self._ignore.has_key(modulename):
 110             return self._ignore[modulename]
 111
 112         # haven't seen this one before, so see if the module name is
 113         # on the ignore list.  Need to take some care since ignoring
 114         # "cmp" musn't mean ignoring "cmpcache" but ignoring
 115         # "Spam" must also mean ignoring "Spam.Eggs".
 116         for mod in self._mods:
 117             if mod == modulename:  # Identical names, so ignore
 118                 self._ignore[modulename] = 1
 119                 return 1
 120             # check if the module is a proper submodule of something on
 121             # the ignore list
 122             n = len(mod)
 123             # (will not overflow since if the first n characters are the
 124             # same and the name has not already occured, then the size
 125             # of "name" is greater than that of "mod")
 126             if mod == modulename[:n] and modulename[n] == '.':
 127                 self._ignore[modulename] = 1
 128                 return 1
 129
 130         # Now check that __file__ isn't in one of the directories
 131         if filename is None:
 132             # must be a built-in, so we must ignore
 133             self._ignore[modulename] = 1
 134             return 1
 135
 136         # Ignore a file when it contains one of the ignorable paths
 137         for d in self._dirs:
 138             # The '+ os.sep' is to ensure that d is a parent directory,
 139             # as compared to cases like:
 140             #  d = "/usr/local"
 141             #  filename = "/usr/local.py"
 142             # or
 143             #  d = "/usr/local.py"
 144             #  filename = "/usr/local.py"
 145             if string.find(filename, d + os.sep) == 0:
 146                 self._ignore[modulename] = 1
 147                 return 1
 148
 149         # Tried the different ways, so we don't ignore this module
 150         self._ignore[modulename] = 0
 151         return 0
 152
 153
 154 def run(trace, cmd):
 155     import __main__
 156     dict = __main__.__dict__
 157     sys.settrace(trace)
 158     try:
 159         exec cmd in dict, dict
 160     finally:
 161         sys.settrace(None)
 162
 163 def runctx(trace, cmd, globals=None, locals=None):
 164     if globals is None: globals = {}
 165     if locals is None: locals = {}
 166     sys.settrace(trace)
 167     try:
 168         exec cmd in dict, dict
 169     finally:
 170         sys.settrace(None)
 171
 172 def runfunc(trace, func, *args, **kw):
 173     result = None
 174     sys.settrace(trace)
 175     try:
 176         result = apply(func, args, kw)
 177     finally:
 178         sys.settrace(None)
 179     return result
 180
 181
 182 class CoverageResults:
 183     def __init__(self, counts = {}, modules = {}):
 184         self.counts = counts.copy()    # map (filename, lineno) to count
 185         self.modules = modules.copy()  # map filenames to modules
 186
 187     def update(self, other):
 188         """Merge in the data from another CoverageResults"""
 189         counts = self.counts
 190         other_counts = other.counts
 191         modules = self.modules
 192         other_modules = other.modules
 193
 194         for key in other_counts.keys():
 195             counts[key] = counts.get(key, 0) + other_counts[key]
 196
 197         for key in other_modules.keys():
 198             if modules.has_key(key):
 199                 # make sure they point to the same file
 200                 assert modules[key] == other_modules[key], \
 201                        "Strange! filename %s has two different module names" % \
 202                        (key, modules[key], other_module[key])
 203             else:
 204                 modules[key] = other_modules[key]
 205
 206 # Given a code string, return the SET_LINENO information
 207 def _find_LINENO_from_string(co_code):
 208     """return all of the SET_LINENO information from a code string"""
 209     import dis
 210     linenos = {}
 211
 212     # This code was filched from the `dis' module then modified
 213     n = len(co_code)
 214     i = 0
 215     prev_op = None
 216     prev_lineno = 0
 217     while i < n:
 218         c = co_code[i]
 219         op = ord(c)
 220         if op == dis.SET_LINENO:
 221             if prev_op == op:
 222                 # two SET_LINENO in a row, so the previous didn't
 223                 # indicate anything.  This occurs with triple
 224                 # quoted strings (?).  Remove the old one.
 225                 del linenos[prev_lineno]
 226             prev_lineno = ord(co_code[i+1]) + ord(co_code[i+2])*256
 227             linenos[prev_lineno] = 1
 228         if op >= dis.HAVE_ARGUMENT:
 229             i = i + 3
 230         else:
 231             i = i + 1
 232         prev_op = op
 233     return linenos
 234
 235 def _find_LINENO(code):
 236     """return all of the SET_LINENO information from a code object"""
 237     import types
 238
 239     # get all of the lineno information from the code of this scope level
 240     linenos = _find_LINENO_from_string(code.co_code)
 241
 242     # and check the constants for references to other code objects
 243     for c in code.co_consts:
 244         if type(c) == types.CodeType:
 245             # find another code object, so recurse into it
 246             linenos.update(_find_LINENO(c))
 247     return linenos
 248
 249 def find_executable_linenos(filename):
 250     """return a dict of the line numbers from executable statements in a file
 251
 252     Works by finding all of the code-like objects in the module then searching
 253     the byte code for 'SET_LINENO' terms (so this won't work one -O files).
 254
 255     """
 256     import parser
 257
 258     prog = open(filename).read()
 259     ast = parser.suite(prog)
 260     code = parser.compileast(ast, filename)
 261
 262     # The only way I know to find line numbers is to look for the
 263     # SET_LINENO instructions.  Isn't there some way to get it from
 264     # the AST?
 265
 266     return _find_LINENO(code)
 267
 268 ### XXX because os.path.commonprefix seems broken by my way of thinking...
 269 def commonprefix(dirs):
 270     "Given a list of pathnames, returns the longest common leading component"
 271     if not dirs: return ''
 272     n = copy.copy(dirs)
 273     for i in range(len(n)):
 274         n[i] = n[i].split(os.sep)
 275     prefix = n[0]
 276     for item in n:
 277         for i in range(len(prefix)):
 278             if prefix[:i+1] <> item[:i+1]:
 279                 prefix = prefix[:i]
 280                 if i == 0: return ''
 281                 break
 282     return os.sep.join(prefix)
 283
 284 def create_results_log(results, dirname = ".", show_missing = 1,
 285                        save_counts = 0):
 286     import re
 287     # turn the counts data ("(filename, lineno) = count") into something
 288     # accessible on a per-file basis
 289     per_file = {}
 290     for filename, lineno in results.counts.keys():
 291         lines_hit = per_file[filename] = per_file.get(filename, {})
 292         lines_hit[lineno] = results.counts[(filename, lineno)]
 293
 294     # try and merge existing counts and modules file from dirname
 295     try:
 296         counts = marshal.load(open(os.path.join(dirname, "counts")))
 297         modules = marshal.load(open(os.path.join(dirname, "modules")))
 298         results.update(results.__class__(counts, modules))
 299     except IOError:
 300         pass
 301
 302     # there are many places where this is insufficient, like a blank
 303     # line embedded in a multiline string.
 304     blank = re.compile(r'^\s*(#.*)?$')
 305
 306     # generate file paths for the coverage files we are going to write...
 307     fnlist = []
 308     tfdir = tempfile.gettempdir()
 309     for key in per_file.keys():
 310         filename = key
 311
 312         # skip some "files" we don't care about...
 313         if filename == "<string>":
 314             continue
 315         # are these caused by code compiled using exec or something?
 316         if filename.startswith(tfdir):
 317             continue
 318
 319         # XXX this is almost certainly not portable!!!
 320         fndir = os.path.dirname(filename)
 321         if filename[:1] == os.sep:
 322             coverpath = os.path.join(dirname, "."+fndir)
 323         else:
 324             coverpath = os.path.join(dirname, fndir)
 325
 326         if filename.endswith(".pyc") or filename.endswith(".pyo"):
 327             filename = filename[:-1]
 328
 329         # Get the original lines from the .py file
 330         try:
 331             lines = open(filename, 'r').readlines()
 332         except IOError, err:
 333             sys.stderr.write(
 334                 "%s: Could not open %s for reading because: %s - skipping\n" % \
 335                 ("trace", `filename`, err.strerror))
 336             continue
 337
 338         modulename = os.path.split(results.modules[key])[1]
 339
 340         # build list file name by appending a ".cover" to the module name
 341         # and sticking it into the specified directory
 342         listfilename = os.path.join(coverpath, modulename + ".cover")
 343         #sys.stderr.write("modulename: %(modulename)s\n"
 344         #                 "filename: %(filename)s\n"
 345         #                 "coverpath: %(coverpath)s\n"
 346         #                 "listfilename: %(listfilename)s\n"
 347         #                 "dirname: %(dirname)s\n"
 348         #                 % locals())
 349         try:
 350             outfile = open(listfilename, 'w')
 351         except IOError, err:
 352             sys.stderr.write(
 353                 '%s: Could not open %s for writing because: %s - skipping\n' %
 354                 ("trace", `listfilename`, err.strerror))
 355             continue
 356
 357         # If desired, get a list of the line numbers which represent
 358         # executable content (returned as a dict for better lookup speed)
 359         if show_missing:
 360             executable_linenos = find_executable_linenos(filename)
 361         else:
 362             executable_linenos = {}
 363
 364         lines_hit = per_file[key]
 365         for i in range(len(lines)):
 366             line = lines[i]
 367
 368             # do the blank/comment match to try to mark more lines
 369             # (help the reader find stuff that hasn't been covered)
 370             if lines_hit.has_key(i+1):
 371                 # count precedes the lines that we captured
 372                 outfile.write('%5d: ' % lines_hit[i+1])
 373             elif blank.match(line):
 374                 # blank lines and comments are preceded by dots
 375                 outfile.write('    . ')
 376             else:
 377                 # lines preceded by no marks weren't hit
 378                 # Highlight them if so indicated, unless the line contains
 379                 # '#pragma: NO COVER' (it is possible to embed this into
 380                 # the text as a non-comment; no easy fix)
 381                 if executable_linenos.has_key(i+1) and \
 382                    string.find(lines[i],
 383                                string.join(['#pragma', 'NO COVER'])) == -1:
 384                     outfile.write('>>>>>> ')
 385                 else:
 386                     outfile.write(' '*7)
 387             outfile.write(string.expandtabs(lines[i], 8))
 388
 389         outfile.close()
 390
 391         if save_counts:
 392             # try and store counts and module info into dirname
 393             try:
 394                 marshal.dump(results.counts,
 395                              open(os.path.join(dirname, "counts"), "w"))
 396                 marshal.dump(results.modules,
 397                              open(os.path.join(dirname, "modules"), "w"))
 398             except IOError, err:
 399                 sys.stderr.write("cannot save counts/modules files because %s" %
 400                                  err.strerror)
 401
 402 # There is a lot of code shared between these two classes even though
 403 # it is straightforward to make a super class to share code.  However,
 404 # for performance reasons (remember, this is called at every step) I
 405 # wanted to keep everything to a single function call.  Also, by
 406 # staying within a single scope, I don't have to temporarily nullify
 407 # sys.settrace, which would slow things down even more.
 408
 409 class Coverage:
 410     def __init__(self, ignore = Ignore()):
 411         self.ignore = ignore
 412         self.ignore_names = ignore._ignore # access ignore's cache (speed hack)
 413
 414         self.counts = {}   # keys are (filename, linenumber)
 415         self.modules = {}  # maps filename -> module name
 416
 417     def trace(self, frame, why, arg):
 418         if why == 'line':
 419             # something is fishy about getting the file name
 420             filename = frame.f_globals.get("__file__", None)
 421             if filename is None:
 422                 filename = frame.f_code.co_filename
 423             modulename = frame.f_globals["__name__"]
 424
 425             # We do this next block to keep from having to make methods
 426             # calls, which also requires resetting the trace
 427             ignore_it = self.ignore_names.get(modulename, -1)
 428             if ignore_it == -1:  # unknown filename
 429                 sys.settrace(None)
 430                 ignore_it = self.ignore.names(filename, modulename)
 431                 sys.settrace(self.trace)
 432
 433                 # record the module name for every file
 434                 self.modules[filename] = modulename
 435
 436             if not ignore_it:
 437                 lineno = frame.f_lineno
 438
 439                 # record the file name and line number of every trace
 440                 key = (filename, lineno)
 441                 self.counts[key] = self.counts.get(key, 0) + 1
 442
 443         return self.trace
 444
 445     def results(self):
 446         return CoverageResults(self.counts, self.modules)
 447
 448 class Trace:
 449     def __init__(self, ignore = Ignore()):
 450         self.ignore = ignore
 451         self.ignore_names = ignore._ignore # access ignore's cache (speed hack)
 452
 453         self.files = {'<string>': None}  # stores lines from the .py file, or None
 454
 455     def trace(self, frame, why, arg):
 456         if why == 'line':
 457             filename = frame.f_code.co_filename
 458             modulename = frame.f_globals["__name__"]
 459
 460             # We do this next block to keep from having to make methods
 461             # calls, which also requires resetting the trace
 462             ignore_it = self.ignore_names.get(modulename, -1)
 463             if ignore_it == -1:  # unknown filename
 464                 sys.settrace(None)
 465                 ignore_it = self.ignore.names(filename, modulename)
 466                 sys.settrace(self.trace)
 467
 468             if not ignore_it:
 469                 lineno = frame.f_lineno
 470                 files = self.files
 471
 472                 if filename != '<string>' and not files.has_key(filename):
 473                     files[filename] = map(string.rstrip,
 474                                           open(filename).readlines())
 475
 476                 # If you want to see filenames (the original behaviour), try:
 477                 #   modulename = filename
 478                 # or, prettier but confusing when several files have the same name
 479                 #   modulename = os.path.basename(filename)
 480
 481                 if files[filename] != None:
 482                     print '%s(%d): %s' % (os.path.basename(filename), lineno,
 483                                           files[filename][lineno-1])
 484                 else:
 485                     print '%s(%d): ??' % (modulename, lineno)
 486
 487         return self.trace
 488
 489
 490 def _err_exit(msg):
 491         sys.stderr.write("%s: %s\n" % (sys.argv[0], msg))
 492         sys.exit(1)
 493
 494 def main(argv = None):
 495     import getopt
 496
 497     if argv is None:
 498         argv = sys.argv
 499     try:
 500         opts, prog_argv = getopt.getopt(argv[1:], "tcrRf:d:m",
 501                                         ["help", "version", "trace", "count",
 502                                          "report", "no-report",
 503                                          "file=", "logdir=", "missing",
 504                                          "ignore-module=", "ignore-dir="])
 505
 506     except getopt.error, msg:
 507         sys.stderr.write("%s: %s\n" % (sys.argv[0], msg))
 508         sys.stderr.write("Try `%s --help' for more information\n" % sys.argv[0])
 509         sys.exit(1)
 510
 511     trace = 0
 512     count = 0
 513     report = 0
 514     no_report = 0
 515     counts_file = None
 516     logdir = "."
 517     missing = 0
 518     ignore_modules = []
 519     ignore_dirs = []
 520
 521     for opt, val in opts:
 522         if opt == "--help":
 523             usage(sys.stdout)
 524             sys.exit(0)
 525
 526         if opt == "--version":
 527             sys.stdout.write("trace 2.0\n")
 528             sys.exit(0)
 529
 530         if opt == "-t" or opt == "--trace":
 531             trace = 1
 532             continue
 533
 534         if opt == "-c" or opt == "--count":
 535             count = 1
 536             continue
 537
 538         if opt == "-r" or opt == "--report":
 539             report = 1
 540             continue
 541
 542         if opt == "-R" or opt == "--no-report":
 543             no_report = 1
 544             continue
 545
 546         if opt == "-f" or opt == "--file":
 547             counts_file = val
 548             continue
 549
 550         if opt == "-d" or opt == "--logdir":
 551             logdir = val
 552             continue
 553
 554         if opt == "-m" or opt == "--missing":
 555             missing = 1
 556             continue
 557
 558         if opt == "--ignore-module":
 559             ignore_modules.append(val)
 560             continue
 561
 562         if opt == "--ignore-dir":
 563             for s in string.split(val, os.pathsep):
 564                 s = os.path.expandvars(s)
 565                 # should I also call expanduser? (after all, could use $HOME)
 566
 567                 s = string.replace(s, "$prefix",
 568                                    os.path.join(sys.prefix, "lib",
 569                                                 "python" + sys.version[:3]))
 570                 s = string.replace(s, "$exec_prefix",
 571                                    os.path.join(sys.exec_prefix, "lib",
 572                                                 "python" + sys.version[:3]))
 573                 s = os.path.normpath(s)
 574                 ignore_dirs.append(s)
 575             continue
 576
 577         assert 0, "Should never get here"
 578
 579     if len(prog_argv) == 0:
 580         _err_exit("missing name of file to run")
 581
 582     if count + trace + report > 1:
 583         _err_exit("can only specify one of --trace, --count or --report")
 584
 585     if count + trace + report == 0:
 586         _err_exit("must specify one of --trace, --count or --report")
 587
 588     if report and counts_file is None:
 589         _err_exit("--report requires a --file")
 590
 591     if report and no_report:
 592         _err_exit("cannot specify both --report and --no-report")
 593
 594     if logdir is not None:
 595         # warn if the directory doesn't exist, but keep on going
 596         # (is this the correct behaviour?)
 597         if not os.path.isdir(logdir):
 598             sys.stderr.write(
 599                 "trace: WARNING, --logdir directory %s is not available\n" %
 600                        `logdir`)
 601
 602     sys.argv = prog_argv
 603     progname = prog_argv[0]
 604     if eval(sys.version[:3])>1.3:
 605         sys.path[0] = os.path.split(progname)[0] # ???
 606
 607     # everything is ready
 608     ignore = Ignore(ignore_modules, ignore_dirs)
 609     if trace:
 610         t = Trace(ignore)
 611         try:
 612             run(t.trace, 'execfile(' + `progname` + ')')
 613         except IOError, err:
 614             _err_exit("Cannot run file %s because: %s" % \
 615                       (`sys.argv[0]`, err.strerror))
 616
 617     elif count:
 618         t = Coverage(ignore)
 619         try:
 620             run(t.trace, 'execfile(' + `progname` + ')')
 621         except IOError, err:
 622             _err_exit("Cannot run file %s because: %s" % \
 623                       (`sys.argv[0]`, err.strerror))
 624         except SystemExit:
 625             pass
 626
 627         results = t.results()
 628         # Add another lookup from the program's file name to its import name
 629         # This give the right results, but I'm not sure why ...
 630         results.modules[progname] = os.path.splitext(progname)[0]
 631
 632         if counts_file:
 633             # add in archived data, if available
 634             try:
 635                 old_counts, old_modules = marshal.load(open(counts_file, 'rb'))
 636             except IOError:
 637                 pass
 638             else:
 639                 results.update(CoverageResults(old_counts, old_modules))
 640
 641         if not no_report:
 642             create_results_log(results, logdir, missing)
 643
 644         if counts_file:
 645             try:
 646                 marshal.dump( (results.counts, results.modules),
 647                               open(counts_file, 'wb'))
 648             except IOError, err:
 649                 _err_exit("Cannot save counts file %s because: %s" % \
 650                           (`counts_file`, err.strerror))
 651
 652     elif report:
 653         old_counts, old_modules = marshal.load(open(counts_file, 'rb'))
 654         results = CoverageResults(old_counts, old_modules)
 655         create_results_log(results, logdir, missing)
 656
 657     else:
 658         assert 0, "Should never get here"
 659
 660 if __name__=='__main__':
 661     main()