Lib/profile.py

   1 #! /usr/bin/env python
   2 #
   3 # Class for profiling python code. rev 1.0  6/2/94
   4 #
   5 # Based on prior profile module by Sjoerd Mullender...
   6 #   which was hacked somewhat by: Guido van Rossum
   7
   8 """Class for profiling Python code."""
   9
  10 # Copyright 1994, by InfoSeek Corporation, all rights reserved.
  11 # Written by James Roskind
  12 #
  13 # Permission to use, copy, modify, and distribute this Python software
  14 # and its associated documentation for any purpose (subject to the
  15 # restriction in the following sentence) without fee is hereby granted,
  16 # provided that the above copyright notice appears in all copies, and
  17 # that both that copyright notice and this permission notice appear in
  18 # supporting documentation, and that the name of InfoSeek not be used in
  19 # advertising or publicity pertaining to distribution of the software
  20 # without specific, written prior permission.  This permission is
  21 # explicitly restricted to the copying and modification of the software
  22 # to remain in Python, compiled Python, or other languages (such as C)
  23 # wherein the modified or derived code is exclusively imported into a
  24 # Python module.
  25 #
  26 # INFOSEEK CORPORATION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
  27 # SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
  28 # FITNESS. IN NO EVENT SHALL INFOSEEK CORPORATION BE LIABLE FOR ANY
  29 # SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER
  30 # RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
  31 # CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
  32 # CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  33
  34
  35
  36 import sys
  37 import os
  38 import time
  39 import marshal
  40 from optparse import OptionParser
  41
  42 __all__ = ["run", "runctx", "help", "Profile"]
  43
  44 # Sample timer for use with
  45 #i_count = 0
  46 #def integer_timer():
  47 #       global i_count
  48 #       i_count = i_count + 1
  49 #       return i_count
  50 #itimes = integer_timer # replace with C coded timer returning integers
  51
  52 #**************************************************************************
  53 # The following are the static member functions for the profiler class
  54 # Note that an instance of Profile() is *not* needed to call them.
  55 #**************************************************************************
  56
  57 def run(statement, filename=None, sort=-1):
  58     """Run statement under profiler optionally saving results in filename
  59
  60     This function takes a single argument that can be passed to the
  61     "exec" statement, and an optional file name.  In all cases this
  62     routine attempts to "exec" its first argument and gather profiling
  63     statistics from the execution. If no file name is present, then this
  64     function automatically prints a simple profiling report, sorted by the
  65     standard name string (file/line/function-name) that is presented in
  66     each line.
  67     """
  68     prof = Profile()
  69     try:
  70         prof = prof.run(statement)
  71     except SystemExit:
  72         pass
  73     if filename is not None:
  74         prof.dump_stats(filename)
  75     else:
  76         return prof.print_stats(sort)
  77
  78 def runctx(statement, globals, locals, filename=None):
  79     """Run statement under profiler, supplying your own globals and locals,
  80     optionally saving results in filename.
  81
  82     statement and filename have the same semantics as profile.run
  83     """
  84     prof = Profile()
  85     try:
  86         prof = prof.runctx(statement, globals, locals)
  87     except SystemExit:
  88         pass
  89
  90     if filename is not None:
  91         prof.dump_stats(filename)
  92     else:
  93         return prof.print_stats()
  94
  95 # Backwards compatibility.
  96 def help():
  97     print "Documentation for the profile module can be found "
  98     print "in the Python Library Reference, section 'The Python Profiler'."
  99
 100 if hasattr(os, "times"):
 101     def _get_time_times(timer=os.times):
 102         t = timer()
 103         return t[0] + t[1]
 104
 105 # Using getrusage(3) is better than clock(3) if available:
 106 # on some systems (e.g. FreeBSD), getrusage has a higher resolution
 107 # Furthermore, on a POSIX system, returns microseconds, which
 108 # wrap around after 36min.
 109 _has_res = 0
 110 try:
 111     import resource
 112     resgetrusage = lambda: resource.getrusage(resource.RUSAGE_SELF)
 113     def _get_time_resource(timer=resgetrusage):
 114         t = timer()
 115         return t[0] + t[1]
 116     _has_res = 1
 117 except ImportError:
 118     pass
 119
 120 class Profile:
 121     """Profiler class.
 122
 123     self.cur is always a tuple.  Each such tuple corresponds to a stack
 124     frame that is currently active (self.cur[-2]).  The following are the
 125     definitions of its members.  We use this external "parallel stack" to
 126     avoid contaminating the program that we are profiling. (old profiler
 127     used to write into the frames local dictionary!!) Derived classes
 128     can change the definition of some entries, as long as they leave
 129     [-2:] intact (frame and previous tuple).  In case an internal error is
 130     detected, the -3 element is used as the function name.
 131
 132     [ 0] = Time that needs to be charged to the parent frame's function.
 133            It is used so that a function call will not have to access the
 134            timing data for the parent frame.
 135     [ 1] = Total time spent in this frame's function, excluding time in
 136            subfunctions (this latter is tallied in cur[2]).
 137     [ 2] = Total time spent in subfunctions, excluding time executing the
 138            frame's function (this latter is tallied in cur[1]).
 139     [-3] = Name of the function that corresponds to this frame.
 140     [-2] = Actual frame that we correspond to (used to sync exception handling).
 141     [-1] = Our parent 6-tuple (corresponds to frame.f_back).
 142
 143     Timing data for each function is stored as a 5-tuple in the dictionary
 144     self.timings[].  The index is always the name stored in self.cur[-3].
 145     The following are the definitions of the members:
 146
 147     [0] = The number of times this function was called, not counting direct
 148           or indirect recursion,
 149     [1] = Number of times this function appears on the stack, minus one
 150     [2] = Total time spent internal to this function
 151     [3] = Cumulative time that this function was present on the stack.  In
 152           non-recursive functions, this is the total execution time from start
 153           to finish of each invocation of a function, including time spent in
 154           all subfunctions.
 155     [4] = A dictionary indicating for each function name, the number of times
 156           it was called by us.
 157     """
 158
 159     bias = 0  # calibration constant
 160
 161     def __init__(self, timer=None, bias=None):
 162         self.timings = {}
 163         self.cur = None
 164         self.cmd = ""
 165         self.c_func_name = ""
 166
 167         if bias is None:
 168             bias = self.bias
 169         self.bias = bias     # Materialize in local dict for lookup speed.
 170
 171         if not timer:
 172             if _has_res:
 173                 self.timer = resgetrusage
 174                 self.dispatcher = self.trace_dispatch
 175                 self.get_time = _get_time_resource
 176             elif hasattr(time, 'clock'):
 177                 self.timer = self.get_time = time.clock
 178                 self.dispatcher = self.trace_dispatch_i
 179             elif hasattr(os, 'times'):
 180                 self.timer = os.times
 181                 self.dispatcher = self.trace_dispatch
 182                 self.get_time = _get_time_times
 183             else:
 184                 self.timer = self.get_time = time.time
 185                 self.dispatcher = self.trace_dispatch_i
 186         else:
 187             self.timer = timer
 188             t = self.timer() # test out timer function
 189             try:
 190                 length = len(t)
 191             except TypeError:
 192                 self.get_time = timer
 193                 self.dispatcher = self.trace_dispatch_i
 194             else:
 195                 if length == 2:
 196                     self.dispatcher = self.trace_dispatch
 197                 else:
 198                     self.dispatcher = self.trace_dispatch_l
 199                 # This get_time() implementation needs to be defined
 200                 # here to capture the passed-in timer in the parameter
 201                 # list (for performance).  Note that we can't assume
 202                 # the timer() result contains two values in all
 203                 # cases.
 204                 def get_time_timer(timer=timer, sum=sum):
 205                     return sum(timer())
 206                 self.get_time = get_time_timer
 207         self.t = self.get_time()
 208         self.simulate_call('profiler')
 209
 210     # Heavily optimized dispatch routine for os.times() timer
 211
 212     def trace_dispatch(self, frame, event, arg):
 213         timer = self.timer
 214         t = timer()
 215         t = t[0] + t[1] - self.t - self.bias
 216
 217         if event == "c_call":
 218             self.c_func_name = arg.__name__
 219
 220         if self.dispatch[event](self, frame,t):
 221             t = timer()
 222             self.t = t[0] + t[1]
 223         else:
 224             r = timer()
 225             self.t = r[0] + r[1] - t # put back unrecorded delta
 226
 227     # Dispatch routine for best timer program (return = scalar, fastest if
 228     # an integer but float works too -- and time.clock() relies on that).
 229
 230     def trace_dispatch_i(self, frame, event, arg):
 231         timer = self.timer
 232         t = timer() - self.t - self.bias
 233
 234         if event == "c_call":
 235             self.c_func_name = arg.__name__
 236
 237         if self.dispatch[event](self, frame, t):
 238             self.t = timer()
 239         else:
 240             self.t = timer() - t  # put back unrecorded delta
 241
 242     # Dispatch routine for macintosh (timer returns time in ticks of
 243     # 1/60th second)
 244
 245     def trace_dispatch_mac(self, frame, event, arg):
 246         timer = self.timer
 247         t = timer()/60.0 - self.t - self.bias
 248
 249         if event == "c_call":
 250             self.c_func_name = arg.__name__
 251
 252         if self.dispatch[event](self, frame, t):
 253             self.t = timer()/60.0
 254         else:
 255             self.t = timer()/60.0 - t  # put back unrecorded delta
 256
 257     # SLOW generic dispatch routine for timer returning lists of numbers
 258
 259     def trace_dispatch_l(self, frame, event, arg):
 260         get_time = self.get_time
 261         t = get_time() - self.t - self.bias
 262
 263         if event == "c_call":
 264             self.c_func_name = arg.__name__
 265
 266         if self.dispatch[event](self, frame, t):
 267             self.t = get_time()
 268         else:
 269             self.t = get_time() - t # put back unrecorded delta
 270
 271     # In the event handlers, the first 3 elements of self.cur are unpacked
 272     # into vrbls w/ 3-letter names.  The last two characters are meant to be
 273     # mnemonic:
 274     #     _pt  self.cur[0] "parent time"   time to be charged to parent frame
 275     #     _it  self.cur[1] "internal time" time spent directly in the function
 276     #     _et  self.cur[2] "external time" time spent in subfunctions
 277
 278     def trace_dispatch_exception(self, frame, t):
 279         rpt, rit, ret, rfn, rframe, rcur = self.cur
 280         if (rframe is not frame) and rcur:
 281             return self.trace_dispatch_return(rframe, t)
 282         self.cur = rpt, rit+t, ret, rfn, rframe, rcur
 283         return 1
 284
 285
 286     def trace_dispatch_call(self, frame, t):
 287         if self.cur and frame.f_back is not self.cur[-2]:
 288             rpt, rit, ret, rfn, rframe, rcur = self.cur
 289             if not isinstance(rframe, Profile.fake_frame):
 290                 assert rframe.f_back is frame.f_back, ("Bad call", rfn,
 291                                                        rframe, rframe.f_back,
 292                                                        frame, frame.f_back)
 293                 self.trace_dispatch_return(rframe, 0)
 294                 assert (self.cur is None or \
 295                         frame.f_back is self.cur[-2]), ("Bad call",
 296                                                         self.cur[-3])
 297         fcode = frame.f_code
 298         fn = (fcode.co_filename, fcode.co_firstlineno, fcode.co_name)
 299         self.cur = (t, 0, 0, fn, frame, self.cur)
 300         timings = self.timings
 301         if fn in timings:
 302             cc, ns, tt, ct, callers = timings[fn]
 303             timings[fn] = cc, ns + 1, tt, ct, callers
 304         else:
 305             timings[fn] = 0, 0, 0, 0, {}
 306         return 1
 307
 308     def trace_dispatch_c_call (self, frame, t):
 309         fn = ("", 0, self.c_func_name)
 310         self.cur = (t, 0, 0, fn, frame, self.cur)
 311         timings = self.timings
 312         if fn in timings:
 313             cc, ns, tt, ct, callers = timings[fn]
 314             timings[fn] = cc, ns+1, tt, ct, callers
 315         else:
 316             timings[fn] = 0, 0, 0, 0, {}
 317         return 1
 318
 319     def trace_dispatch_return(self, frame, t):
 320         if frame is not self.cur[-2]:
 321             assert frame is self.cur[-2].f_back, ("Bad return", self.cur[-3])
 322             self.trace_dispatch_return(self.cur[-2], 0)
 323
 324         # Prefix "r" means part of the Returning or exiting frame.
 325         # Prefix "p" means part of the Previous or Parent or older frame.
 326
 327         rpt, rit, ret, rfn, frame, rcur = self.cur
 328         rit = rit + t
 329         frame_total = rit + ret
 330
 331         ppt, pit, pet, pfn, pframe, pcur = rcur
 332         self.cur = ppt, pit + rpt, pet + frame_total, pfn, pframe, pcur
 333
 334         timings = self.timings
 335         cc, ns, tt, ct, callers = timings[rfn]
 336         if not ns:
 337             # This is the only occurrence of the function on the stack.
 338             # Else this is a (directly or indirectly) recursive call, and
 339             # its cumulative time will get updated when the topmost call to
 340             # it returns.
 341             ct = ct + frame_total
 342             cc = cc + 1
 343
 344         if pfn in callers:
 345             callers[pfn] = callers[pfn] + 1  # hack: gather more
 346             # stats such as the amount of time added to ct courtesy
 347             # of this specific call, and the contribution to cc
 348             # courtesy of this call.
 349         else:
 350             callers[pfn] = 1
 351
 352         timings[rfn] = cc, ns - 1, tt + rit, ct, callers
 353
 354         return 1
 355
 356
 357     dispatch = {
 358         "call": trace_dispatch_call,
 359         "exception": trace_dispatch_exception,
 360         "return": trace_dispatch_return,
 361         "c_call": trace_dispatch_c_call,
 362         "c_exception": trace_dispatch_return,  # the C function returned
 363         "c_return": trace_dispatch_return,
 364         }
 365
 366
 367     # The next few functions play with self.cmd. By carefully preloading
 368     # our parallel stack, we can force the profiled result to include
 369     # an arbitrary string as the name of the calling function.
 370     # We use self.cmd as that string, and the resulting stats look
 371     # very nice :-).
 372
 373     def set_cmd(self, cmd):
 374         if self.cur[-1]: return   # already set
 375         self.cmd = cmd
 376         self.simulate_call(cmd)
 377
 378     class fake_code:
 379         def __init__(self, filename, line, name):
 380             self.co_filename = filename
 381             self.co_line = line
 382             self.co_name = name
 383             self.co_firstlineno = 0
 384
 385         def __repr__(self):
 386             return repr((self.co_filename, self.co_line, self.co_name))
 387
 388     class fake_frame:
 389         def __init__(self, code, prior):
 390             self.f_code = code
 391             self.f_back = prior
 392
 393     def simulate_call(self, name):
 394         code = self.fake_code('profile', 0, name)
 395         if self.cur:
 396             pframe = self.cur[-2]
 397         else:
 398             pframe = None
 399         frame = self.fake_frame(code, pframe)
 400         self.dispatch['call'](self, frame, 0)
 401
 402     # collect stats from pending stack, including getting final
 403     # timings for self.cmd frame.
 404
 405     def simulate_cmd_complete(self):
 406         get_time = self.get_time
 407         t = get_time() - self.t
 408         while self.cur[-1]:
 409             # We *can* cause assertion errors here if
 410             # dispatch_trace_return checks for a frame match!
 411             self.dispatch['return'](self, self.cur[-2], t)
 412             t = 0
 413         self.t = get_time() - t
 414
 415
 416     def print_stats(self, sort=-1):
 417         import pstats
 418         pstats.Stats(self).strip_dirs().sort_stats(sort). \
 419                   print_stats()
 420
 421     def dump_stats(self, file):
 422         f = open(file, 'wb')
 423         self.create_stats()
 424         marshal.dump(self.stats, f)
 425         f.close()
 426
 427     def create_stats(self):
 428         self.simulate_cmd_complete()
 429         self.snapshot_stats()
 430
 431     def snapshot_stats(self):
 432         self.stats = {}
 433         for func, (cc, ns, tt, ct, callers) in self.timings.iteritems():
 434             callers = callers.copy()
 435             nc = 0
 436             for callcnt in callers.itervalues():
 437                 nc += callcnt
 438             self.stats[func] = cc, nc, tt, ct, callers
 439
 440
 441     # The following two methods can be called by clients to use
 442     # a profiler to profile a statement, given as a string.
 443
 444     def run(self, cmd):
 445         import __main__
 446         dict = __main__.__dict__
 447         return self.runctx(cmd, dict, dict)
 448
 449     def runctx(self, cmd, globals, locals):
 450         self.set_cmd(cmd)
 451         sys.setprofile(self.dispatcher)
 452         try:
 453             exec cmd in globals, locals
 454         finally:
 455             sys.setprofile(None)
 456         return self
 457
 458     # This method is more useful to profile a single function call.
 459     def runcall(self, func, *args, **kw):
 460         self.set_cmd(repr(func))
 461         sys.setprofile(self.dispatcher)
 462         try:
 463             return func(*args, **kw)
 464         finally:
 465             sys.setprofile(None)
 466
 467
 468     #******************************************************************
 469     # The following calculates the overhead for using a profiler.  The
 470     # problem is that it takes a fair amount of time for the profiler
 471     # to stop the stopwatch (from the time it receives an event).
 472     # Similarly, there is a delay from the time that the profiler
 473     # re-starts the stopwatch before the user's code really gets to
 474     # continue.  The following code tries to measure the difference on
 475     # a per-event basis.
 476     #
 477     # Note that this difference is only significant if there are a lot of
 478     # events, and relatively little user code per event.  For example,
 479     # code with small functions will typically benefit from having the
 480     # profiler calibrated for the current platform.  This *could* be
 481     # done on the fly during init() time, but it is not worth the
 482     # effort.  Also note that if too large a value specified, then
 483     # execution time on some functions will actually appear as a
 484     # negative number.  It is *normal* for some functions (with very
 485     # low call counts) to have such negative stats, even if the
 486     # calibration figure is "correct."
 487     #
 488     # One alternative to profile-time calibration adjustments (i.e.,
 489     # adding in the magic little delta during each event) is to track
 490     # more carefully the number of events (and cumulatively, the number
 491     # of events during sub functions) that are seen.  If this were
 492     # done, then the arithmetic could be done after the fact (i.e., at
 493     # display time).  Currently, we track only call/return events.
 494     # These values can be deduced by examining the callees and callers
 495     # vectors for each functions.  Hence we *can* almost correct the
 496     # internal time figure at print time (note that we currently don't
 497     # track exception event processing counts).  Unfortunately, there
 498     # is currently no similar information for cumulative sub-function
 499     # time.  It would not be hard to "get all this info" at profiler
 500     # time.  Specifically, we would have to extend the tuples to keep
 501     # counts of this in each frame, and then extend the defs of timing
 502     # tuples to include the significant two figures. I'm a bit fearful
 503     # that this additional feature will slow the heavily optimized
 504     # event/time ratio (i.e., the profiler would run slower, fur a very
 505     # low "value added" feature.)
 506     #**************************************************************
 507
 508     def calibrate(self, m, verbose=0):
 509         if self.__class__ is not Profile:
 510             raise TypeError("Subclasses must override .calibrate().")
 511
 512         saved_bias = self.bias
 513         self.bias = 0
 514         try:
 515             return self._calibrate_inner(m, verbose)
 516         finally:
 517             self.bias = saved_bias
 518
 519     def _calibrate_inner(self, m, verbose):
 520         get_time = self.get_time
 521
 522         # Set up a test case to be run with and without profiling.  Include
 523         # lots of calls, because we're trying to quantify stopwatch overhead.
 524         # Do not raise any exceptions, though, because we want to know
 525         # exactly how many profile events are generated (one call event, +
 526         # one return event, per Python-level call).
 527
 528         def f1(n):
 529             for i in range(n):
 530                 x = 1
 531
 532         def f(m, f1=f1):
 533             for i in range(m):
 534                 f1(100)
 535
 536         f(m)    # warm up the cache
 537
 538         # elapsed_noprofile <- time f(m) takes without profiling.
 539         t0 = get_time()
 540         f(m)
 541         t1 = get_time()
 542         elapsed_noprofile = t1 - t0
 543         if verbose:
 544             print "elapsed time without profiling =", elapsed_noprofile
 545
 546         # elapsed_profile <- time f(m) takes with profiling.  The difference
 547         # is profiling overhead, only some of which the profiler subtracts
 548         # out on its own.
 549         p = Profile()
 550         t0 = get_time()
 551         p.runctx('f(m)', globals(), locals())
 552         t1 = get_time()
 553         elapsed_profile = t1 - t0
 554         if verbose:
 555             print "elapsed time with profiling =", elapsed_profile
 556
 557         # reported_time <- "CPU seconds" the profiler charged to f and f1.
 558         total_calls = 0.0
 559         reported_time = 0.0
 560         for (filename, line, funcname), (cc, ns, tt, ct, callers) in \
 561                 p.timings.items():
 562             if funcname in ("f", "f1"):
 563                 total_calls += cc
 564                 reported_time += tt
 565
 566         if verbose:
 567             print "'CPU seconds' profiler reported =", reported_time
 568             print "total # calls =", total_calls
 569         if total_calls != m + 1:
 570             raise ValueError("internal error: total calls = %d" % total_calls)
 571
 572         # reported_time - elapsed_noprofile = overhead the profiler wasn't
 573         # able to measure.  Divide by twice the number of calls (since there
 574         # are two profiler events per call in this test) to get the hidden
 575         # overhead per event.
 576         mean = (reported_time - elapsed_noprofile) / 2.0 / total_calls
 577         if verbose:
 578             print "mean stopwatch overhead per profile event =", mean
 579         return mean
 580
 581 #****************************************************************************
 582 def Stats(*args):
 583     print 'Report generating functions are in the "pstats" module\a'
 584
 585 def main():
 586     usage = "profile.py [-o output_file_path] [-s sort] scriptfile [arg] ..."
 587     parser = OptionParser(usage=usage)
 588     parser.allow_interspersed_args = False
 589     parser.add_option('-o', '--outfile', dest="outfile",
 590         help="Save stats to <outfile>", default=None)
 591     parser.add_option('-s', '--sort', dest="sort",
 592         help="Sort order when printing to stdout, based on pstats.Stats class", default=-1)
 593
 594     if not sys.argv[1:]:
 595         parser.print_usage()
 596         sys.exit(2)
 597
 598     (options, args) = parser.parse_args()
 599
 600     if (len(args) > 0):
 601         sys.argv[:] = args
 602         sys.path.insert(0, os.path.dirname(sys.argv[0]))
 603         run('execfile(%r)' % (sys.argv[0],), options.outfile, options.sort)
 604     else:
 605         parser.print_usage()
 606     return parser
 607
 608 # When invoked as main program, invoke the profiler on a script
 609 if __name__ == '__main__':
 610     main()