Lib/profile.py

   1 #! /usr/bin/env python
   2 #
   3 # Class for profiling python code. rev 1.0  6/2/94
   4 #
   5 # Based on prior profile module by Sjoerd Mullender...
   6 #   which was hacked somewhat by: Guido van Rossum
   7 #
   8 # See profile.doc for more information
   9
  10 """Class for profiling Python code."""
  11
  12 # Copyright 1994, by InfoSeek Corporation, all rights reserved.
  13 # Written by James Roskind
  14 #
  15 # Permission to use, copy, modify, and distribute this Python software
  16 # and its associated documentation for any purpose (subject to the
  17 # restriction in the following sentence) without fee is hereby granted,
  18 # provided that the above copyright notice appears in all copies, and
  19 # that both that copyright notice and this permission notice appear in
  20 # supporting documentation, and that the name of InfoSeek not be used in
  21 # advertising or publicity pertaining to distribution of the software
  22 # without specific, written prior permission.  This permission is
  23 # explicitly restricted to the copying and modification of the software
  24 # to remain in Python, compiled Python, or other languages (such as C)
  25 # wherein the modified or derived code is exclusively imported into a
  26 # Python module.
  27 #
  28 # INFOSEEK CORPORATION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
  29 # SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
  30 # FITNESS. IN NO EVENT SHALL INFOSEEK CORPORATION BE LIABLE FOR ANY
  31 # SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER
  32 # RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
  33 # CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
  34 # CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  35
  36
  37
  38 import sys
  39 import os
  40 import time
  41 import marshal
  42
  43 __all__ = ["run","help","Profile"]
  44
  45 # Sample timer for use with
  46 #i_count = 0
  47 #def integer_timer():
  48 #       global i_count
  49 #       i_count = i_count + 1
  50 #       return i_count
  51 #itimes = integer_timer # replace with C coded timer returning integers
  52
  53 #**************************************************************************
  54 # The following are the static member functions for the profiler class
  55 # Note that an instance of Profile() is *not* needed to call them.
  56 #**************************************************************************
  57
  58 def run(statement, filename=None):
  59     """Run statement under profiler optionally saving results in filename
  60
  61     This function takes a single argument that can be passed to the
  62     "exec" statement, and an optional file name.  In all cases this
  63     routine attempts to "exec" its first argument and gather profiling
  64     statistics from the execution. If no file name is present, then this
  65     function automatically prints a simple profiling report, sorted by the
  66     standard name string (file/line/function-name) that is presented in
  67     each line.
  68     """
  69     prof = Profile()
  70     try:
  71         prof = prof.run(statement)
  72     except SystemExit:
  73         pass
  74     if filename is not None:
  75         prof.dump_stats(filename)
  76     else:
  77         return prof.print_stats()
  78
  79 # print help
  80 def help():
  81     for dirname in sys.path:
  82         fullname = os.path.join(dirname, 'profile.doc')
  83         if os.path.exists(fullname):
  84             sts = os.system('${PAGER-more} '+fullname)
  85             if sts: print '*** Pager exit status:', sts
  86             break
  87     else:
  88         print 'Sorry, can\'t find the help file "profile.doc"',
  89         print 'along the Python search path'
  90
  91
  92 class Profile:
  93     """Profiler class.
  94
  95     self.cur is always a tuple.  Each such tuple corresponds to a stack
  96     frame that is currently active (self.cur[-2]).  The following are the
  97     definitions of its members.  We use this external "parallel stack" to
  98     avoid contaminating the program that we are profiling. (old profiler
  99     used to write into the frames local dictionary!!) Derived classes
 100     can change the definition of some entries, as long as they leave
 101     [-2:] intact.
 102
 103     [ 0] = Time that needs to be charged to the parent frame's function.
 104            It is used so that a function call will not have to access the
 105            timing data for the parent frame.
 106     [ 1] = Total time spent in this frame's function, excluding time in
 107            subfunctions (this latter is tallied in cur[2]).
 108     [ 2] = Total time spent in subfunctions, excluding time executing the
 109            frame's function (this latter is tallied in cur[1]).
 110     [-3] = Name of the function that corresponds to this frame.
 111     [-2] = Actual frame that we correspond to (used to sync exception handling)
 112     [-1] = Our parent 6-tuple (corresponds to frame.f_back)
 113
 114     Timing data for each function is stored as a 5-tuple in the dictionary
 115     self.timings[].  The index is always the name stored in self.cur[4].
 116     The following are the definitions of the members:
 117
 118     [0] = The number of times this function was called, not counting direct
 119           or indirect recursion,
 120     [1] = Number of times this function appears on the stack, minus one
 121     [2] = Total time spent internal to this function
 122     [3] = Cumulative time that this function was present on the stack.  In
 123           non-recursive functions, this is the total execution time from start
 124           to finish of each invocation of a function, including time spent in
 125           all subfunctions.
 126     [4] = A dictionary indicating for each function name, the number of times
 127           it was called by us.
 128     """
 129
 130     def __init__(self, timer=None):
 131         self.timings = {}
 132         self.cur = None
 133         self.cmd = ""
 134
 135         self.dispatch = {  \
 136                   'call'     : self.trace_dispatch_call, \
 137                   'return'   : self.trace_dispatch_return, \
 138                   'exception': self.trace_dispatch_exception, \
 139                   }
 140
 141         if not timer:
 142             if os.name == 'mac':
 143                 import MacOS
 144                 self.timer = MacOS.GetTicks
 145                 self.dispatcher = self.trace_dispatch_mac
 146                 self.get_time = self.get_time_mac
 147             elif hasattr(time, 'clock'):
 148                 self.timer = time.clock
 149                 self.dispatcher = self.trace_dispatch_i
 150             elif hasattr(os, 'times'):
 151                 self.timer = os.times
 152                 self.dispatcher = self.trace_dispatch
 153             else:
 154                 self.timer = time.time
 155                 self.dispatcher = self.trace_dispatch_i
 156         else:
 157             self.timer = timer
 158             t = self.timer() # test out timer function
 159             try:
 160                 if len(t) == 2:
 161                     self.dispatcher = self.trace_dispatch
 162                 else:
 163                     self.dispatcher = self.trace_dispatch_l
 164             except TypeError:
 165                 self.dispatcher = self.trace_dispatch_i
 166         self.t = self.get_time()
 167         self.simulate_call('profiler')
 168
 169
 170     def get_time(self): # slow simulation of method to acquire time
 171         t = self.timer()
 172         if type(t) == type(()) or type(t) == type([]):
 173             t = reduce(lambda x,y: x+y, t, 0)
 174         return t
 175
 176     def get_time_mac(self):
 177         return self.timer()/60.0
 178
 179     # Heavily optimized dispatch routine for os.times() timer
 180
 181     def trace_dispatch(self, frame, event, arg):
 182         t = self.timer()
 183         t = t[0] + t[1] - self.t        # No Calibration constant
 184         # t = t[0] + t[1] - self.t - .00053 # Calibration constant
 185
 186         if self.dispatch[event](frame,t):
 187             t = self.timer()
 188             self.t = t[0] + t[1]
 189         else:
 190             r = self.timer()
 191             self.t = r[0] + r[1] - t # put back unrecorded delta
 192         return
 193
 194
 195
 196     # Dispatch routine for best timer program (return = scalar integer)
 197
 198     def trace_dispatch_i(self, frame, event, arg):
 199         t = self.timer() - self.t # - 1 # Integer calibration constant
 200         if self.dispatch[event](frame,t):
 201             self.t = self.timer()
 202         else:
 203             self.t = self.timer() - t  # put back unrecorded delta
 204         return
 205
 206     # Dispatch routine for macintosh (timer returns time in ticks of 1/60th second)
 207
 208     def trace_dispatch_mac(self, frame, event, arg):
 209         t = self.timer()/60.0 - self.t # - 1 # Integer calibration constant
 210         if self.dispatch[event](frame,t):
 211             self.t = self.timer()/60.0
 212         else:
 213             self.t = self.timer()/60.0 - t  # put back unrecorded delta
 214         return
 215
 216
 217     # SLOW generic dispatch routine for timer returning lists of numbers
 218
 219     def trace_dispatch_l(self, frame, event, arg):
 220         t = self.get_time() - self.t
 221
 222         if self.dispatch[event](frame,t):
 223             self.t = self.get_time()
 224         else:
 225             self.t = self.get_time()-t # put back unrecorded delta
 226         return
 227
 228
 229     def trace_dispatch_exception(self, frame, t):
 230         rt, rtt, rct, rfn, rframe, rcur = self.cur
 231         if (rframe is not frame) and rcur:
 232             return self.trace_dispatch_return(rframe, t)
 233         self.cur = rt, rtt+t, rct, rfn, rframe, rcur
 234         return 1
 235
 236
 237     def trace_dispatch_call(self, frame, t):
 238         if self.cur and frame.f_back is not self.cur[-2]:
 239             rt, rtt, rct, rfn, rframe, rcur = self.cur
 240             if not isinstance(rframe, Profile.fake_frame):
 241                 if rframe.f_back is not frame.f_back:
 242                     print rframe, rframe.f_back
 243                     print frame, frame.f_back
 244                     raise "Bad call", self.cur[-3]
 245                 self.trace_dispatch_return(rframe, 0)
 246                 if self.cur and frame.f_back is not self.cur[-2]:
 247                     raise "Bad call[2]", self.cur[-3]
 248         fcode = frame.f_code
 249         fn = (fcode.co_filename, fcode.co_firstlineno, fcode.co_name)
 250         self.cur = (t, 0, 0, fn, frame, self.cur)
 251         timings = self.timings
 252         if timings.has_key(fn):
 253             cc, ns, tt, ct, callers = timings[fn]
 254             timings[fn] = cc, ns + 1, tt, ct, callers
 255         else:
 256             timings[fn] = 0, 0, 0, 0, {}
 257         return 1
 258
 259     def trace_dispatch_return(self, frame, t):
 260         if frame is not self.cur[-2]:
 261             if frame is self.cur[-2].f_back:
 262                 self.trace_dispatch_return(self.cur[-2], 0)
 263             else:
 264                 raise "Bad return", self.cur[-3]
 265
 266         # Prefix "r" means part of the Returning or exiting frame
 267         # Prefix "p" means part of the Previous or older frame
 268
 269         rt, rtt, rct, rfn, frame, rcur = self.cur
 270         rtt = rtt + t
 271         sft = rtt + rct
 272
 273         pt, ptt, pct, pfn, pframe, pcur = rcur
 274         self.cur = pt, ptt+rt, pct+sft, pfn, pframe, pcur
 275
 276         timings = self.timings
 277         cc, ns, tt, ct, callers = timings[rfn]
 278         if not ns:
 279             ct = ct + sft
 280             cc = cc + 1
 281         if callers.has_key(pfn):
 282             callers[pfn] = callers[pfn] + 1  # hack: gather more
 283             # stats such as the amount of time added to ct courtesy
 284             # of this specific call, and the contribution to cc
 285             # courtesy of this call.
 286         else:
 287             callers[pfn] = 1
 288         timings[rfn] = cc, ns - 1, tt+rtt, ct, callers
 289
 290         return 1
 291
 292     # The next few function play with self.cmd. By carefully preloading
 293     # our parallel stack, we can force the profiled result to include
 294     # an arbitrary string as the name of the calling function.
 295     # We use self.cmd as that string, and the resulting stats look
 296     # very nice :-).
 297
 298     def set_cmd(self, cmd):
 299         if self.cur[-1]: return   # already set
 300         self.cmd = cmd
 301         self.simulate_call(cmd)
 302
 303     class fake_code:
 304         def __init__(self, filename, line, name):
 305             self.co_filename = filename
 306             self.co_line = line
 307             self.co_name = name
 308             self.co_firstlineno = 0
 309
 310         def __repr__(self):
 311             return repr((self.co_filename, self.co_line, self.co_name))
 312
 313     class fake_frame:
 314         def __init__(self, code, prior):
 315             self.f_code = code
 316             self.f_back = prior
 317
 318     def simulate_call(self, name):
 319         code = self.fake_code('profile', 0, name)
 320         if self.cur:
 321             pframe = self.cur[-2]
 322         else:
 323             pframe = None
 324         frame = self.fake_frame(code, pframe)
 325         a = self.dispatch['call'](frame, 0)
 326         return
 327
 328     # collect stats from pending stack, including getting final
 329     # timings for self.cmd frame.
 330
 331     def simulate_cmd_complete(self):
 332         t = self.get_time() - self.t
 333         while self.cur[-1]:
 334             # We *can* cause assertion errors here if
 335             # dispatch_trace_return checks for a frame match!
 336             a = self.dispatch['return'](self.cur[-2], t)
 337             t = 0
 338         self.t = self.get_time() - t
 339
 340
 341     def print_stats(self):
 342         import pstats
 343         pstats.Stats(self).strip_dirs().sort_stats(-1). \
 344                   print_stats()
 345
 346     def dump_stats(self, file):
 347         f = open(file, 'wb')
 348         self.create_stats()
 349         marshal.dump(self.stats, f)
 350         f.close()
 351
 352     def create_stats(self):
 353         self.simulate_cmd_complete()
 354         self.snapshot_stats()
 355
 356     def snapshot_stats(self):
 357         self.stats = {}
 358         for func in self.timings.keys():
 359             cc, ns, tt, ct, callers = self.timings[func]
 360             callers = callers.copy()
 361             nc = 0
 362             for func_caller in callers.keys():
 363                 nc = nc + callers[func_caller]
 364             self.stats[func] = cc, nc, tt, ct, callers
 365
 366
 367     # The following two methods can be called by clients to use
 368     # a profiler to profile a statement, given as a string.
 369
 370     def run(self, cmd):
 371         import __main__
 372         dict = __main__.__dict__
 373         return self.runctx(cmd, dict, dict)
 374
 375     def runctx(self, cmd, globals, locals):
 376         self.set_cmd(cmd)
 377         sys.setprofile(self.dispatcher)
 378         try:
 379             exec cmd in globals, locals
 380         finally:
 381             sys.setprofile(None)
 382         return self
 383
 384     # This method is more useful to profile a single function call.
 385     def runcall(self, func, *args):
 386         self.set_cmd(`func`)
 387         sys.setprofile(self.dispatcher)
 388         try:
 389             return apply(func, args)
 390         finally:
 391             sys.setprofile(None)
 392
 393
 394     #******************************************************************
 395     # The following calculates the overhead for using a profiler.  The
 396     # problem is that it takes a fair amount of time for the profiler
 397     # to stop the stopwatch (from the time it receives an event).
 398     # Similarly, there is a delay from the time that the profiler
 399     # re-starts the stopwatch before the user's code really gets to
 400     # continue.  The following code tries to measure the difference on
 401     # a per-event basis. The result can the be placed in the
 402     # Profile.dispatch_event() routine for the given platform.  Note
 403     # that this difference is only significant if there are a lot of
 404     # events, and relatively little user code per event.  For example,
 405     # code with small functions will typically benefit from having the
 406     # profiler calibrated for the current platform.  This *could* be
 407     # done on the fly during init() time, but it is not worth the
 408     # effort.  Also note that if too large a value specified, then
 409     # execution time on some functions will actually appear as a
 410     # negative number.  It is *normal* for some functions (with very
 411     # low call counts) to have such negative stats, even if the
 412     # calibration figure is "correct."
 413     #
 414     # One alternative to profile-time calibration adjustments (i.e.,
 415     # adding in the magic little delta during each event) is to track
 416     # more carefully the number of events (and cumulatively, the number
 417     # of events during sub functions) that are seen.  If this were
 418     # done, then the arithmetic could be done after the fact (i.e., at
 419     # display time).  Currently, we track only call/return events.
 420     # These values can be deduced by examining the callees and callers
 421     # vectors for each functions.  Hence we *can* almost correct the
 422     # internal time figure at print time (note that we currently don't
 423     # track exception event processing counts).  Unfortunately, there
 424     # is currently no similar information for cumulative sub-function
 425     # time.  It would not be hard to "get all this info" at profiler
 426     # time.  Specifically, we would have to extend the tuples to keep
 427     # counts of this in each frame, and then extend the defs of timing
 428     # tuples to include the significant two figures. I'm a bit fearful
 429     # that this additional feature will slow the heavily optimized
 430     # event/time ratio (i.e., the profiler would run slower, fur a very
 431     # low "value added" feature.)
 432     #
 433     # Plugging in the calibration constant doesn't slow down the
 434     # profiler very much, and the accuracy goes way up.
 435     #**************************************************************
 436
 437     def calibrate(self, m):
 438         # Modified by Tim Peters
 439         n = m
 440         s = self.get_time()
 441         while n:
 442             self.simple()
 443             n = n - 1
 444         f = self.get_time()
 445         my_simple = f - s
 446         #print "Simple =", my_simple,
 447
 448         n = m
 449         s = self.get_time()
 450         while n:
 451             self.instrumented()
 452             n = n - 1
 453         f = self.get_time()
 454         my_inst = f - s
 455         # print "Instrumented =", my_inst
 456         avg_cost = (my_inst - my_simple)/m
 457         #print "Delta/call =", avg_cost, "(profiler fixup constant)"
 458         return avg_cost
 459
 460     # simulate a program with no profiler activity
 461     def simple(self):
 462         a = 1
 463         pass
 464
 465     # simulate a program with call/return event processing
 466     def instrumented(self):
 467         a = 1
 468         self.profiler_simulation(a, a, a)
 469
 470     # simulate an event processing activity (from user's perspective)
 471     def profiler_simulation(self, x, y, z):
 472         t = self.timer()
 473         ## t = t[0] + t[1]
 474         self.ut = t
 475
 476
 477
 478 class OldProfile(Profile):
 479     """A derived profiler that simulates the old style profile, providing
 480     errant results on recursive functions. The reason for the usefulness of
 481     this profiler is that it runs faster (i.e., less overhead).  It still
 482     creates all the caller stats, and is quite useful when there is *no*
 483     recursion in the user's code.
 484
 485     This code also shows how easy it is to create a modified profiler.
 486     """
 487
 488     def trace_dispatch_exception(self, frame, t):
 489         rt, rtt, rct, rfn, rframe, rcur = self.cur
 490         if rcur and not rframe is frame:
 491             return self.trace_dispatch_return(rframe, t)
 492         return 0
 493
 494     def trace_dispatch_call(self, frame, t):
 495         fn = `frame.f_code`
 496
 497         self.cur = (t, 0, 0, fn, frame, self.cur)
 498         if self.timings.has_key(fn):
 499             tt, ct, callers = self.timings[fn]
 500             self.timings[fn] = tt, ct, callers
 501         else:
 502             self.timings[fn] = 0, 0, {}
 503         return 1
 504
 505     def trace_dispatch_return(self, frame, t):
 506         rt, rtt, rct, rfn, frame, rcur = self.cur
 507         rtt = rtt + t
 508         sft = rtt + rct
 509
 510         pt, ptt, pct, pfn, pframe, pcur = rcur
 511         self.cur = pt, ptt+rt, pct+sft, pfn, pframe, pcur
 512
 513         tt, ct, callers = self.timings[rfn]
 514         if callers.has_key(pfn):
 515             callers[pfn] = callers[pfn] + 1
 516         else:
 517             callers[pfn] = 1
 518         self.timings[rfn] = tt+rtt, ct + sft, callers
 519
 520         return 1
 521
 522
 523     def snapshot_stats(self):
 524         self.stats = {}
 525         for func in self.timings.keys():
 526             tt, ct, callers = self.timings[func]
 527             callers = callers.copy()
 528             nc = 0
 529             for func_caller in callers.keys():
 530                 nc = nc + callers[func_caller]
 531             self.stats[func] = nc, nc, tt, ct, callers
 532
 533
 534
 535 class HotProfile(Profile):
 536     """The fastest derived profile example.  It does not calculate
 537     caller-callee relationships, and does not calculate cumulative
 538     time under a function.  It only calculates time spent in a
 539     function, so it runs very quickly due to its very low overhead.
 540     """
 541
 542     def trace_dispatch_exception(self, frame, t):
 543         rt, rtt, rfn, rframe, rcur = self.cur
 544         if rcur and not rframe is frame:
 545             return self.trace_dispatch_return(rframe, t)
 546         return 0
 547
 548     def trace_dispatch_call(self, frame, t):
 549         self.cur = (t, 0, frame, self.cur)
 550         return 1
 551
 552     def trace_dispatch_return(self, frame, t):
 553         rt, rtt, frame, rcur = self.cur
 554
 555         rfn = `frame.f_code`
 556
 557         pt, ptt, pframe, pcur = rcur
 558         self.cur = pt, ptt+rt, pframe, pcur
 559
 560         if self.timings.has_key(rfn):
 561             nc, tt = self.timings[rfn]
 562             self.timings[rfn] = nc + 1, rt + rtt + tt
 563         else:
 564             self.timings[rfn] =      1, rt + rtt
 565
 566         return 1
 567
 568
 569     def snapshot_stats(self):
 570         self.stats = {}
 571         for func in self.timings.keys():
 572             nc, tt = self.timings[func]
 573             self.stats[func] = nc, nc, tt, 0, {}
 574
 575
 576
 577 #****************************************************************************
 578 def Stats(*args):
 579     print 'Report generating functions are in the "pstats" module\a'
 580
 581
 582 # When invoked as main program, invoke the profiler on a script
 583 if __name__ == '__main__':
 584     import sys
 585     import os
 586     if not sys.argv[1:]:
 587         print "usage: profile.py scriptfile [arg] ..."
 588         sys.exit(2)
 589
 590     filename = sys.argv[1]  # Get script filename
 591
 592     del sys.argv[0]         # Hide "profile.py" from argument list
 593
 594     # Insert script directory in front of module search path
 595     sys.path.insert(0, os.path.dirname(filename))
 596
 597     run('execfile(' + `filename` + ')')