Lib/profile.py

   1 #! /usr/bin/env python
   2 #
   3 # Class for profiling python code. rev 1.0  6/2/94
   4 #
   5 # Based on prior profile module by Sjoerd Mullender...
   6 #   which was hacked somewhat by: Guido van Rossum
   7 #
   8 # See profile.doc for more information
   9
  10 """Class for profiling Python code."""
  11
  12 # Copyright 1994, by InfoSeek Corporation, all rights reserved.
  13 # Written by James Roskind
  14 #
  15 # Permission to use, copy, modify, and distribute this Python software
  16 # and its associated documentation for any purpose (subject to the
  17 # restriction in the following sentence) without fee is hereby granted,
  18 # provided that the above copyright notice appears in all copies, and
  19 # that both that copyright notice and this permission notice appear in
  20 # supporting documentation, and that the name of InfoSeek not be used in
  21 # advertising or publicity pertaining to distribution of the software
  22 # without specific, written prior permission.  This permission is
  23 # explicitly restricted to the copying and modification of the software
  24 # to remain in Python, compiled Python, or other languages (such as C)
  25 # wherein the modified or derived code is exclusively imported into a
  26 # Python module.
  27 #
  28 # INFOSEEK CORPORATION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
  29 # SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
  30 # FITNESS. IN NO EVENT SHALL INFOSEEK CORPORATION BE LIABLE FOR ANY
  31 # SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER
  32 # RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
  33 # CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
  34 # CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  35
  36
  37
  38 import sys
  39 import os
  40 import time
  41 import marshal
  42
  43 __all__ = ["run","help","Profile"]
  44
  45 # Sample timer for use with
  46 #i_count = 0
  47 #def integer_timer():
  48 #       global i_count
  49 #       i_count = i_count + 1
  50 #       return i_count
  51 #itimes = integer_timer # replace with C coded timer returning integers
  52
  53 #**************************************************************************
  54 # The following are the static member functions for the profiler class
  55 # Note that an instance of Profile() is *not* needed to call them.
  56 #**************************************************************************
  57
  58 def run(statement, filename=None):
  59     """Run statement under profiler optionally saving results in filename
  60
  61     This function takes a single argument that can be passed to the
  62     "exec" statement, and an optional file name.  In all cases this
  63     routine attempts to "exec" its first argument and gather profiling
  64     statistics from the execution. If no file name is present, then this
  65     function automatically prints a simple profiling report, sorted by the
  66     standard name string (file/line/function-name) that is presented in
  67     each line.
  68     """
  69     prof = Profile()
  70     try:
  71         prof = prof.run(statement)
  72     except SystemExit:
  73         pass
  74     if filename is not None:
  75         prof.dump_stats(filename)
  76     else:
  77         return prof.print_stats()
  78
  79 # print help
  80 def help():
  81     for dirname in sys.path:
  82         fullname = os.path.join(dirname, 'profile.doc')
  83         if os.path.exists(fullname):
  84             sts = os.system('${PAGER-more} '+fullname)
  85             if sts: print '*** Pager exit status:', sts
  86             break
  87     else:
  88         print 'Sorry, can\'t find the help file "profile.doc"',
  89         print 'along the Python search path'
  90
  91
  92 if os.name == "mac":
  93     import MacOS
  94     def _get_time_mac(timer=MacOS.GetTicks):
  95         return timer() / 60.0
  96
  97 if hasattr(os, "times"):
  98     def _get_time_times(timer=os.times):
  99         t = timer()
 100         return t[0] + t[1]
 101
 102
 103 class Profile:
 104     """Profiler class.
 105
 106     self.cur is always a tuple.  Each such tuple corresponds to a stack
 107     frame that is currently active (self.cur[-2]).  The following are the
 108     definitions of its members.  We use this external "parallel stack" to
 109     avoid contaminating the program that we are profiling. (old profiler
 110     used to write into the frames local dictionary!!) Derived classes
 111     can change the definition of some entries, as long as they leave
 112     [-2:] intact.
 113
 114     [ 0] = Time that needs to be charged to the parent frame's function.
 115            It is used so that a function call will not have to access the
 116            timing data for the parent frame.
 117     [ 1] = Total time spent in this frame's function, excluding time in
 118            subfunctions
 119     [ 2] = Cumulative time spent in this frame's function, including time in
 120            all subfunctions to this frame.
 121     [-3] = Name of the function that corresponds to this frame.
 122     [-2] = Actual frame that we correspond to (used to sync exception handling)
 123     [-1] = Our parent 6-tuple (corresponds to frame.f_back)
 124
 125     Timing data for each function is stored as a 5-tuple in the dictionary
 126     self.timings[].  The index is always the name stored in self.cur[4].
 127     The following are the definitions of the members:
 128
 129     [0] = The number of times this function was called, not counting direct
 130           or indirect recursion,
 131     [1] = Number of times this function appears on the stack, minus one
 132     [2] = Total time spent internal to this function
 133     [3] = Cumulative time that this function was present on the stack.  In
 134           non-recursive functions, this is the total execution time from start
 135           to finish of each invocation of a function, including time spent in
 136           all subfunctions.
 137     [5] = A dictionary indicating for each function name, the number of times
 138           it was called by us.
 139     """
 140
 141     def __init__(self, timer=None):
 142         self.timings = {}
 143         self.cur = None
 144         self.cmd = ""
 145
 146         if not timer:
 147             if os.name == 'mac':
 148                 self.timer = MacOS.GetTicks
 149                 self.dispatcher = self.trace_dispatch_mac
 150                 self.get_time = _get_time_mac
 151             elif hasattr(time, 'clock'):
 152                 self.timer = self.get_time = time.clock
 153                 self.dispatcher = self.trace_dispatch_i
 154             elif hasattr(os, 'times'):
 155                 self.timer = os.times
 156                 self.dispatcher = self.trace_dispatch
 157                 self.get_time = _get_time_times
 158             else:
 159                 self.timer = self.get_time = time.time
 160                 self.dispatcher = self.trace_dispatch_i
 161         else:
 162             self.timer = timer
 163             t = self.timer() # test out timer function
 164             try:
 165                 length = len(t)
 166             except TypeError:
 167                 self.get_time = timer
 168                 self.dispatcher = self.trace_dispatch_i
 169             else:
 170                 if length == 2:
 171                     self.dispatcher = self.trace_dispatch
 172                 else:
 173                     self.dispatcher = self.trace_dispatch_l
 174                 # This get_time() implementation needs to be defined
 175                 # here to capture the passed-in timer in the parameter
 176                 # list (for performance).  Note that we can't assume
 177                 # the timer() result contains two values in all
 178                 # cases.
 179                 import operator
 180                 def get_time_timer(timer=timer,
 181                                    reduce=reduce, reducer=operator.add):
 182                     return reduce(reducer, timer(), 0)
 183                 self.get_time = get_time_timer
 184         self.t = self.get_time()
 185         self.simulate_call('profiler')
 186
 187     # Heavily optimized dispatch routine for os.times() timer
 188
 189     def trace_dispatch(self, frame, event, arg):
 190         timer = self.timer
 191         t = timer()
 192         t = t[0] + t[1] - self.t        # No Calibration constant
 193         # t = t[0] + t[1] - self.t - .00053 # Calibration constant
 194
 195         if self.dispatch[event](self, frame,t):
 196             t = timer()
 197             self.t = t[0] + t[1]
 198         else:
 199             r = timer()
 200             self.t = r[0] + r[1] - t # put back unrecorded delta
 201         return
 202
 203
 204
 205     # Dispatch routine for best timer program (return = scalar integer)
 206
 207     def trace_dispatch_i(self, frame, event, arg):
 208         timer = self.timer
 209         t = timer() - self.t # - 1 # Integer calibration constant
 210         if self.dispatch[event](self, frame,t):
 211             self.t = timer()
 212         else:
 213             self.t = timer() - t  # put back unrecorded delta
 214         return
 215
 216     # Dispatch routine for macintosh (timer returns time in ticks of
 217     # 1/60th second)
 218
 219     def trace_dispatch_mac(self, frame, event, arg):
 220         timer = self.timer
 221         t = timer()/60.0 - self.t # - 1 # Integer calibration constant
 222         if self.dispatch[event](self, frame,t):
 223             self.t = timer()/60.0
 224         else:
 225             self.t = timer()/60.0 - t  # put back unrecorded delta
 226         return
 227
 228
 229     # SLOW generic dispatch routine for timer returning lists of numbers
 230
 231     def trace_dispatch_l(self, frame, event, arg):
 232         get_time = self.get_time
 233         t = get_time() - self.t
 234
 235         if self.dispatch[event](self, frame,t):
 236             self.t = get_time()
 237         else:
 238             self.t = get_time() - t # put back unrecorded delta
 239         return
 240
 241
 242     def trace_dispatch_exception(self, frame, t):
 243         rt, rtt, rct, rfn, rframe, rcur = self.cur
 244         if (not rframe is frame) and rcur:
 245             return self.trace_dispatch_return(rframe, t)
 246         return 0
 247
 248
 249     def trace_dispatch_call(self, frame, t):
 250         fcode = frame.f_code
 251         fn = (fcode.co_filename, fcode.co_firstlineno, fcode.co_name)
 252         self.cur = (t, 0, 0, fn, frame, self.cur)
 253         timings = self.timings
 254         if timings.has_key(fn):
 255             cc, ns, tt, ct, callers = timings[fn]
 256             timings[fn] = cc, ns + 1, tt, ct, callers
 257         else:
 258             timings[fn] = 0, 0, 0, 0, {}
 259         return 1
 260
 261     def trace_dispatch_return(self, frame, t):
 262         # if not frame is self.cur[-2]: raise "Bad return", self.cur[3]
 263
 264         # Prefix "r" means part of the Returning or exiting frame
 265         # Prefix "p" means part of the Previous or older frame
 266
 267         rt, rtt, rct, rfn, frame, rcur = self.cur
 268         rtt = rtt + t
 269         sft = rtt + rct
 270
 271         pt, ptt, pct, pfn, pframe, pcur = rcur
 272         self.cur = pt, ptt+rt, pct+sft, pfn, pframe, pcur
 273
 274         timings = self.timings
 275         cc, ns, tt, ct, callers = timings[rfn]
 276         if not ns:
 277             ct = ct + sft
 278             cc = cc + 1
 279         if callers.has_key(pfn):
 280             callers[pfn] = callers[pfn] + 1  # hack: gather more
 281             # stats such as the amount of time added to ct courtesy
 282             # of this specific call, and the contribution to cc
 283             # courtesy of this call.
 284         else:
 285             callers[pfn] = 1
 286         timings[rfn] = cc, ns - 1, tt+rtt, ct, callers
 287
 288         return 1
 289
 290
 291     dispatch = {
 292         "call": trace_dispatch_call,
 293         "exception": trace_dispatch_exception,
 294         "return": trace_dispatch_return,
 295         }
 296
 297
 298     # The next few function play with self.cmd. By carefully preloading
 299     # our parallel stack, we can force the profiled result to include
 300     # an arbitrary string as the name of the calling function.
 301     # We use self.cmd as that string, and the resulting stats look
 302     # very nice :-).
 303
 304     def set_cmd(self, cmd):
 305         if self.cur[-1]: return   # already set
 306         self.cmd = cmd
 307         self.simulate_call(cmd)
 308
 309     class fake_code:
 310         def __init__(self, filename, line, name):
 311             self.co_filename = filename
 312             self.co_line = line
 313             self.co_name = name
 314             self.co_firstlineno = 0
 315
 316         def __repr__(self):
 317             return repr((self.co_filename, self.co_line, self.co_name))
 318
 319     class fake_frame:
 320         def __init__(self, code, prior):
 321             self.f_code = code
 322             self.f_back = prior
 323
 324     def simulate_call(self, name):
 325         code = self.fake_code('profile', 0, name)
 326         if self.cur:
 327             pframe = self.cur[-2]
 328         else:
 329             pframe = None
 330         frame = self.fake_frame(code, pframe)
 331         a = self.dispatch['call'](self, frame, 0)
 332         return
 333
 334     # collect stats from pending stack, including getting final
 335     # timings for self.cmd frame.
 336
 337     def simulate_cmd_complete(self):
 338         get_time = self.get_time
 339         t = get_time() - self.t
 340         while self.cur[-1]:
 341             # We *can* cause assertion errors here if
 342             # dispatch_trace_return checks for a frame match!
 343             a = self.dispatch['return'](self, self.cur[-2], t)
 344             t = 0
 345         self.t = get_time() - t
 346
 347
 348     def print_stats(self):
 349         import pstats
 350         pstats.Stats(self).strip_dirs().sort_stats(-1). \
 351                   print_stats()
 352
 353     def dump_stats(self, file):
 354         f = open(file, 'wb')
 355         self.create_stats()
 356         marshal.dump(self.stats, f)
 357         f.close()
 358
 359     def create_stats(self):
 360         self.simulate_cmd_complete()
 361         self.snapshot_stats()
 362
 363     def snapshot_stats(self):
 364         self.stats = {}
 365         for func in self.timings.keys():
 366             cc, ns, tt, ct, callers = self.timings[func]
 367             callers = callers.copy()
 368             nc = 0
 369             for func_caller in callers.keys():
 370                 nc = nc + callers[func_caller]
 371             self.stats[func] = cc, nc, tt, ct, callers
 372
 373
 374     # The following two methods can be called by clients to use
 375     # a profiler to profile a statement, given as a string.
 376
 377     def run(self, cmd):
 378         import __main__
 379         dict = __main__.__dict__
 380         return self.runctx(cmd, dict, dict)
 381
 382     def runctx(self, cmd, globals, locals):
 383         self.set_cmd(cmd)
 384         sys.setprofile(self.dispatcher)
 385         try:
 386             exec cmd in globals, locals
 387         finally:
 388             sys.setprofile(None)
 389         return self
 390
 391     # This method is more useful to profile a single function call.
 392     def runcall(self, func, *args, **kw):
 393         self.set_cmd(`func`)
 394         sys.setprofile(self.dispatcher)
 395         try:
 396             return apply(func, args, kw)
 397         finally:
 398             sys.setprofile(None)
 399
 400
 401     #******************************************************************
 402     # The following calculates the overhead for using a profiler.  The
 403     # problem is that it takes a fair amount of time for the profiler
 404     # to stop the stopwatch (from the time it receives an event).
 405     # Similarly, there is a delay from the time that the profiler
 406     # re-starts the stopwatch before the user's code really gets to
 407     # continue.  The following code tries to measure the difference on
 408     # a per-event basis. The result can the be placed in the
 409     # Profile.dispatch_event() routine for the given platform.  Note
 410     # that this difference is only significant if there are a lot of
 411     # events, and relatively little user code per event.  For example,
 412     # code with small functions will typically benefit from having the
 413     # profiler calibrated for the current platform.  This *could* be
 414     # done on the fly during init() time, but it is not worth the
 415     # effort.  Also note that if too large a value specified, then
 416     # execution time on some functions will actually appear as a
 417     # negative number.  It is *normal* for some functions (with very
 418     # low call counts) to have such negative stats, even if the
 419     # calibration figure is "correct."
 420     #
 421     # One alternative to profile-time calibration adjustments (i.e.,
 422     # adding in the magic little delta during each event) is to track
 423     # more carefully the number of events (and cumulatively, the number
 424     # of events during sub functions) that are seen.  If this were
 425     # done, then the arithmetic could be done after the fact (i.e., at
 426     # display time).  Currently, we track only call/return events.
 427     # These values can be deduced by examining the callees and callers
 428     # vectors for each functions.  Hence we *can* almost correct the
 429     # internal time figure at print time (note that we currently don't
 430     # track exception event processing counts).  Unfortunately, there
 431     # is currently no similar information for cumulative sub-function
 432     # time.  It would not be hard to "get all this info" at profiler
 433     # time.  Specifically, we would have to extend the tuples to keep
 434     # counts of this in each frame, and then extend the defs of timing
 435     # tuples to include the significant two figures. I'm a bit fearful
 436     # that this additional feature will slow the heavily optimized
 437     # event/time ratio (i.e., the profiler would run slower, fur a very
 438     # low "value added" feature.)
 439     #
 440     # Plugging in the calibration constant doesn't slow down the
 441     # profiler very much, and the accuracy goes way up.
 442     #**************************************************************
 443
 444     def calibrate(self, m):
 445         # Modified by Tim Peters
 446         get_time = self.get_time
 447         n = m
 448         s = get_time()
 449         while n:
 450             self.simple()
 451             n = n - 1
 452         f = get_time()
 453         my_simple = f - s
 454         #print "Simple =", my_simple,
 455
 456         n = m
 457         s = get_time()
 458         while n:
 459             self.instrumented()
 460             n = n - 1
 461         f = get_time()
 462         my_inst = f - s
 463         # print "Instrumented =", my_inst
 464         avg_cost = (my_inst - my_simple)/m
 465         #print "Delta/call =", avg_cost, "(profiler fixup constant)"
 466         return avg_cost
 467
 468     # simulate a program with no profiler activity
 469     def simple(self):
 470         a = 1
 471         pass
 472
 473     # simulate a program with call/return event processing
 474     def instrumented(self):
 475         a = 1
 476         self.profiler_simulation(a, a, a)
 477
 478     # simulate an event processing activity (from user's perspective)
 479     def profiler_simulation(self, x, y, z):
 480         t = self.timer()
 481         ## t = t[0] + t[1]
 482         self.ut = t
 483
 484
 485
 486 class OldProfile(Profile):
 487     """A derived profiler that simulates the old style profile, providing
 488     errant results on recursive functions. The reason for the usefulness of
 489     this profiler is that it runs faster (i.e., less overhead).  It still
 490     creates all the caller stats, and is quite useful when there is *no*
 491     recursion in the user's code.
 492
 493     This code also shows how easy it is to create a modified profiler.
 494     """
 495
 496     def trace_dispatch_exception(self, frame, t):
 497         rt, rtt, rct, rfn, rframe, rcur = self.cur
 498         if rcur and not rframe is frame:
 499             return self.trace_dispatch_return(rframe, t)
 500         return 0
 501
 502     def trace_dispatch_call(self, frame, t):
 503         fn = `frame.f_code`
 504
 505         self.cur = (t, 0, 0, fn, frame, self.cur)
 506         if self.timings.has_key(fn):
 507             tt, ct, callers = self.timings[fn]
 508             self.timings[fn] = tt, ct, callers
 509         else:
 510             self.timings[fn] = 0, 0, {}
 511         return 1
 512
 513     def trace_dispatch_return(self, frame, t):
 514         rt, rtt, rct, rfn, frame, rcur = self.cur
 515         rtt = rtt + t
 516         sft = rtt + rct
 517
 518         pt, ptt, pct, pfn, pframe, pcur = rcur
 519         self.cur = pt, ptt+rt, pct+sft, pfn, pframe, pcur
 520
 521         tt, ct, callers = self.timings[rfn]
 522         if callers.has_key(pfn):
 523             callers[pfn] = callers[pfn] + 1
 524         else:
 525             callers[pfn] = 1
 526         self.timings[rfn] = tt+rtt, ct + sft, callers
 527
 528         return 1
 529
 530
 531     dispatch = {
 532         "call": trace_dispatch_call,
 533         "exception": trace_dispatch_exception,
 534         "return": trace_dispatch_return,
 535         }
 536
 537
 538     def snapshot_stats(self):
 539         self.stats = {}
 540         for func in self.timings.keys():
 541             tt, ct, callers = self.timings[func]
 542             callers = callers.copy()
 543             nc = 0
 544             for func_caller in callers.keys():
 545                 nc = nc + callers[func_caller]
 546             self.stats[func] = nc, nc, tt, ct, callers
 547
 548
 549
 550 class HotProfile(Profile):
 551     """The fastest derived profile example.  It does not calculate
 552     caller-callee relationships, and does not calculate cumulative
 553     time under a function.  It only calculates time spent in a
 554     function, so it runs very quickly due to its very low overhead.
 555     """
 556
 557     def trace_dispatch_exception(self, frame, t):
 558         rt, rtt, rfn, rframe, rcur = self.cur
 559         if rcur and not rframe is frame:
 560             return self.trace_dispatch_return(rframe, t)
 561         return 0
 562
 563     def trace_dispatch_call(self, frame, t):
 564         self.cur = (t, 0, frame, self.cur)
 565         return 1
 566
 567     def trace_dispatch_return(self, frame, t):
 568         rt, rtt, frame, rcur = self.cur
 569
 570         rfn = `frame.f_code`
 571
 572         pt, ptt, pframe, pcur = rcur
 573         self.cur = pt, ptt+rt, pframe, pcur
 574
 575         if self.timings.has_key(rfn):
 576             nc, tt = self.timings[rfn]
 577             self.timings[rfn] = nc + 1, rt + rtt + tt
 578         else:
 579             self.timings[rfn] =      1, rt + rtt
 580
 581         return 1
 582
 583
 584     dispatch = {
 585         "call": trace_dispatch_call,
 586         "exception": trace_dispatch_exception,
 587         "return": trace_dispatch_return,
 588         }
 589
 590
 591     def snapshot_stats(self):
 592         self.stats = {}
 593         for func in self.timings.keys():
 594             nc, tt = self.timings[func]
 595             self.stats[func] = nc, nc, tt, 0, {}
 596
 597
 598
 599 #****************************************************************************
 600 def Stats(*args):
 601     print 'Report generating functions are in the "pstats" module\a'
 602
 603
 604 # When invoked as main program, invoke the profiler on a script
 605 if __name__ == '__main__':
 606     if not sys.argv[1:]:
 607         print "usage: profile.py scriptfile [arg] ..."
 608         sys.exit(2)
 609
 610     filename = sys.argv[1]  # Get script filename
 611
 612     del sys.argv[0]         # Hide "profile.py" from argument list
 613
 614     # Insert script directory in front of module search path
 615     sys.path.insert(0, os.path.dirname(filename))
 616
 617     run('execfile(' + `filename` + ')')