Lib/profile.py

   1 #
   2 # Class for profiling python code. rev 1.0  6/2/94
   3 #
   4 # Based on prior profile module by Sjoerd Mullender...
   5 #   which was hacked somewhat by: Guido van Rossum
   6 #
   7 # See profile.doc for more information
   8
   9
  10 # Copyright 1994, by InfoSeek Corporation, all rights reserved.
  11 # Written by James Roskind
  12 #
  13 # Permission to use, copy, modify, and distribute this Python software
  14 # and its associated documentation for any purpose (subject to the
  15 # restriction in the following sentence) without fee is hereby granted,
  16 # provided that the above copyright notice appears in all copies, and
  17 # that both that copyright notice and this permission notice appear in
  18 # supporting documentation, and that the name of InfoSeek not be used in
  19 # advertising or publicity pertaining to distribution of the software
  20 # without specific, written prior permission.  This permission is
  21 # explicitly restricted to the copying and modification of the software
  22 # to remain in Python, compiled Python, or other languages (such as C)
  23 # wherein the modified or derived code is exclusively imported into a
  24 # Python module.
  25 #
  26 # INFOSEEK CORPORATION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
  27 # SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
  28 # FITNESS. IN NO EVENT SHALL INFOSEEK CORPORATION BE LIABLE FOR ANY
  29 # SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER
  30 # RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
  31 # CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
  32 # CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  33
  34
  35
  36 import sys
  37 import os
  38 import time
  39 import string
  40 import marshal
  41
  42
  43 # Global variables
  44 func_norm_dict = {}
  45 func_norm_counter = 0
  46 if hasattr(os, 'getpid'):
  47         pid_string = `os.getpid()`
  48 else:
  49         pid_string = ''
  50
  51
  52 # Sample timer for use with
  53 #i_count = 0
  54 #def integer_timer():
  55 #       global i_count
  56 #       i_count = i_count + 1
  57 #       return i_count
  58 #itimes = integer_timer # replace with C coded timer returning integers
  59
  60 #**************************************************************************
  61 # The following are the static member functions for the profiler class
  62 # Note that an instance of Profile() is *not* needed to call them.
  63 #**************************************************************************
  64
  65
  66 # simplified user interface
  67 def run(statement, *args):
  68         prof = Profile()
  69         try:
  70                 prof = prof.run(statement)
  71         except SystemExit:
  72                 pass
  73         if args:
  74                 prof.dump_stats(args[0])
  75         else:
  76                 return prof.print_stats()
  77
  78 # print help
  79 def help():
  80         for dirname in sys.path:
  81                 fullname = os.path.join(dirname, 'profile.doc')
  82                 if os.path.exists(fullname):
  83                         sts = os.system('${PAGER-more} '+fullname)
  84                         if sts: print '*** Pager exit status:', sts
  85                         break
  86         else:
  87                 print 'Sorry, can\'t find the help file "profile.doc"',
  88                 print 'along the Python search path'
  89
  90
  91 #**************************************************************************
  92 # class Profile documentation:
  93 #**************************************************************************
  94 # self.cur is always a tuple.  Each such tuple corresponds to a stack
  95 # frame that is currently active (self.cur[-2]).  The following are the
  96 # definitions of its members.  We use this external "parallel stack" to
  97 # avoid contaminating the program that we are profiling. (old profiler
  98 # used to write into the frames local dictionary!!) Derived classes
  99 # can change the definition of some entries, as long as they leave
 100 # [-2:] intact.
 101 #
 102 # [ 0] = Time that needs to be charged to the parent frame's function.  It is
 103 #        used so that a function call will not have to access the timing data
 104 #        for the parents frame.
 105 # [ 1] = Total time spent in this frame's function, excluding time in
 106 #        subfunctions
 107 # [ 2] = Cumulative time spent in this frame's function, including time in
 108 #        all subfunctions to this frame.
 109 # [-3] = Name of the function that corresonds to this frame.
 110 # [-2] = Actual frame that we correspond to (used to sync exception handling)
 111 # [-1] = Our parent 6-tuple (corresonds to frame.f_back)
 112 #**************************************************************************
 113 # Timing data for each function is stored as a 5-tuple in the dictionary
 114 # self.timings[].  The index is always the name stored in self.cur[4].
 115 # The following are the definitions of the members:
 116 #
 117 # [0] = The number of times this function was called, not counting direct
 118 #       or indirect recursion,
 119 # [1] = Number of times this function appears on the stack, minus one
 120 # [2] = Total time spent internal to this function
 121 # [3] = Cumulative time that this function was present on the stack.  In
 122 #       non-recursive functions, this is the total execution time from start
 123 #       to finish of each invocation of a function, including time spent in
 124 #       all subfunctions.
 125 # [5] = A dictionary indicating for each function name, the number of times
 126 #       it was called by us.
 127 #**************************************************************************
 128 # We produce function names via a repr() call on the f_code object during
 129 # profiling. This save a *lot* of CPU time.  This results in a string that
 130 # always looks like:
 131 #   <code object main at 87090, file "/a/lib/python-local/myfib.py", line 76>
 132 # After we "normalize it, it is a tuple of filename, line, function-name.
 133 # We wait till we are done profiling to do the normalization.
 134 # *IF* this repr format changes, then only the normalization routine should
 135 # need to be fixed.
 136 #**************************************************************************
 137 class Profile:
 138
 139         def __init__(self, timer=None):
 140                 self.timings = {}
 141                 self.cur = None
 142                 self.cmd = ""
 143
 144                 self.dispatch = {  \
 145                           'call'     : self.trace_dispatch_call, \
 146                           'return'   : self.trace_dispatch_return, \
 147                           'exception': self.trace_dispatch_exception, \
 148                           }
 149
 150                 if not timer:
 151                         if hasattr(os, 'times'):
 152                                 self.timer = os.times
 153                                 self.dispatcher = self.trace_dispatch
 154                         else:
 155                                 self.timer = time.time
 156                                 self.dispatcher = self.trace_dispatch_i
 157                 else:
 158                         self.timer = timer
 159                         t = self.timer() # test out timer function
 160                         try:
 161                                 if len(t) == 2:
 162                                         self.dispatcher = self.trace_dispatch
 163                                 else:
 164                                         self.dispatcher = self.trace_dispatch_l
 165                         except TypeError:
 166                                 self.dispatcher = self.trace_dispatch_i
 167                 self.t = self.get_time()
 168                 self.simulate_call('profiler')
 169
 170
 171         def get_time(self): # slow simulation of method to acquire time
 172                 t = self.timer()
 173                 if type(t) == type(()) or type(t) == type([]):
 174                         t = reduce(lambda x,y: x+y, t, 0)
 175                 return t
 176
 177
 178         # Heavily optimized dispatch routine for os.times() timer
 179
 180         def trace_dispatch(self, frame, event, arg):
 181                 t = self.timer()
 182                 t = t[0] + t[1] - self.t        # No Calibration constant
 183                 # t = t[0] + t[1] - self.t - .00053 # Calibration constant
 184
 185                 if self.dispatch[event](frame,t):
 186                         t = self.timer()
 187                         self.t = t[0] + t[1]
 188                 else:
 189                         r = self.timer()
 190                         self.t = r[0] + r[1] - t # put back unrecorded delta
 191                 return
 192
 193
 194
 195         # Dispatch routine for best timer program (return = scalar integer)
 196
 197         def trace_dispatch_i(self, frame, event, arg):
 198                 t = self.timer() - self.t # - 1 # Integer calibration constant
 199                 if self.dispatch[event](frame,t):
 200                         self.t = self.timer()
 201                 else:
 202                         self.t = self.timer() - t  # put back unrecorded delta
 203                 return
 204
 205
 206         # SLOW generic dispatch rountine for timer returning lists of numbers
 207
 208         def trace_dispatch_l(self, frame, event, arg):
 209                 t = self.get_time() - self.t
 210
 211                 if self.dispatch[event](frame,t):
 212                         self.t = self.get_time()
 213                 else:
 214                         self.t = self.get_time()-t # put back unrecorded delta
 215                 return
 216
 217
 218         def trace_dispatch_exception(self, frame, t):
 219                 rt, rtt, rct, rfn, rframe, rcur = self.cur
 220                 if (not rframe is frame) and rcur:
 221                         return self.trace_dispatch_return(rframe, t)
 222                 return 0
 223
 224
 225         def trace_dispatch_call(self, frame, t):
 226                 fn = `frame.f_code`
 227
 228                 # The following should be about the best approach, but
 229                 # we would need a function that maps from id() back to
 230                 # the actual code object.
 231                 #     fn = id(frame.f_code)
 232                 # Note we would really use our own function, which would
 233                 # return the code address, *and* bump the ref count.  We
 234                 # would then fix up the normalize function to do the
 235                 # actualy repr(fn) call.
 236
 237                 # The following is an interesting alternative
 238                 # It doesn't do as good a job, and it doesn't run as
 239                 # fast 'cause repr() is written in C, and this is Python.
 240                 #fcode = frame.f_code
 241                 #code = fcode.co_code
 242                 #if ord(code[0]) == 127: #  == SET_LINENO
 243                 #       # see "opcode.h" in the Python source
 244                 #       fn = (fcode.co_filename, ord(code[1]) | \
 245                 #                 ord(code[2]) << 8, fcode.co_name)
 246                 #else:
 247                 #       fn = (fcode.co_filename, 0, fcode.co_name)
 248
 249                 self.cur = (t, 0, 0, fn, frame, self.cur)
 250                 if self.timings.has_key(fn):
 251                         cc, ns, tt, ct, callers = self.timings[fn]
 252                         self.timings[fn] = cc, ns + 1, tt, ct, callers
 253                 else:
 254                         self.timings[fn] = 0, 0, 0, 0, {}
 255                 return 1
 256
 257         def trace_dispatch_return(self, frame, t):
 258                 # if not frame is self.cur[-2]: raise "Bad return", self.cur[3]
 259
 260                 # Prefix "r" means part of the Returning or exiting frame
 261                 # Prefix "p" means part of the Previous or older frame
 262
 263                 rt, rtt, rct, rfn, frame, rcur = self.cur
 264                 rtt = rtt + t
 265                 sft = rtt + rct
 266
 267                 pt, ptt, pct, pfn, pframe, pcur = rcur
 268                 self.cur = pt, ptt+rt, pct+sft, pfn, pframe, pcur
 269
 270                 cc, ns, tt, ct, callers = self.timings[rfn]
 271                 if not ns:
 272                         ct = ct + sft
 273                         cc = cc + 1
 274                 if callers.has_key(pfn):
 275                         callers[pfn] = callers[pfn] + 1  # hack: gather more
 276                         # stats such as the amount of time added to ct courtesy
 277                         # of this specific call, and the contribution to cc
 278                         # courtesy of this call.
 279                 else:
 280                         callers[pfn] = 1
 281                 self.timings[rfn] = cc, ns - 1, tt+rtt, ct, callers
 282
 283                 return 1
 284
 285         # The next few function play with self.cmd. By carefully preloading
 286         # our paralell stack, we can force the profiled result to include
 287         # an arbitrary string as the name of the calling function.
 288         # We use self.cmd as that string, and the resulting stats look
 289         # very nice :-).
 290
 291         def set_cmd(self, cmd):
 292                 if self.cur[-1]: return   # already set
 293                 self.cmd = cmd
 294                 self.simulate_call(cmd)
 295
 296         class fake_code:
 297                 def __init__(self, filename, line, name):
 298                         self.co_filename = filename
 299                         self.co_line = line
 300                         self.co_name = name
 301                         self.co_code = '\0'  # anything but 127
 302
 303                 def __repr__(self):
 304                         return (self.co_filename, self.co_line, self.co_name)
 305
 306         class fake_frame:
 307                 def __init__(self, code, prior):
 308                         self.f_code = code
 309                         self.f_back = prior
 310
 311         def simulate_call(self, name):
 312                 code = self.fake_code('profile', 0, name)
 313                 if self.cur:
 314                         pframe = self.cur[-2]
 315                 else:
 316                         pframe = None
 317                 frame = self.fake_frame(code, pframe)
 318                 a = self.dispatch['call'](frame, 0)
 319                 return
 320
 321         # collect stats from pending stack, including getting final
 322         # timings for self.cmd frame.
 323
 324         def simulate_cmd_complete(self):
 325                 t = self.get_time() - self.t
 326                 while self.cur[-1]:
 327                         # We *can* cause assertion errors here if
 328                         # dispatch_trace_return checks for a frame match!
 329                         a = self.dispatch['return'](self.cur[-2], t)
 330                         t = 0
 331                 self.t = self.get_time() - t
 332
 333
 334         def print_stats(self):
 335                 import pstats
 336                 pstats.Stats(self).strip_dirs().sort_stats(-1). \
 337                           print_stats()
 338
 339         def dump_stats(self, file):
 340                 f = open(file, 'w')
 341                 self.create_stats()
 342                 marshal.dump(self.stats, f)
 343                 f.close()
 344
 345         def create_stats(self):
 346                 self.simulate_cmd_complete()
 347                 self.snapshot_stats()
 348
 349         def snapshot_stats(self):
 350                 self.stats = {}
 351                 for func in self.timings.keys():
 352                         cc, ns, tt, ct, callers = self.timings[func]
 353                         nor_func = self.func_normalize(func)
 354                         nor_callers = {}
 355                         nc = 0
 356                         for func_caller in callers.keys():
 357                                 nor_callers[self.func_normalize(func_caller)]=\
 358                                           callers[func_caller]
 359                                 nc = nc + callers[func_caller]
 360                         self.stats[nor_func] = cc, nc, tt, ct, nor_callers
 361
 362
 363         # Override the following function if you can figure out
 364         # a better name for the binary f_code entries.  I just normalize
 365         # them sequentially in a dictionary.  It would be nice if we could
 366         # *really* see the name of the underlying C code :-).  Sometimes
 367         #  you can figure out what-is-what by looking at caller and callee
 368         # lists (and knowing what your python code does).
 369
 370         def func_normalize(self, func_name):
 371                 global func_norm_dict
 372                 global func_norm_counter
 373                 global func_sequence_num
 374
 375                 if func_norm_dict.has_key(func_name):
 376                         return func_norm_dict[func_name]
 377                 if type(func_name) == type(""):
 378                         long_name = string.split(func_name)
 379                         file_name = long_name[-3][1:-2]
 380                         func = long_name[2]
 381                         lineno = long_name[-1][:-1]
 382                         if '?' == func:   # Until I find out how to may 'em...
 383                                 file_name = 'python'
 384                                 func_norm_counter = func_norm_counter + 1
 385                                 func = pid_string + ".C." + `func_norm_counter`
 386                         result =  file_name ,  string.atoi(lineno) , func
 387                 else:
 388                         result = func_name
 389                 func_norm_dict[func_name] = result
 390                 return result
 391
 392
 393         # The following two methods can be called by clients to use
 394         # a profiler to profile a statement, given as a string.
 395
 396         def run(self, cmd):
 397                 import __main__
 398                 dict = __main__.__dict__
 399                 return self.runctx(cmd, dict, dict)
 400
 401         def runctx(self, cmd, globals, locals):
 402                 self.set_cmd(cmd)
 403                 sys.setprofile(self.dispatcher)
 404                 try:
 405                         exec cmd in globals, locals
 406                 finally:
 407                         sys.setprofile(None)
 408                 return self
 409
 410         # This method is more useful to profile a single function call.
 411         def runcall(self, func, *args):
 412                 self.set_cmd(`func`)
 413                 sys.setprofile(self.dispatcher)
 414                 try:
 415                         return apply(func, args)
 416                 finally:
 417                         sys.setprofile(None)
 418
 419
 420         #******************************************************************
 421         # The following calculates the overhead for using a profiler.  The
 422         # problem is that it takes a fair amount of time for the profiler
 423         # to stop the stopwatch (from the time it recieves an event).
 424         # Similarly, there is a delay from the time that the profiler
 425         # re-starts the stopwatch before the user's code really gets to
 426         # continue.  The following code tries to measure the difference on
 427         # a per-event basis. The result can the be placed in the
 428         # Profile.dispatch_event() routine for the given platform.  Note
 429         # that this difference is only significant if there are a lot of
 430         # events, and relatively little user code per event.  For example,
 431         # code with small functions will typically benefit from having the
 432         # profiler calibrated for the current platform.  This *could* be
 433         # done on the fly during init() time, but it is not worth the
 434         # effort.  Also note that if too large a value specified, then
 435         # execution time on some functions will actually appear as a
 436         # negative number.  It is *normal* for some functions (with very
 437         # low call counts) to have such negative stats, even if the
 438         # calibration figure is "correct."
 439         #
 440         # One alternative to profile-time calibration adjustments (i.e.,
 441         # adding in the magic little delta during each event) is to track
 442         # more carefully the number of events (and cumulatively, the number
 443         # of events during sub functions) that are seen.  If this were
 444         # done, then the arithmetic could be done after the fact (i.e., at
 445         # display time).  Currintly, we track only call/return events.
 446         # These values can be deduced by examining the callees and callers
 447         # vectors for each functions.  Hence we *can* almost correct the
 448         # internal time figure at print time (note that we currently don't
 449         # track exception event processing counts).  Unfortunately, there
 450         # is currently no similar information for cumulative sub-function
 451         # time.  It would not be hard to "get all this info" at profiler
 452         # time.  Specifically, we would have to extend the tuples to keep
 453         # counts of this in each frame, and then extend the defs of timing
 454         # tuples to include the significant two figures. I'm a bit fearful
 455         # that this additional feature will slow the heavily optimized
 456         # event/time ratio (i.e., the profiler would run slower, fur a very
 457         # low "value added" feature.)
 458         #
 459         # Plugging in the calibration constant doesn't slow down the
 460         # profiler very much, and the accuracy goes way up.
 461         #**************************************************************
 462
 463         def calibrate(self, m):
 464                 n = m
 465                 s = self.timer()
 466                 while n:
 467                         self.simple()
 468                         n = n - 1
 469                 f = self.timer()
 470                 my_simple = f[0]+f[1]-s[0]-s[1]
 471                 #print "Simple =", my_simple,
 472
 473                 n = m
 474                 s = self.timer()
 475                 while n:
 476                         self.instrumented()
 477                         n = n - 1
 478                 f = self.timer()
 479                 my_inst = f[0]+f[1]-s[0]-s[1]
 480                 # print "Instrumented =", my_inst
 481                 avg_cost = (my_inst - my_simple)/m
 482                 #print "Delta/call =", avg_cost, "(profiler fixup constant)"
 483                 return avg_cost
 484
 485         # simulate a program with no profiler activity
 486         def simple(self):
 487                 a = 1
 488                 pass
 489
 490         # simulate a program with call/return event processing
 491         def instrumented(self):
 492                 a = 1
 493                 self.profiler_simulation(a, a, a)
 494
 495         # simulate an event processing activity (from user's perspective)
 496         def profiler_simulation(self, x, y, z):
 497                 t = self.timer()
 498                 t = t[0] + t[1]
 499                 self.ut = t
 500
 501
 502
 503 #****************************************************************************
 504 # OldProfile class documentation
 505 #****************************************************************************
 506 #
 507 # The following derived profiler simulates the old style profile, providing
 508 # errant results on recursive functions. The reason for the usefulnes of this
 509 # profiler is that it runs faster (i.e., less overhead).  It still creates
 510 # all the caller stats, and is quite useful when there is *no* recursion
 511 # in the user's code.
 512 #
 513 # This code also shows how easy it is to create a modified profiler.
 514 #****************************************************************************
 515 class OldProfile(Profile):
 516         def trace_dispatch_exception(self, frame, t):
 517                 rt, rtt, rct, rfn, rframe, rcur = self.cur
 518                 if rcur and not rframe is frame:
 519                         return self.trace_dispatch_return(rframe, t)
 520                 return 0
 521
 522         def trace_dispatch_call(self, frame, t):
 523                 fn = `frame.f_code`
 524
 525                 self.cur = (t, 0, 0, fn, frame, self.cur)
 526                 if self.timings.has_key(fn):
 527                         tt, ct, callers = self.timings[fn]
 528                         self.timings[fn] = tt, ct, callers
 529                 else:
 530                         self.timings[fn] = 0, 0, {}
 531                 return 1
 532
 533         def trace_dispatch_return(self, frame, t):
 534                 rt, rtt, rct, rfn, frame, rcur = self.cur
 535                 rtt = rtt + t
 536                 sft = rtt + rct
 537
 538                 pt, ptt, pct, pfn, pframe, pcur = rcur
 539                 self.cur = pt, ptt+rt, pct+sft, pfn, pframe, pcur
 540
 541                 tt, ct, callers = self.timings[rfn]
 542                 if callers.has_key(pfn):
 543                         callers[pfn] = callers[pfn] + 1
 544                 else:
 545                         callers[pfn] = 1
 546                 self.timings[rfn] = tt+rtt, ct + sft, callers
 547
 548                 return 1
 549
 550
 551         def snapshot_stats(self):
 552                 self.stats = {}
 553                 for func in self.timings.keys():
 554                         tt, ct, callers = self.timings[func]
 555                         nor_func = self.func_normalize(func)
 556                         nor_callers = {}
 557                         nc = 0
 558                         for func_caller in callers.keys():
 559                                 nor_callers[self.func_normalize(func_caller)]=\
 560                                           callers[func_caller]
 561                                 nc = nc + callers[func_caller]
 562                         self.stats[nor_func] = nc, nc, tt, ct, nor_callers
 563
 564
 565
 566 #****************************************************************************
 567 # HotProfile class documentation
 568 #****************************************************************************
 569 #
 570 # This profiler is the fastest derived profile example.  It does not
 571 # calculate caller-callee relationships, and does not calculate cumulative
 572 # time under a function.  It only calculates time spent in a function, so
 573 # it runs very quickly (re: very low overhead)
 574 #****************************************************************************
 575 class HotProfile(Profile):
 576         def trace_dispatch_exception(self, frame, t):
 577                 rt, rtt, rfn, rframe, rcur = self.cur
 578                 if rcur and not rframe is frame:
 579                         return self.trace_dispatch_return(rframe, t)
 580                 return 0
 581
 582         def trace_dispatch_call(self, frame, t):
 583                 self.cur = (t, 0, frame, self.cur)
 584                 return 1
 585
 586         def trace_dispatch_return(self, frame, t):
 587                 rt, rtt, frame, rcur = self.cur
 588
 589                 rfn = `frame.f_code`
 590
 591                 pt, ptt, pframe, pcur = rcur
 592                 self.cur = pt, ptt+rt, pframe, pcur
 593
 594                 if self.timings.has_key(rfn):
 595                         nc, tt = self.timings[rfn]
 596                         self.timings[rfn] = nc + 1, rt + rtt + tt
 597                 else:
 598                         self.timings[rfn] =      1, rt + rtt
 599
 600                 return 1
 601
 602
 603         def snapshot_stats(self):
 604                 self.stats = {}
 605                 for func in self.timings.keys():
 606                         nc, tt = self.timings[func]
 607                         nor_func = self.func_normalize(func)
 608                         self.stats[nor_func] = nc, nc, tt, 0, {}
 609
 610
 611
 612 #****************************************************************************
 613 def Stats(*args):
 614         print 'Report generating functions are in the "pstats" module\a'