Lib/profile.py

   1 #
   2 # Class for profiling python code. rev 1.0  6/2/94
   3 #
   4 # Based on prior profile module by Sjoerd Mullender...
   5 #   which was hacked somewhat by: Guido van Rossum
   6 #
   7 # See profile.doc for more information
   8
   9
  10 # Copyright 1994, by InfoSeek Corporation, all rights reserved.
  11 # Written by James Roskind
  12 #
  13 # Permission to use, copy, modify, and distribute this Python software
  14 # and its associated documentation for any purpose (subject to the
  15 # restriction in the following sentence) without fee is hereby granted,
  16 # provided that the above copyright notice appears in all copies, and
  17 # that both that copyright notice and this permission notice appear in
  18 # supporting documentation, and that the name of InfoSeek not be used in
  19 # advertising or publicity pertaining to distribution of the software
  20 # without specific, written prior permission.  This permission is
  21 # explicitly restricted to the copying and modification of the software
  22 # to remain in Python, compiled Python, or other languages (such as C)
  23 # wherein the modified or derived code is exclusively imported into a
  24 # Python module.
  25 #
  26 # INFOSEEK CORPORATION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
  27 # SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
  28 # FITNESS. IN NO EVENT SHALL INFOSEEK CORPORATION BE LIABLE FOR ANY
  29 # SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER
  30 # RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
  31 # CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
  32 # CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  33
  34
  35
  36 import sys
  37 import os
  38 import time
  39 import string
  40 import marshal
  41
  42
  43 # Global variables
  44 func_norm_dict = {}
  45 func_norm_counter = 0
  46 pid_string = `os.getpid()`
  47
  48
  49 # Optimized intermodule references
  50 ostimes = os.times
  51
  52
  53 # Sample timer for use with
  54 #i_count = 0
  55 #def integer_timer():
  56 #       global i_count
  57 #       i_count = i_count + 1
  58 #       return i_count
  59 #itimes = integer_timer # replace with C coded timer returning integers
  60
  61 #**************************************************************************
  62 # The following are the static member functions for the profiler class
  63 # Note that an instance of Profile() is *not* needed to call them.
  64 #**************************************************************************
  65
  66
  67 # simplified user interface
  68 def run(statement, *args):
  69         prof = Profile()
  70         try:
  71                 prof = prof.run(statement)
  72         except SystemExit:
  73                 pass
  74         if args:
  75                 prof.dump_stats(args[0])
  76         else:
  77                 return prof.print_stats()
  78
  79 # print help
  80 def help():
  81         for dirname in sys.path:
  82                 fullname = os.path.join(dirname, 'profile.doc')
  83                 if os.path.exists(fullname):
  84                         sts = os.system('${PAGER-more} '+fullname)
  85                         if sts: print '*** Pager exit status:', sts
  86                         break
  87         else:
  88                 print 'Sorry, can\'t find the help file "profile.doc"',
  89                 print 'along the Python search path'
  90
  91
  92 #**************************************************************************
  93 # class Profile documentation:
  94 #**************************************************************************
  95 # self.cur is always a tuple.  Each such tuple corresponds to a stack
  96 # frame that is currently active (self.cur[-2]).  The following are the
  97 # definitions of its members.  We use this external "parallel stack" to
  98 # avoid contaminating the program that we are profiling. (old profiler
  99 # used to write into the frames local dictionary!!) Derived classes
 100 # can change the definition of some entries, as long as they leave
 101 # [-2:] intact.
 102 #
 103 # [ 0] = Time that needs to be charged to the parent frame's function.  It is
 104 #        used so that a function call will not have to access the timing data
 105 #        for the parents frame.
 106 # [ 1] = Total time spent in this frame's function, excluding time in
 107 #        subfunctions
 108 # [ 2] = Cumulative time spent in this frame's function, including time in
 109 #        all subfunctions to this frame.
 110 # [-3] = Name of the function that corresonds to this frame.
 111 # [-2] = Actual frame that we correspond to (used to sync exception handling)
 112 # [-1] = Our parent 6-tuple (corresonds to frame.f_back)
 113 #**************************************************************************
 114 # Timing data for each function is stored as a 5-tuple in the dictionary
 115 # self.timings[].  The index is always the name stored in self.cur[4].
 116 # The following are the definitions of the members:
 117 #
 118 # [0] = The number of times this function was called, not counting direct
 119 #       or indirect recursion,
 120 # [1] = Number of times this function appears on the stack, minus one
 121 # [2] = Total time spent internal to this function
 122 # [3] = Cumulative time that this function was present on the stack.  In
 123 #       non-recursive functions, this is the total execution time from start
 124 #       to finish of each invocation of a function, including time spent in
 125 #       all subfunctions.
 126 # [5] = A dictionary indicating for each function name, the number of times
 127 #       it was called by us.
 128 #**************************************************************************
 129 # We produce function names via a repr() call on the f_code object during
 130 # profiling. This save a *lot* of CPU time.  This results in a string that
 131 # always looks like:
 132 #   <code object main at 87090, file "/a/lib/python-local/myfib.py", line 76>
 133 # After we "normalize it, it is a tuple of filename, line, function-name.
 134 # We wait till we are done profiling to do the normalization.
 135 # *IF* this repr format changes, then only the normalization routine should
 136 # need to be fixed.
 137 #**************************************************************************
 138 class Profile:
 139
 140         def __init__(self, *arg):
 141                 self.timings = {}
 142                 self.cur = None
 143                 self.cmd = ""
 144
 145                 self.dispatch = {  \
 146                           'call'     : self.trace_dispatch_call, \
 147                           'return'   : self.trace_dispatch_return, \
 148                           'exception': self.trace_dispatch_exception, \
 149                           }
 150
 151                 if not arg:
 152                         self.timer = os.times
 153                         self.dispatcher = self.trace_dispatch
 154                 else:
 155                         self.timer = arg[0]
 156                         t = self.timer() # test out timer function
 157                         try:
 158                                 if len(t) == 2:
 159                                         self.dispatcher = self.trace_dispatch
 160                                 else:
 161                                         self.dispatcher = self.trace_dispatch_r
 162                         except:
 163                                 self.dispatcher = self.trace_dispatch_i
 164                 self.t = self.get_time()
 165                 self.simulate_call('profiler')
 166
 167
 168         def get_time(self): # slow simulation of method to acquire time
 169                 t = self.timer()
 170                 if type(t) == type(()) or type(t) == type([]):
 171                         t = reduce(lambda x,y: x+y, t, 0)
 172                 return t
 173
 174
 175         # Heavily optimized dispatch routine for os.times() timer
 176
 177         def trace_dispatch(self, frame, event, arg):
 178                 t = self.timer()
 179                 t = t[0] + t[1] - self.t        # No Calibration constant
 180                 # t = t[0] + t[1] - self.t - .00053 # Calibration constant
 181
 182                 if self.dispatch[event](frame,t):
 183                         t = self.timer()
 184                         self.t = t[0] + t[1]
 185                 else:
 186                         r = self.timer()
 187                         self.t = r[0] + r[1] - t # put back unrecorded delta
 188                 return
 189
 190
 191
 192         # Dispatch routine for best timer program (return = scalar integer)
 193
 194         def trace_dispatch_i(self, frame, event, arg):
 195                 t = self.timer() - self.t # - 1 # Integer calibration constant
 196                 if self.dispatch[event](frame,t):
 197                         self.t = self.timer()
 198                 else:
 199                         self.t = self.timer() - t  # put back unrecorded delta
 200                 return
 201
 202
 203         # SLOW generic dispatch rountine for timer returning lists of numbers
 204
 205         def trace_dispatch_l(self, frame, event, arg):
 206                 t = self.get_time() - self.t
 207
 208                 if self.dispatch[event](frame,t):
 209                         self.t = self.get_time()
 210                 else:
 211                         self.t = self.get_time()-t # put back unrecorded delta
 212                 return
 213
 214
 215         def trace_dispatch_exception(self, frame, t):
 216                 rt, rtt, rct, rfn, rframe, rcur = self.cur
 217                 if (not rframe is frame) and rcur:
 218                         return self.trace_dispatch_return(rframe, t)
 219                 return 0
 220
 221
 222         def trace_dispatch_call(self, frame, t):
 223                 fn = `frame.f_code`
 224
 225                 # The following should be about the best approach, but
 226                 # we would need a function that maps from id() back to
 227                 # the actual code object.
 228                 #     fn = id(frame.f_code)
 229                 # Note we would really use our own function, which would
 230                 # return the code address, *and* bump the ref count.  We
 231                 # would then fix up the normalize function to do the
 232                 # actualy repr(fn) call.
 233
 234                 # The following is an interesting alternative
 235                 # It doesn't do as good a job, and it doesn't run as
 236                 # fast 'cause repr() is written in C, and this is Python.
 237                 #fcode = frame.f_code
 238                 #code = fcode.co_code
 239                 #if ord(code[0]) == 127: #  == SET_LINENO
 240                 #       # see "opcode.h" in the Python source
 241                 #       fn = (fcode.co_filename, ord(code[1]) | \
 242                 #                 ord(code[2]) << 8, fcode.co_name)
 243                 #else:
 244                 #       fn = (fcode.co_filename, 0, fcode.co_name)
 245
 246                 self.cur = (t, 0, 0, fn, frame, self.cur)
 247                 if self.timings.has_key(fn):
 248                         cc, ns, tt, ct, callers = self.timings[fn]
 249                         self.timings[fn] = cc, ns + 1, tt, ct, callers
 250                 else:
 251                         self.timings[fn] = 0, 0, 0, 0, {}
 252                 return 1
 253
 254         def trace_dispatch_return(self, frame, t):
 255                 # if not frame is self.cur[-2]: raise "Bad return", self.cur[3]
 256
 257                 # Prefix "r" means part of the Returning or exiting frame
 258                 # Prefix "p" means part of the Previous or older frame
 259
 260                 rt, rtt, rct, rfn, frame, rcur = self.cur
 261                 rtt = rtt + t
 262                 sft = rtt + rct
 263
 264                 pt, ptt, pct, pfn, pframe, pcur = rcur
 265                 self.cur = pt, ptt+rt, pct+sft, pfn, pframe, pcur
 266
 267                 cc, ns, tt, ct, callers = self.timings[rfn]
 268                 if not ns:
 269                         ct = ct + sft
 270                         cc = cc + 1
 271                 if callers.has_key(pfn):
 272                         callers[pfn] = callers[pfn] + 1  # hack: gather more
 273                         # stats such as the amount of time added to ct courtesy
 274                         # of this specific call, and the contribution to cc
 275                         # courtesy of this call.
 276                 else:
 277                         callers[pfn] = 1
 278                 self.timings[rfn] = cc, ns - 1, tt+rtt, ct, callers
 279
 280                 return 1
 281
 282         # The next few function play with self.cmd. By carefully preloading
 283         # our paralell stack, we can force the profiled result to include
 284         # an arbitrary string as the name of the calling function.
 285         # We use self.cmd as that string, and the resulting stats look
 286         # very nice :-).
 287
 288         def set_cmd(self, cmd):
 289                 if self.cur[-1]: return   # already set
 290                 self.cmd = cmd
 291                 self.simulate_call(cmd)
 292
 293         class fake_code:
 294                 def __init__(self, filename, line, name):
 295                         self.co_filename = filename
 296                         self.co_line = line
 297                         self.co_name = name
 298                         self.co_code = '\0'  # anything but 127
 299
 300                 def __repr__(self):
 301                         return (self.co_filename, self.co_line, self.co_name)
 302
 303         class fake_frame:
 304                 def __init__(self, code, prior):
 305                         self.f_code = code
 306                         self.f_back = prior
 307
 308         def simulate_call(self, name):
 309                 code = self.fake_code('profile', 0, name)
 310                 if self.cur:
 311                         pframe = self.cur[-2]
 312                 else:
 313                         pframe = None
 314                 frame = self.fake_frame(code, pframe)
 315                 a = self.dispatch['call'](frame, 0)
 316                 return
 317
 318         # collect stats from pending stack, including getting final
 319         # timings for self.cmd frame.
 320
 321         def simulate_cmd_complete(self):
 322                 t = self.get_time() - self.t
 323                 while self.cur[-1]:
 324                         # We *can* cause assertion errors here if
 325                         # dispatch_trace_return checks for a frame match!
 326                         a = self.dispatch['return'](self.cur[-2], t)
 327                         t = 0
 328                 self.t = self.get_time() - t
 329
 330
 331         def print_stats(self):
 332                 import pstats
 333                 pstats.Stats(self).strip_dirs().sort_stats(-1). \
 334                           print_stats()
 335
 336         def dump_stats(self, file):
 337                 f = open(file, 'w')
 338                 self.create_stats()
 339                 marshal.dump(self.stats, f)
 340                 f.close()
 341
 342         def create_stats(self):
 343                 self.simulate_cmd_complete()
 344                 self.snapshot_stats()
 345
 346         def snapshot_stats(self):
 347                 self.stats = {}
 348                 for func in self.timings.keys():
 349                         cc, ns, tt, ct, callers = self.timings[func]
 350                         nor_func = self.func_normalize(func)
 351                         nor_callers = {}
 352                         nc = 0
 353                         for func_caller in callers.keys():
 354                                 nor_callers[self.func_normalize(func_caller)]=\
 355                                           callers[func_caller]
 356                                 nc = nc + callers[func_caller]
 357                         self.stats[nor_func] = cc, nc, tt, ct, nor_callers
 358
 359
 360         # Override the following function if you can figure out
 361         # a better name for the binary f_code entries.  I just normalize
 362         # them sequentially in a dictionary.  It would be nice if we could
 363         # *really* see the name of the underlying C code :-).  Sometimes
 364         #  you can figure out what-is-what by looking at caller and callee
 365         # lists (and knowing what your python code does).
 366
 367         def func_normalize(self, func_name):
 368                 global func_norm_dict
 369                 global func_norm_counter
 370                 global func_sequence_num
 371
 372                 if func_norm_dict.has_key(func_name):
 373                         return func_norm_dict[func_name]
 374                 if type(func_name) == type(""):
 375                         long_name = string.split(func_name)
 376                         file_name = long_name[6][1:-2]
 377                         func = long_name[2]
 378                         lineno = long_name[8][:-1]
 379                         if '?' == func:   # Until I find out how to may 'em...
 380                                 file_name = 'python'
 381                                 func_norm_counter = func_norm_counter + 1
 382                                 func = pid_string + ".C." + `func_norm_counter`
 383                         result =  file_name ,  string.atoi(lineno) , func
 384                 else:
 385                         result = func_name
 386                 func_norm_dict[func_name] = result
 387                 return result
 388
 389
 390         # The following two methods can be called by clients to use
 391         # a profiler to profile a statement, given as a string.
 392
 393         def run(self, cmd):
 394                 import __main__
 395                 dict = __main__.__dict__
 396                 self.runctx(cmd, dict, dict)
 397                 return self
 398
 399         def runctx(self, cmd, globals, locals):
 400                 self.set_cmd(cmd)
 401                 sys.setprofile(self.trace_dispatch)
 402                 try:
 403                         exec(cmd, globals, locals)
 404                 finally:
 405                         sys.setprofile(None)
 406
 407         # This method is more useful to profile a single function call.
 408         def runcall(self, func, *args):
 409                 self.set_cmd(func.__name__)
 410                 sys.setprofile(self.trace_dispatch)
 411                 try:
 412                         apply(func, args)
 413                 finally:
 414                         sys.setprofile(None)
 415                 return self
 416
 417
 418         #******************************************************************
 419         # The following calculates the overhead for using a profiler.  The
 420         # problem is that it takes a fair amount of time for the profiler
 421         # to stop the stopwatch (from the time it recieves an event).
 422         # Similarly, there is a delay from the time that the profiler
 423         # re-starts the stopwatch before the user's code really gets to
 424         # continue.  The following code tries to measure the difference on
 425         # a per-event basis. The result can the be placed in the
 426         # Profile.dispatch_event() routine for the given platform.  Note
 427         # that this difference is only significant if there are a lot of
 428         # events, and relatively little user code per event.  For example,
 429         # code with small functions will typically benefit from having the
 430         # profiler calibrated for the current platform.  This *could* be
 431         # done on the fly during init() time, but it is not worth the
 432         # effort.  Also note that if too large a value specified, then
 433         # execution time on some functions will actually appear as a
 434         # negative number.  It is *normal* for some functions (with very
 435         # low call counts) to have such negative stats, even if the
 436         # calibration figure is "correct."
 437         #
 438         # One alternative to profile-time calibration adjustments (i.e.,
 439         # adding in the magic little delta during each event) is to track
 440         # more carefully the number of events (and cumulatively, the number
 441         # of events during sub functions) that are seen.  If this were
 442         # done, then the arithmetic could be done after the fact (i.e., at
 443         # display time).  Currintly, we track only call/return events.
 444         # These values can be deduced by examining the callees and callers
 445         # vectors for each functions.  Hence we *can* almost correct the
 446         # internal time figure at print time (note that we currently don't
 447         # track exception event processing counts).  Unfortunately, there
 448         # is currently no similar information for cumulative sub-function
 449         # time.  It would not be hard to "get all this info" at profiler
 450         # time.  Specifically, we would have to extend the tuples to keep
 451         # counts of this in each frame, and then extend the defs of timing
 452         # tuples to include the significant two figures. I'm a bit fearful
 453         # that this additional feature will slow the heavily optimized
 454         # event/time ratio (i.e., the profiler would run slower, fur a very
 455         # low "value added" feature.)
 456         #
 457         # Plugging in the calibration constant doesn't slow down the
 458         # profiler very much, and the accuracy goes way up.
 459         #**************************************************************
 460
 461         def calibrate(self, m):
 462                 n = m
 463                 s = self.timer()
 464                 while n:
 465                         self.simple()
 466                         n = n - 1
 467                 f = self.timer()
 468                 my_simple = f[0]+f[1]-s[0]-s[1]
 469                 #print "Simple =", my_simple,
 470
 471                 n = m
 472                 s = self.timer()
 473                 while n:
 474                         self.instrumented()
 475                         n = n - 1
 476                 f = self.timer()
 477                 my_inst = f[0]+f[1]-s[0]-s[1]
 478                 # print "Instrumented =", my_inst
 479                 avg_cost = (my_inst - my_simple)/m
 480                 #print "Delta/call =", avg_cost, "(profiler fixup constant)"
 481                 return avg_cost
 482
 483         # simulate a program with no profiler activity
 484         def simple(self):
 485                 a = 1
 486                 pass
 487
 488         # simulate a program with call/return event processing
 489         def instrumented(self):
 490                 a = 1
 491                 self.profiler_simulation(a, a, a)
 492
 493         # simulate an event processing activity (from user's perspective)
 494         def profiler_simulation(self, x, y, z):
 495                 t = self.timer()
 496                 t = t[0] + t[1]
 497                 self.ut = t
 498
 499
 500
 501 #****************************************************************************
 502 # OldProfile class documentation
 503 #****************************************************************************
 504 #
 505 # The following derived profiler simulates the old style profile, providing
 506 # errant results on recursive functions. The reason for the usefulnes of this
 507 # profiler is that it runs faster (i.e., less overhead).  It still creates
 508 # all the caller stats, and is quite useful when there is *no* recursion
 509 # in the user's code.
 510 #
 511 # This code also shows how easy it is to create a modified profiler.
 512 #****************************************************************************
 513 class OldProfile(Profile):
 514         def trace_dispatch_exception(self, frame, t):
 515                 rt, rtt, rct, rfn, rframe, rcur = self.cur
 516                 if rcur and not rframe is frame:
 517                         return self.trace_dispatch_return(rframe, t)
 518                 return 0
 519
 520         def trace_dispatch_call(self, frame, t):
 521                 fn = `frame.f_code`
 522
 523                 self.cur = (t, 0, 0, fn, frame, self.cur)
 524                 if self.timings.has_key(fn):
 525                         tt, ct, callers = self.timings[fn]
 526                         self.timings[fn] = tt, ct, callers
 527                 else:
 528                         self.timings[fn] = 0, 0, {}
 529                 return 1
 530
 531         def trace_dispatch_return(self, frame, t):
 532                 rt, rtt, rct, rfn, frame, rcur = self.cur
 533                 rtt = rtt + t
 534                 sft = rtt + rct
 535
 536                 pt, ptt, pct, pfn, pframe, pcur = rcur
 537                 self.cur = pt, ptt+rt, pct+sft, pfn, pframe, pcur
 538
 539                 tt, ct, callers = self.timings[rfn]
 540                 if callers.has_key(pfn):
 541                         callers[pfn] = callers[pfn] + 1
 542                 else:
 543                         callers[pfn] = 1
 544                 self.timings[rfn] = tt+rtt, ct + sft, callers
 545
 546                 return 1
 547
 548
 549         def snapshot_stats(self):
 550                 self.stats = {}
 551                 for func in self.timings.keys():
 552                         tt, ct, callers = self.timings[func]
 553                         nor_func = self.func_normalize(func)
 554                         nor_callers = {}
 555                         nc = 0
 556                         for func_caller in callers.keys():
 557                                 nor_callers[self.func_normalize(func_caller)]=\
 558                                           callers[func_caller]
 559                                 nc = nc + callers[func_caller]
 560                         self.stats[nor_func] = nc, nc, tt, ct, nor_callers
 561
 562
 563
 564 #****************************************************************************
 565 # HotProfile class documentation
 566 #****************************************************************************
 567 #
 568 # This profiler is the fastest derived profile example.  It does not
 569 # calculate caller-callee relationships, and does not calculate cumulative
 570 # time under a function.  It only calculates time spent in a function, so
 571 # it runs very quickly (re: very low overhead)
 572 #****************************************************************************
 573 class HotProfile(Profile):
 574         def trace_dispatch_exception(self, frame, t):
 575                 rt, rtt, rfn, rframe, rcur = self.cur
 576                 if rcur and not rframe is frame:
 577                         return self.trace_dispatch_return(rframe, t)
 578                 return 0
 579
 580         def trace_dispatch_call(self, frame, t):
 581                 self.cur = (t, 0, frame, self.cur)
 582                 return 1
 583
 584         def trace_dispatch_return(self, frame, t):
 585                 rt, rtt, frame, rcur = self.cur
 586
 587                 rfn = `frame.f_code`
 588
 589                 pt, ptt, pframe, pcur = rcur
 590                 self.cur = pt, ptt+rt, pframe, pcur
 591
 592                 if self.timings.has_key(rfn):
 593                         nc, tt = self.timings[rfn]
 594                         self.timings[rfn] = nc + 1, rt + rtt + tt
 595                 else:
 596                         self.timings[rfn] =      1, rt + rtt
 597
 598                 return 1
 599
 600
 601         def snapshot_stats(self):
 602                 self.stats = {}
 603                 for func in self.timings.keys():
 604                         nc, tt = self.timings[func]
 605                         nor_func = self.func_normalize(func)
 606                         self.stats[nor_func] = nc, nc, tt, 0, {}
 607
 608
 609
 610 #****************************************************************************
 611 def Stats(*args):
 612         print 'Report generating functions are in the "pstats" module\a'