really the last log entry for 1.1
[python/dscho.git] / Lib / profile.py
blob35ed63e51817824093b816e61843220b5e39d83b
2 # Class for profiling python code. rev 1.0 6/2/94
4 # Based on prior profile module by Sjoerd Mullender...
5 # which was hacked somewhat by: Guido van Rossum
7 # See profile.doc for more information
10 # Copyright 1994, by InfoSeek Corporation, all rights reserved.
11 # Written by James Roskind
13 # Permission to use, copy, modify, and distribute this Python software
14 # and its associated documentation for any purpose (subject to the
15 # restriction in the following sentence) without fee is hereby granted,
16 # provided that the above copyright notice appears in all copies, and
17 # that both that copyright notice and this permission notice appear in
18 # supporting documentation, and that the name of InfoSeek not be used in
19 # advertising or publicity pertaining to distribution of the software
20 # without specific, written prior permission. This permission is
21 # explicitly restricted to the copying and modification of the software
22 # to remain in Python, compiled Python, or other languages (such as C)
23 # wherein the modified or derived code is exclusively imported into a
24 # Python module.
26 # INFOSEEK CORPORATION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
27 # SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
28 # FITNESS. IN NO EVENT SHALL INFOSEEK CORPORATION BE LIABLE FOR ANY
29 # SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER
30 # RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
31 # CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
32 # CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
36 import sys
37 import os
38 import time
39 import string
40 import marshal
43 # Global variables
44 func_norm_dict = {}
45 func_norm_counter = 0
46 pid_string = `os.getpid()`
49 # Optimized intermodule references
50 ostimes = os.times
53 # Sample timer for use with
54 #i_count = 0
55 #def integer_timer():
56 # global i_count
57 # i_count = i_count + 1
58 # return i_count
59 #itimes = integer_timer # replace with C coded timer returning integers
61 #**************************************************************************
62 # The following are the static member functions for the profiler class
63 # Note that an instance of Profile() is *not* needed to call them.
64 #**************************************************************************
67 # simplified user interface
68 def run(statement, *args):
69 prof = Profile()
70 try:
71 prof = prof.run(statement)
72 except SystemExit:
73 pass
74 if args:
75 prof.dump_stats(args[0])
76 else:
77 return prof.print_stats()
79 # print help
80 def help():
81 for dirname in sys.path:
82 fullname = os.path.join(dirname, 'profile.doc')
83 if os.path.exists(fullname):
84 sts = os.system('${PAGER-more} '+fullname)
85 if sts: print '*** Pager exit status:', sts
86 break
87 else:
88 print 'Sorry, can\'t find the help file "profile.doc"',
89 print 'along the Python search path'
92 #**************************************************************************
93 # class Profile documentation:
94 #**************************************************************************
95 # self.cur is always a tuple. Each such tuple corresponds to a stack
96 # frame that is currently active (self.cur[-2]). The following are the
97 # definitions of its members. We use this external "parallel stack" to
98 # avoid contaminating the program that we are profiling. (old profiler
99 # used to write into the frames local dictionary!!) Derived classes
100 # can change the definition of some entries, as long as they leave
101 # [-2:] intact.
103 # [ 0] = Time that needs to be charged to the parent frame's function. It is
104 # used so that a function call will not have to access the timing data
105 # for the parents frame.
106 # [ 1] = Total time spent in this frame's function, excluding time in
107 # subfunctions
108 # [ 2] = Cumulative time spent in this frame's function, including time in
109 # all subfunctions to this frame.
110 # [-3] = Name of the function that corresonds to this frame.
111 # [-2] = Actual frame that we correspond to (used to sync exception handling)
112 # [-1] = Our parent 6-tuple (corresonds to frame.f_back)
113 #**************************************************************************
114 # Timing data for each function is stored as a 5-tuple in the dictionary
115 # self.timings[]. The index is always the name stored in self.cur[4].
116 # The following are the definitions of the members:
118 # [0] = The number of times this function was called, not counting direct
119 # or indirect recursion,
120 # [1] = Number of times this function appears on the stack, minus one
121 # [2] = Total time spent internal to this function
122 # [3] = Cumulative time that this function was present on the stack. In
123 # non-recursive functions, this is the total execution time from start
124 # to finish of each invocation of a function, including time spent in
125 # all subfunctions.
126 # [5] = A dictionary indicating for each function name, the number of times
127 # it was called by us.
128 #**************************************************************************
129 # We produce function names via a repr() call on the f_code object during
130 # profiling. This save a *lot* of CPU time. This results in a string that
131 # always looks like:
132 # <code object main at 87090, file "/a/lib/python-local/myfib.py", line 76>
133 # After we "normalize it, it is a tuple of filename, line, function-name.
134 # We wait till we are done profiling to do the normalization.
135 # *IF* this repr format changes, then only the normalization routine should
136 # need to be fixed.
137 #**************************************************************************
138 class Profile:
140 def __init__(self, *arg):
141 self.timings = {}
142 self.cur = None
143 self.cmd = ""
145 self.dispatch = { \
146 'call' : self.trace_dispatch_call, \
147 'return' : self.trace_dispatch_return, \
148 'exception': self.trace_dispatch_exception, \
151 if not arg:
152 self.timer = os.times
153 self.dispatcher = self.trace_dispatch
154 else:
155 self.timer = arg[0]
156 t = self.timer() # test out timer function
157 try:
158 if len(t) == 2:
159 self.dispatcher = self.trace_dispatch
160 else:
161 self.dispatcher = self.trace_dispatch_r
162 except:
163 self.dispatcher = self.trace_dispatch_i
164 self.t = self.get_time()
165 self.simulate_call('profiler')
168 def get_time(self): # slow simulation of method to acquire time
169 t = self.timer()
170 if type(t) == type(()) or type(t) == type([]):
171 t = reduce(lambda x,y: x+y, t, 0)
172 return t
175 # Heavily optimized dispatch routine for os.times() timer
177 def trace_dispatch(self, frame, event, arg):
178 t = self.timer()
179 t = t[0] + t[1] - self.t # No Calibration constant
180 # t = t[0] + t[1] - self.t - .00053 # Calibration constant
182 if self.dispatch[event](frame,t):
183 t = self.timer()
184 self.t = t[0] + t[1]
185 else:
186 r = self.timer()
187 self.t = r[0] + r[1] - t # put back unrecorded delta
188 return
192 # Dispatch routine for best timer program (return = scalar integer)
194 def trace_dispatch_i(self, frame, event, arg):
195 t = self.timer() - self.t # - 1 # Integer calibration constant
196 if self.dispatch[event](frame,t):
197 self.t = self.timer()
198 else:
199 self.t = self.timer() - t # put back unrecorded delta
200 return
203 # SLOW generic dispatch rountine for timer returning lists of numbers
205 def trace_dispatch_l(self, frame, event, arg):
206 t = self.get_time() - self.t
208 if self.dispatch[event](frame,t):
209 self.t = self.get_time()
210 else:
211 self.t = self.get_time()-t # put back unrecorded delta
212 return
215 def trace_dispatch_exception(self, frame, t):
216 rt, rtt, rct, rfn, rframe, rcur = self.cur
217 if (not rframe is frame) and rcur:
218 return self.trace_dispatch_return(rframe, t)
219 return 0
222 def trace_dispatch_call(self, frame, t):
223 fn = `frame.f_code`
225 # The following should be about the best approach, but
226 # we would need a function that maps from id() back to
227 # the actual code object.
228 # fn = id(frame.f_code)
229 # Note we would really use our own function, which would
230 # return the code address, *and* bump the ref count. We
231 # would then fix up the normalize function to do the
232 # actualy repr(fn) call.
234 # The following is an interesting alternative
235 # It doesn't do as good a job, and it doesn't run as
236 # fast 'cause repr() is written in C, and this is Python.
237 #fcode = frame.f_code
238 #code = fcode.co_code
239 #if ord(code[0]) == 127: # == SET_LINENO
240 # # see "opcode.h" in the Python source
241 # fn = (fcode.co_filename, ord(code[1]) | \
242 # ord(code[2]) << 8, fcode.co_name)
243 #else:
244 # fn = (fcode.co_filename, 0, fcode.co_name)
246 self.cur = (t, 0, 0, fn, frame, self.cur)
247 if self.timings.has_key(fn):
248 cc, ns, tt, ct, callers = self.timings[fn]
249 self.timings[fn] = cc, ns + 1, tt, ct, callers
250 else:
251 self.timings[fn] = 0, 0, 0, 0, {}
252 return 1
254 def trace_dispatch_return(self, frame, t):
255 # if not frame is self.cur[-2]: raise "Bad return", self.cur[3]
257 # Prefix "r" means part of the Returning or exiting frame
258 # Prefix "p" means part of the Previous or older frame
260 rt, rtt, rct, rfn, frame, rcur = self.cur
261 rtt = rtt + t
262 sft = rtt + rct
264 pt, ptt, pct, pfn, pframe, pcur = rcur
265 self.cur = pt, ptt+rt, pct+sft, pfn, pframe, pcur
267 cc, ns, tt, ct, callers = self.timings[rfn]
268 if not ns:
269 ct = ct + sft
270 cc = cc + 1
271 if callers.has_key(pfn):
272 callers[pfn] = callers[pfn] + 1 # hack: gather more
273 # stats such as the amount of time added to ct courtesy
274 # of this specific call, and the contribution to cc
275 # courtesy of this call.
276 else:
277 callers[pfn] = 1
278 self.timings[rfn] = cc, ns - 1, tt+rtt, ct, callers
280 return 1
282 # The next few function play with self.cmd. By carefully preloading
283 # our paralell stack, we can force the profiled result to include
284 # an arbitrary string as the name of the calling function.
285 # We use self.cmd as that string, and the resulting stats look
286 # very nice :-).
288 def set_cmd(self, cmd):
289 if self.cur[-1]: return # already set
290 self.cmd = cmd
291 self.simulate_call(cmd)
293 class fake_code:
294 def __init__(self, filename, line, name):
295 self.co_filename = filename
296 self.co_line = line
297 self.co_name = name
298 self.co_code = '\0' # anything but 127
300 def __repr__(self):
301 return (self.co_filename, self.co_line, self.co_name)
303 class fake_frame:
304 def __init__(self, code, prior):
305 self.f_code = code
306 self.f_back = prior
308 def simulate_call(self, name):
309 code = self.fake_code('profile', 0, name)
310 if self.cur:
311 pframe = self.cur[-2]
312 else:
313 pframe = None
314 frame = self.fake_frame(code, pframe)
315 a = self.dispatch['call'](frame, 0)
316 return
318 # collect stats from pending stack, including getting final
319 # timings for self.cmd frame.
321 def simulate_cmd_complete(self):
322 t = self.get_time() - self.t
323 while self.cur[-1]:
324 # We *can* cause assertion errors here if
325 # dispatch_trace_return checks for a frame match!
326 a = self.dispatch['return'](self.cur[-2], t)
327 t = 0
328 self.t = self.get_time() - t
331 def print_stats(self):
332 import pstats
333 pstats.Stats(self).strip_dirs().sort_stats(-1). \
334 print_stats()
336 def dump_stats(self, file):
337 f = open(file, 'w')
338 self.create_stats()
339 marshal.dump(self.stats, f)
340 f.close()
342 def create_stats(self):
343 self.simulate_cmd_complete()
344 self.snapshot_stats()
346 def snapshot_stats(self):
347 self.stats = {}
348 for func in self.timings.keys():
349 cc, ns, tt, ct, callers = self.timings[func]
350 nor_func = self.func_normalize(func)
351 nor_callers = {}
352 nc = 0
353 for func_caller in callers.keys():
354 nor_callers[self.func_normalize(func_caller)]=\
355 callers[func_caller]
356 nc = nc + callers[func_caller]
357 self.stats[nor_func] = cc, nc, tt, ct, nor_callers
360 # Override the following function if you can figure out
361 # a better name for the binary f_code entries. I just normalize
362 # them sequentially in a dictionary. It would be nice if we could
363 # *really* see the name of the underlying C code :-). Sometimes
364 # you can figure out what-is-what by looking at caller and callee
365 # lists (and knowing what your python code does).
367 def func_normalize(self, func_name):
368 global func_norm_dict
369 global func_norm_counter
370 global func_sequence_num
372 if func_norm_dict.has_key(func_name):
373 return func_norm_dict[func_name]
374 if type(func_name) == type(""):
375 long_name = string.split(func_name)
376 file_name = long_name[6][1:-2]
377 func = long_name[2]
378 lineno = long_name[8][:-1]
379 if '?' == func: # Until I find out how to may 'em...
380 file_name = 'python'
381 func_norm_counter = func_norm_counter + 1
382 func = pid_string + ".C." + `func_norm_counter`
383 result = file_name , string.atoi(lineno) , func
384 else:
385 result = func_name
386 func_norm_dict[func_name] = result
387 return result
390 # The following two methods can be called by clients to use
391 # a profiler to profile a statement, given as a string.
393 def run(self, cmd):
394 import __main__
395 dict = __main__.__dict__
396 self.runctx(cmd, dict, dict)
397 return self
399 def runctx(self, cmd, globals, locals):
400 self.set_cmd(cmd)
401 sys.setprofile(self.trace_dispatch)
402 try:
403 exec(cmd, globals, locals)
404 finally:
405 sys.setprofile(None)
407 # This method is more useful to profile a single function call.
408 def runcall(self, func, *args):
409 self.set_cmd(func.__name__)
410 sys.setprofile(self.trace_dispatch)
411 try:
412 apply(func, args)
413 finally:
414 sys.setprofile(None)
415 return self
418 #******************************************************************
419 # The following calculates the overhead for using a profiler. The
420 # problem is that it takes a fair amount of time for the profiler
421 # to stop the stopwatch (from the time it recieves an event).
422 # Similarly, there is a delay from the time that the profiler
423 # re-starts the stopwatch before the user's code really gets to
424 # continue. The following code tries to measure the difference on
425 # a per-event basis. The result can the be placed in the
426 # Profile.dispatch_event() routine for the given platform. Note
427 # that this difference is only significant if there are a lot of
428 # events, and relatively little user code per event. For example,
429 # code with small functions will typically benefit from having the
430 # profiler calibrated for the current platform. This *could* be
431 # done on the fly during init() time, but it is not worth the
432 # effort. Also note that if too large a value specified, then
433 # execution time on some functions will actually appear as a
434 # negative number. It is *normal* for some functions (with very
435 # low call counts) to have such negative stats, even if the
436 # calibration figure is "correct."
438 # One alternative to profile-time calibration adjustments (i.e.,
439 # adding in the magic little delta during each event) is to track
440 # more carefully the number of events (and cumulatively, the number
441 # of events during sub functions) that are seen. If this were
442 # done, then the arithmetic could be done after the fact (i.e., at
443 # display time). Currintly, we track only call/return events.
444 # These values can be deduced by examining the callees and callers
445 # vectors for each functions. Hence we *can* almost correct the
446 # internal time figure at print time (note that we currently don't
447 # track exception event processing counts). Unfortunately, there
448 # is currently no similar information for cumulative sub-function
449 # time. It would not be hard to "get all this info" at profiler
450 # time. Specifically, we would have to extend the tuples to keep
451 # counts of this in each frame, and then extend the defs of timing
452 # tuples to include the significant two figures. I'm a bit fearful
453 # that this additional feature will slow the heavily optimized
454 # event/time ratio (i.e., the profiler would run slower, fur a very
455 # low "value added" feature.)
457 # Plugging in the calibration constant doesn't slow down the
458 # profiler very much, and the accuracy goes way up.
459 #**************************************************************
461 def calibrate(self, m):
462 n = m
463 s = self.timer()
464 while n:
465 self.simple()
466 n = n - 1
467 f = self.timer()
468 my_simple = f[0]+f[1]-s[0]-s[1]
469 #print "Simple =", my_simple,
471 n = m
472 s = self.timer()
473 while n:
474 self.instrumented()
475 n = n - 1
476 f = self.timer()
477 my_inst = f[0]+f[1]-s[0]-s[1]
478 # print "Instrumented =", my_inst
479 avg_cost = (my_inst - my_simple)/m
480 #print "Delta/call =", avg_cost, "(profiler fixup constant)"
481 return avg_cost
483 # simulate a program with no profiler activity
484 def simple(self):
485 a = 1
486 pass
488 # simulate a program with call/return event processing
489 def instrumented(self):
490 a = 1
491 self.profiler_simulation(a, a, a)
493 # simulate an event processing activity (from user's perspective)
494 def profiler_simulation(self, x, y, z):
495 t = self.timer()
496 t = t[0] + t[1]
497 self.ut = t
501 #****************************************************************************
502 # OldProfile class documentation
503 #****************************************************************************
505 # The following derived profiler simulates the old style profile, providing
506 # errant results on recursive functions. The reason for the usefulnes of this
507 # profiler is that it runs faster (i.e., less overhead). It still creates
508 # all the caller stats, and is quite useful when there is *no* recursion
509 # in the user's code.
511 # This code also shows how easy it is to create a modified profiler.
512 #****************************************************************************
513 class OldProfile(Profile):
514 def trace_dispatch_exception(self, frame, t):
515 rt, rtt, rct, rfn, rframe, rcur = self.cur
516 if rcur and not rframe is frame:
517 return self.trace_dispatch_return(rframe, t)
518 return 0
520 def trace_dispatch_call(self, frame, t):
521 fn = `frame.f_code`
523 self.cur = (t, 0, 0, fn, frame, self.cur)
524 if self.timings.has_key(fn):
525 tt, ct, callers = self.timings[fn]
526 self.timings[fn] = tt, ct, callers
527 else:
528 self.timings[fn] = 0, 0, {}
529 return 1
531 def trace_dispatch_return(self, frame, t):
532 rt, rtt, rct, rfn, frame, rcur = self.cur
533 rtt = rtt + t
534 sft = rtt + rct
536 pt, ptt, pct, pfn, pframe, pcur = rcur
537 self.cur = pt, ptt+rt, pct+sft, pfn, pframe, pcur
539 tt, ct, callers = self.timings[rfn]
540 if callers.has_key(pfn):
541 callers[pfn] = callers[pfn] + 1
542 else:
543 callers[pfn] = 1
544 self.timings[rfn] = tt+rtt, ct + sft, callers
546 return 1
549 def snapshot_stats(self):
550 self.stats = {}
551 for func in self.timings.keys():
552 tt, ct, callers = self.timings[func]
553 nor_func = self.func_normalize(func)
554 nor_callers = {}
555 nc = 0
556 for func_caller in callers.keys():
557 nor_callers[self.func_normalize(func_caller)]=\
558 callers[func_caller]
559 nc = nc + callers[func_caller]
560 self.stats[nor_func] = nc, nc, tt, ct, nor_callers
564 #****************************************************************************
565 # HotProfile class documentation
566 #****************************************************************************
568 # This profiler is the fastest derived profile example. It does not
569 # calculate caller-callee relationships, and does not calculate cumulative
570 # time under a function. It only calculates time spent in a function, so
571 # it runs very quickly (re: very low overhead)
572 #****************************************************************************
573 class HotProfile(Profile):
574 def trace_dispatch_exception(self, frame, t):
575 rt, rtt, rfn, rframe, rcur = self.cur
576 if rcur and not rframe is frame:
577 return self.trace_dispatch_return(rframe, t)
578 return 0
580 def trace_dispatch_call(self, frame, t):
581 self.cur = (t, 0, frame, self.cur)
582 return 1
584 def trace_dispatch_return(self, frame, t):
585 rt, rtt, frame, rcur = self.cur
587 rfn = `frame.f_code`
589 pt, ptt, pframe, pcur = rcur
590 self.cur = pt, ptt+rt, pframe, pcur
592 if self.timings.has_key(rfn):
593 nc, tt = self.timings[rfn]
594 self.timings[rfn] = nc + 1, rt + rtt + tt
595 else:
596 self.timings[rfn] = 1, rt + rtt
598 return 1
601 def snapshot_stats(self):
602 self.stats = {}
603 for func in self.timings.keys():
604 nc, tt = self.timings[func]
605 nor_func = self.func_normalize(func)
606 self.stats[nor_func] = nc, nc, tt, 0, {}
610 #****************************************************************************
611 def Stats(*args):
612 print 'Report generating functions are in the "pstats" module\a'