2 # Class for profiling python code. rev 1.0 6/2/94
4 # Based on prior profile module by Sjoerd Mullender...
5 # which was hacked somewhat by: Guido van Rossum
7 # See profile.doc for more information
10 # Copyright 1994, by InfoSeek Corporation, all rights reserved.
11 # Written by James Roskind
13 # Permission to use, copy, modify, and distribute this Python software
14 # and its associated documentation for any purpose (subject to the
15 # restriction in the following sentence) without fee is hereby granted,
16 # provided that the above copyright notice appears in all copies, and
17 # that both that copyright notice and this permission notice appear in
18 # supporting documentation, and that the name of InfoSeek not be used in
19 # advertising or publicity pertaining to distribution of the software
20 # without specific, written prior permission. This permission is
21 # explicitly restricted to the copying and modification of the software
22 # to remain in Python, compiled Python, or other languages (such as C)
23 # wherein the modified or derived code is exclusively imported into a
26 # INFOSEEK CORPORATION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
27 # SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
28 # FITNESS. IN NO EVENT SHALL INFOSEEK CORPORATION BE LIABLE FOR ANY
29 # SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER
30 # RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
31 # CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
32 # CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
46 pid_string
= `os
.getpid()`
49 # Optimized intermodule references
53 # Sample timer for use with
57 # i_count = i_count + 1
59 #itimes = integer_timer # replace with C coded timer returning integers
61 #**************************************************************************
62 # The following are the static member functions for the profiler class
63 # Note that an instance of Profile() is *not* needed to call them.
64 #**************************************************************************
67 # simplified user interface
68 def run(statement
, *args
):
71 prof
= prof
.run(statement
)
75 prof
.dump_stats(args
[0])
77 return prof
.print_stats()
81 for dirname
in sys
.path
:
82 fullname
= os
.path
.join(dirname
, 'profile.doc')
83 if os
.path
.exists(fullname
):
84 sts
= os
.system('${PAGER-more} '+fullname
)
85 if sts
: print '*** Pager exit status:', sts
88 print 'Sorry, can\'t find the help file "profile.doc"',
89 print 'along the Python search path'
92 #**************************************************************************
93 # class Profile documentation:
94 #**************************************************************************
95 # self.cur is always a tuple. Each such tuple corresponds to a stack
96 # frame that is currently active (self.cur[-2]). The following are the
97 # definitions of its members. We use this external "parallel stack" to
98 # avoid contaminating the program that we are profiling. (old profiler
99 # used to write into the frames local dictionary!!) Derived classes
100 # can change the definition of some entries, as long as they leave
103 # [ 0] = Time that needs to be charged to the parent frame's function. It is
104 # used so that a function call will not have to access the timing data
105 # for the parents frame.
106 # [ 1] = Total time spent in this frame's function, excluding time in
108 # [ 2] = Cumulative time spent in this frame's function, including time in
109 # all subfunctions to this frame.
110 # [-3] = Name of the function that corresonds to this frame.
111 # [-2] = Actual frame that we correspond to (used to sync exception handling)
112 # [-1] = Our parent 6-tuple (corresonds to frame.f_back)
113 #**************************************************************************
114 # Timing data for each function is stored as a 5-tuple in the dictionary
115 # self.timings[]. The index is always the name stored in self.cur[4].
116 # The following are the definitions of the members:
118 # [0] = The number of times this function was called, not counting direct
119 # or indirect recursion,
120 # [1] = Number of times this function appears on the stack, minus one
121 # [2] = Total time spent internal to this function
122 # [3] = Cumulative time that this function was present on the stack. In
123 # non-recursive functions, this is the total execution time from start
124 # to finish of each invocation of a function, including time spent in
126 # [5] = A dictionary indicating for each function name, the number of times
127 # it was called by us.
128 #**************************************************************************
129 # We produce function names via a repr() call on the f_code object during
130 # profiling. This save a *lot* of CPU time. This results in a string that
132 # <code object main at 87090, file "/a/lib/python-local/myfib.py", line 76>
133 # After we "normalize it, it is a tuple of filename, line, function-name.
134 # We wait till we are done profiling to do the normalization.
135 # *IF* this repr format changes, then only the normalization routine should
137 #**************************************************************************
140 def __init__(self
, *arg
):
146 'call' : self
.trace_dispatch_call
, \
147 'return' : self
.trace_dispatch_return
, \
148 'exception': self
.trace_dispatch_exception
, \
152 self
.timer
= os
.times
153 self
.dispatcher
= self
.trace_dispatch
156 t
= self
.timer() # test out timer function
159 self
.dispatcher
= self
.trace_dispatch
161 self
.dispatcher
= self
.trace_dispatch_r
163 self
.dispatcher
= self
.trace_dispatch_i
164 self
.t
= self
.get_time()
165 self
.simulate_call('profiler')
168 def get_time(self
): # slow simulation of method to acquire time
170 if type(t
) == type(()) or type(t
) == type([]):
171 t
= reduce(lambda x
,y
: x
+y
, t
, 0)
175 # Heavily optimized dispatch routine for os.times() timer
177 def trace_dispatch(self
, frame
, event
, arg
):
179 t
= t
[0] + t
[1] - self
.t
# No Calibration constant
180 # t = t[0] + t[1] - self.t - .00053 # Calibration constant
182 if self
.dispatch
[event
](frame
,t
):
187 self
.t
= r
[0] + r
[1] - t
# put back unrecorded delta
192 # Dispatch routine for best timer program (return = scalar integer)
194 def trace_dispatch_i(self
, frame
, event
, arg
):
195 t
= self
.timer() - self
.t
# - 1 # Integer calibration constant
196 if self
.dispatch
[event
](frame
,t
):
197 self
.t
= self
.timer()
199 self
.t
= self
.timer() - t
# put back unrecorded delta
203 # SLOW generic dispatch rountine for timer returning lists of numbers
205 def trace_dispatch_l(self
, frame
, event
, arg
):
206 t
= self
.get_time() - self
.t
208 if self
.dispatch
[event
](frame
,t
):
209 self
.t
= self
.get_time()
211 self
.t
= self
.get_time()-t
# put back unrecorded delta
215 def trace_dispatch_exception(self
, frame
, t
):
216 rt
, rtt
, rct
, rfn
, rframe
, rcur
= self
.cur
217 if (not rframe
is frame
) and rcur
:
218 return self
.trace_dispatch_return(rframe
, t
)
222 def trace_dispatch_call(self
, frame
, t
):
225 # The following should be about the best approach, but
226 # we would need a function that maps from id() back to
227 # the actual code object.
228 # fn = id(frame.f_code)
229 # Note we would really use our own function, which would
230 # return the code address, *and* bump the ref count. We
231 # would then fix up the normalize function to do the
232 # actualy repr(fn) call.
234 # The following is an interesting alternative
235 # It doesn't do as good a job, and it doesn't run as
236 # fast 'cause repr() is written in C, and this is Python.
237 #fcode = frame.f_code
238 #code = fcode.co_code
239 #if ord(code[0]) == 127: # == SET_LINENO
240 # # see "opcode.h" in the Python source
241 # fn = (fcode.co_filename, ord(code[1]) | \
242 # ord(code[2]) << 8, fcode.co_name)
244 # fn = (fcode.co_filename, 0, fcode.co_name)
246 self
.cur
= (t
, 0, 0, fn
, frame
, self
.cur
)
247 if self
.timings
.has_key(fn
):
248 cc
, ns
, tt
, ct
, callers
= self
.timings
[fn
]
249 self
.timings
[fn
] = cc
, ns
+ 1, tt
, ct
, callers
251 self
.timings
[fn
] = 0, 0, 0, 0, {}
254 def trace_dispatch_return(self
, frame
, t
):
255 # if not frame is self.cur[-2]: raise "Bad return", self.cur[3]
257 # Prefix "r" means part of the Returning or exiting frame
258 # Prefix "p" means part of the Previous or older frame
260 rt
, rtt
, rct
, rfn
, frame
, rcur
= self
.cur
264 pt
, ptt
, pct
, pfn
, pframe
, pcur
= rcur
265 self
.cur
= pt
, ptt
+rt
, pct
+sft
, pfn
, pframe
, pcur
267 cc
, ns
, tt
, ct
, callers
= self
.timings
[rfn
]
271 if callers
.has_key(pfn
):
272 callers
[pfn
] = callers
[pfn
] + 1 # hack: gather more
273 # stats such as the amount of time added to ct courtesy
274 # of this specific call, and the contribution to cc
275 # courtesy of this call.
278 self
.timings
[rfn
] = cc
, ns
- 1, tt
+rtt
, ct
, callers
282 # The next few function play with self.cmd. By carefully preloading
283 # our paralell stack, we can force the profiled result to include
284 # an arbitrary string as the name of the calling function.
285 # We use self.cmd as that string, and the resulting stats look
288 def set_cmd(self
, cmd
):
289 if self
.cur
[-1]: return # already set
291 self
.simulate_call(cmd
)
294 def __init__(self
, filename
, line
, name
):
295 self
.co_filename
= filename
298 self
.co_code
= '\0' # anything but 127
301 return (self
.co_filename
, self
.co_line
, self
.co_name
)
304 def __init__(self
, code
, prior
):
308 def simulate_call(self
, name
):
309 code
= self
.fake_code('profile', 0, name
)
311 pframe
= self
.cur
[-2]
314 frame
= self
.fake_frame(code
, pframe
)
315 a
= self
.dispatch
['call'](frame
, 0)
318 # collect stats from pending stack, including getting final
319 # timings for self.cmd frame.
321 def simulate_cmd_complete(self
):
322 t
= self
.get_time() - self
.t
324 # We *can* cause assertion errors here if
325 # dispatch_trace_return checks for a frame match!
326 a
= self
.dispatch
['return'](self
.cur
[-2], t
)
328 self
.t
= self
.get_time() - t
331 def print_stats(self
):
333 pstats
.Stats(self
).strip_dirs().sort_stats(-1). \
336 def dump_stats(self
, file):
339 marshal
.dump(self
.stats
, f
)
342 def create_stats(self
):
343 self
.simulate_cmd_complete()
344 self
.snapshot_stats()
346 def snapshot_stats(self
):
348 for func
in self
.timings
.keys():
349 cc
, ns
, tt
, ct
, callers
= self
.timings
[func
]
350 nor_func
= self
.func_normalize(func
)
353 for func_caller
in callers
.keys():
354 nor_callers
[self
.func_normalize(func_caller
)]=\
356 nc
= nc
+ callers
[func_caller
]
357 self
.stats
[nor_func
] = cc
, nc
, tt
, ct
, nor_callers
360 # Override the following function if you can figure out
361 # a better name for the binary f_code entries. I just normalize
362 # them sequentially in a dictionary. It would be nice if we could
363 # *really* see the name of the underlying C code :-). Sometimes
364 # you can figure out what-is-what by looking at caller and callee
365 # lists (and knowing what your python code does).
367 def func_normalize(self
, func_name
):
368 global func_norm_dict
369 global func_norm_counter
370 global func_sequence_num
372 if func_norm_dict
.has_key(func_name
):
373 return func_norm_dict
[func_name
]
374 if type(func_name
) == type(""):
375 long_name
= string
.split(func_name
)
376 file_name
= long_name
[6][1:-2]
378 lineno
= long_name
[8][:-1]
379 if '?' == func
: # Until I find out how to may 'em...
381 func_norm_counter
= func_norm_counter
+ 1
382 func
= pid_string
+ ".C." + `func_norm_counter`
383 result
= file_name
, string
.atoi(lineno
) , func
386 func_norm_dict
[func_name
] = result
390 # The following two methods can be called by clients to use
391 # a profiler to profile a statement, given as a string.
395 dict = __main__
.__dict
__
396 self
.runctx(cmd
, dict, dict)
399 def runctx(self
, cmd
, globals, locals):
401 sys
.setprofile(self
.trace_dispatch
)
403 exec(cmd
, globals, locals)
407 # This method is more useful to profile a single function call.
408 def runcall(self
, func
, *args
):
409 self
.set_cmd(func
.__name
__)
410 sys
.setprofile(self
.trace_dispatch
)
418 #******************************************************************
419 # The following calculates the overhead for using a profiler. The
420 # problem is that it takes a fair amount of time for the profiler
421 # to stop the stopwatch (from the time it recieves an event).
422 # Similarly, there is a delay from the time that the profiler
423 # re-starts the stopwatch before the user's code really gets to
424 # continue. The following code tries to measure the difference on
425 # a per-event basis. The result can the be placed in the
426 # Profile.dispatch_event() routine for the given platform. Note
427 # that this difference is only significant if there are a lot of
428 # events, and relatively little user code per event. For example,
429 # code with small functions will typically benefit from having the
430 # profiler calibrated for the current platform. This *could* be
431 # done on the fly during init() time, but it is not worth the
432 # effort. Also note that if too large a value specified, then
433 # execution time on some functions will actually appear as a
434 # negative number. It is *normal* for some functions (with very
435 # low call counts) to have such negative stats, even if the
436 # calibration figure is "correct."
438 # One alternative to profile-time calibration adjustments (i.e.,
439 # adding in the magic little delta during each event) is to track
440 # more carefully the number of events (and cumulatively, the number
441 # of events during sub functions) that are seen. If this were
442 # done, then the arithmetic could be done after the fact (i.e., at
443 # display time). Currintly, we track only call/return events.
444 # These values can be deduced by examining the callees and callers
445 # vectors for each functions. Hence we *can* almost correct the
446 # internal time figure at print time (note that we currently don't
447 # track exception event processing counts). Unfortunately, there
448 # is currently no similar information for cumulative sub-function
449 # time. It would not be hard to "get all this info" at profiler
450 # time. Specifically, we would have to extend the tuples to keep
451 # counts of this in each frame, and then extend the defs of timing
452 # tuples to include the significant two figures. I'm a bit fearful
453 # that this additional feature will slow the heavily optimized
454 # event/time ratio (i.e., the profiler would run slower, fur a very
455 # low "value added" feature.)
457 # Plugging in the calibration constant doesn't slow down the
458 # profiler very much, and the accuracy goes way up.
459 #**************************************************************
461 def calibrate(self
, m
):
468 my_simple
= f
[0]+f
[1]-s
[0]-s
[1]
469 #print "Simple =", my_simple,
477 my_inst
= f
[0]+f
[1]-s
[0]-s
[1]
478 # print "Instrumented =", my_inst
479 avg_cost
= (my_inst
- my_simple
)/m
480 #print "Delta/call =", avg_cost, "(profiler fixup constant)"
483 # simulate a program with no profiler activity
488 # simulate a program with call/return event processing
489 def instrumented(self
):
491 self
.profiler_simulation(a
, a
, a
)
493 # simulate an event processing activity (from user's perspective)
494 def profiler_simulation(self
, x
, y
, z
):
501 #****************************************************************************
502 # OldProfile class documentation
503 #****************************************************************************
505 # The following derived profiler simulates the old style profile, providing
506 # errant results on recursive functions. The reason for the usefulnes of this
507 # profiler is that it runs faster (i.e., less overhead). It still creates
508 # all the caller stats, and is quite useful when there is *no* recursion
509 # in the user's code.
511 # This code also shows how easy it is to create a modified profiler.
512 #****************************************************************************
513 class OldProfile(Profile
):
514 def trace_dispatch_exception(self
, frame
, t
):
515 rt
, rtt
, rct
, rfn
, rframe
, rcur
= self
.cur
516 if rcur
and not rframe
is frame
:
517 return self
.trace_dispatch_return(rframe
, t
)
520 def trace_dispatch_call(self
, frame
, t
):
523 self
.cur
= (t
, 0, 0, fn
, frame
, self
.cur
)
524 if self
.timings
.has_key(fn
):
525 tt
, ct
, callers
= self
.timings
[fn
]
526 self
.timings
[fn
] = tt
, ct
, callers
528 self
.timings
[fn
] = 0, 0, {}
531 def trace_dispatch_return(self
, frame
, t
):
532 rt
, rtt
, rct
, rfn
, frame
, rcur
= self
.cur
536 pt
, ptt
, pct
, pfn
, pframe
, pcur
= rcur
537 self
.cur
= pt
, ptt
+rt
, pct
+sft
, pfn
, pframe
, pcur
539 tt
, ct
, callers
= self
.timings
[rfn
]
540 if callers
.has_key(pfn
):
541 callers
[pfn
] = callers
[pfn
] + 1
544 self
.timings
[rfn
] = tt
+rtt
, ct
+ sft
, callers
549 def snapshot_stats(self
):
551 for func
in self
.timings
.keys():
552 tt
, ct
, callers
= self
.timings
[func
]
553 nor_func
= self
.func_normalize(func
)
556 for func_caller
in callers
.keys():
557 nor_callers
[self
.func_normalize(func_caller
)]=\
559 nc
= nc
+ callers
[func_caller
]
560 self
.stats
[nor_func
] = nc
, nc
, tt
, ct
, nor_callers
564 #****************************************************************************
565 # HotProfile class documentation
566 #****************************************************************************
568 # This profiler is the fastest derived profile example. It does not
569 # calculate caller-callee relationships, and does not calculate cumulative
570 # time under a function. It only calculates time spent in a function, so
571 # it runs very quickly (re: very low overhead)
572 #****************************************************************************
573 class HotProfile(Profile
):
574 def trace_dispatch_exception(self
, frame
, t
):
575 rt
, rtt
, rfn
, rframe
, rcur
= self
.cur
576 if rcur
and not rframe
is frame
:
577 return self
.trace_dispatch_return(rframe
, t
)
580 def trace_dispatch_call(self
, frame
, t
):
581 self
.cur
= (t
, 0, frame
, self
.cur
)
584 def trace_dispatch_return(self
, frame
, t
):
585 rt
, rtt
, frame
, rcur
= self
.cur
589 pt
, ptt
, pframe
, pcur
= rcur
590 self
.cur
= pt
, ptt
+rt
, pframe
, pcur
592 if self
.timings
.has_key(rfn
):
593 nc
, tt
= self
.timings
[rfn
]
594 self
.timings
[rfn
] = nc
+ 1, rt
+ rtt
+ tt
596 self
.timings
[rfn
] = 1, rt
+ rtt
601 def snapshot_stats(self
):
603 for func
in self
.timings
.keys():
604 nc
, tt
= self
.timings
[func
]
605 nor_func
= self
.func_normalize(func
)
606 self
.stats
[nor_func
] = nc
, nc
, tt
, 0, {}
610 #****************************************************************************
612 print 'Report generating functions are in the "pstats" module\a'