3 # Class for profiling python code. rev 1.0 6/2/94
5 # Based on prior profile module by Sjoerd Mullender...
6 # which was hacked somewhat by: Guido van Rossum
8 # See profile.doc for more information
10 """Class for profiling Python code."""
12 # Copyright 1994, by InfoSeek Corporation, all rights reserved.
13 # Written by James Roskind
15 # Permission to use, copy, modify, and distribute this Python software
16 # and its associated documentation for any purpose (subject to the
17 # restriction in the following sentence) without fee is hereby granted,
18 # provided that the above copyright notice appears in all copies, and
19 # that both that copyright notice and this permission notice appear in
20 # supporting documentation, and that the name of InfoSeek not be used in
21 # advertising or publicity pertaining to distribution of the software
22 # without specific, written prior permission. This permission is
23 # explicitly restricted to the copying and modification of the software
24 # to remain in Python, compiled Python, or other languages (such as C)
25 # wherein the modified or derived code is exclusively imported into a
28 # INFOSEEK CORPORATION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
29 # SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
30 # FITNESS. IN NO EVENT SHALL INFOSEEK CORPORATION BE LIABLE FOR ANY
31 # SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER
32 # RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
33 # CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
34 # CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
43 __all__
= ["run","help","Profile"]
45 # Sample timer for use with
49 # i_count = i_count + 1
51 #itimes = integer_timer # replace with C coded timer returning integers
53 #**************************************************************************
54 # The following are the static member functions for the profiler class
55 # Note that an instance of Profile() is *not* needed to call them.
56 #**************************************************************************
58 def run(statement
, filename
=None):
59 """Run statement under profiler optionally saving results in filename
61 This function takes a single argument that can be passed to the
62 "exec" statement, and an optional file name. In all cases this
63 routine attempts to "exec" its first argument and gather profiling
64 statistics from the execution. If no file name is present, then this
65 function automatically prints a simple profiling report, sorted by the
66 standard name string (file/line/function-name) that is presented in
71 prof
= prof
.run(statement
)
74 if filename
is not None:
75 prof
.dump_stats(filename
)
77 return prof
.print_stats()
81 for dirname
in sys
.path
:
82 fullname
= os
.path
.join(dirname
, 'profile.doc')
83 if os
.path
.exists(fullname
):
84 sts
= os
.system('${PAGER-more} ' + fullname
)
85 if sts
: print '*** Pager exit status:', sts
88 print 'Sorry, can\'t find the help file "profile.doc"',
89 print 'along the Python search path.'
94 def _get_time_mac(timer
=MacOS
.GetTicks
):
97 if hasattr(os
, "times"):
98 def _get_time_times(timer
=os
.times
):
106 self.cur is always a tuple. Each such tuple corresponds to a stack
107 frame that is currently active (self.cur[-2]). The following are the
108 definitions of its members. We use this external "parallel stack" to
109 avoid contaminating the program that we are profiling. (old profiler
110 used to write into the frames local dictionary!!) Derived classes
111 can change the definition of some entries, as long as they leave
112 [-2:] intact (frame and previous tuple). In case an internal error is
113 detected, the -3 element is used as the function name.
115 [ 0] = Time that needs to be charged to the parent frame's function.
116 It is used so that a function call will not have to access the
117 timing data for the parent frame.
118 [ 1] = Total time spent in this frame's function, excluding time in
119 subfunctions (this latter is tallied in cur[2]).
120 [ 2] = Total time spent in subfunctions, excluding time executing the
121 frame's function (this latter is tallied in cur[1]).
122 [-3] = Name of the function that corresponds to this frame.
123 [-2] = Actual frame that we correspond to (used to sync exception handling).
124 [-1] = Our parent 6-tuple (corresponds to frame.f_back).
126 Timing data for each function is stored as a 5-tuple in the dictionary
127 self.timings[]. The index is always the name stored in self.cur[-3].
128 The following are the definitions of the members:
130 [0] = The number of times this function was called, not counting direct
131 or indirect recursion,
132 [1] = Number of times this function appears on the stack, minus one
133 [2] = Total time spent internal to this function
134 [3] = Cumulative time that this function was present on the stack. In
135 non-recursive functions, this is the total execution time from start
136 to finish of each invocation of a function, including time spent in
138 [4] = A dictionary indicating for each function name, the number of times
142 bias
= 0 # calibration constant
144 def __init__(self
, timer
=None, bias
=None):
151 self
.bias
= bias
# Materialize in local dict for lookup speed.
155 self
.timer
= MacOS
.GetTicks
156 self
.dispatcher
= self
.trace_dispatch_mac
157 self
.get_time
= _get_time_mac
158 elif hasattr(time
, 'clock'):
159 self
.timer
= self
.get_time
= time
.clock
160 self
.dispatcher
= self
.trace_dispatch_i
161 elif hasattr(os
, 'times'):
162 self
.timer
= os
.times
163 self
.dispatcher
= self
.trace_dispatch
164 self
.get_time
= _get_time_times
166 self
.timer
= self
.get_time
= time
.time
167 self
.dispatcher
= self
.trace_dispatch_i
170 t
= self
.timer() # test out timer function
174 self
.get_time
= timer
175 self
.dispatcher
= self
.trace_dispatch_i
178 self
.dispatcher
= self
.trace_dispatch
180 self
.dispatcher
= self
.trace_dispatch_l
181 # This get_time() implementation needs to be defined
182 # here to capture the passed-in timer in the parameter
183 # list (for performance). Note that we can't assume
184 # the timer() result contains two values in all
187 def get_time_timer(timer
=timer
,
188 reduce=reduce, reducer
=operator
.add
):
189 return reduce(reducer
, timer(), 0)
190 self
.get_time
= get_time_timer
191 self
.t
= self
.get_time()
192 self
.simulate_call('profiler')
194 # Heavily optimized dispatch routine for os.times() timer
196 def trace_dispatch(self
, frame
, event
, arg
):
199 t
= t
[0] + t
[1] - self
.t
- self
.bias
201 if self
.dispatch
[event
](self
, frame
,t
):
206 self
.t
= r
[0] + r
[1] - t
# put back unrecorded delta
208 # Dispatch routine for best timer program (return = scalar, fastest if
209 # an integer but float works too -- and time.clock() relies on that).
211 def trace_dispatch_i(self
, frame
, event
, arg
):
213 t
= timer() - self
.t
- self
.bias
214 if self
.dispatch
[event
](self
, frame
,t
):
217 self
.t
= timer() - t
# put back unrecorded delta
219 # Dispatch routine for macintosh (timer returns time in ticks of
222 def trace_dispatch_mac(self
, frame
, event
, arg
):
224 t
= timer()/60.0 - self
.t
- self
.bias
225 if self
.dispatch
[event
](self
, frame
, t
):
226 self
.t
= timer()/60.0
228 self
.t
= timer()/60.0 - t
# put back unrecorded delta
230 # SLOW generic dispatch routine for timer returning lists of numbers
232 def trace_dispatch_l(self
, frame
, event
, arg
):
233 get_time
= self
.get_time
234 t
= get_time() - self
.t
- self
.bias
236 if self
.dispatch
[event
](self
, frame
, t
):
239 self
.t
= get_time() - t
# put back unrecorded delta
241 # In the event handlers, the first 3 elements of self.cur are unpacked
242 # into vrbls w/ 3-letter names. The last two characters are meant to be
244 # _pt self.cur[0] "parent time" time to be charged to parent frame
245 # _it self.cur[1] "internal time" time spent directly in the function
246 # _et self.cur[2] "external time" time spent in subfunctions
248 def trace_dispatch_exception(self
, frame
, t
):
249 rpt
, rit
, ret
, rfn
, rframe
, rcur
= self
.cur
250 if (rframe
is not frame
) and rcur
:
251 return self
.trace_dispatch_return(rframe
, t
)
252 self
.cur
= rpt
, rit
+t
, ret
, rfn
, rframe
, rcur
256 def trace_dispatch_call(self
, frame
, t
):
257 if self
.cur
and frame
.f_back
is not self
.cur
[-2]:
258 rpt
, rit
, ret
, rfn
, rframe
, rcur
= self
.cur
259 if not isinstance(rframe
, Profile
.fake_frame
):
260 assert rframe
.f_back
is frame
.f_back
, ("Bad call", rfn
,
261 rframe
, rframe
.f_back
,
263 self
.trace_dispatch_return(rframe
, 0)
264 assert (self
.cur
is None or \
265 frame
.f_back
is self
.cur
[-2]), ("Bad call",
268 fn
= (fcode
.co_filename
, fcode
.co_firstlineno
, fcode
.co_name
)
269 self
.cur
= (t
, 0, 0, fn
, frame
, self
.cur
)
270 timings
= self
.timings
272 cc
, ns
, tt
, ct
, callers
= timings
[fn
]
273 timings
[fn
] = cc
, ns
+ 1, tt
, ct
, callers
275 timings
[fn
] = 0, 0, 0, 0, {}
278 def trace_dispatch_return(self
, frame
, t
):
279 if frame
is not self
.cur
[-2]:
280 assert frame
is self
.cur
[-2].f_back
, ("Bad return", self
.cur
[-3])
281 self
.trace_dispatch_return(self
.cur
[-2], 0)
283 # Prefix "r" means part of the Returning or exiting frame.
284 # Prefix "p" means part of the Previous or Parent or older frame.
286 rpt
, rit
, ret
, rfn
, frame
, rcur
= self
.cur
288 frame_total
= rit
+ ret
290 ppt
, pit
, pet
, pfn
, pframe
, pcur
= rcur
291 self
.cur
= ppt
, pit
+ rpt
, pet
+ frame_total
, pfn
, pframe
, pcur
293 timings
= self
.timings
294 cc
, ns
, tt
, ct
, callers
= timings
[rfn
]
296 # This is the only occurrence of the function on the stack.
297 # Else this is a (directly or indirectly) recursive call, and
298 # its cumulative time will get updated when the topmost call to
300 ct
= ct
+ frame_total
304 callers
[pfn
] = callers
[pfn
] + 1 # hack: gather more
305 # stats such as the amount of time added to ct courtesy
306 # of this specific call, and the contribution to cc
307 # courtesy of this call.
311 timings
[rfn
] = cc
, ns
- 1, tt
+ rit
, ct
, callers
317 "call": trace_dispatch_call
,
318 "exception": trace_dispatch_exception
,
319 "return": trace_dispatch_return
,
323 # The next few functions play with self.cmd. By carefully preloading
324 # our parallel stack, we can force the profiled result to include
325 # an arbitrary string as the name of the calling function.
326 # We use self.cmd as that string, and the resulting stats look
329 def set_cmd(self
, cmd
):
330 if self
.cur
[-1]: return # already set
332 self
.simulate_call(cmd
)
335 def __init__(self
, filename
, line
, name
):
336 self
.co_filename
= filename
339 self
.co_firstlineno
= 0
342 return repr((self
.co_filename
, self
.co_line
, self
.co_name
))
345 def __init__(self
, code
, prior
):
349 def simulate_call(self
, name
):
350 code
= self
.fake_code('profile', 0, name
)
352 pframe
= self
.cur
[-2]
355 frame
= self
.fake_frame(code
, pframe
)
356 self
.dispatch
['call'](self
, frame
, 0)
358 # collect stats from pending stack, including getting final
359 # timings for self.cmd frame.
361 def simulate_cmd_complete(self
):
362 get_time
= self
.get_time
363 t
= get_time() - self
.t
365 # We *can* cause assertion errors here if
366 # dispatch_trace_return checks for a frame match!
367 self
.dispatch
['return'](self
, self
.cur
[-2], t
)
369 self
.t
= get_time() - t
372 def print_stats(self
):
374 pstats
.Stats(self
).strip_dirs().sort_stats(-1). \
377 def dump_stats(self
, file):
380 marshal
.dump(self
.stats
, f
)
383 def create_stats(self
):
384 self
.simulate_cmd_complete()
385 self
.snapshot_stats()
387 def snapshot_stats(self
):
389 for func
, (cc
, ns
, tt
, ct
, callers
) in self
.timings
.iteritems():
390 callers
= callers
.copy()
392 for callcnt
in callers
.itervalues():
394 self
.stats
[func
] = cc
, nc
, tt
, ct
, callers
397 # The following two methods can be called by clients to use
398 # a profiler to profile a statement, given as a string.
402 dict = __main__
.__dict
__
403 return self
.runctx(cmd
, dict, dict)
405 def runctx(self
, cmd
, globals, locals):
407 sys
.setprofile(self
.dispatcher
)
409 exec cmd
in globals, locals
414 # This method is more useful to profile a single function call.
415 def runcall(self
, func
, *args
, **kw
):
417 sys
.setprofile(self
.dispatcher
)
419 return apply(func
, args
, kw
)
424 #******************************************************************
425 # The following calculates the overhead for using a profiler. The
426 # problem is that it takes a fair amount of time for the profiler
427 # to stop the stopwatch (from the time it receives an event).
428 # Similarly, there is a delay from the time that the profiler
429 # re-starts the stopwatch before the user's code really gets to
430 # continue. The following code tries to measure the difference on
433 # Note that this difference is only significant if there are a lot of
434 # events, and relatively little user code per event. For example,
435 # code with small functions will typically benefit from having the
436 # profiler calibrated for the current platform. This *could* be
437 # done on the fly during init() time, but it is not worth the
438 # effort. Also note that if too large a value specified, then
439 # execution time on some functions will actually appear as a
440 # negative number. It is *normal* for some functions (with very
441 # low call counts) to have such negative stats, even if the
442 # calibration figure is "correct."
444 # One alternative to profile-time calibration adjustments (i.e.,
445 # adding in the magic little delta during each event) is to track
446 # more carefully the number of events (and cumulatively, the number
447 # of events during sub functions) that are seen. If this were
448 # done, then the arithmetic could be done after the fact (i.e., at
449 # display time). Currently, we track only call/return events.
450 # These values can be deduced by examining the callees and callers
451 # vectors for each functions. Hence we *can* almost correct the
452 # internal time figure at print time (note that we currently don't
453 # track exception event processing counts). Unfortunately, there
454 # is currently no similar information for cumulative sub-function
455 # time. It would not be hard to "get all this info" at profiler
456 # time. Specifically, we would have to extend the tuples to keep
457 # counts of this in each frame, and then extend the defs of timing
458 # tuples to include the significant two figures. I'm a bit fearful
459 # that this additional feature will slow the heavily optimized
460 # event/time ratio (i.e., the profiler would run slower, fur a very
461 # low "value added" feature.)
462 #**************************************************************
464 def calibrate(self
, m
, verbose
=0):
465 if self
.__class
__ is not Profile
:
466 raise TypeError("Subclasses must override .calibrate().")
468 saved_bias
= self
.bias
471 return self
._calibrate
_inner
(m
, verbose
)
473 self
.bias
= saved_bias
475 def _calibrate_inner(self
, m
, verbose
):
476 get_time
= self
.get_time
478 # Set up a test case to be run with and without profiling. Include
479 # lots of calls, because we're trying to quantify stopwatch overhead.
480 # Do not raise any exceptions, though, because we want to know
481 # exactly how many profile events are generated (one call event, +
482 # one return event, per Python-level call).
492 f(m
) # warm up the cache
494 # elapsed_noprofile <- time f(m) takes without profiling.
498 elapsed_noprofile
= t1
- t0
500 print "elapsed time without profiling =", elapsed_noprofile
502 # elapsed_profile <- time f(m) takes with profiling. The difference
503 # is profiling overhead, only some of which the profiler subtracts
507 p
.runctx('f(m)', globals(), locals())
509 elapsed_profile
= t1
- t0
511 print "elapsed time with profiling =", elapsed_profile
513 # reported_time <- "CPU seconds" the profiler charged to f and f1.
516 for (filename
, line
, funcname
), (cc
, ns
, tt
, ct
, callers
) in \
518 if funcname
in ("f", "f1"):
523 print "'CPU seconds' profiler reported =", reported_time
524 print "total # calls =", total_calls
525 if total_calls
!= m
+ 1:
526 raise ValueError("internal error: total calls = %d" % total_calls
)
528 # reported_time - elapsed_noprofile = overhead the profiler wasn't
529 # able to measure. Divide by twice the number of calls (since there
530 # are two profiler events per call in this test) to get the hidden
531 # overhead per event.
532 mean
= (reported_time
- elapsed_noprofile
) / 2.0 / total_calls
534 print "mean stopwatch overhead per profile event =", mean
537 #****************************************************************************
539 print 'Report generating functions are in the "pstats" module\a'
542 # When invoked as main program, invoke the profiler on a script
543 if __name__
== '__main__':
545 print "usage: profile.py scriptfile [arg] ..."
548 filename
= sys
.argv
[1] # Get script filename
550 del sys
.argv
[0] # Hide "profile.py" from argument list
552 # Insert script directory in front of module search path
553 sys
.path
.insert(0, os
.path
.dirname(filename
))
555 run('execfile(' + `filename`
+ ')')