3 # Class for profiling python code. rev 1.0 6/2/94
5 # Based on prior profile module by Sjoerd Mullender...
6 # which was hacked somewhat by: Guido van Rossum
8 """Class for profiling Python code."""
10 # Copyright 1994, by InfoSeek Corporation, all rights reserved.
11 # Written by James Roskind
13 # Permission to use, copy, modify, and distribute this Python software
14 # and its associated documentation for any purpose (subject to the
15 # restriction in the following sentence) without fee is hereby granted,
16 # provided that the above copyright notice appears in all copies, and
17 # that both that copyright notice and this permission notice appear in
18 # supporting documentation, and that the name of InfoSeek not be used in
19 # advertising or publicity pertaining to distribution of the software
20 # without specific, written prior permission. This permission is
21 # explicitly restricted to the copying and modification of the software
22 # to remain in Python, compiled Python, or other languages (such as C)
23 # wherein the modified or derived code is exclusively imported into a
26 # INFOSEEK CORPORATION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
27 # SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
28 # FITNESS. IN NO EVENT SHALL INFOSEEK CORPORATION BE LIABLE FOR ANY
29 # SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER
30 # RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
31 # CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
32 # CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
40 from optparse
import OptionParser
42 __all__
= ["run", "runctx", "help", "Profile"]
44 # Sample timer for use with
48 # i_count = i_count + 1
50 #itimes = integer_timer # replace with C coded timer returning integers
52 #**************************************************************************
53 # The following are the static member functions for the profiler class
54 # Note that an instance of Profile() is *not* needed to call them.
55 #**************************************************************************
57 def run(statement
, filename
=None, sort
=-1):
58 """Run statement under profiler optionally saving results in filename
60 This function takes a single argument that can be passed to the
61 "exec" statement, and an optional file name. In all cases this
62 routine attempts to "exec" its first argument and gather profiling
63 statistics from the execution. If no file name is present, then this
64 function automatically prints a simple profiling report, sorted by the
65 standard name string (file/line/function-name) that is presented in
70 prof
= prof
.run(statement
)
73 if filename
is not None:
74 prof
.dump_stats(filename
)
76 return prof
.print_stats(sort
)
78 def runctx(statement
, globals, locals, filename
=None):
79 """Run statement under profiler, supplying your own globals and locals,
80 optionally saving results in filename.
82 statement and filename have the same semantics as profile.run
86 prof
= prof
.runctx(statement
, globals, locals)
90 if filename
is not None:
91 prof
.dump_stats(filename
)
93 return prof
.print_stats()
95 # Backwards compatibility.
97 print "Documentation for the profile module can be found "
98 print "in the Python Library Reference, section 'The Python Profiler'."
100 if hasattr(os
, "times"):
101 def _get_time_times(timer
=os
.times
):
105 # Using getrusage(3) is better than clock(3) if available:
106 # on some systems (e.g. FreeBSD), getrusage has a higher resolution
107 # Furthermore, on a POSIX system, returns microseconds, which
108 # wrap around after 36min.
112 resgetrusage
= lambda: resource
.getrusage(resource
.RUSAGE_SELF
)
113 def _get_time_resource(timer
=resgetrusage
):
123 self.cur is always a tuple. Each such tuple corresponds to a stack
124 frame that is currently active (self.cur[-2]). The following are the
125 definitions of its members. We use this external "parallel stack" to
126 avoid contaminating the program that we are profiling. (old profiler
127 used to write into the frames local dictionary!!) Derived classes
128 can change the definition of some entries, as long as they leave
129 [-2:] intact (frame and previous tuple). In case an internal error is
130 detected, the -3 element is used as the function name.
132 [ 0] = Time that needs to be charged to the parent frame's function.
133 It is used so that a function call will not have to access the
134 timing data for the parent frame.
135 [ 1] = Total time spent in this frame's function, excluding time in
136 subfunctions (this latter is tallied in cur[2]).
137 [ 2] = Total time spent in subfunctions, excluding time executing the
138 frame's function (this latter is tallied in cur[1]).
139 [-3] = Name of the function that corresponds to this frame.
140 [-2] = Actual frame that we correspond to (used to sync exception handling).
141 [-1] = Our parent 6-tuple (corresponds to frame.f_back).
143 Timing data for each function is stored as a 5-tuple in the dictionary
144 self.timings[]. The index is always the name stored in self.cur[-3].
145 The following are the definitions of the members:
147 [0] = The number of times this function was called, not counting direct
148 or indirect recursion,
149 [1] = Number of times this function appears on the stack, minus one
150 [2] = Total time spent internal to this function
151 [3] = Cumulative time that this function was present on the stack. In
152 non-recursive functions, this is the total execution time from start
153 to finish of each invocation of a function, including time spent in
155 [4] = A dictionary indicating for each function name, the number of times
159 bias
= 0 # calibration constant
161 def __init__(self
, timer
=None, bias
=None):
165 self
.c_func_name
= ""
169 self
.bias
= bias
# Materialize in local dict for lookup speed.
173 self
.timer
= resgetrusage
174 self
.dispatcher
= self
.trace_dispatch
175 self
.get_time
= _get_time_resource
176 elif hasattr(time
, 'clock'):
177 self
.timer
= self
.get_time
= time
.clock
178 self
.dispatcher
= self
.trace_dispatch_i
179 elif hasattr(os
, 'times'):
180 self
.timer
= os
.times
181 self
.dispatcher
= self
.trace_dispatch
182 self
.get_time
= _get_time_times
184 self
.timer
= self
.get_time
= time
.time
185 self
.dispatcher
= self
.trace_dispatch_i
188 t
= self
.timer() # test out timer function
192 self
.get_time
= timer
193 self
.dispatcher
= self
.trace_dispatch_i
196 self
.dispatcher
= self
.trace_dispatch
198 self
.dispatcher
= self
.trace_dispatch_l
199 # This get_time() implementation needs to be defined
200 # here to capture the passed-in timer in the parameter
201 # list (for performance). Note that we can't assume
202 # the timer() result contains two values in all
204 def get_time_timer(timer
=timer
, sum=sum):
206 self
.get_time
= get_time_timer
207 self
.t
= self
.get_time()
208 self
.simulate_call('profiler')
210 # Heavily optimized dispatch routine for os.times() timer
212 def trace_dispatch(self
, frame
, event
, arg
):
215 t
= t
[0] + t
[1] - self
.t
- self
.bias
217 if event
== "c_call":
218 self
.c_func_name
= arg
.__name
__
220 if self
.dispatch
[event
](self
, frame
,t
):
225 self
.t
= r
[0] + r
[1] - t
# put back unrecorded delta
227 # Dispatch routine for best timer program (return = scalar, fastest if
228 # an integer but float works too -- and time.clock() relies on that).
230 def trace_dispatch_i(self
, frame
, event
, arg
):
232 t
= timer() - self
.t
- self
.bias
234 if event
== "c_call":
235 self
.c_func_name
= arg
.__name
__
237 if self
.dispatch
[event
](self
, frame
, t
):
240 self
.t
= timer() - t
# put back unrecorded delta
242 # Dispatch routine for macintosh (timer returns time in ticks of
245 def trace_dispatch_mac(self
, frame
, event
, arg
):
247 t
= timer()/60.0 - self
.t
- self
.bias
249 if event
== "c_call":
250 self
.c_func_name
= arg
.__name
__
252 if self
.dispatch
[event
](self
, frame
, t
):
253 self
.t
= timer()/60.0
255 self
.t
= timer()/60.0 - t
# put back unrecorded delta
257 # SLOW generic dispatch routine for timer returning lists of numbers
259 def trace_dispatch_l(self
, frame
, event
, arg
):
260 get_time
= self
.get_time
261 t
= get_time() - self
.t
- self
.bias
263 if event
== "c_call":
264 self
.c_func_name
= arg
.__name
__
266 if self
.dispatch
[event
](self
, frame
, t
):
269 self
.t
= get_time() - t
# put back unrecorded delta
271 # In the event handlers, the first 3 elements of self.cur are unpacked
272 # into vrbls w/ 3-letter names. The last two characters are meant to be
274 # _pt self.cur[0] "parent time" time to be charged to parent frame
275 # _it self.cur[1] "internal time" time spent directly in the function
276 # _et self.cur[2] "external time" time spent in subfunctions
278 def trace_dispatch_exception(self
, frame
, t
):
279 rpt
, rit
, ret
, rfn
, rframe
, rcur
= self
.cur
280 if (rframe
is not frame
) and rcur
:
281 return self
.trace_dispatch_return(rframe
, t
)
282 self
.cur
= rpt
, rit
+t
, ret
, rfn
, rframe
, rcur
286 def trace_dispatch_call(self
, frame
, t
):
287 if self
.cur
and frame
.f_back
is not self
.cur
[-2]:
288 rpt
, rit
, ret
, rfn
, rframe
, rcur
= self
.cur
289 if not isinstance(rframe
, Profile
.fake_frame
):
290 assert rframe
.f_back
is frame
.f_back
, ("Bad call", rfn
,
291 rframe
, rframe
.f_back
,
293 self
.trace_dispatch_return(rframe
, 0)
294 assert (self
.cur
is None or \
295 frame
.f_back
is self
.cur
[-2]), ("Bad call",
298 fn
= (fcode
.co_filename
, fcode
.co_firstlineno
, fcode
.co_name
)
299 self
.cur
= (t
, 0, 0, fn
, frame
, self
.cur
)
300 timings
= self
.timings
302 cc
, ns
, tt
, ct
, callers
= timings
[fn
]
303 timings
[fn
] = cc
, ns
+ 1, tt
, ct
, callers
305 timings
[fn
] = 0, 0, 0, 0, {}
308 def trace_dispatch_c_call (self
, frame
, t
):
309 fn
= ("", 0, self
.c_func_name
)
310 self
.cur
= (t
, 0, 0, fn
, frame
, self
.cur
)
311 timings
= self
.timings
313 cc
, ns
, tt
, ct
, callers
= timings
[fn
]
314 timings
[fn
] = cc
, ns
+1, tt
, ct
, callers
316 timings
[fn
] = 0, 0, 0, 0, {}
319 def trace_dispatch_return(self
, frame
, t
):
320 if frame
is not self
.cur
[-2]:
321 assert frame
is self
.cur
[-2].f_back
, ("Bad return", self
.cur
[-3])
322 self
.trace_dispatch_return(self
.cur
[-2], 0)
324 # Prefix "r" means part of the Returning or exiting frame.
325 # Prefix "p" means part of the Previous or Parent or older frame.
327 rpt
, rit
, ret
, rfn
, frame
, rcur
= self
.cur
329 frame_total
= rit
+ ret
331 ppt
, pit
, pet
, pfn
, pframe
, pcur
= rcur
332 self
.cur
= ppt
, pit
+ rpt
, pet
+ frame_total
, pfn
, pframe
, pcur
334 timings
= self
.timings
335 cc
, ns
, tt
, ct
, callers
= timings
[rfn
]
337 # This is the only occurrence of the function on the stack.
338 # Else this is a (directly or indirectly) recursive call, and
339 # its cumulative time will get updated when the topmost call to
341 ct
= ct
+ frame_total
345 callers
[pfn
] = callers
[pfn
] + 1 # hack: gather more
346 # stats such as the amount of time added to ct courtesy
347 # of this specific call, and the contribution to cc
348 # courtesy of this call.
352 timings
[rfn
] = cc
, ns
- 1, tt
+ rit
, ct
, callers
358 "call": trace_dispatch_call
,
359 "exception": trace_dispatch_exception
,
360 "return": trace_dispatch_return
,
361 "c_call": trace_dispatch_c_call
,
362 "c_exception": trace_dispatch_return
, # the C function returned
363 "c_return": trace_dispatch_return
,
367 # The next few functions play with self.cmd. By carefully preloading
368 # our parallel stack, we can force the profiled result to include
369 # an arbitrary string as the name of the calling function.
370 # We use self.cmd as that string, and the resulting stats look
373 def set_cmd(self
, cmd
):
374 if self
.cur
[-1]: return # already set
376 self
.simulate_call(cmd
)
379 def __init__(self
, filename
, line
, name
):
380 self
.co_filename
= filename
383 self
.co_firstlineno
= 0
386 return repr((self
.co_filename
, self
.co_line
, self
.co_name
))
389 def __init__(self
, code
, prior
):
393 def simulate_call(self
, name
):
394 code
= self
.fake_code('profile', 0, name
)
396 pframe
= self
.cur
[-2]
399 frame
= self
.fake_frame(code
, pframe
)
400 self
.dispatch
['call'](self
, frame
, 0)
402 # collect stats from pending stack, including getting final
403 # timings for self.cmd frame.
405 def simulate_cmd_complete(self
):
406 get_time
= self
.get_time
407 t
= get_time() - self
.t
409 # We *can* cause assertion errors here if
410 # dispatch_trace_return checks for a frame match!
411 self
.dispatch
['return'](self
, self
.cur
[-2], t
)
413 self
.t
= get_time() - t
416 def print_stats(self
, sort
=-1):
418 pstats
.Stats(self
).strip_dirs().sort_stats(sort
). \
421 def dump_stats(self
, file):
424 marshal
.dump(self
.stats
, f
)
427 def create_stats(self
):
428 self
.simulate_cmd_complete()
429 self
.snapshot_stats()
431 def snapshot_stats(self
):
433 for func
, (cc
, ns
, tt
, ct
, callers
) in self
.timings
.iteritems():
434 callers
= callers
.copy()
436 for callcnt
in callers
.itervalues():
438 self
.stats
[func
] = cc
, nc
, tt
, ct
, callers
441 # The following two methods can be called by clients to use
442 # a profiler to profile a statement, given as a string.
446 dict = __main__
.__dict
__
447 return self
.runctx(cmd
, dict, dict)
449 def runctx(self
, cmd
, globals, locals):
451 sys
.setprofile(self
.dispatcher
)
453 exec cmd
in globals, locals
458 # This method is more useful to profile a single function call.
459 def runcall(self
, func
, *args
, **kw
):
460 self
.set_cmd(repr(func
))
461 sys
.setprofile(self
.dispatcher
)
463 return func(*args
, **kw
)
468 #******************************************************************
469 # The following calculates the overhead for using a profiler. The
470 # problem is that it takes a fair amount of time for the profiler
471 # to stop the stopwatch (from the time it receives an event).
472 # Similarly, there is a delay from the time that the profiler
473 # re-starts the stopwatch before the user's code really gets to
474 # continue. The following code tries to measure the difference on
477 # Note that this difference is only significant if there are a lot of
478 # events, and relatively little user code per event. For example,
479 # code with small functions will typically benefit from having the
480 # profiler calibrated for the current platform. This *could* be
481 # done on the fly during init() time, but it is not worth the
482 # effort. Also note that if too large a value specified, then
483 # execution time on some functions will actually appear as a
484 # negative number. It is *normal* for some functions (with very
485 # low call counts) to have such negative stats, even if the
486 # calibration figure is "correct."
488 # One alternative to profile-time calibration adjustments (i.e.,
489 # adding in the magic little delta during each event) is to track
490 # more carefully the number of events (and cumulatively, the number
491 # of events during sub functions) that are seen. If this were
492 # done, then the arithmetic could be done after the fact (i.e., at
493 # display time). Currently, we track only call/return events.
494 # These values can be deduced by examining the callees and callers
495 # vectors for each functions. Hence we *can* almost correct the
496 # internal time figure at print time (note that we currently don't
497 # track exception event processing counts). Unfortunately, there
498 # is currently no similar information for cumulative sub-function
499 # time. It would not be hard to "get all this info" at profiler
500 # time. Specifically, we would have to extend the tuples to keep
501 # counts of this in each frame, and then extend the defs of timing
502 # tuples to include the significant two figures. I'm a bit fearful
503 # that this additional feature will slow the heavily optimized
504 # event/time ratio (i.e., the profiler would run slower, fur a very
505 # low "value added" feature.)
506 #**************************************************************
508 def calibrate(self
, m
, verbose
=0):
509 if self
.__class
__ is not Profile
:
510 raise TypeError("Subclasses must override .calibrate().")
512 saved_bias
= self
.bias
515 return self
._calibrate
_inner
(m
, verbose
)
517 self
.bias
= saved_bias
519 def _calibrate_inner(self
, m
, verbose
):
520 get_time
= self
.get_time
522 # Set up a test case to be run with and without profiling. Include
523 # lots of calls, because we're trying to quantify stopwatch overhead.
524 # Do not raise any exceptions, though, because we want to know
525 # exactly how many profile events are generated (one call event, +
526 # one return event, per Python-level call).
536 f(m
) # warm up the cache
538 # elapsed_noprofile <- time f(m) takes without profiling.
542 elapsed_noprofile
= t1
- t0
544 print "elapsed time without profiling =", elapsed_noprofile
546 # elapsed_profile <- time f(m) takes with profiling. The difference
547 # is profiling overhead, only some of which the profiler subtracts
551 p
.runctx('f(m)', globals(), locals())
553 elapsed_profile
= t1
- t0
555 print "elapsed time with profiling =", elapsed_profile
557 # reported_time <- "CPU seconds" the profiler charged to f and f1.
560 for (filename
, line
, funcname
), (cc
, ns
, tt
, ct
, callers
) in \
562 if funcname
in ("f", "f1"):
567 print "'CPU seconds' profiler reported =", reported_time
568 print "total # calls =", total_calls
569 if total_calls
!= m
+ 1:
570 raise ValueError("internal error: total calls = %d" % total_calls
)
572 # reported_time - elapsed_noprofile = overhead the profiler wasn't
573 # able to measure. Divide by twice the number of calls (since there
574 # are two profiler events per call in this test) to get the hidden
575 # overhead per event.
576 mean
= (reported_time
- elapsed_noprofile
) / 2.0 / total_calls
578 print "mean stopwatch overhead per profile event =", mean
581 #****************************************************************************
583 print 'Report generating functions are in the "pstats" module\a'
586 usage
= "profile.py [-o output_file_path] [-s sort] scriptfile [arg] ..."
587 parser
= OptionParser(usage
=usage
)
588 parser
.allow_interspersed_args
= False
589 parser
.add_option('-o', '--outfile', dest
="outfile",
590 help="Save stats to <outfile>", default
=None)
591 parser
.add_option('-s', '--sort', dest
="sort",
592 help="Sort order when printing to stdout, based on pstats.Stats class", default
=-1)
598 (options
, args
) = parser
.parse_args()
602 sys
.path
.insert(0, os
.path
.dirname(sys
.argv
[0]))
603 run('execfile(%r)' % (sys
.argv
[0],), options
.outfile
, options
.sort
)
608 # When invoked as main program, invoke the profiler on a script
609 if __name__
== '__main__':