5 Note that this requires os.clock(), debug.sethook(),
6 and debug.getinfo() or your equivalent replacements to
7 be available if this is an embedded application.
11 profiler = newProfiler()
14 < call some functions that take time >
18 local outfile = io.open( "profile.txt", "w+" )
19 profiler:report( outfile )
22 == Optionally choosing profiling method ==
24 The rest of this comment can be ignored if you merely want a good profiler.
26 newProfiler(method, sampledelay):
28 If method is omitted or "time", will profile based on real performance.
29 optionally, frequency can be provided to control the number of opcodes
30 per profiling tick. By default this is 100000, which (on my system) provides
31 one tick approximately every 2ms and reduces system performance by about 10%.
32 This can be reduced to increase accuracy at the cost of performance, or
33 increased for the opposite effect.
35 If method is "call", will profile based on function calls. Frequency is
39 "time" may bias profiling somewhat towards large areas with "simple opcodes",
40 as the profiling function (which introduces a certain amount of unavoidable
41 overhead) will be called more often. This can be minimized by using a larger
42 sample delay - the default should leave any error largely overshadowed by
43 statistical noise. With a delay of 1000 I was able to achieve inaccuray of
44 approximately 25%. Increasing the delay to 100000 left inaccuracy below my
47 "call" may bias profiling heavily towards areas with many function calls.
48 Testing found a degenerate case giving a figure inaccurate by approximately
49 20,000%. (Yes, a multiple of 200.) This is, however, more directly comparable
50 to common profilers (such as gprof) and also gives accurate function call
51 counts, which cannot be retrieved from "time".
53 I strongly recommend "time" mode, and it is now the default.
57 2008-09-16 - Time-based profiling and conversion to Lua 5.1
58 by Ben Wilhelm ( zorba-pepperfish@pavlovian.net ).
59 Added the ability to optionally choose profiling methods, along with a new
62 Converted to Lua 5, a few improvements, and
63 additional documentation by Tom Spilman ( tom@sickheadgames.com )
65 Additional corrections and tidying by original author
66 Daniel Silverstone ( dsilvers@pepperfish.net )
70 Daniel Silverstone is no longer using this code, and judging by how long it's
71 been waiting for Lua 5.1 support, I don't think Tom Spilman is either. I'm
72 perfectly willing to take on maintenance, so if you have problems or
73 questions, go ahead and email me :)
74 -- Ben Wilhelm ( zorba-pepperfish@pavlovian.net ) '
78 Lua profiler - Copyright Pepperfish 2002,2003,2004
80 Permission is hereby granted, free of charge, to any person obtaining a copy
81 of this software and associated documentation files (the "Software"), to
82 deal in the Software without restriction, including without limitation the
83 rights to use, copy, modify, merge, publish, distribute, and/or sell copies
84 of the Software, and to permit persons to whom the Software is furnished to
85 do so, subject to the following conditions:
87 The above copyright notice and this permission notice shall be included in
88 all copies or substantial portions of the Software.
90 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
91 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
92 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
93 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
94 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
95 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
102 -- All profiler related stuff is stored in the top level table '_profiler'
108 -- newProfiler() creates a new profiler object for managing
109 -- the profiler and storing state. Note that only one profiler
110 -- object can be executing at one time.
112 function newProfiler(variant
, sampledelay
)
113 if _profiler
.running
then
114 print("Profiler already running.")
118 variant
= variant
or "time"
120 if variant
~= "time" and variant
~= "call" then
121 print("Profiler method must be 'time' or 'call'.")
126 for k
,v
in pairs(_profiler
) do
129 newprof
.variant
= variant
130 newprof
.sampledelay
= sampledelay
or 100000
136 -- This function starts the profiler. It will do nothing
137 -- if this (or any other) profiler is already running.
139 function _profiler
.start(self
)
140 if _profiler
.running
then
143 -- Start the profiler. This begins by setting up internal profiler state
144 _profiler
.running
= self
147 if self
.variant
== "time" then
148 self
.lastclock
= os
.clock()
149 debug
.sethook( _profiler_hook_wrapper_by_time
, "", self
.sampledelay
)
150 elseif self
.variant
== "call" then
151 debug
.sethook( _profiler_hook_wrapper_by_call
, "cr" )
153 print("Profiler method must be 'time' or 'call'.")
160 -- This function stops the profiler. It will do nothing
161 -- if a profiler is not running, and nothing if it isn't
162 -- the currently running profiler.
164 function _profiler
.stop(self
)
165 if _profiler
.running
~= self
then
168 -- Stop the profiler.
170 _profiler
.running
= nil
175 -- Simple wrapper to handle the hook. You should not
176 -- be calling this directly. Duplicated to reduce overhead.
178 function _profiler_hook_wrapper_by_call(action
)
179 if _profiler
.running
== nil then
182 _profiler
.running
:_internal_profile_by_call(action
)
184 function _profiler_hook_wrapper_by_time(action
)
185 if _profiler
.running
== nil then
188 _profiler
.running
:_internal_profile_by_time(action
)
193 -- This is the main by-function-call function of the profiler and should not
194 -- be called except by the hook wrapper
196 function _profiler
._internal_profile_by_call(self
,action
)
197 -- Since we can obtain the 'function' for the item we've had call us, we
199 local caller_info
= debug
.getinfo( 3 )
200 if caller_info
== nil then
201 print "No caller_info"
205 --SHG_LOG("[_profiler._internal_profile] "..(caller_info.name or "<nil>"))
207 -- Retrieve the most recent activation record...
208 local latest_ar
= nil
209 if table.getn(self
.callstack
) > 0 then
210 latest_ar
= self
.callstack
[table.getn(self
.callstack
)]
213 -- Are we allowed to profile this function?
214 local should_not_profile
= 0
215 for k
,v
in pairs(self
.prevented_functions
) do
216 if k
== caller_info
.func
then
217 should_not_profile
= v
220 -- Also check the top activation record...
222 if latest_ar
.should_not_profile
== 2 then
223 should_not_profile
= 2
227 -- Now then, are we in 'call' or 'return' ?
228 -- print("Profile:", caller_info.name, "SNP:", should_not_profile,
229 -- "Action:", action )
230 if action
== "call" then
233 this_ar
.should_not_profile
= should_not_profile
234 this_ar
.parent_ar
= latest_ar
235 this_ar
.anon_child
= 0
236 this_ar
.name_child
= 0
237 this_ar
.children
= {}
238 this_ar
.children_time
= {}
239 this_ar
.clock_start
= os
.clock()
240 -- Last thing to do on a call is to insert this onto the ar stack...
241 table.insert( self
.callstack
, this_ar
)
243 local this_ar
= latest_ar
244 if this_ar
== nil then
245 return -- No point in doing anything if no upper activation record
248 -- Right, calculate the time in this function...
249 this_ar
.clock_end
= os
.clock()
250 this_ar
.this_time
= this_ar
.clock_end
- this_ar
.clock_start
252 -- Now, if we have a parent, update its call info...
253 if this_ar
.parent_ar
then
254 this_ar
.parent_ar
.children
[caller_info
.func
] =
255 (this_ar
.parent_ar
.children
[caller_info
.func
] or 0) + 1
256 this_ar
.parent_ar
.children_time
[caller_info
.func
] =
257 (this_ar
.parent_ar
.children_time
[caller_info
.func
] or 0 ) +
259 if caller_info
.name
== nil then
260 this_ar
.parent_ar
.anon_child
=
261 this_ar
.parent_ar
.anon_child
+ this_ar
.this_time
263 this_ar
.parent_ar
.name_child
=
264 this_ar
.parent_ar
.name_child
+ this_ar
.this_time
267 -- Now if we're meant to record information about ourselves, do so...
268 if this_ar
.should_not_profile
== 0 then
269 local inforec
= self
:_get_func_rec(caller_info
.func
,1)
270 inforec
.count
= inforec
.count
+ 1
271 inforec
.time
= inforec
.time
+ this_ar
.this_time
272 inforec
.anon_child_time
= inforec
.anon_child_time
+ this_ar
.anon_child
273 inforec
.name_child_time
= inforec
.name_child_time
+ this_ar
.name_child
274 inforec
.func_info
= caller_info
275 for k
,v
in pairs(this_ar
.children
) do
276 inforec
.children
[k
] = (inforec
.children
[k
] or 0) + v
277 inforec
.children_time
[k
] =
278 (inforec
.children_time
[k
] or 0) + this_ar
.children_time
[k
]
282 -- Last thing to do on return is to drop the last activation record...
283 table.remove( self
.callstack
, table.getn( self
.callstack
) )
289 -- This is the main by-time internal function of the profiler and should not
290 -- be called except by the hook wrapper
292 function _profiler
._internal_profile_by_time(self
,action
)
293 -- we do this first so we add the minimum amount of extra time to this call
294 local timetaken
= os
.clock() - self
.lastclock
299 local caller
= debug
.getinfo(depth
)
301 if not caller
.func
then caller
.func
= "(tail call)" end
302 if self
.prevented_functions
[caller
.func
] == nil then
303 local info
= self
:_get_func_rec(caller
.func
, 1, caller
)
304 info
.count
= info
.count
+ 1
305 info
.time
= info
.time
+ timetaken
307 -- we're not the head, so update the "children" times also
308 if last_caller
.name
then
309 info
.name_child_time
= info
.name_child_time
+ timetaken
311 info
.anon_child_time
= info
.anon_child_time
+ timetaken
313 info
.children
[last_caller
.func
] =
314 (info
.children
[last_caller
.func
] or 0) + 1
315 info
.children_time
[last_caller
.func
] =
316 (info
.children_time
[last_caller
.func
] or 0) + timetaken
321 caller
= debug
.getinfo(depth
)
324 self
.lastclock
= os
.clock()
329 -- This returns a (possibly empty) function record for
330 -- the specified function. It is for internal profiler use.
332 function _profiler
._get_func_rec(self
,func
,force
,info
)
333 -- Find the function ref for 'func' (if force and not present, create one)
334 local ret
= self
.rawstats
[func
]
335 if ret
== nil and force
~= 1 then
339 -- Build a new function statistics table
344 ret
.anon_child_time
= 0
345 ret
.name_child_time
= 0
347 ret
.children_time
= {}
349 self
.rawstats
[func
] = ret
356 -- This writes a profile report to the output file object. If
357 -- sort_by_total_time is nil or false the output is sorted by
358 -- the function time minus the time in it's children.
360 function _profiler
.report( self
, outfile
, sort_by_total_time
)
363 [[Lua Profile output created by profiler.lua. Copyright Pepperfish 2002+
367 -- This is pretty awful.
369 if self
.variant
== "time" then
370 terms
.capitalized
= "Sample"
371 terms
.single
= "sample"
372 terms
.pastverb
= "sampled"
373 elseif self
.variant
== "call" then
374 terms
.capitalized
= "Call"
375 terms
.single
= "call"
376 terms
.pastverb
= "called"
383 for func
,record
in pairs(self
.rawstats
) do
384 table.insert(ordering
, func
)
387 if sort_by_total_time
then
388 table.sort( ordering
,
389 function(a
,b
) return self
.rawstats
[a
] > self
.rawstats
[b
] end
392 table.sort( ordering
,
394 local arec
= self
.rawstats
[a
]
395 local brec
= self
.rawstats
[b
]
396 local atime
= arec
.time
- (arec
.anon_child_time
+ arec
.name_child_time
)
397 local btime
= brec
.time
- (brec
.anon_child_time
+ brec
.name_child_time
)
403 for i
=1,table.getn(ordering
) do
404 local func
= ordering
[i
]
405 local record
= self
.rawstats
[func
]
406 local thisfuncname
= " " .. self
:_pretty_name(func
) .. " "
407 if string.len( thisfuncname
) < 42 then
409 string.rep( "-", (42 - string.len(thisfuncname
))/2 ) .. thisfuncname
411 thisfuncname
.. string.rep( "-", 42 - string.len(thisfuncname
) )
414 total_time
= total_time
+ ( record
.time
- ( record
.anon_child_time
+
415 record
.name_child_time
) )
416 outfile
:write( string.rep( "-", 19 ) .. thisfuncname
..
417 string.rep( "-", 19 ) .. "\n" )
418 outfile
:write( terms
.capitalized
.." count: " ..
419 string.format( "%4d", record
.count
) .. "\n" )
420 outfile
:write( "Time spend total: " ..
421 string.format( "%4.3f", record
.time
) .. "s\n" )
422 outfile
:write( "Time spent in children: " ..
423 string.format("%4.3f",record
.anon_child_time
+record
.name_child_time
) ..
426 record
.time
- (record
.anon_child_time
+ record
.name_child_time
)
427 outfile
:write( "Time spent in self: " ..
428 string.format("%4.3f", timeinself
) .. "s\n" )
429 outfile
:write( "Time spent per " .. terms
.single
.. ": " ..
430 string.format("%4.5f", record
.time
/record
.count
) ..
431 "s/" .. terms
.single
.. "\n" )
432 outfile
:write( "Time spent in self per "..terms
.single
..": " ..
433 string.format( "%4.5f", timeinself
/record
.count
) .. "s/" ..
436 -- Report on each child in the form
437 -- Child <funcname> called n times and took a.bs
438 local added_blank
= 0
439 for k
,v
in pairs(record
.children
) do
440 if self
.prevented_functions
[k
] == nil or
441 self
.prevented_functions
[k
] == 0
443 if added_blank
== 0 then
444 outfile
:write( "\n" ) -- extra separation line
447 outfile
:write( "Child " .. self
:_pretty_name(k
) ..
448 string.rep( " ", 41-string.len(self
:_pretty_name(k
)) ) .. " " ..
449 terms
.pastverb
.." " .. string.format("%6d", v
) )
450 outfile
:write( " times. Took " ..
451 string.format("%4.2f", record
.children_time
[k
] ) .. "s\n" )
455 outfile
:write( "\n" ) -- extra separation line
458 outfile
:write( "\n\n" )
459 outfile
:write( "Total time spent in profiled functions: " ..
460 string.format("%5.3g",total_time
) .. "s\n" )
470 -- This writes the profile to the output file object as
471 -- loadable Lua source.
473 function _profiler
.lua_report(self
,outfile
)
474 -- Purpose: Write out the entire raw state in a cross-referenceable form.
477 for func
,record
in pairs(self
.rawstats
) do
478 table.insert(ordering
, func
)
479 functonum
[func
] = table.getn(ordering
)
483 "-- Profile generated by profiler.lua Copyright Pepperfish 2002+\n\n" )
484 outfile
:write( "-- Function names\nfuncnames = {}\n" )
485 for i
=1,table.getn(ordering
) do
486 local thisfunc
= ordering
[i
]
487 outfile
:write( "funcnames[" .. i
.. "] = " ..
488 string.format("%q", self
:_pretty_name(thisfunc
)) .. "\n" )
490 outfile
:write( "\n" )
491 outfile
:write( "-- Function times\nfunctimes = {}\n" )
492 for i
=1,table.getn(ordering
) do
493 local thisfunc
= ordering
[i
]
494 local record
= self
.rawstats
[thisfunc
]
495 outfile
:write( "functimes[" .. i
.. "] = { " )
496 outfile
:write( "tot=" .. record
.time
.. ", " )
497 outfile
:write( "achild=" .. record
.anon_child_time
.. ", " )
498 outfile
:write( "nchild=" .. record
.name_child_time
.. ", " )
499 outfile
:write( "count=" .. record
.count
.. " }\n" )
501 outfile
:write( "\n" )
502 outfile
:write( "-- Child links\nchildren = {}\n" )
503 for i
=1,table.getn(ordering
) do
504 local thisfunc
= ordering
[i
]
505 local record
= self
.rawstats
[thisfunc
]
506 outfile
:write( "children[" .. i
.. "] = { " )
507 for k
,v
in pairs(record
.children
) do
508 if functonum
[k
] then -- non-recorded functions will be ignored now
509 outfile
:write( functonum
[k
] .. ", " )
512 outfile
:write( "}\n" )
514 outfile
:write( "\n" )
515 outfile
:write( "-- Child call counts\nchildcounts = {}\n" )
516 for i
=1,table.getn(ordering
) do
517 local thisfunc
= ordering
[i
]
518 local record
= self
.rawstats
[thisfunc
]
519 outfile
:write( "children[" .. i
.. "] = { " )
520 for k
,v
in record
.children
do
521 if functonum
[k
] then -- non-recorded functions will be ignored now
522 outfile
:write( v
.. ", " )
525 outfile
:write( "}\n" )
527 outfile
:write( "\n" )
528 outfile
:write( "-- Child call time\nchildtimes = {}\n" )
529 for i
=1,table.getn(ordering
) do
530 local thisfunc
= ordering
[i
]
531 local record
= self
.rawstats
[thisfunc
];
532 outfile
:write( "children[" .. i
.. "] = { " )
533 for k
,v
in pairs(record
.children
) do
534 if functonum
[k
] then -- non-recorded functions will be ignored now
535 outfile
:write( record
.children_time
[k
] .. ", " )
538 outfile
:write( "}\n" )
540 outfile
:write( "\n\n-- That is all.\n\n" )
544 -- Internal function to calculate a pretty name for the profile output
545 function _profiler
._pretty_name(self
,func
)
547 -- Only the data collected during the actual
548 -- run seems to be correct.... why?
549 local info
= self
.rawstats
[ func
].func_info
550 -- local info = debug.getinfo( func )
553 if info
.what
== "Lua" then
556 if info
.what
== "C" then
559 if info
.what
== "main" then
563 if info
.name
== nil then
564 name
= name
.. "<"..tostring(func
) .. ">"
566 name
= name
.. info
.name
570 name
= name
.. "@" .. info
.source
572 if info
.what
== "C" then
575 name
= name
.. "@<string>"
579 if info
.what
== "C" then
582 name
= name
.. info
.linedefined
590 -- This allows you to specify functions which you do
591 -- not want profiled. Setting level to 1 keeps the
592 -- function from being profiled. Setting level to 2
593 -- keeps both the function and its children from
596 function _profiler
.prevent(self
, func
, level
)
597 self
.prevented_functions
[func
] = (level
or 1)
601 _profiler
.prevented_functions
= {
602 [_profiler
.start
] = 2,
603 [_profiler
.stop
] = 2,
604 [_profiler
._internal_profile_by_time
] = 2,
605 [_profiler
._internal_profile_by_call
] = 2,
606 [_profiler_hook_wrapper_by_time
] = 2,
607 [_profiler_hook_wrapper_by_call
] = 2,
608 [_profiler
.prevent
] = 2,
609 [_profiler
._get_func_rec
] = 2,
610 [_profiler
.report
] = 2,
611 [_profiler
.lua_report
] = 2,
612 [_profiler
._pretty_name
] = 2