1 ;;; Copyright (C) 2003 Gerd Moellmann <gerd.moellmann@t-online.de>
2 ;;; All rights reserved.
4 ;;; Redistribution and use in source and binary forms, with or without
5 ;;; modification, are permitted provided that the following conditions
8 ;;; 1. Redistributions of source code must retain the above copyright
9 ;;; notice, this list of conditions and the following disclaimer.
10 ;;; 2. Redistributions in binary form must reproduce the above copyright
11 ;;; notice, this list of conditions and the following disclaimer in the
12 ;;; documentation and/or other materials provided with the distribution.
13 ;;; 3. The name of the author may not be used to endorse or promote
14 ;;; products derived from this software without specific prior written
17 ;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
18 ;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 ;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 ;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE
21 ;;; LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 ;;; CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
23 ;;; OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
24 ;;; BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
25 ;;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 ;;; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
27 ;;; USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
30 ;;; Statistical profiler.
34 ;;; This profiler arranges for SIGPROF interrupts to interrupt a
35 ;;; running program at regular intervals. Each time a SIGPROF occurs,
36 ;;; the current program counter and return address is recorded in a
37 ;;; vector, until a configurable maximum number of samples have been
40 ;;; A profiling report is generated from the samples array by
41 ;;; determining the Lisp functions corresponding to the recorded
42 ;;; addresses. Each program counter/return address pair forms one
43 ;;; edge in a call graph.
47 ;;; The code being generated on x86 makes determining callers reliably
48 ;;; something between extremely difficult and impossible. Example:
50 ;;; 10979F00: .entry eval::eval-stack-args(arg-count)
51 ;;; 18: pop dword ptr [ebp-8]
52 ;;; 1B: lea esp, [ebp-32]
57 ;;; 29: mov [ebp-12], edi
58 ;;; 2C: mov dword ptr [ebp-16], #x28F0000B ; nil
59 ;;; ; No-arg-parsing entry point
60 ;;; 33: mov dword ptr [ebp-20], 0
62 ;;; 3C: L0: mov edx, esp
64 ;;; 41: mov eax, [#x10979EF8] ; #<FDEFINITION object for eval::eval-stack-pop>
66 ;;; 49: mov [edx-4], ebp
68 ;;; 4E: call dword ptr [eax+5]
71 ;;; Suppose this function is interrupted by SIGPROF at 4E. At that
72 ;;; point, the frame pointer EBP has been modified so that the
73 ;;; original return address of the caller of eval-stack-args is no
74 ;;; longer where it can be found by x86-call-context, and the new
75 ;;; return address, for the call to eval-stack-pop, is not yet on the
76 ;;; stack. The effect is that x86-call-context returns something
77 ;;; bogus, which leads to wrong edges in the call graph.
79 ;;; One thing that one might try is filtering cases where the program
80 ;;; is interrupted at a call instruction. But since the above example
81 ;;; of an interrupt at a call instruction isn't the only case where
82 ;;; the stack is something x86-call-context can't really cope with,
83 ;;; this is not a general solution.
85 ;;; Random ideas for implementation:
87 ;;; * Space profiler. Sample when new pages are allocated instead of
90 ;;; * Record a configurable number of callers up the stack. That
91 ;;; could give a more complete graph when there are many small
94 ;;; * Print help strings for reports, include hints to the problem
97 ;;; * Make flat report the default since call-graph isn't that
100 (defpackage #:sb-sprof
101 (:use
#:cl
#:sb-ext
#:sb-unix
#:sb-alien
#:sb-sys
)
102 (:export
#:*sample-interval
* #:*max-samples
*
103 #:start-sampling
#:stop-sampling
#:with-sampling
104 #:with-profiling
#:start-profiling
#:stop-profiling
107 (in-package #:sb-sprof
)
112 (defstruct (vertex (:constructor make-vertex
)
113 (:constructor make-scc
(scc-vertices edges
)))
114 (visited nil
:type boolean
)
115 (root nil
:type
(or null vertex
))
117 (edges () :type list
)
118 (scc-vertices () :type list
))
121 (vertex (sb-impl::missing-arg
) :type vertex
))
124 (vertices () :type list
))
126 (declaim (inline scc-p
))
127 (defun scc-p (vertex)
128 (not (null (vertex-scc-vertices vertex
))))
130 (defmacro do-vertices
((vertex graph
) &body body
)
131 `(dolist (,vertex
(graph-vertices ,graph
))
134 (defmacro do-edges
((edge edge-to vertex
) &body body
)
135 `(dolist (,edge
(vertex-edges ,vertex
))
136 (let ((,edge-to
(edge-vertex ,edge
)))
139 (defun self-cycle-p (vertex)
140 (do-edges (e to vertex
)
144 (defun map-vertices (fn vertices
)
146 (setf (vertex-visited v
) nil
))
148 (unless (vertex-visited v
)
151 ;;; Eeko Nuutila, Eljas Soisalon-Soininen, around 1992. Improves on
152 ;;; Tarjan's original algorithm by not using the stack when processing
153 ;;; trivial components. Trivial components should appear frequently
154 ;;; in a call-graph such as ours, I think. Same complexity O(V+E) as
156 (defun strong-components (vertices)
157 (let ((in-component (make-array (length vertices
)
158 :element-type
'boolean
159 :initial-element nil
))
163 (labels ((min-root (x y
)
164 (let ((rx (vertex-root x
))
165 (ry (vertex-root y
)))
166 (if (< (vertex-dfn rx
) (vertex-dfn ry
))
170 (aref in-component
(vertex-dfn v
)))
171 ((setf in-component
) (in v
)
172 (setf (aref in-component
(vertex-dfn v
)) in
))
174 (> (vertex-dfn x
) (vertex-dfn y
)))
176 (setf (vertex-dfn v
) (incf dfn
)
179 (vertex-visited v
) t
)
181 (unless (vertex-visited w
)
183 (unless (in-component w
)
184 (setf (vertex-root v
) (min-root v w
))))
185 (if (eq v
(vertex-root v
))
186 (loop while
(and stack
(vertex-> (car stack
) v
))
188 collect w into this-component
189 do
(setf (in-component w
) t
)
191 (setf (in-component v
) t
)
192 (push (cons v this-component
) components
))
194 (map-vertices #'visit vertices
)
197 ;;; Given a dag as a list of vertices, return the list sorted
198 ;;; topologically, children first.
199 (defun topological-sort (dag)
202 (labels ((rec-sort (v)
203 (setf (vertex-visited v
) t
)
204 (setf (vertex-dfn v
) (incf dfn
))
205 (dolist (e (vertex-edges v
))
206 (unless (vertex-visited (edge-vertex e
))
207 (rec-sort (edge-vertex e
))))
209 (map-vertices #'rec-sort dag
)
212 ;;; Reduce graph G to a dag by coalescing strongly connected components
213 ;;; into vertices. Sort the result topologically.
214 (defun reduce-graph (graph &optional
(scc-constructor #'make-scc
))
215 (sb-int:collect
((sccs) (trivial))
216 (dolist (c (strong-components (graph-vertices graph
)))
217 (if (or (cdr c
) (self-cycle-p (car c
)))
218 (sb-int:collect
((outgoing))
223 (sccs (funcall scc-constructor c
(outgoing))))
226 (dolist (v (trivial))
228 (when (member w
(vertex-scc-vertices scc
))
229 (setf (edge-vertex e
) scc
)))))
230 (setf (graph-vertices graph
)
231 (topological-sort (nconc (sccs) (trivial))))))
236 ;;; An AA tree is a red-black tree with the extra condition that left
237 ;;; children may not be red. This condition simplifies the red-black
238 ;;; algorithm. It eliminates half of the restructuring cases, and
239 ;;; simplifies the delete algorithm.
241 (defstruct (aa-node (:conc-name aa-
))
242 (left nil
:type
(or null aa-node
))
243 (right nil
:type
(or null aa-node
))
244 (level 0 :type integer
)
248 (let ((node (make-aa-node)))
249 (setf (aa-left node
) node
)
250 (setf (aa-right node
) node
)
254 (root *null-node
* :type aa-node
))
256 (declaim (inline skew split rotate-with-left-child rotate-with-right-child
))
258 (defun rotate-with-left-child (k2)
259 (let ((k1 (aa-left k2
)))
260 (setf (aa-left k2
) (aa-right k1
))
261 (setf (aa-right k1
) k2
)
264 (defun rotate-with-right-child (k1)
265 (let ((k2 (aa-right k1
)))
266 (setf (aa-right k1
) (aa-left k2
))
267 (setf (aa-left k2
) k1
)
271 (if (= (aa-level (aa-left aa
)) (aa-level aa
))
272 (rotate-with-left-child aa
)
276 (when (= (aa-level (aa-right (aa-right aa
)))
278 (setq aa
(rotate-with-right-child aa
))
279 (incf (aa-level aa
)))
282 (macrolet ((def (name () &body body
)
283 (let ((name (sb-int::symbolicate
'aa- name
)))
284 `(defun ,name
(item tree
&key
285 (test-< #'<) (test-= #'=)
286 (node-key #'identity
) (item-key #'identity
))
287 (let ((.item-key.
(funcall item-key item
)))
288 (flet ((item-< (node)
289 (funcall test-
< .item-key.
290 (funcall node-key
(aa-data node
))))
292 (funcall test-
= .item-key.
293 (funcall node-key
(aa-data node
)))))
294 (declare (inline item-
< item-
=))
298 (labels ((insert-into (aa)
299 (cond ((eq aa
*null-node
*)
300 (setq aa
(make-aa-node :data item
302 :right
*null-node
*)))
304 (return-from insert-into aa
))
306 (setf (aa-left aa
) (insert-into (aa-left aa
))))
308 (setf (aa-right aa
) (insert-into (aa-right aa
)))))
310 (setf (aa-tree-root tree
)
311 (insert-into (aa-tree-root tree
)))))
314 (let ((deleted-node *null-node
*)
316 (labels ((remove-from (aa)
317 (unless (eq aa
*null-node
*)
320 (setf (aa-left aa
) (remove-from (aa-left aa
)))
322 (setq deleted-node aa
)
323 (setf (aa-right aa
) (remove-from (aa-right aa
)))))
324 (cond ((eq aa last-node
)
326 ;; If at the bottom of the tree, and item
327 ;; is present, delete it.
328 (when (and (not (eq deleted-node
*null-node
*))
329 (item-= deleted-node
))
330 (setf (aa-data deleted-node
) (aa-data aa
))
331 (setq deleted-node
*null-node
*)
332 (setq aa
(aa-right aa
))))
334 ;; Otherwise not at bottom of tree; rebalance.
335 ((or (< (aa-level (aa-left aa
))
337 (< (aa-level (aa-right aa
))
340 (when (> (aa-level (aa-right aa
)) (aa-level aa
))
341 (setf (aa-level (aa-right aa
)) (aa-level aa
)))
343 (setf (aa-right aa
) (skew (aa-right aa
)))
344 (setf (aa-right (aa-right aa
))
345 (skew (aa-right (aa-right aa
))))
347 (setf (aa-right aa
) (split (aa-right aa
))))))
349 (setf (aa-tree-root tree
)
350 (remove-from (aa-tree-root tree
))))))
353 (let ((current (aa-tree-root tree
)))
354 (setf (aa-data *null-node
*) item
)
356 (cond ((eq current
*null-node
*)
357 (return (values nil nil
)))
359 (return (values (aa-data current
) t
)))
361 (setq current
(aa-left current
)))
363 (setq current
(aa-right current
))))))))
368 ;;; Sort the subsequence of Vec in the interval [From To] using
369 ;;; comparison function Test. Assume each element to sort consists of
370 ;;; Element-Size array slots, and that the slot Key-Offset contains
372 (defun qsort (vec &key
(element-size 1) (key-offset 0)
373 (from 0) (to (- (length vec
) element-size
)))
374 (declare (type fixnum to from element-size key-offset
))
375 (declare (type (simple-array address
) vec
))
376 (labels ((rotate (i j
)
377 (declare (fixnum i j
))
378 (loop repeat element-size
379 for i from i and j from j do
380 (rotatef (aref vec i
) (aref vec j
))))
382 (aref vec
(+ i key-offset
)))
384 (declare (fixnum to from
))
386 (let* ((mid (* element-size
387 (round (+ (/ from element-size
)
391 (j (+ to element-size
))
393 (declare (fixnum mid i j
))
396 (loop do
(incf i element-size
)
398 ;; QSORT used to take a test
399 ;; parameter which was funcalled
400 ;; here. This caused some consing,
401 ;; which is problematic since
402 ;; QSORT is indirectly called in
403 ;; an after-gc-hook. So just
404 ;; hardcode >, which would've been
405 ;; used for the test anyway.
408 (loop do
(decf j element-size
)
409 until
(or (<= j from
)
412 (when (< j i
) (return))
415 (rec-sort from
(- j element-size
))
424 "Type used for addresses, for instance, program counters,
425 code start/end locations etc."
426 '(unsigned-byte #.sb-vm
::n-machine-word-bits
))
428 (defconstant +unknown-address
+ 0
429 "Constant representing an address that cannot be determined.")
431 ;;; A call graph. Vertices are NODE structures, edges are CALL
433 (defstruct (call-graph (:include graph
)
434 (:constructor %make-call-graph
))
435 ;; the value of *Sample-Interval* at the time the graph was created
436 (sample-interval (sb-impl::missing-arg
) :type number
)
437 ;; number of samples taken
438 (nsamples (sb-impl::missing-arg
) :type sb-impl
::index
)
439 ;; sample count for samples not in any function
440 (elsewhere-count (sb-impl::missing-arg
) :type sb-impl
::index
)
441 ;; a flat list of NODEs, sorted by sample count
442 (flat-nodes () :type list
))
444 ;;; A node in a call graph, representing a function that has been
445 ;;; sampled. The edges of a node are CALL structures that represent
446 ;;; functions called from a given node.
447 (defstruct (node (:include vertex
)
448 (:constructor %make-node
))
449 ;; A numeric label for the node. The most frequently called function
450 ;; gets label 1. This is just for identification purposes in the
452 (index 0 :type fixnum
)
453 ;; start and end address of the function's code
454 (start-pc 0 :type address
)
455 (end-pc 0 :type address
)
456 ;; the name of the function
458 ;; sample count for this function
459 (count 0 :type fixnum
)
460 ;; count including time spent in functions called from this one
461 (accrued-count 0 :type fixnum
)
462 ;; list of NODEs for functions calling this one
463 (callers () :type list
))
465 ;;; A cycle in a call graph. The functions forming the cycle are
466 ;;; found in the SCC-VERTICES slot of the VERTEX structure.
467 (defstruct (cycle (:include node
)))
469 ;;; An edge in a call graph. EDGE-VERTEX is the function being
471 (defstruct (call (:include edge
)
472 (:constructor make-call
(vertex)))
473 ;; number of times the call was sampled
474 (count 1 :type sb-impl
::index
))
476 ;;; Info about a function in dynamic-space. This is used to track
477 ;;; address changes of functions during GC.
478 (defstruct (dyninfo (:constructor make-dyninfo
(code start end
)))
479 ;; component this info is for
480 (code (sb-impl::missing-arg
) :type sb-kernel
::code-component
)
481 ;; current start and end address of the component
482 (start (sb-impl::missing-arg
) :type address
)
483 (end (sb-impl::missing-arg
) :type address
)
484 ;; new start address of the component, after GC.
485 (new-start 0 :type address
))
487 (defmethod print-object ((call-graph call-graph
) stream
)
488 (print-unreadable-object (call-graph stream
:type t
:identity t
)
489 (format stream
"~d samples" (call-graph-nsamples call-graph
))))
491 (defmethod print-object ((node node
) stream
)
492 (print-unreadable-object (node stream
:type t
:identity t
)
493 (format stream
"~s [~d]" (node-name node
) (node-index node
))))
495 (defmethod print-object ((call call
) stream
)
496 (print-unreadable-object (call stream
:type t
:identity t
)
497 (format stream
"~s [~d]" (node-name (call-vertex call
))
498 (node-index (call-vertex call
)))))
500 (deftype report-type
()
501 '(member nil
:flat
:graph
))
503 (defvar *sample-interval
* 0.01
504 "Default number of seconds between samples.")
505 (declaim (number *sample-interval
*))
507 (defvar *max-samples
* 50000
508 "Default number of samples taken.")
509 (declaim (type sb-impl
::index
*max-samples
*))
511 (defconstant +sample-size
+
515 (defvar *samples
* nil
)
516 (declaim (type (or null
(vector address
)) *samples
*))
518 (defvar *samples-index
* 0)
519 (declaim (type sb-impl
::index
*samples-index
*))
521 (defvar *profiling
* nil
)
522 (defvar *sampling
* nil
)
523 (declaim (type boolean
*profiling
* *sampling
*))
525 (defvar *dynamic-space-code-info
* ())
526 (declaim (type list
*dynamic-space-code-info
*))
528 (defvar *show-progress
* nil
)
530 (defvar *old-sampling
* nil
)
532 (defun turn-off-sampling ()
533 (setq *old-sampling
* *sampling
*)
534 (setq *sampling
* nil
))
536 (defun turn-on-sampling ()
537 (setq *sampling
* *old-sampling
*))
539 (defun show-progress (format-string &rest args
)
540 (when *show-progress
*
541 (apply #'format t format-string args
)
544 (defun start-sampling ()
545 "Switch on statistical sampling."
548 (defun stop-sampling ()
549 "Switch off statistical sampling."
550 (setq *sampling
* nil
))
552 (defmacro with-sampling
((&optional
(on t
)) &body body
)
553 "Evaluate body with statistical sampling turned on or off."
554 `(let ((*sampling
* ,on
))
557 (defun sort-samples (key-offset)
558 "Sort *Samples* using comparison Test. Key must be one of
559 :Pc or :Return-Pc for sorting by pc or return pc."
560 (when (plusp *samples-index
*)
563 :to
(- *samples-index
* +sample-size
+)
564 :element-size
+sample-size
+
565 :key-offset key-offset
)))
568 (declare (type address pc
))
569 (setf (aref *samples
* *samples-index
*) pc
)
570 (incf *samples-index
*))
572 ;;; SIGPROF handler. Record current PC and return address in
575 (defun sigprof-handler (signal code scp
)
576 (declare (ignore signal code
) (type system-area-pointer scp
))
577 (when (and *sampling
*
578 (< *samples-index
* (length *samples
*)))
579 (sb-sys:without-gcing
580 (locally (declare (optimize (inhibit-warnings 2)))
581 (with-alien ((scp (* os-context-t
) :local scp
))
582 ;; For some reason completely bogus small values for the
583 ;; frame pointer are returned every now and then, leading
584 ;; to segfaults. Try to avoid these cases.
586 ;; FIXME: Do a more thorough sanity check on ebp, or figure
587 ;; out why this is happening.
588 ;; -- JES, 2005-01-11
589 (when (< (sb-vm::context-register scp
#.sb-vm
::ebp-offset
)
591 (dotimes (i +sample-size
+)
593 (return-from sigprof-handler nil
))
594 (let* ((pc-ptr (sb-vm:context-pc scp
))
595 (fp (sb-vm::context-register scp
#.sb-vm
::ebp-offset
)))
596 (record (sap-int pc-ptr
))
597 (let ((fp (int-sap fp
))
599 (dotimes (i (1- +sample-size
+))
602 (sb-di::x86-call-context fp
:depth i
))
609 ;; FIXME: On non-x86 platforms we don't yet walk the call stack deeper
612 (defun sigprof-handler (signal code scp
)
613 (declare (ignore signal code
))
614 (when (and *sampling
*
615 (< *samples-index
* (length *samples
*)))
616 (sb-sys:without-gcing
617 (with-alien ((scp (* os-context-t
) :local scp
))
618 (locally (declare (optimize (inhibit-warnings 2)))
619 (let* ((pc-ptr (sb-vm:context-pc scp
))
620 (fp (sb-vm::context-register scp
#.sb-vm
::cfp-offset
))
623 (* sb-vm
::lra-save-offset sb-vm
::n-word-bytes
))))
624 (record (sap-int pc-ptr
))
627 ;;; Map function FN over code objects in dynamic-space. FN is called
628 ;;; with two arguments, the object and its size in bytes.
629 (defun map-dynamic-space-code (fn)
630 (flet ((call-if-code (obj obj-type size
)
631 (declare (ignore obj-type
))
632 (when (sb-kernel:code-component-p obj
)
633 (funcall fn obj size
))))
634 (sb-vm::map-allocated-objects
#'call-if-code
:dynamic
)))
636 ;;; Return the start address of CODE.
637 (defun code-start (code)
638 (declare (type sb-kernel
:code-component code
))
639 (sap-int (sb-kernel:code-instructions code
)))
641 ;;; Return start and end address of CODE as multiple values.
642 (defun code-bounds (code)
643 (declare (type sb-kernel
:code-component code
))
644 (let* ((start (code-start code
))
645 (end (+ start
(sb-kernel:%code-code-size code
))))
648 ;;; Record the addresses of dynamic-space code objects in
649 ;;; *DYNAMIC-SPACE-CODE-INFO*. Call this with GC disabled.
650 (defun record-dyninfo ()
651 (flet ((record-address (code size
)
652 (declare (ignore size
))
653 (multiple-value-bind (start end
)
655 (push (make-dyninfo code start end
)
656 *dynamic-space-code-info
*))))
657 (map-dynamic-space-code #'record-address
)))
659 (defun adjust-samples (offset)
660 (sort-samples offset
)
662 (declare (type sb-impl
::index sidx
))
663 (dolist (info *dynamic-space-code-info
*)
664 (unless (= (dyninfo-new-start info
) (dyninfo-start info
))
665 (let ((pos (do ((i sidx
(+ i
+sample-size
+)))
666 ((= i
*samples-index
*) nil
)
667 (declare (type sb-impl
::index i
))
668 (when (<= (dyninfo-start info
)
669 (aref *samples
* (+ i offset
))
674 (loop with delta
= (- (dyninfo-new-start info
)
675 (dyninfo-start info
))
676 for j from sidx below
*samples-index
* by
+sample-size
+
677 as pc
= (aref *samples
* (+ j offset
))
678 while
(<= (dyninfo-start info
) pc
(dyninfo-end info
)) do
679 (incf (aref *samples
* (+ j offset
)) delta
)
680 (incf sidx
+sample-size
+))))))))
682 ;;; This runs from *AFTER-GC-HOOKS*. Adjust *SAMPLES* for address
683 ;;; changes of dynamic-space code objects.
684 (defun adjust-samples-for-address-changes ()
685 (sb-sys:without-gcing
687 (setq *dynamic-space-code-info
*
688 (sort *dynamic-space-code-info
* #'> :key
#'dyninfo-start
))
689 (dolist (info *dynamic-space-code-info
*)
690 (setf (dyninfo-new-start info
)
691 (code-start (dyninfo-code info
))))
693 (dotimes (i +sample-size
+)
695 (dolist (info *dynamic-space-code-info
*)
696 (let ((size (- (dyninfo-end info
) (dyninfo-start info
))))
697 (setf (dyninfo-start info
) (dyninfo-new-start info
))
698 (setf (dyninfo-end info
) (+ (dyninfo-new-start info
) size
))))
701 (defmacro with-profiling
((&key
(sample-interval '*sample-interval
*)
702 (max-samples '*max-samples
*)
705 (report nil report-p
))
707 "Repeatedly evaluate Body with statistical profiling turned on.
708 The following keyword args are recognized:
710 :Sample-Interval <seconds>
711 Take a sample every <seconds> seconds. Default is
715 Repeat evaluating body until <max> samples are taken.
716 Default is *Max-Samples*.
719 If specified, call Report with :Type <type> at the end.
722 It true, call Reset at the beginning."
723 (declare (type report-type report
))
724 `(let ((*sample-interval
* ,sample-interval
)
725 (*max-samples
* ,max-samples
))
726 ,@(when reset
'((reset)))
729 (when (>= *samples-index
* (length *samples
*))
731 ,@(when show-progress
732 `((format t
"~&===> ~d of ~d samples taken.~%"
733 (/ *samples-index
* +sample-size
+)
735 (let ((.last-index.
*samples-index
*))
737 (when (= .last-index.
*samples-index
*)
738 (warn "No sampling progress; possibly a profiler bug.")
741 ,@(when report-p
`((report :type
,report
)))))
743 (defun start-profiling (&key
(max-samples *max-samples
*)
744 (sample-interval *sample-interval
*)
746 "Start profiling statistically if not already profiling.
747 The following keyword args are recognized:
749 :Sample-Interval <seconds>
750 Take a sample every <seconds> seconds. Default is
754 Maximum number of samples. Default is *Max-Samples*.
757 If true, the default, start sampling right away.
758 If false, Start-Sampling can be used to turn sampling on."
760 (multiple-value-bind (secs usecs
)
761 (multiple-value-bind (secs rest
)
762 (truncate sample-interval
)
763 (values secs
(truncate (* rest
1000000))))
764 (setq *samples
* (make-array (* max-samples
+sample-size
+)
765 :element-type
'address
))
766 (setq *samples-index
* 0)
767 (setq *sampling
* sampling
)
768 ;; Disabled for now, since this was causing some problems with the
769 ;; sampling getting turned off completely. --JES, 2004-06-19
771 ;; BEFORE-GC-HOOKS have exceedingly bad interactions with
772 ;; threads. -- CSR, 2004-06-21
774 ;; (pushnew 'turn-off-sampling *before-gc-hooks*)
775 (pushnew 'adjust-samples-for-address-changes
*after-gc-hooks
*)
777 (sb-sys:enable-interrupt sb-unix
::sigprof
#'sigprof-handler
)
778 (unix-setitimer :profile secs usecs secs usecs
)
779 (setq *profiling
* t
)))
782 (defun stop-profiling ()
783 "Stop profiling if profiling."
785 (setq *after-gc-hooks
*
786 (delete 'adjust-samples-for-address-changes
*after-gc-hooks
*))
787 (unix-setitimer :profile
0 0 0 0)
788 (sb-sys:enable-interrupt sb-unix
::sigprof
:default
)
789 (setq *sampling
* nil
)
790 (setq *profiling
* nil
))
794 "Reset the profiler."
796 (setq *sampling
* nil
)
797 (setq *dynamic-space-code-info
* ())
799 (setq *samples-index
* 0)
802 ;;; Make a NODE for debug-info INFO.
803 (defun make-node (info)
805 (sb-kernel::code-component
806 (multiple-value-bind (start end
)
808 (%make-node
:name
(or (sb-disassem::find-assembler-routine start
)
809 (format nil
"~a" info
))
810 :start-pc start
:end-pc end
)))
811 (sb-di::compiled-debug-fun
812 (let* ((name (sb-di::debug-fun-name info
))
813 (cdf (sb-di::compiled-debug-fun-compiler-debug-fun info
))
814 (start-offset (sb-c::compiled-debug-fun-start-pc cdf
))
815 (end-offset (sb-c::compiled-debug-fun-elsewhere-pc cdf
))
816 (component (sb-di::compiled-debug-fun-component info
))
817 (start-pc (code-start component
)))
818 (%make-node
:name name
819 :start-pc
(+ start-pc start-offset
)
820 :end-pc
(+ start-pc end-offset
))))
822 (%make-node
:name
(sb-di::debug-fun-name info
)))
824 (%make-node
:name
(coerce info
'string
)))))
826 ;;; Return something serving as debug info for address PC. If we can
827 ;;; get something from SB-DI:DEBUG-FUNCTION-FROM-PC, return that.
828 ;;; Otherwise, if we can determine a code component, return that.
829 ;;; Otherwise return nil.
830 (defun debug-info (pc)
831 (declare (type address pc
))
832 (let ((ptr (sb-di::component-ptr-from-pc
(int-sap pc
))))
833 (cond ((sap= ptr
(int-sap 0))
834 (let ((name (foreign-symbol-in-address (int-sap pc
))))
836 (format nil
"foreign function ~a" name
))))
838 (let* ((code (sb-di::component-from-component-ptr ptr
))
839 (code-header-len (* (sb-kernel:get-header-data code
)
842 (- (sb-kernel:get-lisp-obj-address code
)
843 sb-vm
:other-pointer-lowtag
)
845 (df (ignore-errors (sb-di::debug-fun-from-pc code
851 ;;; One function can have more than one COMPILED-DEBUG-FUNCTION with
852 ;;; the same name. Reduce the number of calls to Debug-Info by first
853 ;;; looking for a given PC in a red-black tree. If not found in the
854 ;;; tree, get debug info, and look for a node in a hash-table by
855 ;;; function name. If not found in the hash-table, make a new node.
858 (defvar *name-
>node
*)
860 (defmacro with-lookup-tables
(() &body body
)
861 `(let ((*node-tree
* (make-aa-tree))
862 (*name-
>node
* (make-hash-table :test
'equal
)))
865 (defun tree-find (item)
866 (flet ((pc/node-
= (pc node
)
867 (<= (node-start-pc node
) pc
(node-end-pc node
)))
869 (< pc
(node-start-pc node
))))
870 (aa-find item
*node-tree
* :test-
= #'pc
/node-
= :test-
< #'pc
/node-
<)))
872 (defun tree-insert (item)
873 (flet ((node/node-
= (x y
)
874 (<= (node-start-pc y
) (node-start-pc x
) (node-end-pc y
)))
876 (< (node-start-pc x
) (node-start-pc y
))))
877 (aa-insert item
*node-tree
* :test-
= #'node
/node-
= :test-
< #'node
/node-
<)))
879 ;;; Find or make a new node for address PC. Value is the NODE found
880 ;;; or made; NIL if not enough information exists to make a NODE for
882 (defun lookup-node (pc)
883 (declare (type address pc
))
885 (let ((info (debug-info pc
)))
887 (let* ((new (make-node info
))
888 (found (gethash (node-name new
) *name-
>node
*)))
890 (setf (node-start-pc found
)
891 (min (node-start-pc found
) (node-start-pc new
)))
892 (setf (node-end-pc found
)
893 (max (node-end-pc found
) (node-end-pc new
)))
896 (setf (gethash (node-name new
) *name-
>node
*) new
)
900 ;;; Return a list of all nodes created by LOOKUP-NODE.
901 (defun collect-nodes ()
902 (loop for node being the hash-values of
*name-
>node
*
905 ;;; Value is a CALL-GRAPH for the current contents of *SAMPLES*.
906 (defun make-call-graph-1 (depth)
907 (let ((elsewhere-count 0)
909 (with-lookup-tables ()
910 (loop for i below
(1- *samples-index
*) ;; by +sample-size+
911 as pc
= (aref *samples
* i
)
912 as return-pc
= (aref *samples
* (1+ i
))
913 as callee
= (lookup-node pc
)
915 (when (and callee
(/= return-pc
+unknown-address
+))
916 (let ((caller (lookup-node return-pc
)))
920 (when (and *show-progress
* (plusp i
))
921 (cond ((zerop (mod i
1000))
922 (show-progress "~d" i
))
924 (show-progress "."))))
925 (when (< (mod i
+sample-size
+) depth
)
926 (when (= (mod i
+sample-size
+) 0)
927 (setf visited-nodes nil
)
929 (incf (node-accrued-count callee
))
930 (incf (node-count callee
)))
932 (incf elsewhere-count
))))
934 (push callee visited-nodes
))
936 (unless (member caller visited-nodes
)
937 (incf (node-accrued-count caller
)))
939 (let ((call (find callee
(node-edges caller
)
940 :key
#'call-vertex
)))
941 (pushnew caller
(node-callers callee
))
943 (unless (member caller visited-nodes
)
944 (incf (call-count call
)))
945 (push (make-call callee
) (node-edges caller
))))))))
946 (let ((sorted-nodes (sort (collect-nodes) #'> :key
#'node-count
)))
947 (loop for node in sorted-nodes and i from
1 do
948 (setf (node-index node
) i
))
949 (%make-call-graph
:nsamples
(/ *samples-index
* +sample-size
+)
950 :sample-interval
*sample-interval
*
951 :elsewhere-count elsewhere-count
952 :vertices sorted-nodes
)))))
954 ;;; Reduce CALL-GRAPH to a dag, creating CYCLE structures for call
956 (defun reduce-call-graph (call-graph)
958 (flet ((make-one-cycle (vertices edges
)
959 (let* ((name (format nil
"<Cycle ~d>" (incf cycle-no
)))
960 (count (loop for v in vertices sum
(node-count v
))))
961 (make-cycle :name name
964 :scc-vertices vertices
966 (reduce-graph call-graph
#'make-one-cycle
))))
968 ;;; For all nodes in CALL-GRAPH, compute times including the time
969 ;;; spent in functions called from them. Note that the call-graph
970 ;;; vertices are in reverse topological order, children first, so we
971 ;;; will have computed accrued counts of called functions before they
972 ;;; are used to compute accrued counts for callers.
973 (defun compute-accrued-counts (call-graph)
974 (do-vertices (from call-graph
)
975 (setf (node-accrued-count from
) (node-count from
))
976 (do-edges (call to from
)
977 (incf (node-accrued-count from
)
978 (round (* (/ (call-count call
) (node-count to
))
979 (node-accrued-count to
)))))))
981 ;;; Return a CALL-GRAPH structure for the current contents of
982 ;;; *SAMPLES*. The result contain a list of nodes sorted by self-time
983 ;;; in the FLAT-NODES slot, and a dag in VERTICES, with call cycles
984 ;;; reduced to CYCLE structures.
985 (defun make-call-graph (depth)
987 (show-progress "~&Computing call graph ")
988 (let ((call-graph (without-gcing (make-call-graph-1 depth
))))
989 (setf (call-graph-flat-nodes call-graph
)
990 (copy-list (graph-vertices call-graph
)))
991 (show-progress "~&Finding cycles")
992 (reduce-call-graph call-graph
)
993 (show-progress "~&Propagating counts")
994 #+nil
(compute-accrued-counts call-graph
)
1000 (defun print-separator (&key
(length 72) (char #\-
))
1001 (format t
"~&~V,,,V<~>~%" length char
))
1003 (defun samples-percent (call-graph count
)
1004 (* 100.0 (/ count
(call-graph-nsamples call-graph
))))
1006 (defun print-call-graph-header (call-graph)
1007 (let ((nsamples (call-graph-nsamples call-graph
))
1008 (interval (call-graph-sample-interval call-graph
))
1009 (ncycles (loop for v in
(graph-vertices call-graph
)
1011 (format t
"~2&Number of samples: ~d~%~
1012 Sample interval: ~f seconds~%~
1013 Total sampling time: ~f seconds~%~
1014 Number of cycles: ~d~2%"
1017 (* nsamples interval
)
1020 (defun print-flat (call-graph &key
(stream *standard-output
*) max
1021 min-percent
(print-header t
))
1022 (let ((*standard-output
* stream
)
1023 (*print-pretty
* nil
)
1026 (min-count (if min-percent
1027 (round (* (/ min-percent
100.0)
1028 (call-graph-nsamples call-graph
)))
1031 (print-call-graph-header call-graph
))
1032 (format t
"~& Self Cumul Total~%")
1033 (format t
"~& Nr Count % Count % Count % Function~%")
1035 (let ((elsewhere-count (call-graph-elsewhere-count call-graph
))
1037 (dolist (node (call-graph-flat-nodes call-graph
))
1038 (when (or (and max
(> (incf i
) max
))
1039 (< (node-count node
) min-count
))
1041 (let* ((count (node-count node
))
1042 (percent (samples-percent call-graph count
))
1043 (accrued-count (node-accrued-count node
))
1044 (accrued-percent (samples-percent call-graph accrued-count
)))
1045 (incf total-count count
)
1046 (incf total-percent percent
)
1047 (format t
"~&~4d ~6d ~5,1f ~6d ~5,1f ~6d ~5,1f ~s~%"
1058 (format t
"~& ~6d ~5,1f elsewhere~%"
1060 (samples-percent call-graph elsewhere-count
)))))
1062 (defun print-cycles (call-graph)
1063 (when (some #'cycle-p
(graph-vertices call-graph
))
1064 (format t
"~& Cycle~%")
1065 (format t
"~& Count % Parts~%")
1066 (do-vertices (node call-graph
)
1067 (when (cycle-p node
)
1068 (flet ((print-info (indent index count percent name
)
1069 (format t
"~&~6d ~5,1f ~11@t ~V@t ~s [~d]~%"
1070 count percent indent name index
)))
1072 (format t
"~&~6d ~5,1f ~a...~%"
1074 (samples-percent call-graph
(cycle-count node
))
1076 (dolist (v (vertex-scc-vertices node
))
1077 (print-info 4 (node-index v
) (node-count v
)
1078 (samples-percent call-graph
(node-count v
))
1083 (defun print-graph (call-graph &key
(stream *standard-output
*)
1085 (let ((*standard-output
* stream
)
1086 (*print-pretty
* nil
))
1087 (print-call-graph-header call-graph
)
1088 (print-cycles call-graph
)
1089 (flet ((find-call (from to
)
1090 (find to
(node-edges from
) :key
#'call-vertex
))
1091 (print-info (indent index count percent name
)
1092 (format t
"~&~6d ~5,1f ~11@t ~V@t ~s [~d]~%"
1093 count percent indent name index
)))
1094 (format t
"~& Callers~%")
1095 (format t
"~& Cumul. Function~%")
1096 (format t
"~& Count % Count % Callees~%")
1097 (do-vertices (node call-graph
)
1100 ;; Print caller information.
1101 (dolist (caller (node-callers node
))
1102 (let ((call (find-call caller node
)))
1103 (print-info 4 (node-index caller
)
1105 (samples-percent call-graph
(call-count call
))
1106 (node-name caller
))))
1107 ;; Print the node itself.
1108 (format t
"~&~6d ~5,1f ~6d ~5,1f ~s [~d]~%"
1110 (samples-percent call-graph
(node-count node
))
1111 (node-accrued-count node
)
1112 (samples-percent call-graph
(node-accrued-count node
))
1116 (do-edges (call called node
)
1117 (print-info 4 (node-index called
)
1119 (samples-percent call-graph
(call-count call
))
1120 (node-name called
))))
1123 (print-flat call-graph
:stream stream
:max max
1124 :min-percent min-percent
:print-header nil
))))
1126 (defun report (&key
(type :graph
) max min-percent call-graph
1127 (stream *standard-output
*) ((:show-progress
*show-progress
*)))
1128 "Report statistical profiling results. The following keyword
1129 args are recognized:
1132 Specifies the type of report to generate. If :FLAT, show
1133 flat report, if :GRAPH show a call graph and a flat report.
1134 If nil, don't print out a report.
1137 Specify a stream to print the report on. Default is
1141 Don't show more than <max> entries in the flat report.
1143 :Min-Percent <min-percent>
1144 Don't show functions taking less than <min-percent> of the
1145 total time in the flat report.
1147 :Show-Progress <bool>
1148 If true, print progress messages while generating the call graph.
1151 Print a report from <graph> instead of the latest profiling
1154 Value of this function is a Call-Graph object representing the
1155 resulting call-graph."
1156 (let ((graph (or call-graph
(make-call-graph (1- +sample-size
+)))))
1159 (print-flat graph
:stream stream
:max max
:min-percent min-percent
))
1161 (print-graph graph
:stream stream
:max max
:min-percent min-percent
))
1165 ;;; Interface to DISASSEMBLE
1167 (defun add-disassembly-profile-note (chunk stream dstate
)
1168 (declare (ignore chunk stream
))
1169 (unless (zerop *samples-index
*)
1171 (+ (sb-disassem::seg-virtual-location
1172 (sb-disassem:dstate-segment dstate
))
1173 (sb-disassem::dstate-cur-offs dstate
)))
1174 (samples (loop for x from
0 below
*samples-index
* by
+sample-size
+
1175 summing
(if (= (aref *samples
* x
) location
)
1178 (unless (zerop samples
)
1179 (sb-disassem::note
(format nil
"~A/~A samples"
1180 samples
(/ *samples-index
* +sample-size
+))
1183 (pushnew 'add-disassembly-profile-note sb-disassem
::*default-dstate-hooks
*)
1187 (defun test-0 (n &optional
(depth 0))
1188 (declare (optimize (debug 3)))
1191 (test-0 n
(1+ depth
))
1192 (test-0 n
(1+ depth
)))))
1195 (with-profiling (:reset t
:max-samples
1000 :report
:graph
)