1 ;;;; Copyright 2009 Vitaly Mayatskikh <v.mayatskih@gmail.com>
3 ;;;; This file is a part of CL-Perfcounters
5 ;;;; Performance counters are special hardware registers available on most modern
6 ;;;; CPUs. These registers count the number of certain types of hw events: such
7 ;;;; as instructions executed, cachemisses suffered, or branches mis-predicted -
8 ;;;; without slowing down the kernel or applications. These registers can also
9 ;;;; trigger interrupts when a threshold number of events have passed - and can
10 ;;;; thus be used to profile the code that runs on that CPU.
12 ;;;; The Linux Performance Counter subsystem provides an abstraction of these
13 ;;;; hardware capabilities. It provides per task and per CPU counters, counter
14 ;;;; groups, and it provides event capabilities on top of those. It
15 ;;;; provides "virtual" 64-bit counters, regardless of the width of the
16 ;;;; underlying hardware counters.
18 ;;;; CL-Perfcounters is free software: you can redistribute it and/or modify
19 ;;;; it under the terms of the GNU General Public License as published by
20 ;;;; the Free Software Foundation, either version 3 of the License, or
21 ;;;; (at your option) any later version.
23 ;;;; CL-Perfcounters is distributed in the hope that it will be useful,
24 ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
25 ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26 ;;;; GNU General Public License for more details.
28 ;;;; You should have received a copy of the GNU General Public License
29 ;;;; along with this program. If not, see <http://www.gnu.org/licenses/>.
31 (defpackage :perfcounters
33 (:export
:with-performance-counters
:time
+))
35 (in-package :perfcounters
)
41 (defparameter +syscall
+ 298)
43 (defparameter +pr-task-perf-counters-disable
+ 31)
44 (defparameter +pr-task-perf-counters-enable
+ 32)
46 (defcstruct perf-counter-attr
49 (config :unsigned-long-long
)
50 (sample-period/freq
:unsigned-long-long
)
51 (sample-type :unsigned-long-long
)
52 (read_format :unsigned-int
)
53 (flags :unsigned-long-long
)
54 (wakeup-events :unsigned-int
)
55 (reserved-2 :unsigned-int
)
56 (reserved-3 :unsigned-long-long
))
58 (defparameter +perf-type-hardware
+ 0)
59 (defparameter +perf-type-software
+ 1)
60 (defparameter +perf-type-tracepoint
+ 2)
61 (defparameter +perf-type-hw-cache
+ 3)
63 ;; Common hardware events, generalized by the kernel:
64 (defparameter +perf-count-hw-cpu-cycles
+ 0)
65 (defparameter +perf-count-hw-instructions
+ 1)
66 (defparameter +perf-count-hw-cache-references
+ 2)
67 (defparameter +perf-count-hw-cache-misses
+ 3)
68 (defparameter +perf-count-hw-branch-instructions
+ 4)
69 (defparameter +perf-count-hw-branch-misses
+ 5)
70 (defparameter +perf-count-hw-bus-cycles
+ 6)
72 ;; Special "software" counters provided by the kernel, even if the hardware
73 ;; does not support performance counters. These counters measure various
74 ;; physical and sw events of the kernel (and allow the profiling of them as
76 (defparameter +perf-count-sw-cpu-clock
+ 32)
77 (defparameter +perf-count-sw-task-clock
+ 33)
78 (defparameter +perf-count-sw-page-faults
+ 34)
79 (defparameter +perf-count-sw-context-switches
+ 35)
80 (defparameter +perf-count-sw-cpu-migrations
+ 36)
81 (defparameter +perf-count-sw-page-faults-min
+ 37)
82 (defparameter +perf-count-sw-page-faults-maj
+ 38)
84 (defparameter +perf-count-hw-all
+
85 '(+perf-count-hw-cpu-cycles
+ +perf-count-hw-instructions
+
86 +perf-count-hw-cache-references
+ +perf-count-hw-cache-misses
+
87 +perf-count-hw-branch-instructions
+ +perf-count-hw-branch-misses
+
88 +perf-count-hw-bus-cycles
+))
90 (defparameter +perf-count-sw-all
+
91 '(+perf-count-sw-cpu-clock
+ +perf-count-sw-task-clock
+
92 +perf-count-sw-page-faults
+ +perf-count-sw-context-switches
+
93 +perf-count-sw-cpu-migrations
+ +perf-count-sw-page-faults-min
+
94 +perf-count-sw-page-faults-maj
+))
96 (defparameter +perf-count-all
+
97 (append +perf-count-hw-all
+ +perf-count-sw-all
+))
99 ;; Generalized hardware cache counters:
101 ;; { L1-D, L1-I, LLC, ITLB, DTLB, BPU } x
102 ;; { read, write, prefetch } x
103 ;; { accesses, misses }
105 (defparameter +perf-count-hw-cache-l1d
+ 0)
106 (defparameter +perf-count-hw-cache-l1i
+ 1)
107 (defparameter +perf-count-hw-cache-ll
+ 2)
108 (defparameter +perf-count-hw-cache-dtlb
+ 3)
109 (defparameter +perf-count-hw-cache-itlb
+ 4)
110 (defparameter +perf-count-hw-cache-bpu
+ 5)
112 (defparameter +perf-count-format-string
+
113 '(("~@[~:D CPU cycle~:P consumed~%~]"
114 "~@[~:D instruction~:P executed~%~]"
115 "~@[~:D cache hit~:P~%~]"
116 "~@[~:D cache misses~%~]"
117 "~@[~:D branch instruction~:P~%~]"
118 "~@[~:D branch misses~:P~%~]"
119 "~@[~:D bus cycle~:P~%~]")
120 ("~@[~:D cpu clock~:P~%~]"
121 "~@[~:D task clock~:P~%~]"
122 "~@[~:D page fault~:P~%~]"
123 "~@[~:D context switch~:P~%~]"
124 "~@[~:D cpu migration~:P~%~]"
125 "~@[~:D minor fault~%~]"
126 "~@[~:D major fault~:P~%~]")
131 ;; Bits that can be set in hw_event.read_format to request that
132 ;; reads on the counter should return the indicated quantities,
133 ;; in increasing order of bit value, after the counter value.
134 (defcenum perf-counter-read-format
135 (:perf-format-total-time-enabled
)
136 (:perf-format-total-time-running
))
138 ;; Bits that can be set in hw_event.record_type to request information
139 ;; in the overflow packets.
140 (defcenum perf-counter-record-format
143 (:perf-record-time
4)
144 (:perf-record-addr
8)
145 (:perf-record-group
16)
146 (:perf-record-callchain
32))
148 (defcfun ("syscall" %perf-counter-open
) :int
154 (flags :unsigned-long
))
156 (defcvar "errno" :int
)
159 (error "~A failed: ~A" func
160 (foreign-funcall "strerror" :int
*errno
* :string
)))
162 (defun perf-counter-open (event-type event
)
163 (let ((attr (foreign-alloc 'perf-counter-attr
)))
164 (with-foreign-slots ((type size config flags
) attr perf-counter-attr
)
165 (setf type event-type
166 size
(foreign-type-size 'perf-counter-attr
)
169 (multiple-value-bind (ret)
170 (%perf-counter-open
+syscall
+ attr
0 -
1 -
1 0)
173 (fail "Syscall perf_counter_open"))
176 (defcfun ("prctl" %perf-counter-prctl
) :int
179 (defun perf-counter-prctl (opt)
180 (multiple-value-bind (ret)
181 (%perf-counter-prctl opt
)
186 (defun perf-counter-start ()
187 (perf-counter-prctl +pr-task-perf-counters-enable
+))
189 (defun perf-counter-stop ()
190 (perf-counter-prctl +pr-task-perf-counters-disable
+))
192 (defcfun ("read" %read
) :unsigned-long
195 (count :unsigned-long
))
197 (defun perf-counter-read (fd)
198 (with-foreign-object (counter :unsigned-long
)
199 (multiple-value-bind (ret)
200 (%read fd counter
(foreign-type-size :unsigned-long
))
201 (when (< ret
(foreign-type-size :unsigned-long
))
202 (fail "perf-counter-read")))
203 (mem-ref counter
:unsigned-long
)))
205 (defcfun ("close" %perf-counter-close
) :int
208 (defun perf-counter-close (fd)
209 (multiple-value-bind (ret)
210 (%perf-counter-close fd
)
212 (fail "perf-cunter-close"))
215 (defmacro with-performance-counters
(cntrs &body body
)
216 (let ((counters (gensym))
220 (:perf-count-hw-all
(setq cntrs
+perf-count-hw-all
+))
221 (:perf-count-sw-all
(setq cntrs
+perf-count-sw-all
+))
222 (:perf-count-all
(setq cntrs
+perf-count-all
+))))
223 `(let ((,counters
',cntrs
))
224 (loop for i in
,counters
225 for j
= (or (and (symbolp i
) (symbol-value i
)) i
)
226 for
(type . counter
) = (if (< j
32)
227 (cons +perf-type-hardware
+ j
)
228 (cons +perf-type-software
+ (- j
32)))
235 (nth counter
(nth type
+perf-count-format-string
+))
242 (apply #'format
*trace-output
*
243 (format nil
"~A~{ ~A~}~A"
244 "~&Performance monitor:~%~@<~@;" formats
"~:>")
245 (loop for counter in descriptors
246 collect
(perf-counter-read counter
)
247 do
(perf-counter-close counter
))))))))
249 (defmacro time
+ (&body body
)
250 `(with-performance-counters (:perf-count-hw-all
)
255 ;; (with-performance-counters (+perf-count-hw-cpu-cycles+
256 ;; +perf-count-hw-cache-references+
257 ;; +perf-count-sw-context-switches+)