1 ;;;; Copyright 2009 Vitaly Mayatskikh <v.mayatskih@gmail.com>
3 ;;;; This file is a part of CL-Perfcounters
5 ;;;; Performance counters are special hardware registers available on most modern
6 ;;;; CPUs. These registers count the number of certain types of hw events: such
7 ;;;; as instructions executed, cachemisses suffered, or branches mis-predicted -
8 ;;;; without slowing down the kernel or applications. These registers can also
9 ;;;; trigger interrupts when a threshold number of events have passed - and can
10 ;;;; thus be used to profile the code that runs on that CPU.
12 ;;;; The Linux Performance Counter subsystem provides an abstraction of these
13 ;;;; hardware capabilities. It provides per task and per CPU counters, counter
14 ;;;; groups, and it provides event capabilities on top of those. It
15 ;;;; provides "virtual" 64-bit counters, regardless of the width of the
16 ;;;; underlying hardware counters.
18 ;;;; CL-Perfcounters is free software: you can redistribute it and/or modify
19 ;;;; it under the terms of the GNU General Public License as published by
20 ;;;; the Free Software Foundation, either version 3 of the License, or
21 ;;;; (at your option) any later version.
23 ;;;; CL-Perfcounters is distributed in the hope that it will be useful,
24 ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
25 ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26 ;;;; GNU General Public License for more details.
28 ;;;; You should have received a copy of the GNU General Public License
29 ;;;; along with this program. If not, see <http://www.gnu.org/licenses/>.
31 (defpackage :perfcounters
33 (:export
:with-performance-counters
:time
+))
35 (in-package :perfcounters
)
42 (defconstant +syscall
+ 298)
43 (defconstant +pr-task-perf-counters-disable
+ 31)
44 (defconstant +pr-task-perf-counters-enable
+ 32))
46 (defcstruct perf-counter-attr
49 (config :unsigned-long-long
)
50 (sample-period/freq
:unsigned-long-long
)
51 (sample-type :unsigned-long-long
)
52 (read_format :unsigned-int
)
53 (flags :unsigned-long-long
)
54 (wakeup-events :unsigned-int
)
55 (reserved-2 :unsigned-int
)
56 (reserved-3 :unsigned-long-long
))
59 (defconstant +perf-type-hardware
+ 0)
60 (defconstant +perf-type-software
+ 1)
61 (defconstant +perf-type-tracepoint
+ 2)
62 (defconstant +perf-type-hw-cache
+ 3)
64 ;; Common hardware events, generalized by the kernel:
65 (defconstant +perf-count-hw-cpu-cycles
+ 0)
66 (defconstant +perf-count-hw-instructions
+ 1)
67 (defconstant +perf-count-hw-cache-references
+ 2)
68 (defconstant +perf-count-hw-cache-misses
+ 3)
69 (defconstant +perf-count-hw-branch-instructions
+ 4)
70 (defconstant +perf-count-hw-branch-misses
+ 5)
71 (defconstant +perf-count-hw-bus-cycles
+ 6)
73 ;; Special "software" counters provided by the kernel, even if the hardware
74 ;; does not support performance counters. These counters measure various
75 ;; physical and sw events of the kernel (and allow the profiling of them as
77 (defconstant +perf-count-sw-cpu-clock
+ 32)
78 (defconstant +perf-count-sw-task-clock
+ 33)
79 (defconstant +perf-count-sw-page-faults
+ 34)
80 (defconstant +perf-count-sw-context-switches
+ 35)
81 (defconstant +perf-count-sw-cpu-migrations
+ 36)
82 (defconstant +perf-count-sw-page-faults-min
+ 37)
83 (defconstant +perf-count-sw-page-faults-maj
+ 38)
85 (defconstant +perf-count-hw-all
+
86 '(+perf-count-hw-cpu-cycles
+ +perf-count-hw-instructions
+
87 +perf-count-hw-cache-references
+ +perf-count-hw-cache-misses
+
88 +perf-count-hw-branch-instructions
+ +perf-count-hw-branch-misses
+
89 +perf-count-hw-bus-cycles
+))
91 (defconstant +perf-count-sw-all
+
92 '(+perf-count-sw-cpu-clock
+ +perf-count-sw-task-clock
+
93 +perf-count-sw-page-faults
+ +perf-count-sw-context-switches
+
94 +perf-count-sw-cpu-migrations
+ +perf-count-sw-page-faults-min
+
95 +perf-count-sw-page-faults-maj
+))
97 (defconstant +perf-count-all
+
98 (append +perf-count-hw-all
+ +perf-count-sw-all
+))
100 ;; Generalized hardware cache counters:
102 ;; { L1-D, L1-I, LLC, ITLB, DTLB, BPU } x
103 ;; { read, write, prefetch } x
104 ;; { accesses, misses }
105 (defconstant +perf-count-hw-cache-l1d
+ 0)
106 (defconstant +perf-count-hw-cache-l1i
+ 1)
107 (defconstant +perf-count-hw-cache-ll
+ 2)
108 (defconstant +perf-count-hw-cache-dtlb
+ 3)
109 (defconstant +perf-count-hw-cache-itlb
+ 4)
110 (defconstant +perf-count-hw-cache-bpu
+ 5)
112 (defconstant +perf-count-format-string
+
113 '(("~@[~:D CPU cycle~:P consumed~%~]"
114 "~@[~:D instruction~:P executed~%~]"
115 "~@[~:D cache hit~:P~%~]"
116 "~@[~:D cache misses~%~]"
117 "~@[~:D branch instruction~:P~%~]"
118 "~@[~:D branch misses~:P~%~]"
119 "~@[~:D bus cycle~:P~%~]")
120 ("~@[~:D cpu clock~:P~%~]"
121 "~@[~:D task clock~:P~%~]"
122 "~@[~:D page fault~:P~%~]"
123 "~@[~:D context switch~:P~%~]"
124 "~@[~:D cpu migration~:P~%~]"
125 "~@[~:D minor fault~%~]"
126 "~@[~:D major fault~:P~%~]")
131 ;; Bits that can be set in hw_event.read_format to request that
132 ;; reads on the counter should return the indicated quantities,
133 ;; in increasing order of bit value, after the counter value.
134 (defcenum perf-counter-read-format
135 (:perf-format-total-time-enabled
)
136 (:perf-format-total-time-running
))
138 ;; Bits that can be set in hw_event.record_type to request information
139 ;; in the overflow packets.
140 (defcenum perf-counter-record-format
143 (:perf-record-time
4)
144 (:perf-record-addr
8)
145 (:perf-record-group
16)
146 (:perf-record-callchain
32))
148 (defcvar "errno" :int
)
151 (error "~A failed: ~A" func
152 (foreign-funcall "strerror" :int
*errno
* :string
)))
154 (defun perf-counter-open (event-type event
)
155 (with-foreign-object (attr 'perf-counter-attr
)
156 (foreign-funcall "memset" :pointer attr
:int
0
157 :unsigned-long
(foreign-type-size 'perf-counter-attr
) :int
)
158 (with-foreign-slots ((type size config flags
) attr perf-counter-attr
)
159 (setf type event-type
160 size
(foreign-type-size 'perf-counter-attr
)
163 (multiple-value-bind (ret)
164 (foreign-funcall "syscall" :int
+syscall
+ :pointer attr
:unsigned-long
0
165 :int -
1 :int -
1 :unsigned-long
0 :int
)
167 (fail "Syscall perf_counter_open"))
170 (defun perf-counter-prctl (opt)
171 (multiple-value-bind (ret)
172 (foreign-funcall "prctl" :int opt
:int
)
177 (defun perf-counters-start ()
178 (perf-counter-prctl +pr-task-perf-counters-enable
+))
180 (defun perf-counters-stop ()
181 (perf-counter-prctl +pr-task-perf-counters-disable
+))
183 (defun perf-counter-read (fd)
184 (with-foreign-object (counter :unsigned-long
)
185 (multiple-value-bind (ret)
186 (foreign-funcall "read" :int fd
:pointer counter
187 :unsigned-long
(foreign-type-size :unsigned-long
)
189 (when (< ret
(foreign-type-size :unsigned-long
))
190 (fail "perf-counter-read")))
191 (mem-ref counter
:unsigned-long
)))
193 (defun perf-counter-close (fd)
194 (multiple-value-bind (ret)
195 (foreign-funcall "close" :int fd
:int
)
197 (fail "perf-cunter-close"))
200 (defun perf-counters-open (counters)
201 (let ((sym (car counters
)))
204 (:perf-count-hw-all
(setq counters
+perf-count-hw-all
+))
205 (:perf-count-sw-all
(setq counters
+perf-count-sw-all
+))
206 (:perf-count-all
(setq counters
+perf-count-all
+))))
207 (loop for i in counters
208 for j
= (or (and (symbolp i
) (symbol-value i
)) i
)
209 for
(type . counter
) = (if (< j
32)
210 (cons +perf-type-hardware
+ j
)
211 (cons +perf-type-software
+ (- j
32)))
212 collect
(perf-counter-open type counter
) into descriptors
213 collect
(nth counter
(nth type
+perf-count-format-string
+)) into formats
214 finally
(return (list descriptors formats
)))))
216 (defun perf-counters-read-and-close (descriptors)
217 (loop for counter in descriptors
218 collect
(perf-counter-read counter
)
219 do
(perf-counter-close counter
)))
221 (defmacro with-performance-counters
(cntrs &body body
)
222 (let ((counters (gensym)))
223 `(let* ((,counters
',cntrs
)
224 (x (perf-counters-open ,counters
)))
225 (perf-counters-start)
228 (values (perf-counters-read-and-close (car x
)) (cadr x
)))))
230 (defmacro time
+ (&body body
)
231 `(multiple-value-bind (vals formats
)
232 (with-performance-counters (:perf-count-all
) ,@body
)
233 (apply #'format
*trace-output
*
234 (format nil
"~A~{ ~A~}~A"
235 "~&Performance monitor:~%~@<~@;" formats
"~:>~%")