More changes
[perfcounters.git] / perfcounters.lisp
blobdc0c462573a09309d6109868bcdd0e21a96ca545
1 ;;;; Copyright 2009 Vitaly Mayatskikh <v.mayatskih@gmail.com>
2 ;;;;
3 ;;;; This file is a part of CL-Perfcounters
4 ;;;;
5 ;;;; Performance counters are special hardware registers available on most modern
6 ;;;; CPUs. These registers count the number of certain types of hw events: such
7 ;;;; as instructions executed, cachemisses suffered, or branches mis-predicted -
8 ;;;; without slowing down the kernel or applications. These registers can also
9 ;;;; trigger interrupts when a threshold number of events have passed - and can
10 ;;;; thus be used to profile the code that runs on that CPU.
11 ;;;;
12 ;;;; The Linux Performance Counter subsystem provides an abstraction of these
13 ;;;; hardware capabilities. It provides per task and per CPU counters, counter
14 ;;;; groups, and it provides event capabilities on top of those. It
15 ;;;; provides "virtual" 64-bit counters, regardless of the width of the
16 ;;;; underlying hardware counters.
17 ;;;;
18 ;;;; CL-Perfcounters is free software: you can redistribute it and/or modify
19 ;;;; it under the terms of the GNU General Public License as published by
20 ;;;; the Free Software Foundation, either version 3 of the License, or
21 ;;;; (at your option) any later version.
22 ;;;;
23 ;;;; CL-Perfcounters is distributed in the hope that it will be useful,
24 ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
25 ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26 ;;;; GNU General Public License for more details.
27 ;;;;
28 ;;;; You should have received a copy of the GNU General Public License
29 ;;;; along with this program. If not, see <http://www.gnu.org/licenses/>.
31 (defpackage :perfcounters
32 (:use :cl :cffi)
33 (:export :with-performance-counters :time+))
35 (in-package :perfcounters)
37 ;; x86-64: 298
38 ;; x86: 336
39 ;; ppc: 319
40 ;; s390: 331
41 (progn
42 (defconstant +syscall+ 298)
43 (defconstant +pr-task-perf-counters-disable+ 31)
44 (defconstant +pr-task-perf-counters-enable+ 32))
46 (defcstruct perf-counter-attr
47 (type :unsigned-int)
48 (size :unsigned-int)
49 (config :unsigned-long-long)
50 (sample-period/freq :unsigned-long-long)
51 (sample-type :unsigned-long-long)
52 (read_format :unsigned-int)
53 (flags :unsigned-long-long)
54 (wakeup-events :unsigned-int)
55 (reserved-2 :unsigned-int)
56 (reserved-3 :unsigned-long-long))
58 (progn
59 (defconstant +perf-type-hardware+ 0)
60 (defconstant +perf-type-software+ 1)
61 (defconstant +perf-type-tracepoint+ 2)
62 (defconstant +perf-type-hw-cache+ 3)
64 ;; Common hardware events, generalized by the kernel:
65 (defconstant +perf-count-hw-cpu-cycles+ 0)
66 (defconstant +perf-count-hw-instructions+ 1)
67 (defconstant +perf-count-hw-cache-references+ 2)
68 (defconstant +perf-count-hw-cache-misses+ 3)
69 (defconstant +perf-count-hw-branch-instructions+ 4)
70 (defconstant +perf-count-hw-branch-misses+ 5)
71 (defconstant +perf-count-hw-bus-cycles+ 6)
73 ;; Special "software" counters provided by the kernel, even if the hardware
74 ;; does not support performance counters. These counters measure various
75 ;; physical and sw events of the kernel (and allow the profiling of them as
76 ;; well):
77 (defconstant +perf-count-sw-cpu-clock+ 32)
78 (defconstant +perf-count-sw-task-clock+ 33)
79 (defconstant +perf-count-sw-page-faults+ 34)
80 (defconstant +perf-count-sw-context-switches+ 35)
81 (defconstant +perf-count-sw-cpu-migrations+ 36)
82 (defconstant +perf-count-sw-page-faults-min+ 37)
83 (defconstant +perf-count-sw-page-faults-maj+ 38)
85 (defconstant +perf-count-hw-all+
86 '(+perf-count-hw-cpu-cycles+ +perf-count-hw-instructions+
87 +perf-count-hw-cache-references+ +perf-count-hw-cache-misses+
88 +perf-count-hw-branch-instructions+ +perf-count-hw-branch-misses+
89 +perf-count-hw-bus-cycles+))
91 (defconstant +perf-count-sw-all+
92 '(+perf-count-sw-cpu-clock+ +perf-count-sw-task-clock+
93 +perf-count-sw-page-faults+ +perf-count-sw-context-switches+
94 +perf-count-sw-cpu-migrations+ +perf-count-sw-page-faults-min+
95 +perf-count-sw-page-faults-maj+))
97 (defconstant +perf-count-all+
98 (append +perf-count-hw-all+ +perf-count-sw-all+))
100 ;; Generalized hardware cache counters:
102 ;; { L1-D, L1-I, LLC, ITLB, DTLB, BPU } x
103 ;; { read, write, prefetch } x
104 ;; { accesses, misses }
105 (defconstant +perf-count-hw-cache-l1d+ 0)
106 (defconstant +perf-count-hw-cache-l1i+ 1)
107 (defconstant +perf-count-hw-cache-ll+ 2)
108 (defconstant +perf-count-hw-cache-dtlb+ 3)
109 (defconstant +perf-count-hw-cache-itlb+ 4)
110 (defconstant +perf-count-hw-cache-bpu+ 5)
112 (defconstant +perf-count-format-string+
113 '(("~@[~:D CPU cycle~:P consumed~%~]"
114 "~@[~:D instruction~:P executed~%~]"
115 "~@[~:D cache hit~:P~%~]"
116 "~@[~:D cache misses~%~]"
117 "~@[~:D branch instruction~:P~%~]"
118 "~@[~:D branch misses~:P~%~]"
119 "~@[~:D bus cycle~:P~%~]")
120 ("~@[~:D cpu clock~:P~%~]"
121 "~@[~:D task clock~:P~%~]"
122 "~@[~:D page fault~:P~%~]"
123 "~@[~:D context switch~:P~%~]"
124 "~@[~:D cpu migration~:P~%~]"
125 "~@[~:D minor fault~%~]"
126 "~@[~:D major fault~:P~%~]")
131 ;; Bits that can be set in hw_event.read_format to request that
132 ;; reads on the counter should return the indicated quantities,
133 ;; in increasing order of bit value, after the counter value.
134 (defcenum perf-counter-read-format
135 (:perf-format-total-time-enabled)
136 (:perf-format-total-time-running))
138 ;; Bits that can be set in hw_event.record_type to request information
139 ;; in the overflow packets.
140 (defcenum perf-counter-record-format
141 (:perf-record-ip 1)
142 (:perf-record-tid 2)
143 (:perf-record-time 4)
144 (:perf-record-addr 8)
145 (:perf-record-group 16)
146 (:perf-record-callchain 32))
148 (defcvar "errno" :int)
150 (defun fail (func)
151 (error "~A failed: ~A" func
152 (foreign-funcall "strerror" :int *errno* :string)))
154 (defun perf-counter-open (event-type event)
155 (with-foreign-object (attr 'perf-counter-attr)
156 (foreign-funcall "memset" :pointer attr :int 0
157 :unsigned-long (foreign-type-size 'perf-counter-attr) :int)
158 (with-foreign-slots ((type size config flags) attr perf-counter-attr)
159 (setf type event-type
160 size (foreign-type-size 'perf-counter-attr)
161 config event
162 flags 3))
163 (multiple-value-bind (ret)
164 (foreign-funcall "syscall" :int +syscall+ :pointer attr :unsigned-long 0
165 :int -1 :int -1 :unsigned-long 0 :int)
166 (when (< ret 0)
167 (fail "Syscall perf_counter_open"))
168 ret)))
170 (defun perf-counter-prctl (opt)
171 (multiple-value-bind (ret)
172 (foreign-funcall "prctl" :int opt :int)
173 (when (< ret 0)
174 (fail "prctl"))
175 ret))
177 (defun perf-counters-start ()
178 (perf-counter-prctl +pr-task-perf-counters-enable+))
180 (defun perf-counters-stop ()
181 (perf-counter-prctl +pr-task-perf-counters-disable+))
183 (defun perf-counter-read (fd)
184 (with-foreign-object (counter :unsigned-long)
185 (multiple-value-bind (ret)
186 (foreign-funcall "read" :int fd :pointer counter
187 :unsigned-long (foreign-type-size :unsigned-long)
188 :unsigned-long)
189 (when (< ret (foreign-type-size :unsigned-long))
190 (fail "perf-counter-read")))
191 (mem-ref counter :unsigned-long)))
193 (defun perf-counter-close (fd)
194 (multiple-value-bind (ret)
195 (foreign-funcall "close" :int fd :int)
196 (when (< ret 0)
197 (fail "perf-cunter-close"))
198 ret))
200 (defun perf-counters-open (counters)
201 (let ((sym (car counters)))
202 (when (symbolp sym)
203 (case sym
204 (:perf-count-hw-all (setq counters +perf-count-hw-all+))
205 (:perf-count-sw-all (setq counters +perf-count-sw-all+))
206 (:perf-count-all (setq counters +perf-count-all+))))
207 (loop for i in counters
208 for j = (or (and (symbolp i) (symbol-value i)) i)
209 for (type . counter) = (if (< j 32)
210 (cons +perf-type-hardware+ j)
211 (cons +perf-type-software+ (- j 32)))
212 collect (perf-counter-open type counter) into descriptors
213 collect (nth counter (nth type +perf-count-format-string+)) into formats
214 finally (return (list descriptors formats)))))
216 (defun perf-counters-read-and-close (descriptors)
217 (loop for counter in descriptors
218 collect (perf-counter-read counter)
219 do (perf-counter-close counter)))
221 (defmacro with-performance-counters (cntrs &body body)
222 (let ((counters (gensym)))
223 `(let* ((,counters ',cntrs)
224 (x (perf-counters-open ,counters)))
225 (perf-counters-start)
226 ,@body
227 (perf-counters-stop)
228 (values (perf-counters-read-and-close (car x)) (cadr x)))))
230 (defmacro time+ (&body body)
231 `(multiple-value-bind (vals formats)
232 (with-performance-counters (:perf-count-all) ,@body)
233 (apply #'format *trace-output*
234 (format nil "~A~{ ~A~}~A"
235 "~&Performance monitor:~%~@<~@;" formats "~:>~%")
236 vals)
237 vals))