Syscall number as parameter
[perfcounters.git] / perfcounters.lisp
blob1ae9679f0d76d5ec713579e3d6eee4586c709d3a
1 ;;;; Copyright 2009 Vitaly Mayatskikh <v.mayatskih@gmail.com>
2 ;;;;
3 ;;;; This file is a part of CL-Perfcounters
4 ;;;;
5 ;;;; Performance counters are special hardware registers available on most modern
6 ;;;; CPUs. These registers count the number of certain types of hw events: such
7 ;;;; as instructions executed, cachemisses suffered, or branches mis-predicted -
8 ;;;; without slowing down the kernel or applications. These registers can also
9 ;;;; trigger interrupts when a threshold number of events have passed - and can
10 ;;;; thus be used to profile the code that runs on that CPU.
11 ;;;;
12 ;;;; The Linux Performance Counter subsystem provides an abstraction of these
13 ;;;; hardware capabilities. It provides per task and per CPU counters, counter
14 ;;;; groups, and it provides event capabilities on top of those. It
15 ;;;; provides "virtual" 64-bit counters, regardless of the width of the
16 ;;;; underlying hardware counters.
17 ;;;;
18 ;;;; CL-Perfcounters is free software: you can redistribute it and/or modify
19 ;;;; it under the terms of the GNU General Public License as published by
20 ;;;; the Free Software Foundation, either version 3 of the License, or
21 ;;;; (at your option) any later version.
22 ;;;;
23 ;;;; CL-Perfcounters is distributed in the hope that it will be useful,
24 ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
25 ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26 ;;;; GNU General Public License for more details.
27 ;;;;
28 ;;;; You should have received a copy of the GNU General Public License
29 ;;;; along with this program. If not, see <http://www.gnu.org/licenses/>.
31 (defpackage :perfcounters
32 (:use :cl :cffi)
33 (:export :with-performance-counters :time+))
35 (in-package :perfcounters)
37 ;; x86-64: 298
38 ;; x86: 336
39 ;; ppc: 319
40 ;; s390: 331
41 (defparameter +syscall+ 298)
43 (defparameter +pr-task-perf-counters-disable+ 31)
44 (defparameter +pr-task-perf-counters-enable+ 32)
46 (defcstruct perf-counter-attr
47 (type :unsigned-int)
48 (size :unsigned-int)
49 (config :unsigned-long-long)
50 (sample-period/freq :unsigned-long-long)
51 (sample-type :unsigned-long-long)
52 (read_format :unsigned-int)
53 (flags :unsigned-long-long)
54 (wakeup-events :unsigned-int)
55 (reserved-2 :unsigned-int)
56 (reserved-3 :unsigned-long-long))
58 (defparameter +perf-type-hardware+ 0)
59 (defparameter +perf-type-software+ 1)
60 (defparameter +perf-type-tracepoint+ 2)
61 (defparameter +perf-type-hw-cache+ 3)
63 ;; Common hardware events, generalized by the kernel:
64 (defparameter +perf-count-hw-cpu-cycles+ 0)
65 (defparameter +perf-count-hw-instructions+ 1)
66 (defparameter +perf-count-hw-cache-references+ 2)
67 (defparameter +perf-count-hw-cache-misses+ 3)
68 (defparameter +perf-count-hw-branch-instructions+ 4)
69 (defparameter +perf-count-hw-branch-misses+ 5)
70 (defparameter +perf-count-hw-bus-cycles+ 6)
72 ;; Special "software" counters provided by the kernel, even if the hardware
73 ;; does not support performance counters. These counters measure various
74 ;; physical and sw events of the kernel (and allow the profiling of them as
75 ;; well):
76 (defparameter +perf-count-sw-cpu-clock+ 32)
77 (defparameter +perf-count-sw-task-clock+ 33)
78 (defparameter +perf-count-sw-page-faults+ 34)
79 (defparameter +perf-count-sw-context-switches+ 35)
80 (defparameter +perf-count-sw-cpu-migrations+ 36)
81 (defparameter +perf-count-sw-page-faults-min+ 37)
82 (defparameter +perf-count-sw-page-faults-maj+ 38)
84 (defparameter +perf-count-hw-all+
85 '(+perf-count-hw-cpu-cycles+ +perf-count-hw-instructions+
86 +perf-count-hw-cache-references+ +perf-count-hw-cache-misses+
87 +perf-count-hw-branch-instructions+ +perf-count-hw-branch-misses+
88 +perf-count-hw-bus-cycles+))
90 (defparameter +perf-count-sw-all+
91 '(+perf-count-sw-cpu-clock+ +perf-count-sw-task-clock+
92 +perf-count-sw-page-faults+ +perf-count-sw-context-switches+
93 +perf-count-sw-cpu-migrations+ +perf-count-sw-page-faults-min+
94 +perf-count-sw-page-faults-maj+))
96 (defparameter +perf-count-all+
97 (append +perf-count-hw-all+ +perf-count-sw-all+))
99 ;; Generalized hardware cache counters:
101 ;; { L1-D, L1-I, LLC, ITLB, DTLB, BPU } x
102 ;; { read, write, prefetch } x
103 ;; { accesses, misses }
105 (defparameter +perf-count-hw-cache-l1d+ 0)
106 (defparameter +perf-count-hw-cache-l1i+ 1)
107 (defparameter +perf-count-hw-cache-ll+ 2)
108 (defparameter +perf-count-hw-cache-dtlb+ 3)
109 (defparameter +perf-count-hw-cache-itlb+ 4)
110 (defparameter +perf-count-hw-cache-bpu+ 5)
112 (defparameter +perf-count-format-string+
113 '(("~@[~:D CPU cycle~:P consumed~%~]"
114 "~@[~:D instruction~:P executed~%~]"
115 "~@[~:D cache hit~:P~%~]"
116 "~@[~:D cache misses~%~]"
117 "~@[~:D branch instruction~:P~%~]"
118 "~@[~:D branch misses~:P~%~]"
119 "~@[~:D bus cycle~:P~%~]")
120 ("~@[~:D cpu clock~:P~%~]"
121 "~@[~:D task clock~:P~%~]"
122 "~@[~:D page fault~:P~%~]"
123 "~@[~:D context switch~:P~%~]"
124 "~@[~:D cpu migration~:P~%~]"
125 "~@[~:D minor fault~%~]"
126 "~@[~:D major fault~:P~%~]")
131 ;; Bits that can be set in hw_event.read_format to request that
132 ;; reads on the counter should return the indicated quantities,
133 ;; in increasing order of bit value, after the counter value.
134 (defcenum perf-counter-read-format
135 (:perf-format-total-time-enabled)
136 (:perf-format-total-time-running))
138 ;; Bits that can be set in hw_event.record_type to request information
139 ;; in the overflow packets.
140 (defcenum perf-counter-record-format
141 (:perf-record-ip 1)
142 (:perf-record-tid 2)
143 (:perf-record-time 4)
144 (:perf-record-addr 8)
145 (:perf-record-group 16)
146 (:perf-record-callchain 32))
148 (defcfun ("syscall" %perf-counter-open) :int
149 (syscall-nr :int)
150 (ptr :pointer)
151 (pid :unsigned-long)
152 (cpu :int)
153 (group-fd :int)
154 (flags :unsigned-long))
156 (defcvar "errno" :int)
158 (defun fail (func)
159 (error "~A failed: ~A" func
160 (foreign-funcall "strerror" :int *errno* :string)))
162 (defun perf-counter-open (event-type event)
163 (let ((attr (foreign-alloc 'perf-counter-attr)))
164 (with-foreign-slots ((type size config flags) attr perf-counter-attr)
165 (setf type event-type
166 size (foreign-type-size 'perf-counter-attr)
167 config event
168 flags 3))
169 (multiple-value-bind (ret)
170 (%perf-counter-open +syscall+ attr 0 -1 -1 0)
171 (foreign-free attr)
172 (when (< ret 0)
173 (fail "Syscall perf_counter_open"))
174 ret)))
176 (defcfun ("prctl" %perf-counter-prctl) :int
177 (option :int))
179 (defun perf-counter-prctl (opt)
180 (multiple-value-bind (ret)
181 (%perf-counter-prctl opt)
182 (when (< ret 0)
183 (fail "prctl"))
184 ret))
186 (defun perf-counter-start ()
187 (perf-counter-prctl +pr-task-perf-counters-enable+))
189 (defun perf-counter-stop ()
190 (perf-counter-prctl +pr-task-perf-counters-disable+))
192 (defcfun ("read" %read) :unsigned-long
193 (fd :int)
194 (buf :pointer)
195 (count :unsigned-long))
197 (defun perf-counter-read (fd)
198 (with-foreign-object (counter :unsigned-long)
199 (multiple-value-bind (ret)
200 (%read fd counter (foreign-type-size :unsigned-long))
201 (when (< ret (foreign-type-size :unsigned-long))
202 (fail "perf-counter-read")))
203 (mem-ref counter :unsigned-long)))
205 (defcfun ("close" %perf-counter-close) :int
206 (fd :int))
208 (defun perf-counter-close (fd)
209 (multiple-value-bind (ret)
210 (%perf-counter-close fd)
211 (when (< ret 0)
212 (fail "perf-cunter-close"))
213 ret))
215 (defmacro with-performance-counters (cntrs &body body)
216 (let ((counters (gensym))
217 (sym (car cntrs)))
218 (when (symbolp sym)
219 (case sym
220 (:perf-count-hw-all (setq cntrs +perf-count-hw-all+))
221 (:perf-count-sw-all (setq cntrs +perf-count-sw-all+))
222 (:perf-count-all (setq cntrs +perf-count-all+))))
223 `(let ((,counters ',cntrs))
224 (loop for i in ,counters
225 for j = (or (and (symbolp i) (symbol-value i)) i)
226 for (type . counter) = (if (< j 32)
227 (cons +perf-type-hardware+ j)
228 (cons +perf-type-software+ (- j 32)))
229 collect
230 (perf-counter-open
231 type
232 counter)
233 into descriptors
234 collect
235 (nth counter (nth type +perf-count-format-string+))
236 into formats
237 finally
238 (progn
239 (perf-counter-start)
240 ,@body
241 (perf-counter-stop)
242 (apply #'format *trace-output*
243 (format nil "~A~{ ~A~}~A"
244 "~&Performance monitor:~%~@<~@;" formats "~:>")
245 (loop for counter in descriptors
246 collect (perf-counter-read counter)
247 do (perf-counter-close counter))))))))
249 (defmacro time+ (&body body)
250 `(with-performance-counters (:perf-count-hw-all)
251 ,@body))
253 ;; (time+ t)
255 ;; (with-performance-counters (+perf-count-hw-cpu-cycles+
256 ;; +perf-count-hw-cache-references+
257 ;; +perf-count-sw-context-switches+)
258 ;; (sleep 1))