1 /* $NetBSD: lockstat.c,v 1.14 2008/04/28 15:36:01 ad Exp $ */
4 * Copyright (c) 2006, 2007 The NetBSD Foundation, Inc.
7 * This code is derived from software contributed to The NetBSD Foundation
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
33 * Lock statistics driver, providing kernel support for the lockstat(8)
36 * We use a global lock word (lockstat_lock) to track device opens.
37 * Only one thread can hold the device at a time, providing a global lock.
39 * XXX Timings for contention on sleep locks are currently incorrect.
42 #include <sys/cdefs.h>
43 __KERNEL_RCSID(0, "$NetBSD: lockstat.c,v 1.14 2008/04/28 15:36:01 ad Exp $");
45 #include <sys/types.h>
46 #include <sys/param.h>
48 #include <sys/resourcevar.h>
49 #include <sys/systm.h>
50 #include <sys/kernel.h>
53 #include <sys/syslog.h>
54 #include <sys/atomic.h>
56 #include <dev/lockstat.h>
58 #include <machine/lock.h>
60 #ifndef __HAVE_CPU_COUNTER
61 #error CPU counters not available
65 #define LOCKSTAT_HASH_SHIFT 3
67 #define LOCKSTAT_HASH_SHIFT 2
70 #define LOCKSTAT_MINBUFS 1000
71 #define LOCKSTAT_DEFBUFS 10000
72 #define LOCKSTAT_MAXBUFS 50000
74 #define LOCKSTAT_HASH_SIZE 128
75 #define LOCKSTAT_HASH_MASK (LOCKSTAT_HASH_SIZE - 1)
76 #define LOCKSTAT_HASH(key) \
77 ((key >> LOCKSTAT_HASH_SHIFT) & LOCKSTAT_HASH_MASK)
79 typedef struct lscpu
{
80 SLIST_HEAD(, lsbuf
) lc_free
;
82 LIST_HEAD(lslist
, lsbuf
) lc_hash
[LOCKSTAT_HASH_SIZE
];
85 typedef struct lslist lslist_t
;
87 void lockstatattach(int);
88 void lockstat_start(lsenable_t
*);
89 int lockstat_alloc(lsenable_t
*);
90 void lockstat_init_tables(lsenable_t
*);
91 int lockstat_stop(lsdisable_t
*);
92 void lockstat_free(void);
94 dev_type_open(lockstat_open
);
95 dev_type_close(lockstat_close
);
96 dev_type_read(lockstat_read
);
97 dev_type_ioctl(lockstat_ioctl
);
99 volatile u_int lockstat_enabled
;
100 uintptr_t lockstat_csstart
;
101 uintptr_t lockstat_csend
;
102 uintptr_t lockstat_csmask
;
103 uintptr_t lockstat_lamask
;
104 uintptr_t lockstat_lockstart
;
105 uintptr_t lockstat_lockend
;
106 __cpu_simple_lock_t lockstat_lock
;
108 lsbuf_t
*lockstat_baseb
;
109 size_t lockstat_sizeb
;
111 struct timespec lockstat_stime
;
113 const struct cdevsw lockstat_cdevsw
= {
114 lockstat_open
, lockstat_close
, lockstat_read
, nowrite
, lockstat_ioctl
,
115 nostop
, notty
, nopoll
, nommap
, nokqfilter
, D_OTHER
| D_MPSAFE
119 * Called when the pseudo-driver is attached.
122 lockstatattach(int nunits
)
127 __cpu_simple_lock_init(&lockstat_lock
);
131 * Prepare the per-CPU tables for use, or clear down tables when tracing is
135 lockstat_init_tables(lsenable_t
*le
)
137 int i
, per
, slop
, cpuno
;
138 CPU_INFO_ITERATOR cii
;
143 KASSERT(!lockstat_enabled
);
145 for (CPU_INFO_FOREACH(cii
, ci
)) {
146 if (ci
->ci_lockstat
!= NULL
) {
147 kmem_free(ci
->ci_lockstat
, sizeof(lscpu_t
));
148 ci
->ci_lockstat
= NULL
;
156 per
= le
->le_nbufs
/ ncpu
;
157 slop
= le
->le_nbufs
- (per
* ncpu
);
159 for (CPU_INFO_FOREACH(cii
, ci
)) {
160 lc
= kmem_alloc(sizeof(*lc
), KM_SLEEP
);
162 ci
->ci_lockstat
= lc
;
164 SLIST_INIT(&lc
->lc_free
);
165 for (i
= 0; i
< LOCKSTAT_HASH_SIZE
; i
++)
166 LIST_INIT(&lc
->lc_hash
[i
]);
168 for (i
= per
; i
!= 0; i
--, lb
++) {
169 lb
->lb_cpu
= (uint16_t)cpuno
;
170 SLIST_INSERT_HEAD(&lc
->lc_free
, lb
, lb_chain
.slist
);
173 lb
->lb_cpu
= (uint16_t)cpuno
;
174 SLIST_INSERT_HEAD(&lc
->lc_free
, lb
, lb_chain
.slist
);
182 * Start collecting lock statistics.
185 lockstat_start(lsenable_t
*le
)
188 KASSERT(!lockstat_enabled
);
190 lockstat_init_tables(le
);
192 if ((le
->le_flags
& LE_CALLSITE
) != 0)
193 lockstat_csmask
= (uintptr_t)-1LL;
197 if ((le
->le_flags
& LE_LOCK
) != 0)
198 lockstat_lamask
= (uintptr_t)-1LL;
202 lockstat_csstart
= le
->le_csstart
;
203 lockstat_csend
= le
->le_csend
;
204 lockstat_lockstart
= le
->le_lockstart
;
205 lockstat_lockstart
= le
->le_lockstart
;
206 lockstat_lockend
= le
->le_lockend
;
208 getnanotime(&lockstat_stime
);
209 lockstat_enabled
= le
->le_mask
;
214 * Stop collecting lock statistics.
217 lockstat_stop(lsdisable_t
*ld
)
219 CPU_INFO_ITERATOR cii
;
221 u_int cpuno
, overflow
;
226 KASSERT(lockstat_enabled
);
229 * Set enabled false, force a write barrier, and wait for other CPUs
230 * to exit lockstat_event().
232 lockstat_enabled
= 0;
235 tsleep(&lockstat_stop
, PPAUSE
, "lockstat", mstohz(10));
238 * Did we run out of buffers while tracing?
241 for (CPU_INFO_FOREACH(cii
, ci
))
242 overflow
+= ((lscpu_t
*)ci
->ci_lockstat
)->lc_overflow
;
246 log(LOG_NOTICE
, "lockstat: %d buffer allocations failed\n",
251 lockstat_init_tables(NULL
);
253 /* Run through all LWPs and clear the slate for the next run. */
254 mutex_enter(proc_lock
);
255 LIST_FOREACH(l
, &alllwp
, l_list
) {
260 mutex_exit(proc_lock
);
266 * Fill out the disable struct for the caller.
268 timespecsub(&ts
, &lockstat_stime
, &ld
->ld_time
);
269 ld
->ld_size
= lockstat_sizeb
;
272 for (CPU_INFO_FOREACH(cii
, ci
)) {
273 if (cpuno
> sizeof(ld
->ld_freq
) / sizeof(ld
->ld_freq
[0])) {
274 log(LOG_WARNING
, "lockstat: too many CPUs\n");
277 ld
->ld_freq
[cpuno
++] = cpu_frequency(ci
);
284 * Allocate buffers for lockstat_start().
287 lockstat_alloc(lsenable_t
*le
)
292 KASSERT(!lockstat_enabled
);
295 sz
= sizeof(*lb
) * le
->le_nbufs
;
297 lb
= kmem_zalloc(sz
, KM_SLEEP
);
301 KASSERT(!lockstat_enabled
);
302 KASSERT(lockstat_baseb
== NULL
);
310 * Free allocated buffers after tracing has stopped.
316 KASSERT(!lockstat_enabled
);
318 if (lockstat_baseb
!= NULL
) {
319 kmem_free(lockstat_baseb
, lockstat_sizeb
);
320 lockstat_baseb
= NULL
;
325 * Main entry point from lock primatives.
328 lockstat_event(uintptr_t lock
, uintptr_t callsite
, u_int flags
, u_int count
,
337 if ((flags
& lockstat_enabled
) != flags
|| count
== 0)
339 if (lock
< lockstat_lockstart
|| lock
> lockstat_lockend
)
341 if (callsite
< lockstat_csstart
|| callsite
> lockstat_csend
)
344 callsite
&= lockstat_csmask
;
345 lock
&= lockstat_lamask
;
348 * Find the table for this lock+callsite pair, and try to locate a
349 * buffer with the same key.
352 lc
= curcpu()->ci_lockstat
;
353 ll
= &lc
->lc_hash
[LOCKSTAT_HASH(lock
^ callsite
)];
354 event
= (flags
& LB_EVENT_MASK
) - 1;
356 LIST_FOREACH(lb
, ll
, lb_chain
.list
) {
357 if (lb
->lb_lock
== lock
&& lb
->lb_callsite
== callsite
)
363 * We found a record. Move it to the front of the list, as
364 * we're likely to hit it again soon.
366 if (lb
!= LIST_FIRST(ll
)) {
367 LIST_REMOVE(lb
, lb_chain
.list
);
368 LIST_INSERT_HEAD(ll
, lb
, lb_chain
.list
);
370 lb
->lb_counts
[event
] += count
;
371 lb
->lb_times
[event
] += cycles
;
372 } else if ((lb
= SLIST_FIRST(&lc
->lc_free
)) != NULL
) {
374 * Pinch a new buffer and fill it out.
376 SLIST_REMOVE_HEAD(&lc
->lc_free
, lb_chain
.slist
);
377 LIST_INSERT_HEAD(ll
, lb
, lb_chain
.list
);
378 lb
->lb_flags
= (uint16_t)flags
;
380 lb
->lb_callsite
= callsite
;
381 lb
->lb_counts
[event
] = count
;
382 lb
->lb_times
[event
] = cycles
;
385 * We didn't find a buffer and there were none free.
386 * lockstat_stop() will notice later on and report the
396 * Accept an open() on /dev/lockstat.
399 lockstat_open(dev_t dev
, int flag
, int mode
, lwp_t
*l
)
402 if (!__cpu_simple_lock_try(&lockstat_lock
))
404 lockstat_lwp
= curlwp
;
409 * Accept the last close() on /dev/lockstat.
412 lockstat_close(dev_t dev
, int flag
, int mode
, lwp_t
*l
)
416 __cpu_simple_unlock(&lockstat_lock
);
421 * Handle control operations.
424 lockstat_ioctl(dev_t dev
, u_long cmd
, void *data
, int flag
, lwp_t
*l
)
429 if (lockstat_lwp
!= curlwp
)
433 case IOC_LOCKSTAT_GVERSION
:
434 *(int *)data
= LS_VERSION
;
438 case IOC_LOCKSTAT_ENABLE
:
439 le
= (lsenable_t
*)data
;
441 if (!cpu_hascounter()) {
445 if (lockstat_enabled
) {
451 * Sanitize the arguments passed in and set up filtering.
453 if (le
->le_nbufs
== 0)
454 le
->le_nbufs
= LOCKSTAT_DEFBUFS
;
455 else if (le
->le_nbufs
> LOCKSTAT_MAXBUFS
||
456 le
->le_nbufs
< LOCKSTAT_MINBUFS
) {
460 if ((le
->le_flags
& LE_ONE_CALLSITE
) == 0) {
462 le
->le_csend
= le
->le_csstart
- 1;
464 if ((le
->le_flags
& LE_ONE_LOCK
) == 0) {
465 le
->le_lockstart
= 0;
466 le
->le_lockend
= le
->le_lockstart
- 1;
468 if ((le
->le_mask
& LB_EVENT_MASK
) == 0)
470 if ((le
->le_mask
& LB_LOCK_MASK
) == 0)
476 if ((error
= lockstat_alloc(le
)) == 0)
480 case IOC_LOCKSTAT_DISABLE
:
481 if (!lockstat_enabled
)
484 error
= lockstat_stop((lsdisable_t
*)data
);
496 * Copy buffers out to user-space.
499 lockstat_read(dev_t dev
, struct uio
*uio
, int flag
)
502 if (curlwp
!= lockstat_lwp
|| lockstat_enabled
)
504 return uiomove(lockstat_baseb
, lockstat_sizeb
, uio
);