1 /* $NetBSD: main.c,v 1.16 2009/03/21 13:02:19 ad Exp $ */
4 * Copyright (c) 2006, 2007, 2009 The NetBSD Foundation, Inc.
7 * This code is derived from software contributed to The NetBSD Foundation
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
32 #include <sys/cdefs.h>
34 __RCSID("$NetBSD: main.c,v 1.16 2009/03/21 13:02:19 ad Exp $");
37 #include <sys/types.h>
38 #include <sys/param.h>
40 #include <sys/fcntl.h>
41 #include <sys/ioctl.h>
43 #include <sys/signal.h>
44 #include <sys/sysctl.h>
46 #include <dev/lockstat.h>
62 #define _PATH_DEV_LOCKSTAT "/dev/lockstat"
65 #define MICRO 1000000.0
66 #define NANO 1000000000.0
67 #define PICO 1000000000000.0
69 TAILQ_HEAD(lock_head
, lockstruct
);
70 typedef struct lock_head locklist_t
;
71 TAILQ_HEAD(buf_head
, lsbuf
);
72 typedef struct buf_head buflist_t
;
74 typedef struct lockstruct
{
75 TAILQ_ENTRY(lockstruct
) chain
;
91 const name_t locknames
[] = {
92 { "adaptive_mutex", LB_ADAPTIVE_MUTEX
},
93 { "spin_mutex", LB_SPIN_MUTEX
},
94 { "rwlock", LB_RWLOCK
},
95 { "kernel_lock", LB_KERNEL_LOCK
},
96 { "preemption", LB_NOPREEMPT
},
101 const name_t eventnames
[] = {
103 { "sleep_exclusive", LB_SLEEP1
},
104 { "sleep_shared", LB_SLEEP2
},
108 const name_t alltypes
[] = {
109 { "Adaptive mutex spin", LB_ADAPTIVE_MUTEX
| LB_SPIN
},
110 { "Adaptive mutex sleep", LB_ADAPTIVE_MUTEX
| LB_SLEEP1
},
111 { "Spin mutex spin", LB_SPIN_MUTEX
| LB_SPIN
},
112 { "RW lock sleep (writer)", LB_RWLOCK
| LB_SLEEP1
},
113 { "RW lock sleep (reader)", LB_RWLOCK
| LB_SLEEP2
},
114 { "RW lock spin", LB_RWLOCK
| LB_SPIN
},
115 { "Kernel lock spin", LB_KERNEL_LOCK
| LB_SPIN
},
116 { "Kernel preemption defer", LB_NOPREEMPT
| LB_SPIN
},
117 { "Miscellaneous wait", LB_MISC
| LB_SPIN
},
121 const name_t xtypes
[] = {
123 { "Sleep (writer)", LB_SLEEP1
},
124 { "Sleep (reader)", LB_SLEEP2
},
144 double cpuscale
[sizeof(ld
.ld_freq
) / sizeof(ld
.ld_freq
[0])];
147 void findsym(findsym_t
, char *, uintptr_t *, uintptr_t *, bool);
148 void spawn(int, char **);
149 void display(int, const char *name
);
150 void listnames(const name_t
*);
151 void collapse(bool, bool);
152 int matchname(const name_t
*, char *);
153 void makelists(int, int);
157 lock_t
*morelocks(void);
160 main(int argc
, char **argv
)
162 int eventtype
, locktype
, ch
, nlfd
, fd
;
164 bool sflag
, pflag
, mflag
, Mflag
;
165 const char *nlistf
, *outf
;
166 char *lockname
, *funcname
;
184 while ((ch
= getopt(argc
, argv
, "E:F:L:MN:T:b:ceflmo:pstx")) != -1)
187 eventtype
= matchname(eventnames
, optarg
);
199 locktype
= matchname(locknames
, optarg
);
202 nbufs
= (int)strtol(optarg
, &p
, 0);
203 if (!isdigit((u_int
)*optarg
) || *p
!= '\0')
210 listnames(eventnames
);
234 listnames(locknames
);
249 fd
= open(outf
, O_WRONLY
| O_CREAT
| O_TRUNC
, 0600);
251 err(EXIT_FAILURE
, "opening %s", outf
);
252 outfp
= fdopen(fd
, "w");
257 * Find the name list for resolving symbol names, and load it into
260 if (nlistf
== NULL
) {
261 nlfd
= open(_PATH_KSYMS
, O_RDONLY
);
262 nlistf
= getbootfile();
266 if ((nlfd
= open(nlistf
, O_RDONLY
)) < 0)
267 err(EXIT_FAILURE
, "cannot open " _PATH_KSYMS
" or %s",
270 if (loadsym32(nlfd
) != 0) {
271 if (loadsym64(nlfd
) != 0)
272 errx(EXIT_FAILURE
, "unable to load symbol table");
277 memset(&le
, 0, sizeof(le
));
281 * Set up initial filtering.
283 if (lockname
!= NULL
) {
284 findsym(LOCK_BYNAME
, lockname
, &le
.le_lockstart
,
285 &le
.le_lockend
, true);
286 le
.le_flags
|= LE_ONE_LOCK
;
289 le
.le_flags
|= LE_CALLSITE
;
291 le
.le_flags
|= LE_LOCK
;
292 if (funcname
!= NULL
) {
295 findsym(FUNC_BYNAME
, funcname
, &le
.le_csstart
, &le
.le_csend
, true);
296 le
.le_flags
|= LE_ONE_CALLSITE
;
298 le
.le_mask
= (eventtype
& LB_EVENT_MASK
) | (locktype
& LB_LOCK_MASK
);
303 if ((lsfd
= open(_PATH_DEV_LOCKSTAT
, O_RDONLY
)) < 0)
304 err(EXIT_FAILURE
, "cannot open " _PATH_DEV_LOCKSTAT
);
305 if (ioctl(lsfd
, IOC_LOCKSTAT_GVERSION
, &ch
) < 0)
306 err(EXIT_FAILURE
, "ioctl");
307 if (ch
!= LS_VERSION
)
309 "incompatible lockstat interface version (%d, kernel %d)",
311 if (ioctl(lsfd
, IOC_LOCKSTAT_ENABLE
, &le
))
312 err(EXIT_FAILURE
, "cannot enable tracing");
315 * Execute the traced program.
320 * Stop tracing, and read the trace buffers from the kernel.
322 if (ioctl(lsfd
, IOC_LOCKSTAT_DISABLE
, &ld
) == -1) {
323 if (errno
== EOVERFLOW
) {
324 warnx("overflowed available kernel trace buffers");
327 err(EXIT_FAILURE
, "cannot disable tracing");
329 if ((bufs
= malloc(ld
.ld_size
)) == NULL
)
330 err(EXIT_FAILURE
, "cannot allocate memory for user buffers");
331 if ((size_t)read(lsfd
, bufs
, ld
.ld_size
) != ld
.ld_size
)
332 err(EXIT_FAILURE
, "reading from " _PATH_DEV_LOCKSTAT
);
334 err(EXIT_FAILURE
, "close(" _PATH_DEV_LOCKSTAT
")");
337 * Figure out how to scale the results. For internal use we convert
338 * all times from CPU frequency based to picoseconds, and values are
339 * eventually displayed in ms.
341 for (i
= 0; i
< sizeof(ld
.ld_freq
) / sizeof(ld
.ld_freq
[0]); i
++)
342 if (ld
.ld_freq
[i
] != 0)
343 cpuscale
[i
] = PICO
/ ld
.ld_freq
[i
];
344 ms
= ld
.ld_time
.tv_sec
* MILLI
+ ld
.ld_time
.tv_nsec
/ MICRO
;
346 cscale
= 1.0 / ncpu();
349 cscale
*= (sflag
? MILLI
/ ms
: 1.0);
350 tscale
= cscale
/ NANO
;
351 nbufs
= (int)(ld
.ld_size
/ sizeof(lsbuf_t
));
353 TAILQ_INIT(&locklist
);
354 TAILQ_INIT(&sortlist
);
355 TAILQ_INIT(&freelist
);
357 if ((mflag
| Mflag
) != 0)
358 collapse(mflag
, Mflag
);
361 * Display the results.
363 fprintf(outfp
, "Elapsed time: %.2f seconds.", ms
/ MILLI
);
364 if (sflag
|| pflag
) {
365 fprintf(outfp
, " Displaying ");
367 fprintf(outfp
, "per-CPU ");
369 fprintf(outfp
, "per-second ");
370 fprintf(outfp
, "averages.");
374 for (name
= xflag
? xtypes
: alltypes
; name
->name
!= NULL
; name
++) {
375 if (eventtype
!= -1 &&
376 (name
->mask
& LB_EVENT_MASK
) != eventtype
)
378 if (locktype
!= -1 &&
379 (name
->mask
& LB_LOCK_MASK
) != locktype
)
381 display(name
->mask
, name
->name
);
385 fprintf(outfp
, "None of the selected events were recorded.\n");
395 "%s [options] <command>\n\n"
396 "-b nbuf\t\tset number of event buffers to allocate\n"
397 "-c\t\treport percentage of total events by count, not time\n"
398 "-E event\t\tdisplay only one type of event\n"
399 "-e\t\tlist event types\n"
400 "-F func\t\tlimit trace to one function\n"
401 "-f\t\ttrace only by function\n"
402 "-L lock\t\tlimit trace to one lock (name, or address)\n"
403 "-l\t\ttrace only by lock\n"
404 "-M\t\tmerge lock addresses within unique objects\n"
405 "-m\t\tmerge call sites within unique functions\n"
406 "-N nlist\tspecify name list file\n"
407 "-o file\t\tsend output to named file, not stdout\n"
408 "-p\t\tshow average count/time per CPU, not total\n"
409 "-s\t\tshow average count/time per second, not total\n"
410 "-T type\t\tdisplay only one type of lock\n"
411 "-t\t\tlist lock types\n"
412 "-x\t\tdon't differentiate event types\n",
413 getprogname(), getprogname());
426 listnames(const name_t
*name
)
429 for (; name
->name
!= NULL
; name
++)
430 printf("%s\n", name
->name
);
436 matchname(const name_t
*name
, char *string
)
444 while ((sp
= strsep(&string
, ",")) != NULL
) {
448 for (; name
->name
!= NULL
; name
++) {
449 if (strcasecmp(name
->name
, sp
) == 0) {
454 if (name
->name
== NULL
)
455 errx(EXIT_FAILURE
, "unknown identifier `%s'", sp
);
466 * Return the number of CPUs in the running system.
477 if (sysctl(mib
, 2, &rv
, &varlen
, NULL
, (size_t)0) < 0)
484 * Call into the ELF parser and look up a symbol by name or by address.
487 findsym(findsym_t find
, char *name
, uintptr_t *start
, uintptr_t *end
, bool chg
)
489 uintptr_t tend
, sa
, ea
;
502 if (find
== LOCK_BYNAME
) {
503 if (isdigit((u_int
)name
[0])) {
504 *start
= (uintptr_t)strtoul(name
, &p
, 0);
511 rv
= findsym64(find
, name
, start
, end
);
513 rv
= findsym32(find
, name
, start
, end
);
515 if (find
== FUNC_BYNAME
|| find
== LOCK_BYNAME
) {
517 errx(EXIT_FAILURE
, "unable to find symbol `%s'", name
);
522 snprintf(name
, NAME_SIZE
, "%016lx", (long)*start
);
526 * Fork off the child process and wait for it to complete. We trap SIGINT
527 * so that the caller can use Ctrl-C to stop tracing early and still get
531 spawn(int argc
, char **argv
)
535 switch (pid
= fork()) {
538 if (execvp(argv
[0], argv
) == -1)
539 err(EXIT_FAILURE
, "cannot exec");
542 err(EXIT_FAILURE
, "cannot fork to exec");
545 signal(SIGINT
, nullsig
);
547 signal(SIGINT
, SIG_DFL
);
553 * Allocate a new block of lock_t structures.
558 const int batch
= 32;
559 lock_t
*l
, *lp
, *max
;
561 l
= (lock_t
*)malloc(sizeof(*l
) * batch
);
563 for (lp
= l
, max
= l
+ batch
; lp
< max
; lp
++)
564 TAILQ_INSERT_TAIL(&freelist
, lp
, chain
);
570 * Collapse addresses from unique objects.
573 collapse(bool func
, bool lock
)
577 for (lb
= bufs
, max
= bufs
+ nbufs
; lb
< max
; lb
++) {
578 if (func
&& lb
->lb_callsite
!= 0) {
579 findsym(FUNC_BYADDR
, NULL
, &lb
->lb_callsite
, NULL
,
582 if (lock
&& lb
->lb_lock
!= 0) {
583 findsym(LOCK_BYADDR
, NULL
, &lb
->lb_lock
, NULL
,
590 * From the kernel supplied data, construct two dimensional lists of locks
591 * and event buffers, indexed by lock type and sorted by event type.
594 makelists(int mask
, int event
)
596 lsbuf_t
*lb
, *lb2
, *max
;
601 * Recycle lock_t structures from the last run.
603 while ((l
= TAILQ_FIRST(&locklist
)) != NULL
) {
604 TAILQ_REMOVE(&locklist
, l
, chain
);
605 TAILQ_INSERT_HEAD(&freelist
, l
, chain
);
608 type
= mask
& LB_LOCK_MASK
;
610 for (lb
= bufs
, max
= bufs
+ nbufs
; lb
< max
; lb
++) {
611 if (!xflag
&& (lb
->lb_flags
& LB_LOCK_MASK
) != type
)
613 if (lb
->lb_counts
[event
] == 0)
617 * Look for a record descibing this lock, and allocate a
620 TAILQ_FOREACH(l
, &sortlist
, chain
) {
621 if (l
->lock
== lb
->lb_lock
)
625 if ((l
= TAILQ_FIRST(&freelist
)) == NULL
)
627 TAILQ_REMOVE(&freelist
, l
, chain
);
628 l
->flags
= lb
->lb_flags
;
629 l
->lock
= lb
->lb_lock
;
634 TAILQ_INIT(&l
->tosort
);
635 TAILQ_INIT(&l
->bufs
);
636 TAILQ_INSERT_TAIL(&sortlist
, l
, chain
);
640 * Scale the time values per buffer and summarise
641 * times+counts per lock.
643 lb
->lb_times
[event
] *= cpuscale
[lb
->lb_cpu
];
644 l
->count
+= lb
->lb_counts
[event
];
645 l
->time
+= lb
->lb_times
[event
];
648 * Merge same lock+callsite pairs from multiple CPUs
651 TAILQ_FOREACH(lb2
, &l
->tosort
, lb_chain
.tailq
) {
652 if (lb
->lb_callsite
== lb2
->lb_callsite
)
656 lb2
->lb_counts
[event
] += lb
->lb_counts
[event
];
657 lb2
->lb_times
[event
] += lb
->lb_times
[event
];
659 TAILQ_INSERT_HEAD(&l
->tosort
, lb
, lb_chain
.tailq
);
665 * Now sort the lists.
667 while ((l
= TAILQ_FIRST(&sortlist
)) != NULL
) {
668 TAILQ_REMOVE(&sortlist
, l
, chain
);
671 * Sort the buffers into the per-lock list.
673 while ((lb
= TAILQ_FIRST(&l
->tosort
)) != NULL
) {
674 TAILQ_REMOVE(&l
->tosort
, lb
, lb_chain
.tailq
);
676 lb2
= TAILQ_FIRST(&l
->bufs
);
677 while (lb2
!= NULL
) {
679 if (lb
->lb_counts
[event
] >
680 lb2
->lb_counts
[event
])
682 } else if (lb
->lb_times
[event
] >
683 lb2
->lb_times
[event
])
685 lb2
= TAILQ_NEXT(lb2
, lb_chain
.tailq
);
688 TAILQ_INSERT_TAIL(&l
->bufs
, lb
,
691 TAILQ_INSERT_BEFORE(lb2
, lb
, lb_chain
.tailq
);
695 * Sort this lock into the per-type list, based on the
698 l2
= TAILQ_FIRST(&locklist
);
701 if (l
->count
> l2
->count
)
703 } else if (l
->time
> l2
->time
)
705 l2
= TAILQ_NEXT(l2
, chain
);
708 TAILQ_INSERT_TAIL(&locklist
, l
, chain
);
710 TAILQ_INSERT_BEFORE(l2
, l
, chain
);
715 * Display a summary table for one lock type / event type pair.
718 display(int mask
, const char *name
)
722 double pcscale
, metric
;
723 char fname
[NAME_SIZE
];
726 event
= (mask
& LB_EVENT_MASK
) - 1;
727 makelists(mask
, event
);
729 if (TAILQ_EMPTY(&locklist
))
732 fprintf(outfp
, "\n-- %s\n\n"
733 "Total%% Count Time/ms Lock Caller\n"
734 "------ ------- --------- ---------------------- ------------------------------\n",
738 * Sum up all events for this type of lock + event.
741 TAILQ_FOREACH(l
, &locklist
, chain
) {
751 pcscale
= (100.0 / pcscale
);
754 * For each lock, print a summary total, followed by a breakdown by
757 TAILQ_FOREACH(l
, &locklist
, chain
) {
764 if (l
->name
[0] == '\0')
765 findsym(LOCK_BYADDR
, l
->name
, &l
->lock
, NULL
, false);
767 if (lflag
|| l
->nbufs
> 1)
768 fprintf(outfp
, "%6.2f %7d %9.2f %-22s <all>\n",
769 metric
, (int)(l
->count
* cscale
),
770 l
->time
* tscale
, l
->name
);
775 TAILQ_FOREACH(lb
, &l
->bufs
, lb_chain
.tailq
) {
777 metric
= lb
->lb_counts
[event
];
779 metric
= lb
->lb_times
[event
];
782 findsym(FUNC_BYADDR
, fname
, &lb
->lb_callsite
, NULL
,
784 fprintf(outfp
, "%6.2f %7d %9.2f %-22s %s\n",
785 metric
, (int)(lb
->lb_counts
[event
] * cscale
),
786 lb
->lb_times
[event
] * tscale
, l
->name
, fname
);