Patrick Welche <prlw1@cam.ac.uk>
[netbsd-mini2440.git] / usr.sbin / lockstat / main.c
blobc3d648ec01c0c4e8b32dc8ad9359463900c32961
1 /* $NetBSD: main.c,v 1.16 2009/03/21 13:02:19 ad Exp $ */
3 /*-
4 * Copyright (c) 2006, 2007, 2009 The NetBSD Foundation, Inc.
5 * All rights reserved.
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Andrew Doran.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
32 #include <sys/cdefs.h>
33 #ifndef lint
34 __RCSID("$NetBSD: main.c,v 1.16 2009/03/21 13:02:19 ad Exp $");
35 #endif /* not lint */
37 #include <sys/types.h>
38 #include <sys/param.h>
39 #include <sys/time.h>
40 #include <sys/fcntl.h>
41 #include <sys/ioctl.h>
42 #include <sys/wait.h>
43 #include <sys/signal.h>
44 #include <sys/sysctl.h>
46 #include <dev/lockstat.h>
48 #include <stdio.h>
49 #include <stdlib.h>
50 #include <string.h>
51 #include <limits.h>
52 #include <unistd.h>
53 #include <err.h>
54 #include <paths.h>
55 #include <util.h>
56 #include <ctype.h>
57 #include <errno.h>
58 #include <stdbool.h>
60 #include "extern.h"
62 #define _PATH_DEV_LOCKSTAT "/dev/lockstat"
64 #define MILLI 1000.0
65 #define MICRO 1000000.0
66 #define NANO 1000000000.0
67 #define PICO 1000000000000.0
69 TAILQ_HEAD(lock_head, lockstruct);
70 typedef struct lock_head locklist_t;
71 TAILQ_HEAD(buf_head, lsbuf);
72 typedef struct buf_head buflist_t;
74 typedef struct lockstruct {
75 TAILQ_ENTRY(lockstruct) chain;
76 buflist_t bufs;
77 buflist_t tosort;
78 uintptr_t lock;
79 double time;
80 uint32_t count;
81 u_int flags;
82 u_int nbufs;
83 char name[NAME_SIZE];
84 } lock_t;
86 typedef struct name {
87 const char *name;
88 int mask;
89 } name_t;
91 const name_t locknames[] = {
92 { "adaptive_mutex", LB_ADAPTIVE_MUTEX },
93 { "spin_mutex", LB_SPIN_MUTEX },
94 { "rwlock", LB_RWLOCK },
95 { "kernel_lock", LB_KERNEL_LOCK },
96 { "preemption", LB_NOPREEMPT },
97 { "misc", LB_MISC },
98 { NULL, 0 }
101 const name_t eventnames[] = {
102 { "spin", LB_SPIN },
103 { "sleep_exclusive", LB_SLEEP1 },
104 { "sleep_shared", LB_SLEEP2 },
105 { NULL, 0 },
108 const name_t alltypes[] = {
109 { "Adaptive mutex spin", LB_ADAPTIVE_MUTEX | LB_SPIN },
110 { "Adaptive mutex sleep", LB_ADAPTIVE_MUTEX | LB_SLEEP1 },
111 { "Spin mutex spin", LB_SPIN_MUTEX | LB_SPIN },
112 { "RW lock sleep (writer)", LB_RWLOCK | LB_SLEEP1 },
113 { "RW lock sleep (reader)", LB_RWLOCK | LB_SLEEP2 },
114 { "RW lock spin", LB_RWLOCK | LB_SPIN },
115 { "Kernel lock spin", LB_KERNEL_LOCK | LB_SPIN },
116 { "Kernel preemption defer", LB_NOPREEMPT | LB_SPIN },
117 { "Miscellaneous wait", LB_MISC | LB_SPIN },
118 { NULL, 0 }
121 const name_t xtypes[] = {
122 { "Spin", LB_SPIN },
123 { "Sleep (writer)", LB_SLEEP1 },
124 { "Sleep (reader)", LB_SLEEP2 },
125 { NULL, 0 }
128 locklist_t locklist;
129 locklist_t freelist;
130 locklist_t sortlist;
132 lsbuf_t *bufs;
133 lsdisable_t ld;
134 bool lflag;
135 bool fflag;
136 int nbufs;
137 bool cflag;
138 bool xflag;
139 int lsfd;
140 int displayed;
141 int bin64;
142 double tscale;
143 double cscale;
144 double cpuscale[sizeof(ld.ld_freq) / sizeof(ld.ld_freq[0])];
145 FILE *outfp;
147 void findsym(findsym_t, char *, uintptr_t *, uintptr_t *, bool);
148 void spawn(int, char **);
149 void display(int, const char *name);
150 void listnames(const name_t *);
151 void collapse(bool, bool);
152 int matchname(const name_t *, char *);
153 void makelists(int, int);
154 void nullsig(int);
155 void usage(void);
156 int ncpu(void);
157 lock_t *morelocks(void);
160 main(int argc, char **argv)
162 int eventtype, locktype, ch, nlfd, fd;
163 size_t i;
164 bool sflag, pflag, mflag, Mflag;
165 const char *nlistf, *outf;
166 char *lockname, *funcname;
167 const name_t *name;
168 lsenable_t le;
169 double ms;
170 char *p;
172 nlistf = NULL;
173 outf = NULL;
174 lockname = NULL;
175 funcname = NULL;
176 eventtype = -1;
177 locktype = -1;
178 nbufs = 0;
179 sflag = false;
180 pflag = false;
181 mflag = false;
182 Mflag = false;
184 while ((ch = getopt(argc, argv, "E:F:L:MN:T:b:ceflmo:pstx")) != -1)
185 switch (ch) {
186 case 'E':
187 eventtype = matchname(eventnames, optarg);
188 break;
189 case 'F':
190 funcname = optarg;
191 break;
192 case 'L':
193 lockname = optarg;
194 break;
195 case 'N':
196 nlistf = optarg;
197 break;
198 case 'T':
199 locktype = matchname(locknames, optarg);
200 break;
201 case 'b':
202 nbufs = (int)strtol(optarg, &p, 0);
203 if (!isdigit((u_int)*optarg) || *p != '\0')
204 usage();
205 break;
206 case 'c':
207 cflag = true;
208 break;
209 case 'e':
210 listnames(eventnames);
211 break;
212 case 'f':
213 fflag = true;
214 break;
215 case 'l':
216 lflag = true;
217 break;
218 case 'm':
219 mflag = true;
220 break;
221 case 'M':
222 Mflag = true;
223 break;
224 case 'o':
225 outf = optarg;
226 break;
227 case 'p':
228 pflag = true;
229 break;
230 case 's':
231 sflag = true;
232 break;
233 case 't':
234 listnames(locknames);
235 break;
236 case 'x':
237 xflag = true;
238 break;
239 default:
240 usage();
242 argc -= optind;
243 argv += optind;
245 if (*argv == NULL)
246 usage();
248 if (outf) {
249 fd = open(outf, O_WRONLY | O_CREAT | O_TRUNC, 0600);
250 if (fd == -1)
251 err(EXIT_FAILURE, "opening %s", outf);
252 outfp = fdopen(fd, "w");
253 } else
254 outfp = stdout;
257 * Find the name list for resolving symbol names, and load it into
258 * memory.
260 if (nlistf == NULL) {
261 nlfd = open(_PATH_KSYMS, O_RDONLY);
262 nlistf = getbootfile();
263 } else
264 nlfd = -1;
265 if (nlfd == -1) {
266 if ((nlfd = open(nlistf, O_RDONLY)) < 0)
267 err(EXIT_FAILURE, "cannot open " _PATH_KSYMS " or %s",
268 nlistf);
270 if (loadsym32(nlfd) != 0) {
271 if (loadsym64(nlfd) != 0)
272 errx(EXIT_FAILURE, "unable to load symbol table");
273 bin64 = 1;
275 close(nlfd);
277 memset(&le, 0, sizeof(le));
278 le.le_nbufs = nbufs;
281 * Set up initial filtering.
283 if (lockname != NULL) {
284 findsym(LOCK_BYNAME, lockname, &le.le_lockstart,
285 &le.le_lockend, true);
286 le.le_flags |= LE_ONE_LOCK;
288 if (!lflag)
289 le.le_flags |= LE_CALLSITE;
290 if (!fflag)
291 le.le_flags |= LE_LOCK;
292 if (funcname != NULL) {
293 if (lflag)
294 usage();
295 findsym(FUNC_BYNAME, funcname, &le.le_csstart, &le.le_csend, true);
296 le.le_flags |= LE_ONE_CALLSITE;
298 le.le_mask = (eventtype & LB_EVENT_MASK) | (locktype & LB_LOCK_MASK);
301 * Start tracing.
303 if ((lsfd = open(_PATH_DEV_LOCKSTAT, O_RDONLY)) < 0)
304 err(EXIT_FAILURE, "cannot open " _PATH_DEV_LOCKSTAT);
305 if (ioctl(lsfd, IOC_LOCKSTAT_GVERSION, &ch) < 0)
306 err(EXIT_FAILURE, "ioctl");
307 if (ch != LS_VERSION)
308 errx(EXIT_FAILURE,
309 "incompatible lockstat interface version (%d, kernel %d)",
310 LS_VERSION, ch);
311 if (ioctl(lsfd, IOC_LOCKSTAT_ENABLE, &le))
312 err(EXIT_FAILURE, "cannot enable tracing");
315 * Execute the traced program.
317 spawn(argc, argv);
320 * Stop tracing, and read the trace buffers from the kernel.
322 if (ioctl(lsfd, IOC_LOCKSTAT_DISABLE, &ld) == -1) {
323 if (errno == EOVERFLOW) {
324 warnx("overflowed available kernel trace buffers");
325 exit(EXIT_FAILURE);
327 err(EXIT_FAILURE, "cannot disable tracing");
329 if ((bufs = malloc(ld.ld_size)) == NULL)
330 err(EXIT_FAILURE, "cannot allocate memory for user buffers");
331 if ((size_t)read(lsfd, bufs, ld.ld_size) != ld.ld_size)
332 err(EXIT_FAILURE, "reading from " _PATH_DEV_LOCKSTAT);
333 if (close(lsfd))
334 err(EXIT_FAILURE, "close(" _PATH_DEV_LOCKSTAT ")");
337 * Figure out how to scale the results. For internal use we convert
338 * all times from CPU frequency based to picoseconds, and values are
339 * eventually displayed in ms.
341 for (i = 0; i < sizeof(ld.ld_freq) / sizeof(ld.ld_freq[0]); i++)
342 if (ld.ld_freq[i] != 0)
343 cpuscale[i] = PICO / ld.ld_freq[i];
344 ms = ld.ld_time.tv_sec * MILLI + ld.ld_time.tv_nsec / MICRO;
345 if (pflag)
346 cscale = 1.0 / ncpu();
347 else
348 cscale = 1.0;
349 cscale *= (sflag ? MILLI / ms : 1.0);
350 tscale = cscale / NANO;
351 nbufs = (int)(ld.ld_size / sizeof(lsbuf_t));
353 TAILQ_INIT(&locklist);
354 TAILQ_INIT(&sortlist);
355 TAILQ_INIT(&freelist);
357 if ((mflag | Mflag) != 0)
358 collapse(mflag, Mflag);
361 * Display the results.
363 fprintf(outfp, "Elapsed time: %.2f seconds.", ms / MILLI);
364 if (sflag || pflag) {
365 fprintf(outfp, " Displaying ");
366 if (pflag)
367 fprintf(outfp, "per-CPU ");
368 if (sflag)
369 fprintf(outfp, "per-second ");
370 fprintf(outfp, "averages.");
372 putc('\n', outfp);
374 for (name = xflag ? xtypes : alltypes; name->name != NULL; name++) {
375 if (eventtype != -1 &&
376 (name->mask & LB_EVENT_MASK) != eventtype)
377 continue;
378 if (locktype != -1 &&
379 (name->mask & LB_LOCK_MASK) != locktype)
380 continue;
381 display(name->mask, name->name);
384 if (displayed == 0)
385 fprintf(outfp, "None of the selected events were recorded.\n");
386 exit(EXIT_SUCCESS);
389 void
390 usage(void)
393 fprintf(stderr,
394 "%s: usage:\n"
395 "%s [options] <command>\n\n"
396 "-b nbuf\t\tset number of event buffers to allocate\n"
397 "-c\t\treport percentage of total events by count, not time\n"
398 "-E event\t\tdisplay only one type of event\n"
399 "-e\t\tlist event types\n"
400 "-F func\t\tlimit trace to one function\n"
401 "-f\t\ttrace only by function\n"
402 "-L lock\t\tlimit trace to one lock (name, or address)\n"
403 "-l\t\ttrace only by lock\n"
404 "-M\t\tmerge lock addresses within unique objects\n"
405 "-m\t\tmerge call sites within unique functions\n"
406 "-N nlist\tspecify name list file\n"
407 "-o file\t\tsend output to named file, not stdout\n"
408 "-p\t\tshow average count/time per CPU, not total\n"
409 "-s\t\tshow average count/time per second, not total\n"
410 "-T type\t\tdisplay only one type of lock\n"
411 "-t\t\tlist lock types\n"
412 "-x\t\tdon't differentiate event types\n",
413 getprogname(), getprogname());
415 exit(EXIT_FAILURE);
418 void
419 nullsig(int junk)
422 (void)junk;
425 void
426 listnames(const name_t *name)
429 for (; name->name != NULL; name++)
430 printf("%s\n", name->name);
432 exit(EXIT_SUCCESS);
436 matchname(const name_t *name, char *string)
438 int empty, mask;
439 char *sp;
441 empty = 1;
442 mask = 0;
444 while ((sp = strsep(&string, ",")) != NULL) {
445 if (*sp == '\0')
446 usage();
448 for (; name->name != NULL; name++) {
449 if (strcasecmp(name->name, sp) == 0) {
450 mask |= name->mask;
451 break;
454 if (name->name == NULL)
455 errx(EXIT_FAILURE, "unknown identifier `%s'", sp);
456 empty = 0;
459 if (empty)
460 usage();
462 return mask;
466 * Return the number of CPUs in the running system.
469 ncpu(void)
471 int rv, mib[2];
472 size_t varlen;
474 mib[0] = CTL_HW;
475 mib[1] = HW_NCPU;
476 varlen = sizeof(rv);
477 if (sysctl(mib, 2, &rv, &varlen, NULL, (size_t)0) < 0)
478 rv = 1;
480 return (rv);
484 * Call into the ELF parser and look up a symbol by name or by address.
486 void
487 findsym(findsym_t find, char *name, uintptr_t *start, uintptr_t *end, bool chg)
489 uintptr_t tend, sa, ea;
490 char *p;
491 int rv;
493 if (!chg) {
494 sa = *start;
495 start = &sa;
496 end = &ea;
499 if (end == NULL)
500 end = &tend;
502 if (find == LOCK_BYNAME) {
503 if (isdigit((u_int)name[0])) {
504 *start = (uintptr_t)strtoul(name, &p, 0);
505 if (*p == '\0')
506 return;
510 if (bin64)
511 rv = findsym64(find, name, start, end);
512 else
513 rv = findsym32(find, name, start, end);
515 if (find == FUNC_BYNAME || find == LOCK_BYNAME) {
516 if (rv == -1)
517 errx(EXIT_FAILURE, "unable to find symbol `%s'", name);
518 return;
521 if (rv == -1)
522 snprintf(name, NAME_SIZE, "%016lx", (long)*start);
526 * Fork off the child process and wait for it to complete. We trap SIGINT
527 * so that the caller can use Ctrl-C to stop tracing early and still get
528 * useful results.
530 void
531 spawn(int argc, char **argv)
533 pid_t pid;
535 switch (pid = fork()) {
536 case 0:
537 close(lsfd);
538 if (execvp(argv[0], argv) == -1)
539 err(EXIT_FAILURE, "cannot exec");
540 break;
541 case -1:
542 err(EXIT_FAILURE, "cannot fork to exec");
543 break;
544 default:
545 signal(SIGINT, nullsig);
546 wait(NULL);
547 signal(SIGINT, SIG_DFL);
548 break;
553 * Allocate a new block of lock_t structures.
555 lock_t *
556 morelocks(void)
558 const int batch = 32;
559 lock_t *l, *lp, *max;
561 l = (lock_t *)malloc(sizeof(*l) * batch);
563 for (lp = l, max = l + batch; lp < max; lp++)
564 TAILQ_INSERT_TAIL(&freelist, lp, chain);
566 return l;
570 * Collapse addresses from unique objects.
572 void
573 collapse(bool func, bool lock)
575 lsbuf_t *lb, *max;
577 for (lb = bufs, max = bufs + nbufs; lb < max; lb++) {
578 if (func && lb->lb_callsite != 0) {
579 findsym(FUNC_BYADDR, NULL, &lb->lb_callsite, NULL,
580 true);
582 if (lock && lb->lb_lock != 0) {
583 findsym(LOCK_BYADDR, NULL, &lb->lb_lock, NULL,
584 true);
590 * From the kernel supplied data, construct two dimensional lists of locks
591 * and event buffers, indexed by lock type and sorted by event type.
593 void
594 makelists(int mask, int event)
596 lsbuf_t *lb, *lb2, *max;
597 lock_t *l, *l2;
598 int type;
601 * Recycle lock_t structures from the last run.
603 while ((l = TAILQ_FIRST(&locklist)) != NULL) {
604 TAILQ_REMOVE(&locklist, l, chain);
605 TAILQ_INSERT_HEAD(&freelist, l, chain);
608 type = mask & LB_LOCK_MASK;
610 for (lb = bufs, max = bufs + nbufs; lb < max; lb++) {
611 if (!xflag && (lb->lb_flags & LB_LOCK_MASK) != type)
612 continue;
613 if (lb->lb_counts[event] == 0)
614 continue;
617 * Look for a record descibing this lock, and allocate a
618 * new one if needed.
620 TAILQ_FOREACH(l, &sortlist, chain) {
621 if (l->lock == lb->lb_lock)
622 break;
624 if (l == NULL) {
625 if ((l = TAILQ_FIRST(&freelist)) == NULL)
626 l = morelocks();
627 TAILQ_REMOVE(&freelist, l, chain);
628 l->flags = lb->lb_flags;
629 l->lock = lb->lb_lock;
630 l->nbufs = 0;
631 l->name[0] = '\0';
632 l->count = 0;
633 l->time = 0;
634 TAILQ_INIT(&l->tosort);
635 TAILQ_INIT(&l->bufs);
636 TAILQ_INSERT_TAIL(&sortlist, l, chain);
640 * Scale the time values per buffer and summarise
641 * times+counts per lock.
643 lb->lb_times[event] *= cpuscale[lb->lb_cpu];
644 l->count += lb->lb_counts[event];
645 l->time += lb->lb_times[event];
648 * Merge same lock+callsite pairs from multiple CPUs
649 * together.
651 TAILQ_FOREACH(lb2, &l->tosort, lb_chain.tailq) {
652 if (lb->lb_callsite == lb2->lb_callsite)
653 break;
655 if (lb2 != NULL) {
656 lb2->lb_counts[event] += lb->lb_counts[event];
657 lb2->lb_times[event] += lb->lb_times[event];
658 } else {
659 TAILQ_INSERT_HEAD(&l->tosort, lb, lb_chain.tailq);
660 l->nbufs++;
665 * Now sort the lists.
667 while ((l = TAILQ_FIRST(&sortlist)) != NULL) {
668 TAILQ_REMOVE(&sortlist, l, chain);
671 * Sort the buffers into the per-lock list.
673 while ((lb = TAILQ_FIRST(&l->tosort)) != NULL) {
674 TAILQ_REMOVE(&l->tosort, lb, lb_chain.tailq);
676 lb2 = TAILQ_FIRST(&l->bufs);
677 while (lb2 != NULL) {
678 if (cflag) {
679 if (lb->lb_counts[event] >
680 lb2->lb_counts[event])
681 break;
682 } else if (lb->lb_times[event] >
683 lb2->lb_times[event])
684 break;
685 lb2 = TAILQ_NEXT(lb2, lb_chain.tailq);
687 if (lb2 == NULL)
688 TAILQ_INSERT_TAIL(&l->bufs, lb,
689 lb_chain.tailq);
690 else
691 TAILQ_INSERT_BEFORE(lb2, lb, lb_chain.tailq);
695 * Sort this lock into the per-type list, based on the
696 * totals per lock.
698 l2 = TAILQ_FIRST(&locklist);
699 while (l2 != NULL) {
700 if (cflag) {
701 if (l->count > l2->count)
702 break;
703 } else if (l->time > l2->time)
704 break;
705 l2 = TAILQ_NEXT(l2, chain);
707 if (l2 == NULL)
708 TAILQ_INSERT_TAIL(&locklist, l, chain);
709 else
710 TAILQ_INSERT_BEFORE(l2, l, chain);
715 * Display a summary table for one lock type / event type pair.
717 void
718 display(int mask, const char *name)
720 lock_t *l;
721 lsbuf_t *lb;
722 double pcscale, metric;
723 char fname[NAME_SIZE];
724 int event;
726 event = (mask & LB_EVENT_MASK) - 1;
727 makelists(mask, event);
729 if (TAILQ_EMPTY(&locklist))
730 return;
732 fprintf(outfp, "\n-- %s\n\n"
733 "Total%% Count Time/ms Lock Caller\n"
734 "------ ------- --------- ---------------------- ------------------------------\n",
735 name);
738 * Sum up all events for this type of lock + event.
740 pcscale = 0;
741 TAILQ_FOREACH(l, &locklist, chain) {
742 if (cflag)
743 pcscale += l->count;
744 else
745 pcscale += l->time;
746 displayed++;
748 if (pcscale == 0)
749 pcscale = 100;
750 else
751 pcscale = (100.0 / pcscale);
754 * For each lock, print a summary total, followed by a breakdown by
755 * caller.
757 TAILQ_FOREACH(l, &locklist, chain) {
758 if (cflag)
759 metric = l->count;
760 else
761 metric = l->time;
762 metric *= pcscale;
764 if (l->name[0] == '\0')
765 findsym(LOCK_BYADDR, l->name, &l->lock, NULL, false);
767 if (lflag || l->nbufs > 1)
768 fprintf(outfp, "%6.2f %7d %9.2f %-22s <all>\n",
769 metric, (int)(l->count * cscale),
770 l->time * tscale, l->name);
772 if (lflag)
773 continue;
775 TAILQ_FOREACH(lb, &l->bufs, lb_chain.tailq) {
776 if (cflag)
777 metric = lb->lb_counts[event];
778 else
779 metric = lb->lb_times[event];
780 metric *= pcscale;
782 findsym(FUNC_BYADDR, fname, &lb->lb_callsite, NULL,
783 false);
784 fprintf(outfp, "%6.2f %7d %9.2f %-22s %s\n",
785 metric, (int)(lb->lb_counts[event] * cscale),
786 lb->lb_times[event] * tscale, l->name, fname);