remove xen support
[unleashed/tickless.git] / usr / src / cmd / mdb / common / modules / genunix / kmem.c
blobe8a23221fe7dbd043067cae28fbb8f7f5406e89c
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
27 * Copyright 2011 Joyent, Inc. All rights reserved.
28 * Copyright (c) 2012 by Delphix. All rights reserved.
31 #include <mdb/mdb_param.h>
32 #include <mdb/mdb_modapi.h>
33 #include <mdb/mdb_ctf.h>
34 #include <mdb/mdb_whatis.h>
35 #include <sys/cpuvar.h>
36 #include <sys/kmem_impl.h>
37 #include <sys/vmem_impl.h>
38 #include <sys/machelf.h>
39 #include <sys/modctl.h>
40 #include <sys/kobj.h>
41 #include <sys/panic.h>
42 #include <sys/stack.h>
43 #include <sys/sysmacros.h>
44 #include <vm/page.h>
46 #include "avl.h"
47 #include "combined.h"
48 #include "dist.h"
49 #include "kmem.h"
50 #include "list.h"
52 #define dprintf(x) if (mdb_debug_level) { \
53 mdb_printf("kmem debug: "); \
54 /*CSTYLED*/\
55 mdb_printf x ;\
58 #define KM_ALLOCATED 0x01
59 #define KM_FREE 0x02
60 #define KM_BUFCTL 0x04
61 #define KM_CONSTRUCTED 0x08 /* only constructed free buffers */
62 #define KM_HASH 0x10
64 static int mdb_debug_level = 0;
66 /*ARGSUSED*/
67 static int
68 kmem_init_walkers(uintptr_t addr, const kmem_cache_t *c, void *ignored)
70 mdb_walker_t w;
71 char descr[64];
73 (void) mdb_snprintf(descr, sizeof (descr),
74 "walk the %s cache", c->cache_name);
76 w.walk_name = c->cache_name;
77 w.walk_descr = descr;
78 w.walk_init = kmem_walk_init;
79 w.walk_step = kmem_walk_step;
80 w.walk_fini = kmem_walk_fini;
81 w.walk_init_arg = (void *)addr;
83 if (mdb_add_walker(&w) == -1)
84 mdb_warn("failed to add %s walker", c->cache_name);
86 return (WALK_NEXT);
89 /*ARGSUSED*/
90 int
91 kmem_debug(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
93 mdb_debug_level ^= 1;
95 mdb_printf("kmem: debugging is now %s\n",
96 mdb_debug_level ? "on" : "off");
98 return (DCMD_OK);
102 kmem_cache_walk_init(mdb_walk_state_t *wsp)
104 GElf_Sym sym;
106 if (mdb_lookup_by_name("kmem_caches", &sym) == -1) {
107 mdb_warn("couldn't find kmem_caches");
108 return (WALK_ERR);
111 wsp->walk_addr = (uintptr_t)sym.st_value;
113 return (list_walk_init_named(wsp, "cache list", "cache"));
117 kmem_cpu_cache_walk_init(mdb_walk_state_t *wsp)
119 if (wsp->walk_addr == (uintptr_t)NULL) {
120 mdb_warn("kmem_cpu_cache doesn't support global walks");
121 return (WALK_ERR);
124 if (mdb_layered_walk("cpu", wsp) == -1) {
125 mdb_warn("couldn't walk 'cpu'");
126 return (WALK_ERR);
129 wsp->walk_data = (void *)wsp->walk_addr;
131 return (WALK_NEXT);
135 kmem_cpu_cache_walk_step(mdb_walk_state_t *wsp)
137 uintptr_t caddr = (uintptr_t)wsp->walk_data;
138 const cpu_t *cpu = wsp->walk_layer;
139 kmem_cpu_cache_t cc;
141 caddr += OFFSETOF(kmem_cache_t, cache_cpu[cpu->cpu_seqid]);
143 if (mdb_vread(&cc, sizeof (kmem_cpu_cache_t), caddr) == -1) {
144 mdb_warn("couldn't read kmem_cpu_cache at %p", caddr);
145 return (WALK_ERR);
148 return (wsp->walk_callback(caddr, &cc, wsp->walk_cbdata));
151 static int
152 kmem_slab_check(void *p, uintptr_t saddr, void *arg)
154 kmem_slab_t *sp = p;
155 uintptr_t caddr = (uintptr_t)arg;
156 if ((uintptr_t)sp->slab_cache != caddr) {
157 mdb_warn("slab %p isn't in cache %p (in cache %p)\n",
158 saddr, caddr, sp->slab_cache);
159 return (-1);
162 return (0);
165 static int
166 kmem_partial_slab_check(void *p, uintptr_t saddr, void *arg)
168 kmem_slab_t *sp = p;
170 int rc = kmem_slab_check(p, saddr, arg);
171 if (rc != 0) {
172 return (rc);
175 if (!KMEM_SLAB_IS_PARTIAL(sp)) {
176 mdb_warn("slab %p is not a partial slab\n", saddr);
177 return (-1);
180 return (0);
183 static int
184 kmem_complete_slab_check(void *p, uintptr_t saddr, void *arg)
186 kmem_slab_t *sp = p;
188 int rc = kmem_slab_check(p, saddr, arg);
189 if (rc != 0) {
190 return (rc);
193 if (!KMEM_SLAB_IS_ALL_USED(sp)) {
194 mdb_warn("slab %p is not completely allocated\n", saddr);
195 return (-1);
198 return (0);
201 typedef struct {
202 uintptr_t kns_cache_addr;
203 int kns_nslabs;
204 } kmem_nth_slab_t;
206 static int
207 kmem_nth_slab_check(void *p, uintptr_t saddr, void *arg)
209 kmem_nth_slab_t *chkp = arg;
211 int rc = kmem_slab_check(p, saddr, (void *)chkp->kns_cache_addr);
212 if (rc != 0) {
213 return (rc);
216 return (chkp->kns_nslabs-- == 0 ? 1 : 0);
219 static int
220 kmem_complete_slab_walk_init(mdb_walk_state_t *wsp)
222 uintptr_t caddr = wsp->walk_addr;
224 wsp->walk_addr = (uintptr_t)(caddr +
225 offsetof(kmem_cache_t, cache_complete_slabs));
227 return (list_walk_init_checked(wsp, "slab list", "slab",
228 kmem_complete_slab_check, (void *)caddr));
231 static int
232 kmem_partial_slab_walk_init(mdb_walk_state_t *wsp)
234 uintptr_t caddr = wsp->walk_addr;
236 wsp->walk_addr = (uintptr_t)(caddr +
237 offsetof(kmem_cache_t, cache_partial_slabs));
239 return (avl_walk_init_checked(wsp, "slab list", "slab",
240 kmem_partial_slab_check, (void *)caddr));
244 kmem_slab_walk_init(mdb_walk_state_t *wsp)
246 uintptr_t caddr = wsp->walk_addr;
248 if (caddr == (uintptr_t)NULL) {
249 mdb_warn("kmem_slab doesn't support global walks\n");
250 return (WALK_ERR);
253 combined_walk_init(wsp);
254 combined_walk_add(wsp,
255 kmem_complete_slab_walk_init, list_walk_step, list_walk_fini);
256 combined_walk_add(wsp,
257 kmem_partial_slab_walk_init, avl_walk_step, avl_walk_fini);
259 return (WALK_NEXT);
262 static int
263 kmem_first_complete_slab_walk_init(mdb_walk_state_t *wsp)
265 uintptr_t caddr = wsp->walk_addr;
266 kmem_nth_slab_t *chk;
268 chk = mdb_alloc(sizeof (kmem_nth_slab_t),
269 UM_SLEEP | UM_GC);
270 chk->kns_cache_addr = caddr;
271 chk->kns_nslabs = 1;
272 wsp->walk_addr = (uintptr_t)(caddr +
273 offsetof(kmem_cache_t, cache_complete_slabs));
275 return (list_walk_init_checked(wsp, "slab list", "slab",
276 kmem_nth_slab_check, chk));
280 kmem_slab_walk_partial_init(mdb_walk_state_t *wsp)
282 uintptr_t caddr = wsp->walk_addr;
283 kmem_cache_t c;
285 if (caddr == (uintptr_t)NULL) {
286 mdb_warn("kmem_slab_partial doesn't support global walks\n");
287 return (WALK_ERR);
290 if (mdb_vread(&c, sizeof (c), caddr) == -1) {
291 mdb_warn("couldn't read kmem_cache at %p", caddr);
292 return (WALK_ERR);
295 combined_walk_init(wsp);
298 * Some consumers (umem_walk_step(), in particular) require at
299 * least one callback if there are any buffers in the cache. So
300 * if there are *no* partial slabs, report the first full slab, if
301 * any.
303 * Yes, this is ugly, but it's cleaner than the other possibilities.
305 if (c.cache_partial_slabs.avl_numnodes == 0) {
306 combined_walk_add(wsp, kmem_first_complete_slab_walk_init,
307 list_walk_step, list_walk_fini);
308 } else {
309 combined_walk_add(wsp, kmem_partial_slab_walk_init,
310 avl_walk_step, avl_walk_fini);
313 return (WALK_NEXT);
317 kmem_cache(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv)
319 kmem_cache_t c;
320 const char *filter = NULL;
322 if (mdb_getopts(ac, argv,
323 'n', MDB_OPT_STR, &filter,
324 NULL) != ac) {
325 return (DCMD_USAGE);
328 if (!(flags & DCMD_ADDRSPEC)) {
329 if (mdb_walk_dcmd("kmem_cache", "kmem_cache", ac, argv) == -1) {
330 mdb_warn("can't walk kmem_cache");
331 return (DCMD_ERR);
333 return (DCMD_OK);
336 if (DCMD_HDRSPEC(flags))
337 mdb_printf("%-?s %-25s %4s %6s %8s %8s\n", "ADDR", "NAME",
338 "FLAG", "CFLAG", "BUFSIZE", "BUFTOTL");
340 if (mdb_vread(&c, sizeof (c), addr) == -1) {
341 mdb_warn("couldn't read kmem_cache at %p", addr);
342 return (DCMD_ERR);
345 if ((filter != NULL) && (strstr(c.cache_name, filter) == NULL))
346 return (DCMD_OK);
348 mdb_printf("%0?p %-25s %04x %06x %8ld %8lld\n", addr, c.cache_name,
349 c.cache_flags, c.cache_cflags, c.cache_bufsize, c.cache_buftotal);
351 return (DCMD_OK);
354 void
355 kmem_cache_help(void)
357 mdb_printf("%s", "Print kernel memory caches.\n\n");
358 mdb_dec_indent(2);
359 mdb_printf("%<b>OPTIONS%</b>\n");
360 mdb_inc_indent(2);
361 mdb_printf("%s",
362 " -n name\n"
363 " name of kmem cache (or matching partial name)\n"
364 "\n"
365 "Column\tDescription\n"
366 "\n"
367 "ADDR\t\taddress of kmem cache\n"
368 "NAME\t\tname of kmem cache\n"
369 "FLAG\t\tvarious cache state flags\n"
370 "CFLAG\t\tcache creation flags\n"
371 "BUFSIZE\tobject size in bytes\n"
372 "BUFTOTL\tcurrent total buffers in cache (allocated and free)\n");
375 #define LABEL_WIDTH 11
376 static void
377 kmem_slabs_print_dist(uint_t *ks_bucket, size_t buffers_per_slab,
378 size_t maxbuckets, size_t minbucketsize)
380 uint64_t total;
381 int buckets;
382 int i;
383 const int *distarray;
384 int complete[2];
386 buckets = buffers_per_slab;
388 total = 0;
389 for (i = 0; i <= buffers_per_slab; i++)
390 total += ks_bucket[i];
392 if (maxbuckets > 1)
393 buckets = MIN(buckets, maxbuckets);
395 if (minbucketsize > 1) {
397 * minbucketsize does not apply to the first bucket reserved
398 * for completely allocated slabs
400 buckets = MIN(buckets, 1 + ((buffers_per_slab - 1) /
401 minbucketsize));
402 if ((buckets < 2) && (buffers_per_slab > 1)) {
403 buckets = 2;
404 minbucketsize = (buffers_per_slab - 1);
409 * The first printed bucket is reserved for completely allocated slabs.
410 * Passing (buckets - 1) excludes that bucket from the generated
411 * distribution, since we're handling it as a special case.
413 complete[0] = buffers_per_slab;
414 complete[1] = buffers_per_slab + 1;
415 distarray = dist_linear(buckets - 1, 1, buffers_per_slab - 1);
417 mdb_printf("%*s\n", LABEL_WIDTH, "Allocated");
418 dist_print_header("Buffers", LABEL_WIDTH, "Slabs");
420 dist_print_bucket(complete, 0, ks_bucket, total, LABEL_WIDTH);
422 * Print bucket ranges in descending order after the first bucket for
423 * completely allocated slabs, so a person can see immediately whether
424 * or not there is fragmentation without having to scan possibly
425 * multiple screens of output. Starting at (buckets - 2) excludes the
426 * extra terminating bucket.
428 for (i = buckets - 2; i >= 0; i--) {
429 dist_print_bucket(distarray, i, ks_bucket, total, LABEL_WIDTH);
431 mdb_printf("\n");
433 #undef LABEL_WIDTH
435 /*ARGSUSED*/
436 static int
437 kmem_first_slab(uintptr_t addr, const kmem_slab_t *sp, boolean_t *is_slab)
439 *is_slab = B_TRUE;
440 return (WALK_DONE);
443 /*ARGSUSED*/
444 static int
445 kmem_first_partial_slab(uintptr_t addr, const kmem_slab_t *sp,
446 boolean_t *is_slab)
449 * The "kmem_partial_slab" walker reports the first full slab if there
450 * are no partial slabs (for the sake of consumers that require at least
451 * one callback if there are any buffers in the cache).
453 *is_slab = KMEM_SLAB_IS_PARTIAL(sp);
454 return (WALK_DONE);
457 typedef struct kmem_slab_usage {
458 int ksu_refcnt; /* count of allocated buffers on slab */
459 boolean_t ksu_nomove; /* slab marked non-reclaimable */
460 } kmem_slab_usage_t;
462 typedef struct kmem_slab_stats {
463 const kmem_cache_t *ks_cp;
464 int ks_slabs; /* slabs in cache */
465 int ks_partial_slabs; /* partially allocated slabs in cache */
466 uint64_t ks_unused_buffers; /* total unused buffers in cache */
467 int ks_max_buffers_per_slab; /* max buffers per slab */
468 int ks_usage_len; /* ks_usage array length */
469 kmem_slab_usage_t *ks_usage; /* partial slab usage */
470 uint_t *ks_bucket; /* slab usage distribution */
471 } kmem_slab_stats_t;
473 /*ARGSUSED*/
474 static int
475 kmem_slablist_stat(uintptr_t addr, const kmem_slab_t *sp,
476 kmem_slab_stats_t *ks)
478 kmem_slab_usage_t *ksu;
479 long unused;
481 ks->ks_slabs++;
482 ks->ks_bucket[sp->slab_refcnt]++;
484 unused = (sp->slab_chunks - sp->slab_refcnt);
485 if (unused == 0) {
486 return (WALK_NEXT);
489 ks->ks_partial_slabs++;
490 ks->ks_unused_buffers += unused;
492 if (ks->ks_partial_slabs > ks->ks_usage_len) {
493 kmem_slab_usage_t *usage;
494 int len = ks->ks_usage_len;
496 len = (len == 0 ? 16 : len * 2);
497 usage = mdb_zalloc(len * sizeof (kmem_slab_usage_t), UM_SLEEP);
498 if (ks->ks_usage != NULL) {
499 bcopy(ks->ks_usage, usage,
500 ks->ks_usage_len * sizeof (kmem_slab_usage_t));
501 mdb_free(ks->ks_usage,
502 ks->ks_usage_len * sizeof (kmem_slab_usage_t));
504 ks->ks_usage = usage;
505 ks->ks_usage_len = len;
508 ksu = &ks->ks_usage[ks->ks_partial_slabs - 1];
509 ksu->ksu_refcnt = sp->slab_refcnt;
510 ksu->ksu_nomove = (sp->slab_flags & KMEM_SLAB_NOMOVE);
511 return (WALK_NEXT);
514 static void
515 kmem_slabs_header()
517 mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
518 "", "", "Partial", "", "Unused", "");
519 mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
520 "Cache Name", "Slabs", "Slabs", "Buffers", "Buffers", "Waste");
521 mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
522 "-------------------------", "--------", "--------", "---------",
523 "---------", "------");
527 kmem_slabs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
529 kmem_cache_t c;
530 kmem_slab_stats_t stats;
531 mdb_walk_cb_t cb;
532 int pct;
533 int tenths_pct;
534 size_t maxbuckets = 1;
535 size_t minbucketsize = 0;
536 const char *filter = NULL;
537 const char *name = NULL;
538 uint_t opt_v = FALSE;
539 boolean_t buckets = B_FALSE;
540 boolean_t skip = B_FALSE;
542 if (mdb_getopts(argc, argv,
543 'B', MDB_OPT_UINTPTR, &minbucketsize,
544 'b', MDB_OPT_UINTPTR, &maxbuckets,
545 'n', MDB_OPT_STR, &filter,
546 'N', MDB_OPT_STR, &name,
547 'v', MDB_OPT_SETBITS, TRUE, &opt_v,
548 NULL) != argc) {
549 return (DCMD_USAGE);
552 if ((maxbuckets != 1) || (minbucketsize != 0)) {
553 buckets = B_TRUE;
556 if (!(flags & DCMD_ADDRSPEC)) {
557 if (mdb_walk_dcmd("kmem_cache", "kmem_slabs", argc,
558 argv) == -1) {
559 mdb_warn("can't walk kmem_cache");
560 return (DCMD_ERR);
562 return (DCMD_OK);
565 if (mdb_vread(&c, sizeof (c), addr) == -1) {
566 mdb_warn("couldn't read kmem_cache at %p", addr);
567 return (DCMD_ERR);
570 if (name == NULL) {
571 skip = ((filter != NULL) &&
572 (strstr(c.cache_name, filter) == NULL));
573 } else if (filter == NULL) {
574 skip = (strcmp(c.cache_name, name) != 0);
575 } else {
576 /* match either -n or -N */
577 skip = ((strcmp(c.cache_name, name) != 0) &&
578 (strstr(c.cache_name, filter) == NULL));
581 if (!(opt_v || buckets) && DCMD_HDRSPEC(flags)) {
582 kmem_slabs_header();
583 } else if ((opt_v || buckets) && !skip) {
584 if (DCMD_HDRSPEC(flags)) {
585 kmem_slabs_header();
586 } else {
587 boolean_t is_slab = B_FALSE;
588 const char *walker_name;
589 if (opt_v) {
590 cb = (mdb_walk_cb_t)kmem_first_partial_slab;
591 walker_name = "kmem_slab_partial";
592 } else {
593 cb = (mdb_walk_cb_t)kmem_first_slab;
594 walker_name = "kmem_slab";
596 (void) mdb_pwalk(walker_name, cb, &is_slab, addr);
597 if (is_slab) {
598 kmem_slabs_header();
603 if (skip) {
604 return (DCMD_OK);
607 bzero(&stats, sizeof (kmem_slab_stats_t));
608 stats.ks_cp = &c;
609 stats.ks_max_buffers_per_slab = c.cache_maxchunks;
610 /* +1 to include a zero bucket */
611 stats.ks_bucket = mdb_zalloc((stats.ks_max_buffers_per_slab + 1) *
612 sizeof (*stats.ks_bucket), UM_SLEEP);
613 cb = (mdb_walk_cb_t)kmem_slablist_stat;
614 (void) mdb_pwalk("kmem_slab", cb, &stats, addr);
616 if (c.cache_buftotal == 0) {
617 pct = 0;
618 tenths_pct = 0;
619 } else {
620 uint64_t n = stats.ks_unused_buffers * 10000;
621 pct = (int)(n / c.cache_buftotal);
622 tenths_pct = pct - ((pct / 100) * 100);
623 tenths_pct = (tenths_pct + 5) / 10; /* round nearest tenth */
624 if (tenths_pct == 10) {
625 pct += 100;
626 tenths_pct = 0;
630 pct /= 100;
631 mdb_printf("%-25s %8d %8d %9lld %9lld %3d.%1d%%\n", c.cache_name,
632 stats.ks_slabs, stats.ks_partial_slabs, c.cache_buftotal,
633 stats.ks_unused_buffers, pct, tenths_pct);
635 if (maxbuckets == 0) {
636 maxbuckets = stats.ks_max_buffers_per_slab;
639 if (((maxbuckets > 1) || (minbucketsize > 0)) &&
640 (stats.ks_slabs > 0)) {
641 mdb_printf("\n");
642 kmem_slabs_print_dist(stats.ks_bucket,
643 stats.ks_max_buffers_per_slab, maxbuckets, minbucketsize);
646 mdb_free(stats.ks_bucket, (stats.ks_max_buffers_per_slab + 1) *
647 sizeof (*stats.ks_bucket));
649 if (!opt_v) {
650 return (DCMD_OK);
653 if (opt_v && (stats.ks_partial_slabs > 0)) {
654 int i;
655 kmem_slab_usage_t *ksu;
657 mdb_printf(" %d complete (%d), %d partial:",
658 (stats.ks_slabs - stats.ks_partial_slabs),
659 stats.ks_max_buffers_per_slab,
660 stats.ks_partial_slabs);
662 for (i = 0; i < stats.ks_partial_slabs; i++) {
663 ksu = &stats.ks_usage[i];
664 mdb_printf(" %d%s", ksu->ksu_refcnt,
665 (ksu->ksu_nomove ? "*" : ""));
667 mdb_printf("\n\n");
670 if (stats.ks_usage_len > 0) {
671 mdb_free(stats.ks_usage,
672 stats.ks_usage_len * sizeof (kmem_slab_usage_t));
675 return (DCMD_OK);
678 void
679 kmem_slabs_help(void)
681 mdb_printf("%s",
682 "Display slab usage per kmem cache.\n\n");
683 mdb_dec_indent(2);
684 mdb_printf("%<b>OPTIONS%</b>\n");
685 mdb_inc_indent(2);
686 mdb_printf("%s",
687 " -n name\n"
688 " name of kmem cache (or matching partial name)\n"
689 " -N name\n"
690 " exact name of kmem cache\n"
691 " -b maxbins\n"
692 " Print a distribution of allocated buffers per slab using at\n"
693 " most maxbins bins. The first bin is reserved for completely\n"
694 " allocated slabs. Setting maxbins to zero (-b 0) has the same\n"
695 " effect as specifying the maximum allocated buffers per slab\n"
696 " or setting minbinsize to 1 (-B 1).\n"
697 " -B minbinsize\n"
698 " Print a distribution of allocated buffers per slab, making\n"
699 " all bins (except the first, reserved for completely allocated\n"
700 " slabs) at least minbinsize buffers apart.\n"
701 " -v verbose output: List the allocated buffer count of each partial\n"
702 " slab on the free list in order from front to back to show how\n"
703 " closely the slabs are ordered by usage. For example\n"
704 "\n"
705 " 10 complete, 3 partial (8): 7 3 1\n"
706 "\n"
707 " means there are thirteen slabs with eight buffers each, including\n"
708 " three partially allocated slabs with less than all eight buffers\n"
709 " allocated.\n"
710 "\n"
711 " Buffer allocations are always from the front of the partial slab\n"
712 " list. When a buffer is freed from a completely used slab, that\n"
713 " slab is added to the front of the partial slab list. Assuming\n"
714 " that all buffers are equally likely to be freed soon, the\n"
715 " desired order of partial slabs is most-used at the front of the\n"
716 " list and least-used at the back (as in the example above).\n"
717 " However, if a slab contains an allocated buffer that will not\n"
718 " soon be freed, it would be better for that slab to be at the\n"
719 " front where all of its buffers can be allocated. Taking a slab\n"
720 " off the partial slab list (either with all buffers freed or all\n"
721 " buffers allocated) reduces cache fragmentation.\n"
722 "\n"
723 " A slab's allocated buffer count representing a partial slab (9 in\n"
724 " the example below) may be marked as follows:\n"
725 "\n"
726 " 9* An asterisk indicates that kmem has marked the slab non-\n"
727 " reclaimable because the kmem client refused to move one of the\n"
728 " slab's buffers. Since kmem does not expect to completely free the\n"
729 " slab, it moves it to the front of the list in the hope of\n"
730 " completely allocating it instead. A slab marked with an asterisk\n"
731 " stays marked for as long as it remains on the partial slab list.\n"
732 "\n"
733 "Column\t\tDescription\n"
734 "\n"
735 "Cache Name\t\tname of kmem cache\n"
736 "Slabs\t\t\ttotal slab count\n"
737 "Partial Slabs\t\tcount of partially allocated slabs on the free list\n"
738 "Buffers\t\ttotal buffer count (Slabs * (buffers per slab))\n"
739 "Unused Buffers\tcount of unallocated buffers across all partial slabs\n"
740 "Waste\t\t\t(Unused Buffers / Buffers) does not include space\n"
741 "\t\t\t for accounting structures (debug mode), slab\n"
742 "\t\t\t coloring (incremental small offsets to stagger\n"
743 "\t\t\t buffer alignment), or the per-CPU magazine layer\n");
746 static int
747 addrcmp(const void *lhs, const void *rhs)
749 uintptr_t p1 = *((uintptr_t *)lhs);
750 uintptr_t p2 = *((uintptr_t *)rhs);
752 if (p1 < p2)
753 return (-1);
754 if (p1 > p2)
755 return (1);
756 return (0);
759 static int
760 bufctlcmp(const kmem_bufctl_audit_t **lhs, const kmem_bufctl_audit_t **rhs)
762 const kmem_bufctl_audit_t *bcp1 = *lhs;
763 const kmem_bufctl_audit_t *bcp2 = *rhs;
765 if (bcp1->bc_timestamp > bcp2->bc_timestamp)
766 return (-1);
768 if (bcp1->bc_timestamp < bcp2->bc_timestamp)
769 return (1);
771 return (0);
774 typedef struct kmem_hash_walk {
775 uintptr_t *kmhw_table;
776 size_t kmhw_nelems;
777 size_t kmhw_pos;
778 kmem_bufctl_t kmhw_cur;
779 } kmem_hash_walk_t;
782 kmem_hash_walk_init(mdb_walk_state_t *wsp)
784 kmem_hash_walk_t *kmhw;
785 uintptr_t *hash;
786 kmem_cache_t c;
787 uintptr_t haddr, addr = wsp->walk_addr;
788 size_t nelems;
789 size_t hsize;
791 if (addr == (uintptr_t)NULL) {
792 mdb_warn("kmem_hash doesn't support global walks\n");
793 return (WALK_ERR);
796 if (mdb_vread(&c, sizeof (c), addr) == -1) {
797 mdb_warn("couldn't read cache at addr %p", addr);
798 return (WALK_ERR);
801 if (!(c.cache_flags & KMF_HASH)) {
802 mdb_warn("cache %p doesn't have a hash table\n", addr);
803 return (WALK_DONE); /* nothing to do */
806 kmhw = mdb_zalloc(sizeof (kmem_hash_walk_t), UM_SLEEP);
807 kmhw->kmhw_cur.bc_next = NULL;
808 kmhw->kmhw_pos = 0;
810 kmhw->kmhw_nelems = nelems = c.cache_hash_mask + 1;
811 hsize = nelems * sizeof (uintptr_t);
812 haddr = (uintptr_t)c.cache_hash_table;
814 kmhw->kmhw_table = hash = mdb_alloc(hsize, UM_SLEEP);
815 if (mdb_vread(hash, hsize, haddr) == -1) {
816 mdb_warn("failed to read hash table at %p", haddr);
817 mdb_free(hash, hsize);
818 mdb_free(kmhw, sizeof (kmem_hash_walk_t));
819 return (WALK_ERR);
822 wsp->walk_data = kmhw;
824 return (WALK_NEXT);
828 kmem_hash_walk_step(mdb_walk_state_t *wsp)
830 kmem_hash_walk_t *kmhw = wsp->walk_data;
831 uintptr_t addr = (uintptr_t)NULL;
833 if ((addr = (uintptr_t)kmhw->kmhw_cur.bc_next) == (uintptr_t)NULL) {
834 while (kmhw->kmhw_pos < kmhw->kmhw_nelems) {
835 if ((addr = kmhw->kmhw_table[kmhw->kmhw_pos++]) !=
836 (uintptr_t)NULL)
837 break;
840 if (addr == (uintptr_t)NULL)
841 return (WALK_DONE);
843 if (mdb_vread(&kmhw->kmhw_cur, sizeof (kmem_bufctl_t), addr) == -1) {
844 mdb_warn("couldn't read kmem_bufctl_t at addr %p", addr);
845 return (WALK_ERR);
848 return (wsp->walk_callback(addr, &kmhw->kmhw_cur, wsp->walk_cbdata));
851 void
852 kmem_hash_walk_fini(mdb_walk_state_t *wsp)
854 kmem_hash_walk_t *kmhw = wsp->walk_data;
856 if (kmhw == NULL)
857 return;
859 mdb_free(kmhw->kmhw_table, kmhw->kmhw_nelems * sizeof (uintptr_t));
860 mdb_free(kmhw, sizeof (kmem_hash_walk_t));
864 * Find the address of the bufctl structure for the address 'buf' in cache
865 * 'cp', which is at address caddr, and place it in *out.
867 static int
868 kmem_hash_lookup(kmem_cache_t *cp, uintptr_t caddr, void *buf, uintptr_t *out)
870 uintptr_t bucket = (uintptr_t)KMEM_HASH(cp, buf);
871 kmem_bufctl_t *bcp;
872 kmem_bufctl_t bc;
874 if (mdb_vread(&bcp, sizeof (kmem_bufctl_t *), bucket) == -1) {
875 mdb_warn("unable to read hash bucket for %p in cache %p",
876 buf, caddr);
877 return (-1);
880 while (bcp != NULL) {
881 if (mdb_vread(&bc, sizeof (kmem_bufctl_t),
882 (uintptr_t)bcp) == -1) {
883 mdb_warn("unable to read bufctl at %p", bcp);
884 return (-1);
886 if (bc.bc_addr == buf) {
887 *out = (uintptr_t)bcp;
888 return (0);
890 bcp = bc.bc_next;
893 mdb_warn("unable to find bufctl for %p in cache %p\n", buf, caddr);
894 return (-1);
898 kmem_get_magsize(const kmem_cache_t *cp)
900 uintptr_t addr = (uintptr_t)cp->cache_magtype;
901 GElf_Sym mt_sym;
902 kmem_magtype_t mt;
903 int res;
906 * if cpu 0 has a non-zero magsize, it must be correct. caches
907 * with KMF_NOMAGAZINE have disabled their magazine layers, so
908 * it is okay to return 0 for them.
910 if ((res = cp->cache_cpu[0].cc_magsize) != 0 ||
911 (cp->cache_flags & KMF_NOMAGAZINE))
912 return (res);
914 if (mdb_lookup_by_name("kmem_magtype", &mt_sym) == -1) {
915 mdb_warn("unable to read 'kmem_magtype'");
916 } else if (addr < mt_sym.st_value ||
917 addr + sizeof (mt) - 1 > mt_sym.st_value + mt_sym.st_size - 1 ||
918 ((addr - mt_sym.st_value) % sizeof (mt)) != 0) {
919 mdb_warn("cache '%s' has invalid magtype pointer (%p)\n",
920 cp->cache_name, addr);
921 return (0);
923 if (mdb_vread(&mt, sizeof (mt), addr) == -1) {
924 mdb_warn("unable to read magtype at %a", addr);
925 return (0);
927 return (mt.mt_magsize);
930 /*ARGSUSED*/
931 static int
932 kmem_estimate_slab(uintptr_t addr, const kmem_slab_t *sp, size_t *est)
934 *est -= (sp->slab_chunks - sp->slab_refcnt);
936 return (WALK_NEXT);
940 * Returns an upper bound on the number of allocated buffers in a given
941 * cache.
943 size_t
944 kmem_estimate_allocated(uintptr_t addr, const kmem_cache_t *cp)
946 int magsize;
947 size_t cache_est;
949 cache_est = cp->cache_buftotal;
951 (void) mdb_pwalk("kmem_slab_partial",
952 (mdb_walk_cb_t)kmem_estimate_slab, &cache_est, addr);
954 if ((magsize = kmem_get_magsize(cp)) != 0) {
955 size_t mag_est = cp->cache_full.ml_total * magsize;
957 if (cache_est >= mag_est) {
958 cache_est -= mag_est;
959 } else {
960 mdb_warn("cache %p's magazine layer holds more buffers "
961 "than the slab layer.\n", addr);
964 return (cache_est);
967 #define READMAG_ROUNDS(rounds) { \
968 if (mdb_vread(mp, magbsize, (uintptr_t)kmp) == -1) { \
969 mdb_warn("couldn't read magazine at %p", kmp); \
970 goto fail; \
972 for (i = 0; i < rounds; i++) { \
973 maglist[magcnt++] = mp->mag_round[i]; \
974 if (magcnt == magmax) { \
975 mdb_warn("%d magazines exceeds fudge factor\n", \
976 magcnt); \
977 goto fail; \
983 kmem_read_magazines(kmem_cache_t *cp, uintptr_t addr, int ncpus,
984 void ***maglistp, size_t *magcntp, size_t *magmaxp, int alloc_flags)
986 kmem_magazine_t *kmp, *mp;
987 void **maglist = NULL;
988 int i, cpu;
989 size_t magsize, magmax, magbsize;
990 size_t magcnt = 0;
993 * Read the magtype out of the cache, after verifying the pointer's
994 * correctness.
996 magsize = kmem_get_magsize(cp);
997 if (magsize == 0) {
998 *maglistp = NULL;
999 *magcntp = 0;
1000 *magmaxp = 0;
1001 return (WALK_NEXT);
1005 * There are several places where we need to go buffer hunting:
1006 * the per-CPU loaded magazine, the per-CPU spare full magazine,
1007 * and the full magazine list in the depot.
1009 * For an upper bound on the number of buffers in the magazine
1010 * layer, we have the number of magazines on the cache_full
1011 * list plus at most two magazines per CPU (the loaded and the
1012 * spare). Toss in 100 magazines as a fudge factor in case this
1013 * is live (the number "100" comes from the same fudge factor in
1014 * crash(1M)).
1016 magmax = (cp->cache_full.ml_total + 2 * ncpus + 100) * magsize;
1017 magbsize = offsetof(kmem_magazine_t, mag_round[magsize]);
1019 if (magbsize >= PAGESIZE / 2) {
1020 mdb_warn("magazine size for cache %p unreasonable (%x)\n",
1021 addr, magbsize);
1022 return (WALK_ERR);
1025 maglist = mdb_alloc(magmax * sizeof (void *), alloc_flags);
1026 mp = mdb_alloc(magbsize, alloc_flags);
1027 if (mp == NULL || maglist == NULL)
1028 goto fail;
1031 * First up: the magazines in the depot (i.e. on the cache_full list).
1033 for (kmp = cp->cache_full.ml_list; kmp != NULL; ) {
1034 READMAG_ROUNDS(magsize);
1035 kmp = mp->mag_next;
1037 if (kmp == cp->cache_full.ml_list)
1038 break; /* cache_full list loop detected */
1041 dprintf(("cache_full list done\n"));
1044 * Now whip through the CPUs, snagging the loaded magazines
1045 * and full spares.
1047 * In order to prevent inconsistent dumps, rounds and prounds
1048 * are copied aside before dumping begins.
1050 for (cpu = 0; cpu < ncpus; cpu++) {
1051 kmem_cpu_cache_t *ccp = &cp->cache_cpu[cpu];
1052 short rounds, prounds;
1054 if (KMEM_DUMPCC(ccp)) {
1055 rounds = ccp->cc_dump_rounds;
1056 prounds = ccp->cc_dump_prounds;
1057 } else {
1058 rounds = ccp->cc_rounds;
1059 prounds = ccp->cc_prounds;
1062 dprintf(("reading cpu cache %p\n",
1063 (uintptr_t)ccp - (uintptr_t)cp + addr));
1065 if (rounds > 0 &&
1066 (kmp = ccp->cc_loaded) != NULL) {
1067 dprintf(("reading %d loaded rounds\n", rounds));
1068 READMAG_ROUNDS(rounds);
1071 if (prounds > 0 &&
1072 (kmp = ccp->cc_ploaded) != NULL) {
1073 dprintf(("reading %d previously loaded rounds\n",
1074 prounds));
1075 READMAG_ROUNDS(prounds);
1079 dprintf(("magazine layer: %d buffers\n", magcnt));
1081 if (!(alloc_flags & UM_GC))
1082 mdb_free(mp, magbsize);
1084 *maglistp = maglist;
1085 *magcntp = magcnt;
1086 *magmaxp = magmax;
1088 return (WALK_NEXT);
1090 fail:
1091 if (!(alloc_flags & UM_GC)) {
1092 if (mp)
1093 mdb_free(mp, magbsize);
1094 if (maglist)
1095 mdb_free(maglist, magmax * sizeof (void *));
1097 return (WALK_ERR);
1100 static int
1101 kmem_walk_callback(mdb_walk_state_t *wsp, uintptr_t buf)
1103 return (wsp->walk_callback(buf, NULL, wsp->walk_cbdata));
1106 static int
1107 bufctl_walk_callback(kmem_cache_t *cp, mdb_walk_state_t *wsp, uintptr_t buf)
1109 kmem_bufctl_audit_t b;
1112 * if KMF_AUDIT is not set, we know that we're looking at a
1113 * kmem_bufctl_t.
1115 if (!(cp->cache_flags & KMF_AUDIT) ||
1116 mdb_vread(&b, sizeof (kmem_bufctl_audit_t), buf) == -1) {
1117 (void) memset(&b, 0, sizeof (b));
1118 if (mdb_vread(&b, sizeof (kmem_bufctl_t), buf) == -1) {
1119 mdb_warn("unable to read bufctl at %p", buf);
1120 return (WALK_ERR);
1124 return (wsp->walk_callback(buf, &b, wsp->walk_cbdata));
1127 typedef struct kmem_walk {
1128 int kmw_type;
1130 uintptr_t kmw_addr; /* cache address */
1131 kmem_cache_t *kmw_cp;
1132 size_t kmw_csize;
1135 * magazine layer
1137 void **kmw_maglist;
1138 size_t kmw_max;
1139 size_t kmw_count;
1140 size_t kmw_pos;
1143 * slab layer
1145 char *kmw_valid; /* to keep track of freed buffers */
1146 char *kmw_ubase; /* buffer for slab data */
1147 } kmem_walk_t;
1149 static int
1150 kmem_walk_init_common(mdb_walk_state_t *wsp, int type)
1152 kmem_walk_t *kmw;
1153 int ncpus, csize;
1154 kmem_cache_t *cp;
1155 size_t vm_quantum;
1157 size_t magmax, magcnt;
1158 void **maglist = NULL;
1159 uint_t chunksize, slabsize;
1160 int status = WALK_ERR;
1161 uintptr_t addr = wsp->walk_addr;
1162 const char *layered;
1164 type &= ~KM_HASH;
1166 if (addr == (uintptr_t)NULL) {
1167 mdb_warn("kmem walk doesn't support global walks\n");
1168 return (WALK_ERR);
1171 dprintf(("walking %p\n", addr));
1174 * First we need to figure out how many CPUs are configured in the
1175 * system to know how much to slurp out.
1177 mdb_readvar(&ncpus, "max_ncpus");
1179 csize = KMEM_CACHE_SIZE(ncpus);
1180 cp = mdb_alloc(csize, UM_SLEEP);
1182 if (mdb_vread(cp, csize, addr) == -1) {
1183 mdb_warn("couldn't read cache at addr %p", addr);
1184 goto out2;
1188 * It's easy for someone to hand us an invalid cache address.
1189 * Unfortunately, it is hard for this walker to survive an
1190 * invalid cache cleanly. So we make sure that:
1192 * 1. the vmem arena for the cache is readable,
1193 * 2. the vmem arena's quantum is a power of 2,
1194 * 3. our slabsize is a multiple of the quantum, and
1195 * 4. our chunksize is >0 and less than our slabsize.
1197 if (mdb_vread(&vm_quantum, sizeof (vm_quantum),
1198 (uintptr_t)&cp->cache_arena->vm_quantum) == -1 ||
1199 vm_quantum == 0 ||
1200 (vm_quantum & (vm_quantum - 1)) != 0 ||
1201 cp->cache_slabsize < vm_quantum ||
1202 P2PHASE(cp->cache_slabsize, vm_quantum) != 0 ||
1203 cp->cache_chunksize == 0 ||
1204 cp->cache_chunksize > cp->cache_slabsize) {
1205 mdb_warn("%p is not a valid kmem_cache_t\n", addr);
1206 goto out2;
1209 dprintf(("buf total is %d\n", cp->cache_buftotal));
1211 if (cp->cache_buftotal == 0) {
1212 mdb_free(cp, csize);
1213 return (WALK_DONE);
1217 * If they ask for bufctls, but it's a small-slab cache,
1218 * there is nothing to report.
1220 if ((type & KM_BUFCTL) && !(cp->cache_flags & KMF_HASH)) {
1221 dprintf(("bufctl requested, not KMF_HASH (flags: %p)\n",
1222 cp->cache_flags));
1223 mdb_free(cp, csize);
1224 return (WALK_DONE);
1228 * If they want constructed buffers, but there's no constructor or
1229 * the cache has DEADBEEF checking enabled, there is nothing to report.
1231 if ((type & KM_CONSTRUCTED) && (!(type & KM_FREE) ||
1232 cp->cache_constructor == NULL ||
1233 (cp->cache_flags & (KMF_DEADBEEF | KMF_LITE)) == KMF_DEADBEEF)) {
1234 mdb_free(cp, csize);
1235 return (WALK_DONE);
1239 * Read in the contents of the magazine layer
1241 if (kmem_read_magazines(cp, addr, ncpus, &maglist, &magcnt,
1242 &magmax, UM_SLEEP) == WALK_ERR)
1243 goto out2;
1246 * We have all of the buffers from the magazines; if we are walking
1247 * allocated buffers, sort them so we can bsearch them later.
1249 if (type & KM_ALLOCATED)
1250 qsort(maglist, magcnt, sizeof (void *), addrcmp);
1252 wsp->walk_data = kmw = mdb_zalloc(sizeof (kmem_walk_t), UM_SLEEP);
1254 kmw->kmw_type = type;
1255 kmw->kmw_addr = addr;
1256 kmw->kmw_cp = cp;
1257 kmw->kmw_csize = csize;
1258 kmw->kmw_maglist = maglist;
1259 kmw->kmw_max = magmax;
1260 kmw->kmw_count = magcnt;
1261 kmw->kmw_pos = 0;
1264 * When walking allocated buffers in a KMF_HASH cache, we walk the
1265 * hash table instead of the slab layer.
1267 if ((cp->cache_flags & KMF_HASH) && (type & KM_ALLOCATED)) {
1268 layered = "kmem_hash";
1270 kmw->kmw_type |= KM_HASH;
1271 } else {
1273 * If we are walking freed buffers, we only need the
1274 * magazine layer plus the partially allocated slabs.
1275 * To walk allocated buffers, we need all of the slabs.
1277 if (type & KM_ALLOCATED)
1278 layered = "kmem_slab";
1279 else
1280 layered = "kmem_slab_partial";
1283 * for small-slab caches, we read in the entire slab. For
1284 * freed buffers, we can just walk the freelist. For
1285 * allocated buffers, we use a 'valid' array to track
1286 * the freed buffers.
1288 if (!(cp->cache_flags & KMF_HASH)) {
1289 chunksize = cp->cache_chunksize;
1290 slabsize = cp->cache_slabsize;
1292 kmw->kmw_ubase = mdb_alloc(slabsize +
1293 sizeof (kmem_bufctl_t), UM_SLEEP);
1295 if (type & KM_ALLOCATED)
1296 kmw->kmw_valid =
1297 mdb_alloc(slabsize / chunksize, UM_SLEEP);
1301 status = WALK_NEXT;
1303 if (mdb_layered_walk(layered, wsp) == -1) {
1304 mdb_warn("unable to start layered '%s' walk", layered);
1305 status = WALK_ERR;
1308 out1:
1309 if (status == WALK_ERR) {
1310 if (kmw->kmw_valid)
1311 mdb_free(kmw->kmw_valid, slabsize / chunksize);
1313 if (kmw->kmw_ubase)
1314 mdb_free(kmw->kmw_ubase, slabsize +
1315 sizeof (kmem_bufctl_t));
1317 if (kmw->kmw_maglist)
1318 mdb_free(kmw->kmw_maglist,
1319 kmw->kmw_max * sizeof (uintptr_t));
1321 mdb_free(kmw, sizeof (kmem_walk_t));
1322 wsp->walk_data = NULL;
1325 out2:
1326 if (status == WALK_ERR)
1327 mdb_free(cp, csize);
1329 return (status);
1333 kmem_walk_step(mdb_walk_state_t *wsp)
1335 kmem_walk_t *kmw = wsp->walk_data;
1336 int type = kmw->kmw_type;
1337 kmem_cache_t *cp = kmw->kmw_cp;
1339 void **maglist = kmw->kmw_maglist;
1340 int magcnt = kmw->kmw_count;
1342 uintptr_t chunksize, slabsize;
1343 uintptr_t addr;
1344 const kmem_slab_t *sp;
1345 const kmem_bufctl_t *bcp;
1346 kmem_bufctl_t bc;
1348 int chunks;
1349 char *kbase;
1350 void *buf;
1351 int i, ret;
1353 char *valid, *ubase;
1356 * first, handle the 'kmem_hash' layered walk case
1358 if (type & KM_HASH) {
1360 * We have a buffer which has been allocated out of the
1361 * global layer. We need to make sure that it's not
1362 * actually sitting in a magazine before we report it as
1363 * an allocated buffer.
1365 buf = ((const kmem_bufctl_t *)wsp->walk_layer)->bc_addr;
1367 if (magcnt > 0 &&
1368 bsearch(&buf, maglist, magcnt, sizeof (void *),
1369 addrcmp) != NULL)
1370 return (WALK_NEXT);
1372 if (type & KM_BUFCTL)
1373 return (bufctl_walk_callback(cp, wsp, wsp->walk_addr));
1375 return (kmem_walk_callback(wsp, (uintptr_t)buf));
1378 ret = WALK_NEXT;
1380 addr = kmw->kmw_addr;
1383 * If we're walking freed buffers, report everything in the
1384 * magazine layer before processing the first slab.
1386 if ((type & KM_FREE) && magcnt != 0) {
1387 kmw->kmw_count = 0; /* only do this once */
1388 for (i = 0; i < magcnt; i++) {
1389 buf = maglist[i];
1391 if (type & KM_BUFCTL) {
1392 uintptr_t out;
1394 if (cp->cache_flags & KMF_BUFTAG) {
1395 kmem_buftag_t *btp;
1396 kmem_buftag_t tag;
1398 /* LINTED - alignment */
1399 btp = KMEM_BUFTAG(cp, buf);
1400 if (mdb_vread(&tag, sizeof (tag),
1401 (uintptr_t)btp) == -1) {
1402 mdb_warn("reading buftag for "
1403 "%p at %p", buf, btp);
1404 continue;
1406 out = (uintptr_t)tag.bt_bufctl;
1407 } else {
1408 if (kmem_hash_lookup(cp, addr, buf,
1409 &out) == -1)
1410 continue;
1412 ret = bufctl_walk_callback(cp, wsp, out);
1413 } else {
1414 ret = kmem_walk_callback(wsp, (uintptr_t)buf);
1417 if (ret != WALK_NEXT)
1418 return (ret);
1423 * If they want constructed buffers, we're finished, since the
1424 * magazine layer holds them all.
1426 if (type & KM_CONSTRUCTED)
1427 return (WALK_DONE);
1430 * Handle the buffers in the current slab
1432 chunksize = cp->cache_chunksize;
1433 slabsize = cp->cache_slabsize;
1435 sp = wsp->walk_layer;
1436 chunks = sp->slab_chunks;
1437 kbase = sp->slab_base;
1439 dprintf(("kbase is %p\n", kbase));
1441 if (!(cp->cache_flags & KMF_HASH)) {
1442 valid = kmw->kmw_valid;
1443 ubase = kmw->kmw_ubase;
1445 if (mdb_vread(ubase, chunks * chunksize,
1446 (uintptr_t)kbase) == -1) {
1447 mdb_warn("failed to read slab contents at %p", kbase);
1448 return (WALK_ERR);
1452 * Set up the valid map as fully allocated -- we'll punch
1453 * out the freelist.
1455 if (type & KM_ALLOCATED)
1456 (void) memset(valid, 1, chunks);
1457 } else {
1458 valid = NULL;
1459 ubase = NULL;
1463 * walk the slab's freelist
1465 bcp = sp->slab_head;
1467 dprintf(("refcnt is %d; chunks is %d\n", sp->slab_refcnt, chunks));
1470 * since we could be in the middle of allocating a buffer,
1471 * our refcnt could be one higher than it aught. So we
1472 * check one further on the freelist than the count allows.
1474 for (i = sp->slab_refcnt; i <= chunks; i++) {
1475 uint_t ndx;
1477 dprintf(("bcp is %p\n", bcp));
1479 if (bcp == NULL) {
1480 if (i == chunks)
1481 break;
1482 mdb_warn(
1483 "slab %p in cache %p freelist too short by %d\n",
1484 sp, addr, chunks - i);
1485 break;
1488 if (cp->cache_flags & KMF_HASH) {
1489 if (mdb_vread(&bc, sizeof (bc), (uintptr_t)bcp) == -1) {
1490 mdb_warn("failed to read bufctl ptr at %p",
1491 bcp);
1492 break;
1494 buf = bc.bc_addr;
1495 } else {
1497 * Otherwise the buffer is (or should be) in the slab
1498 * that we've read in; determine its offset in the
1499 * slab, validate that it's not corrupt, and add to
1500 * our base address to find the umem_bufctl_t. (Note
1501 * that we don't need to add the size of the bufctl
1502 * to our offset calculation because of the slop that's
1503 * allocated for the buffer at ubase.)
1505 uintptr_t offs = (uintptr_t)bcp - (uintptr_t)kbase;
1507 if (offs > chunks * chunksize) {
1508 mdb_warn("found corrupt bufctl ptr %p"
1509 " in slab %p in cache %p\n", bcp,
1510 wsp->walk_addr, addr);
1511 break;
1514 bc = *((kmem_bufctl_t *)((uintptr_t)ubase + offs));
1515 buf = KMEM_BUF(cp, bcp);
1518 ndx = ((uintptr_t)buf - (uintptr_t)kbase) / chunksize;
1520 if (ndx > slabsize / cp->cache_bufsize) {
1522 * This is very wrong; we have managed to find
1523 * a buffer in the slab which shouldn't
1524 * actually be here. Emit a warning, and
1525 * try to continue.
1527 mdb_warn("buf %p is out of range for "
1528 "slab %p, cache %p\n", buf, sp, addr);
1529 } else if (type & KM_ALLOCATED) {
1531 * we have found a buffer on the slab's freelist;
1532 * clear its entry
1534 valid[ndx] = 0;
1535 } else {
1537 * Report this freed buffer
1539 if (type & KM_BUFCTL) {
1540 ret = bufctl_walk_callback(cp, wsp,
1541 (uintptr_t)bcp);
1542 } else {
1543 ret = kmem_walk_callback(wsp, (uintptr_t)buf);
1545 if (ret != WALK_NEXT)
1546 return (ret);
1549 bcp = bc.bc_next;
1552 if (bcp != NULL) {
1553 dprintf(("slab %p in cache %p freelist too long (%p)\n",
1554 sp, addr, bcp));
1558 * If we are walking freed buffers, the loop above handled reporting
1559 * them.
1561 if (type & KM_FREE)
1562 return (WALK_NEXT);
1564 if (type & KM_BUFCTL) {
1565 mdb_warn("impossible situation: small-slab KM_BUFCTL walk for "
1566 "cache %p\n", addr);
1567 return (WALK_ERR);
1571 * Report allocated buffers, skipping buffers in the magazine layer.
1572 * We only get this far for small-slab caches.
1574 for (i = 0; ret == WALK_NEXT && i < chunks; i++) {
1575 buf = (char *)kbase + i * chunksize;
1577 if (!valid[i])
1578 continue; /* on slab freelist */
1580 if (magcnt > 0 &&
1581 bsearch(&buf, maglist, magcnt, sizeof (void *),
1582 addrcmp) != NULL)
1583 continue; /* in magazine layer */
1585 ret = kmem_walk_callback(wsp, (uintptr_t)buf);
1587 return (ret);
1590 void
1591 kmem_walk_fini(mdb_walk_state_t *wsp)
1593 kmem_walk_t *kmw = wsp->walk_data;
1594 uintptr_t chunksize;
1595 uintptr_t slabsize;
1597 if (kmw == NULL)
1598 return;
1600 if (kmw->kmw_maglist != NULL)
1601 mdb_free(kmw->kmw_maglist, kmw->kmw_max * sizeof (void *));
1603 chunksize = kmw->kmw_cp->cache_chunksize;
1604 slabsize = kmw->kmw_cp->cache_slabsize;
1606 if (kmw->kmw_valid != NULL)
1607 mdb_free(kmw->kmw_valid, slabsize / chunksize);
1608 if (kmw->kmw_ubase != NULL)
1609 mdb_free(kmw->kmw_ubase, slabsize + sizeof (kmem_bufctl_t));
1611 mdb_free(kmw->kmw_cp, kmw->kmw_csize);
1612 mdb_free(kmw, sizeof (kmem_walk_t));
1615 /*ARGSUSED*/
1616 static int
1617 kmem_walk_all(uintptr_t addr, const kmem_cache_t *c, mdb_walk_state_t *wsp)
1620 * Buffers allocated from NOTOUCH caches can also show up as freed
1621 * memory in other caches. This can be a little confusing, so we
1622 * don't walk NOTOUCH caches when walking all caches (thereby assuring
1623 * that "::walk kmem" and "::walk freemem" yield disjoint output).
1625 if (c->cache_cflags & KMC_NOTOUCH)
1626 return (WALK_NEXT);
1628 if (mdb_pwalk(wsp->walk_data, wsp->walk_callback,
1629 wsp->walk_cbdata, addr) == -1)
1630 return (WALK_DONE);
1632 return (WALK_NEXT);
1635 #define KMEM_WALK_ALL(name, wsp) { \
1636 wsp->walk_data = (name); \
1637 if (mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_walk_all, wsp) == -1) \
1638 return (WALK_ERR); \
1639 return (WALK_DONE); \
1643 kmem_walk_init(mdb_walk_state_t *wsp)
1645 if (wsp->walk_arg != NULL)
1646 wsp->walk_addr = (uintptr_t)wsp->walk_arg;
1648 if (wsp->walk_addr == (uintptr_t)NULL)
1649 KMEM_WALK_ALL("kmem", wsp);
1650 return (kmem_walk_init_common(wsp, KM_ALLOCATED));
1654 bufctl_walk_init(mdb_walk_state_t *wsp)
1656 if (wsp->walk_addr == (uintptr_t)NULL)
1657 KMEM_WALK_ALL("bufctl", wsp);
1658 return (kmem_walk_init_common(wsp, KM_ALLOCATED | KM_BUFCTL));
1662 freemem_walk_init(mdb_walk_state_t *wsp)
1664 if (wsp->walk_addr == (uintptr_t)NULL)
1665 KMEM_WALK_ALL("freemem", wsp);
1666 return (kmem_walk_init_common(wsp, KM_FREE));
1670 freemem_constructed_walk_init(mdb_walk_state_t *wsp)
1672 if (wsp->walk_addr == (uintptr_t)NULL)
1673 KMEM_WALK_ALL("freemem_constructed", wsp);
1674 return (kmem_walk_init_common(wsp, KM_FREE | KM_CONSTRUCTED));
1678 freectl_walk_init(mdb_walk_state_t *wsp)
1680 if (wsp->walk_addr == (uintptr_t)NULL)
1681 KMEM_WALK_ALL("freectl", wsp);
1682 return (kmem_walk_init_common(wsp, KM_FREE | KM_BUFCTL));
1686 freectl_constructed_walk_init(mdb_walk_state_t *wsp)
1688 if (wsp->walk_addr == (uintptr_t)NULL)
1689 KMEM_WALK_ALL("freectl_constructed", wsp);
1690 return (kmem_walk_init_common(wsp,
1691 KM_FREE | KM_BUFCTL | KM_CONSTRUCTED));
1694 typedef struct bufctl_history_walk {
1695 void *bhw_next;
1696 kmem_cache_t *bhw_cache;
1697 kmem_slab_t *bhw_slab;
1698 hrtime_t bhw_timestamp;
1699 } bufctl_history_walk_t;
1702 bufctl_history_walk_init(mdb_walk_state_t *wsp)
1704 bufctl_history_walk_t *bhw;
1705 kmem_bufctl_audit_t bc;
1706 kmem_bufctl_audit_t bcn;
1708 if (wsp->walk_addr == (uintptr_t)NULL) {
1709 mdb_warn("bufctl_history walk doesn't support global walks\n");
1710 return (WALK_ERR);
1713 if (mdb_vread(&bc, sizeof (bc), wsp->walk_addr) == -1) {
1714 mdb_warn("unable to read bufctl at %p", wsp->walk_addr);
1715 return (WALK_ERR);
1718 bhw = mdb_zalloc(sizeof (*bhw), UM_SLEEP);
1719 bhw->bhw_timestamp = 0;
1720 bhw->bhw_cache = bc.bc_cache;
1721 bhw->bhw_slab = bc.bc_slab;
1724 * sometimes the first log entry matches the base bufctl; in that
1725 * case, skip the base bufctl.
1727 if (bc.bc_lastlog != NULL &&
1728 mdb_vread(&bcn, sizeof (bcn), (uintptr_t)bc.bc_lastlog) != -1 &&
1729 bc.bc_addr == bcn.bc_addr &&
1730 bc.bc_cache == bcn.bc_cache &&
1731 bc.bc_slab == bcn.bc_slab &&
1732 bc.bc_timestamp == bcn.bc_timestamp &&
1733 bc.bc_thread == bcn.bc_thread)
1734 bhw->bhw_next = bc.bc_lastlog;
1735 else
1736 bhw->bhw_next = (void *)wsp->walk_addr;
1738 wsp->walk_addr = (uintptr_t)bc.bc_addr;
1739 wsp->walk_data = bhw;
1741 return (WALK_NEXT);
1745 bufctl_history_walk_step(mdb_walk_state_t *wsp)
1747 bufctl_history_walk_t *bhw = wsp->walk_data;
1748 uintptr_t addr = (uintptr_t)bhw->bhw_next;
1749 uintptr_t baseaddr = wsp->walk_addr;
1750 kmem_bufctl_audit_t bc;
1752 if (addr == (uintptr_t)NULL)
1753 return (WALK_DONE);
1755 if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
1756 mdb_warn("unable to read bufctl at %p", bhw->bhw_next);
1757 return (WALK_ERR);
1761 * The bufctl is only valid if the address, cache, and slab are
1762 * correct. We also check that the timestamp is decreasing, to
1763 * prevent infinite loops.
1765 if ((uintptr_t)bc.bc_addr != baseaddr ||
1766 bc.bc_cache != bhw->bhw_cache ||
1767 bc.bc_slab != bhw->bhw_slab ||
1768 (bhw->bhw_timestamp != 0 && bc.bc_timestamp >= bhw->bhw_timestamp))
1769 return (WALK_DONE);
1771 bhw->bhw_next = bc.bc_lastlog;
1772 bhw->bhw_timestamp = bc.bc_timestamp;
1774 return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata));
1777 void
1778 bufctl_history_walk_fini(mdb_walk_state_t *wsp)
1780 bufctl_history_walk_t *bhw = wsp->walk_data;
1782 mdb_free(bhw, sizeof (*bhw));
1785 typedef struct kmem_log_walk {
1786 kmem_bufctl_audit_t *klw_base;
1787 kmem_bufctl_audit_t **klw_sorted;
1788 kmem_log_header_t klw_lh;
1789 size_t klw_size;
1790 size_t klw_maxndx;
1791 size_t klw_ndx;
1792 } kmem_log_walk_t;
1795 kmem_log_walk_init(mdb_walk_state_t *wsp)
1797 uintptr_t lp = wsp->walk_addr;
1798 kmem_log_walk_t *klw;
1799 kmem_log_header_t *lhp;
1800 int maxndx, i, j, k;
1803 * By default (global walk), walk the kmem_transaction_log. Otherwise
1804 * read the log whose kmem_log_header_t is stored at walk_addr.
1806 if (lp == (uintptr_t)NULL &&
1807 mdb_readvar(&lp, "kmem_transaction_log") == -1) {
1808 mdb_warn("failed to read 'kmem_transaction_log'");
1809 return (WALK_ERR);
1812 if (lp == (uintptr_t)NULL) {
1813 mdb_warn("log is disabled\n");
1814 return (WALK_ERR);
1817 klw = mdb_zalloc(sizeof (kmem_log_walk_t), UM_SLEEP);
1818 lhp = &klw->klw_lh;
1820 if (mdb_vread(lhp, sizeof (kmem_log_header_t), lp) == -1) {
1821 mdb_warn("failed to read log header at %p", lp);
1822 mdb_free(klw, sizeof (kmem_log_walk_t));
1823 return (WALK_ERR);
1826 klw->klw_size = lhp->lh_chunksize * lhp->lh_nchunks;
1827 klw->klw_base = mdb_alloc(klw->klw_size, UM_SLEEP);
1828 maxndx = lhp->lh_chunksize / sizeof (kmem_bufctl_audit_t) - 1;
1830 if (mdb_vread(klw->klw_base, klw->klw_size,
1831 (uintptr_t)lhp->lh_base) == -1) {
1832 mdb_warn("failed to read log at base %p", lhp->lh_base);
1833 mdb_free(klw->klw_base, klw->klw_size);
1834 mdb_free(klw, sizeof (kmem_log_walk_t));
1835 return (WALK_ERR);
1838 klw->klw_sorted = mdb_alloc(maxndx * lhp->lh_nchunks *
1839 sizeof (kmem_bufctl_audit_t *), UM_SLEEP);
1841 for (i = 0, k = 0; i < lhp->lh_nchunks; i++) {
1842 kmem_bufctl_audit_t *chunk = (kmem_bufctl_audit_t *)
1843 ((uintptr_t)klw->klw_base + i * lhp->lh_chunksize);
1845 for (j = 0; j < maxndx; j++)
1846 klw->klw_sorted[k++] = &chunk[j];
1849 qsort(klw->klw_sorted, k, sizeof (kmem_bufctl_audit_t *),
1850 (int(*)(const void *, const void *))bufctlcmp);
1852 klw->klw_maxndx = k;
1853 wsp->walk_data = klw;
1855 return (WALK_NEXT);
1859 kmem_log_walk_step(mdb_walk_state_t *wsp)
1861 kmem_log_walk_t *klw = wsp->walk_data;
1862 kmem_bufctl_audit_t *bcp;
1864 if (klw->klw_ndx == klw->klw_maxndx)
1865 return (WALK_DONE);
1867 bcp = klw->klw_sorted[klw->klw_ndx++];
1869 return (wsp->walk_callback((uintptr_t)bcp - (uintptr_t)klw->klw_base +
1870 (uintptr_t)klw->klw_lh.lh_base, bcp, wsp->walk_cbdata));
1873 void
1874 kmem_log_walk_fini(mdb_walk_state_t *wsp)
1876 kmem_log_walk_t *klw = wsp->walk_data;
1878 mdb_free(klw->klw_base, klw->klw_size);
1879 mdb_free(klw->klw_sorted, klw->klw_maxndx *
1880 sizeof (kmem_bufctl_audit_t *));
1881 mdb_free(klw, sizeof (kmem_log_walk_t));
1884 typedef struct allocdby_bufctl {
1885 uintptr_t abb_addr;
1886 hrtime_t abb_ts;
1887 } allocdby_bufctl_t;
1889 typedef struct allocdby_walk {
1890 const char *abw_walk;
1891 uintptr_t abw_thread;
1892 size_t abw_nbufs;
1893 size_t abw_size;
1894 allocdby_bufctl_t *abw_buf;
1895 size_t abw_ndx;
1896 } allocdby_walk_t;
1899 allocdby_walk_bufctl(uintptr_t addr, const kmem_bufctl_audit_t *bcp,
1900 allocdby_walk_t *abw)
1902 if ((uintptr_t)bcp->bc_thread != abw->abw_thread)
1903 return (WALK_NEXT);
1905 if (abw->abw_nbufs == abw->abw_size) {
1906 allocdby_bufctl_t *buf;
1907 size_t oldsize = sizeof (allocdby_bufctl_t) * abw->abw_size;
1909 buf = mdb_zalloc(oldsize << 1, UM_SLEEP);
1911 bcopy(abw->abw_buf, buf, oldsize);
1912 mdb_free(abw->abw_buf, oldsize);
1914 abw->abw_size <<= 1;
1915 abw->abw_buf = buf;
1918 abw->abw_buf[abw->abw_nbufs].abb_addr = addr;
1919 abw->abw_buf[abw->abw_nbufs].abb_ts = bcp->bc_timestamp;
1920 abw->abw_nbufs++;
1922 return (WALK_NEXT);
1925 /*ARGSUSED*/
1927 allocdby_walk_cache(uintptr_t addr, const kmem_cache_t *c, allocdby_walk_t *abw)
1929 if (mdb_pwalk(abw->abw_walk, (mdb_walk_cb_t)allocdby_walk_bufctl,
1930 abw, addr) == -1) {
1931 mdb_warn("couldn't walk bufctl for cache %p", addr);
1932 return (WALK_DONE);
1935 return (WALK_NEXT);
1938 static int
1939 allocdby_cmp(const allocdby_bufctl_t *lhs, const allocdby_bufctl_t *rhs)
1941 if (lhs->abb_ts < rhs->abb_ts)
1942 return (1);
1943 if (lhs->abb_ts > rhs->abb_ts)
1944 return (-1);
1945 return (0);
1948 static int
1949 allocdby_walk_init_common(mdb_walk_state_t *wsp, const char *walk)
1951 allocdby_walk_t *abw;
1953 if (wsp->walk_addr == (uintptr_t)NULL) {
1954 mdb_warn("allocdby walk doesn't support global walks\n");
1955 return (WALK_ERR);
1958 abw = mdb_zalloc(sizeof (allocdby_walk_t), UM_SLEEP);
1960 abw->abw_thread = wsp->walk_addr;
1961 abw->abw_walk = walk;
1962 abw->abw_size = 128; /* something reasonable */
1963 abw->abw_buf =
1964 mdb_zalloc(abw->abw_size * sizeof (allocdby_bufctl_t), UM_SLEEP);
1966 wsp->walk_data = abw;
1968 if (mdb_walk("kmem_cache",
1969 (mdb_walk_cb_t)allocdby_walk_cache, abw) == -1) {
1970 mdb_warn("couldn't walk kmem_cache");
1971 allocdby_walk_fini(wsp);
1972 return (WALK_ERR);
1975 qsort(abw->abw_buf, abw->abw_nbufs, sizeof (allocdby_bufctl_t),
1976 (int(*)(const void *, const void *))allocdby_cmp);
1978 return (WALK_NEXT);
1982 allocdby_walk_init(mdb_walk_state_t *wsp)
1984 return (allocdby_walk_init_common(wsp, "bufctl"));
1988 freedby_walk_init(mdb_walk_state_t *wsp)
1990 return (allocdby_walk_init_common(wsp, "freectl"));
1994 allocdby_walk_step(mdb_walk_state_t *wsp)
1996 allocdby_walk_t *abw = wsp->walk_data;
1997 kmem_bufctl_audit_t bc;
1998 uintptr_t addr;
2000 if (abw->abw_ndx == abw->abw_nbufs)
2001 return (WALK_DONE);
2003 addr = abw->abw_buf[abw->abw_ndx++].abb_addr;
2005 if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
2006 mdb_warn("couldn't read bufctl at %p", addr);
2007 return (WALK_DONE);
2010 return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata));
2013 void
2014 allocdby_walk_fini(mdb_walk_state_t *wsp)
2016 allocdby_walk_t *abw = wsp->walk_data;
2018 mdb_free(abw->abw_buf, sizeof (allocdby_bufctl_t) * abw->abw_size);
2019 mdb_free(abw, sizeof (allocdby_walk_t));
2022 /*ARGSUSED*/
2024 allocdby_walk(uintptr_t addr, const kmem_bufctl_audit_t *bcp, void *ignored)
2026 char c[MDB_SYM_NAMLEN];
2027 GElf_Sym sym;
2028 int i;
2030 mdb_printf("%0?p %12llx ", addr, bcp->bc_timestamp);
2031 for (i = 0; i < bcp->bc_depth; i++) {
2032 if (mdb_lookup_by_addr(bcp->bc_stack[i],
2033 MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
2034 continue;
2035 if (strncmp(c, "kmem_", 5) == 0)
2036 continue;
2037 mdb_printf("%s+0x%lx",
2038 c, bcp->bc_stack[i] - (uintptr_t)sym.st_value);
2039 break;
2041 mdb_printf("\n");
2043 return (WALK_NEXT);
2046 static int
2047 allocdby_common(uintptr_t addr, uint_t flags, const char *w)
2049 if (!(flags & DCMD_ADDRSPEC))
2050 return (DCMD_USAGE);
2052 mdb_printf("%-?s %12s %s\n", "BUFCTL", "TIMESTAMP", "CALLER");
2054 if (mdb_pwalk(w, (mdb_walk_cb_t)allocdby_walk, NULL, addr) == -1) {
2055 mdb_warn("can't walk '%s' for %p", w, addr);
2056 return (DCMD_ERR);
2059 return (DCMD_OK);
2062 /*ARGSUSED*/
2064 allocdby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2066 return (allocdby_common(addr, flags, "allocdby"));
2069 /*ARGSUSED*/
2071 freedby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2073 return (allocdby_common(addr, flags, "freedby"));
2077 * Return a string describing the address in relation to the given thread's
2078 * stack.
2080 * - If the thread state is TS_FREE, return " (inactive interrupt thread)".
2082 * - If the address is above the stack pointer, return an empty string
2083 * signifying that the address is active.
2085 * - If the address is below the stack pointer, and the thread is not on proc,
2086 * return " (below sp)".
2088 * - If the address is below the stack pointer, and the thread is on proc,
2089 * return " (possibly below sp)". Depending on context, we may or may not
2090 * have an accurate t_sp.
2092 static const char *
2093 stack_active(const kthread_t *t, uintptr_t addr)
2095 uintptr_t panicstk;
2096 GElf_Sym sym;
2098 if (t->t_state == TS_FREE)
2099 return (" (inactive interrupt thread)");
2102 * Check to see if we're on the panic stack. If so, ignore t_sp, as it
2103 * no longer relates to the thread's real stack.
2105 if (mdb_lookup_by_name("panic_stack", &sym) == 0) {
2106 panicstk = (uintptr_t)sym.st_value;
2108 if (t->t_sp >= panicstk && t->t_sp < panicstk + PANICSTKSIZE)
2109 return ("");
2112 if (addr >= t->t_sp + STACK_BIAS)
2113 return ("");
2115 if (t->t_state == TS_ONPROC)
2116 return (" (possibly below sp)");
2118 return (" (below sp)");
2122 * Additional state for the kmem and vmem ::whatis handlers
2124 typedef struct whatis_info {
2125 mdb_whatis_t *wi_w;
2126 const kmem_cache_t *wi_cache;
2127 const vmem_t *wi_vmem;
2128 vmem_t *wi_msb_arena;
2129 size_t wi_slab_size;
2130 uint_t wi_slab_found;
2131 uint_t wi_kmem_lite_count;
2132 uint_t wi_freemem;
2133 } whatis_info_t;
2135 /* call one of our dcmd functions with "-v" and the provided address */
2136 static void
2137 whatis_call_printer(mdb_dcmd_f *dcmd, uintptr_t addr)
2139 mdb_arg_t a;
2140 a.a_type = MDB_TYPE_STRING;
2141 a.a_un.a_str = "-v";
2143 mdb_printf(":\n");
2144 (void) (*dcmd)(addr, DCMD_ADDRSPEC, 1, &a);
2147 static void
2148 whatis_print_kmf_lite(uintptr_t btaddr, size_t count)
2150 #define KMEM_LITE_MAX 16
2151 pc_t callers[KMEM_LITE_MAX];
2152 pc_t uninit = (pc_t)KMEM_UNINITIALIZED_PATTERN;
2154 kmem_buftag_t bt;
2155 intptr_t stat;
2156 const char *plural = "";
2157 int i;
2159 /* validate our arguments and read in the buftag */
2160 if (count == 0 || count > KMEM_LITE_MAX ||
2161 mdb_vread(&bt, sizeof (bt), btaddr) == -1)
2162 return;
2164 /* validate the buffer state and read in the callers */
2165 stat = (intptr_t)bt.bt_bufctl ^ bt.bt_bxstat;
2167 if (stat != KMEM_BUFTAG_ALLOC && stat != KMEM_BUFTAG_FREE)
2168 return;
2170 if (mdb_vread(callers, count * sizeof (pc_t),
2171 btaddr + offsetof(kmem_buftag_lite_t, bt_history)) == -1)
2172 return;
2174 /* If there aren't any filled in callers, bail */
2175 if (callers[0] == uninit)
2176 return;
2178 plural = (callers[1] == uninit) ? "" : "s";
2180 /* Everything's done and checked; print them out */
2181 mdb_printf(":\n");
2183 mdb_inc_indent(8);
2184 mdb_printf("recent caller%s: %a", plural, callers[0]);
2185 for (i = 1; i < count; i++) {
2186 if (callers[i] == uninit)
2187 break;
2188 mdb_printf(", %a", callers[i]);
2190 mdb_dec_indent(8);
2193 static void
2194 whatis_print_kmem(whatis_info_t *wi, uintptr_t maddr, uintptr_t addr,
2195 uintptr_t baddr)
2197 mdb_whatis_t *w = wi->wi_w;
2199 const kmem_cache_t *cp = wi->wi_cache;
2200 /* LINTED pointer cast may result in improper alignment */
2201 uintptr_t btaddr = (uintptr_t)KMEM_BUFTAG(cp, addr);
2202 int quiet = (mdb_whatis_flags(w) & WHATIS_QUIET);
2203 int call_printer = (!quiet && (cp->cache_flags & KMF_AUDIT));
2205 mdb_whatis_report_object(w, maddr, addr, "");
2207 if (baddr != 0 && !call_printer)
2208 mdb_printf("bufctl %p ", baddr);
2210 mdb_printf("%s from %s",
2211 (wi->wi_freemem == FALSE) ? "allocated" : "freed", cp->cache_name);
2213 if (baddr != 0 && call_printer) {
2214 whatis_call_printer(bufctl, baddr);
2215 return;
2218 /* for KMF_LITE caches, try to print out the previous callers */
2219 if (!quiet && (cp->cache_flags & KMF_LITE))
2220 whatis_print_kmf_lite(btaddr, wi->wi_kmem_lite_count);
2222 mdb_printf("\n");
2225 /*ARGSUSED*/
2226 static int
2227 whatis_walk_kmem(uintptr_t addr, void *ignored, whatis_info_t *wi)
2229 mdb_whatis_t *w = wi->wi_w;
2231 uintptr_t cur;
2232 size_t size = wi->wi_cache->cache_bufsize;
2234 while (mdb_whatis_match(w, addr, size, &cur))
2235 whatis_print_kmem(wi, cur, addr, (uintptr_t)NULL);
2237 return (WHATIS_WALKRET(w));
2240 /*ARGSUSED*/
2241 static int
2242 whatis_walk_bufctl(uintptr_t baddr, const kmem_bufctl_t *bcp, whatis_info_t *wi)
2244 mdb_whatis_t *w = wi->wi_w;
2246 uintptr_t cur;
2247 uintptr_t addr = (uintptr_t)bcp->bc_addr;
2248 size_t size = wi->wi_cache->cache_bufsize;
2250 while (mdb_whatis_match(w, addr, size, &cur))
2251 whatis_print_kmem(wi, cur, addr, baddr);
2253 return (WHATIS_WALKRET(w));
2256 static int
2257 whatis_walk_seg(uintptr_t addr, const vmem_seg_t *vs, whatis_info_t *wi)
2259 mdb_whatis_t *w = wi->wi_w;
2261 size_t size = vs->vs_end - vs->vs_start;
2262 uintptr_t cur;
2264 /* We're not interested in anything but alloc and free segments */
2265 if (vs->vs_type != VMEM_ALLOC && vs->vs_type != VMEM_FREE)
2266 return (WALK_NEXT);
2268 while (mdb_whatis_match(w, vs->vs_start, size, &cur)) {
2269 mdb_whatis_report_object(w, cur, vs->vs_start, "");
2272 * If we're not printing it seperately, provide the vmem_seg
2273 * pointer if it has a stack trace.
2275 if ((mdb_whatis_flags(w) & WHATIS_QUIET) &&
2276 (!(mdb_whatis_flags(w) & WHATIS_BUFCTL) ||
2277 (vs->vs_type == VMEM_ALLOC && vs->vs_depth != 0))) {
2278 mdb_printf("vmem_seg %p ", addr);
2281 mdb_printf("%s from the %s vmem arena",
2282 (vs->vs_type == VMEM_ALLOC) ? "allocated" : "freed",
2283 wi->wi_vmem->vm_name);
2285 if (!(mdb_whatis_flags(w) & WHATIS_QUIET))
2286 whatis_call_printer(vmem_seg, addr);
2287 else
2288 mdb_printf("\n");
2291 return (WHATIS_WALKRET(w));
2294 static int
2295 whatis_walk_vmem(uintptr_t addr, const vmem_t *vmem, whatis_info_t *wi)
2297 mdb_whatis_t *w = wi->wi_w;
2298 const char *nm = vmem->vm_name;
2300 int identifier = ((vmem->vm_cflags & VMC_IDENTIFIER) != 0);
2301 int idspace = ((mdb_whatis_flags(w) & WHATIS_IDSPACE) != 0);
2303 if (identifier != idspace)
2304 return (WALK_NEXT);
2306 wi->wi_vmem = vmem;
2308 if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2309 mdb_printf("Searching vmem arena %s...\n", nm);
2311 if (mdb_pwalk("vmem_seg",
2312 (mdb_walk_cb_t)whatis_walk_seg, wi, addr) == -1) {
2313 mdb_warn("can't walk vmem_seg for %p", addr);
2314 return (WALK_NEXT);
2317 return (WHATIS_WALKRET(w));
2320 /*ARGSUSED*/
2321 static int
2322 whatis_walk_slab(uintptr_t saddr, const kmem_slab_t *sp, whatis_info_t *wi)
2324 mdb_whatis_t *w = wi->wi_w;
2326 /* It must overlap with the slab data, or it's not interesting */
2327 if (mdb_whatis_overlaps(w,
2328 (uintptr_t)sp->slab_base, wi->wi_slab_size)) {
2329 wi->wi_slab_found++;
2330 return (WALK_DONE);
2332 return (WALK_NEXT);
2335 static int
2336 whatis_walk_cache(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi)
2338 mdb_whatis_t *w = wi->wi_w;
2340 char *walk, *freewalk;
2341 mdb_walk_cb_t func;
2342 int do_bufctl;
2344 int identifier = ((c->cache_flags & KMC_IDENTIFIER) != 0);
2345 int idspace = ((mdb_whatis_flags(w) & WHATIS_IDSPACE) != 0);
2347 if (identifier != idspace)
2348 return (WALK_NEXT);
2350 /* Override the '-b' flag as necessary */
2351 if (!(c->cache_flags & KMF_HASH))
2352 do_bufctl = FALSE; /* no bufctls to walk */
2353 else if (c->cache_flags & KMF_AUDIT)
2354 do_bufctl = TRUE; /* we always want debugging info */
2355 else
2356 do_bufctl = ((mdb_whatis_flags(w) & WHATIS_BUFCTL) != 0);
2358 if (do_bufctl) {
2359 walk = "bufctl";
2360 freewalk = "freectl";
2361 func = (mdb_walk_cb_t)whatis_walk_bufctl;
2362 } else {
2363 walk = "kmem";
2364 freewalk = "freemem";
2365 func = (mdb_walk_cb_t)whatis_walk_kmem;
2368 wi->wi_cache = c;
2370 if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2371 mdb_printf("Searching %s...\n", c->cache_name);
2374 * If more then two buffers live on each slab, figure out if we're
2375 * interested in anything in any slab before doing the more expensive
2376 * kmem/freemem (bufctl/freectl) walkers.
2378 wi->wi_slab_size = c->cache_slabsize - c->cache_maxcolor;
2379 if (!(c->cache_flags & KMF_HASH))
2380 wi->wi_slab_size -= sizeof (kmem_slab_t);
2382 if ((wi->wi_slab_size / c->cache_chunksize) > 2) {
2383 wi->wi_slab_found = 0;
2384 if (mdb_pwalk("kmem_slab", (mdb_walk_cb_t)whatis_walk_slab, wi,
2385 addr) == -1) {
2386 mdb_warn("can't find kmem_slab walker");
2387 return (WALK_DONE);
2389 if (wi->wi_slab_found == 0)
2390 return (WALK_NEXT);
2393 wi->wi_freemem = FALSE;
2394 if (mdb_pwalk(walk, func, wi, addr) == -1) {
2395 mdb_warn("can't find %s walker", walk);
2396 return (WALK_DONE);
2399 if (mdb_whatis_done(w))
2400 return (WALK_DONE);
2403 * We have searched for allocated memory; now search for freed memory.
2405 if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2406 mdb_printf("Searching %s for free memory...\n", c->cache_name);
2408 wi->wi_freemem = TRUE;
2409 if (mdb_pwalk(freewalk, func, wi, addr) == -1) {
2410 mdb_warn("can't find %s walker", freewalk);
2411 return (WALK_DONE);
2414 return (WHATIS_WALKRET(w));
2417 static int
2418 whatis_walk_touch(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi)
2420 if (c->cache_arena == wi->wi_msb_arena ||
2421 (c->cache_cflags & KMC_NOTOUCH))
2422 return (WALK_NEXT);
2424 return (whatis_walk_cache(addr, c, wi));
2427 static int
2428 whatis_walk_metadata(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi)
2430 if (c->cache_arena != wi->wi_msb_arena)
2431 return (WALK_NEXT);
2433 return (whatis_walk_cache(addr, c, wi));
2436 static int
2437 whatis_walk_notouch(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi)
2439 if (c->cache_arena == wi->wi_msb_arena ||
2440 !(c->cache_cflags & KMC_NOTOUCH))
2441 return (WALK_NEXT);
2443 return (whatis_walk_cache(addr, c, wi));
2446 static int
2447 whatis_walk_thread(uintptr_t addr, const kthread_t *t, mdb_whatis_t *w)
2449 uintptr_t cur;
2450 uintptr_t saddr;
2451 size_t size;
2454 * Often, one calls ::whatis on an address from a thread structure.
2455 * We use this opportunity to short circuit this case...
2457 while (mdb_whatis_match(w, addr, sizeof (kthread_t), &cur))
2458 mdb_whatis_report_object(w, cur, addr,
2459 "allocated as a thread structure\n");
2462 * Now check the stack
2464 if (t->t_stkbase == NULL)
2465 return (WALK_NEXT);
2468 * This assumes that t_stk is the end of the stack, but it's really
2469 * only the initial stack pointer for the thread. Arguments to the
2470 * initial procedure, SA(MINFRAME), etc. are all after t_stk. So
2471 * that 't->t_stk::whatis' reports "part of t's stack", we include
2472 * t_stk in the range (the "+ 1", below), but the kernel should
2473 * really include the full stack bounds where we can find it.
2475 saddr = (uintptr_t)t->t_stkbase;
2476 size = (uintptr_t)t->t_stk - saddr + 1;
2477 while (mdb_whatis_match(w, saddr, size, &cur))
2478 mdb_whatis_report_object(w, cur, cur,
2479 "in thread %p's stack%s\n", addr, stack_active(t, cur));
2481 return (WHATIS_WALKRET(w));
2484 static void
2485 whatis_modctl_match(mdb_whatis_t *w, const char *name,
2486 uintptr_t base, size_t size, const char *where)
2488 uintptr_t cur;
2491 * Since we're searching for addresses inside a module, we report
2492 * them as symbols.
2494 while (mdb_whatis_match(w, base, size, &cur))
2495 mdb_whatis_report_address(w, cur, "in %s's %s\n", name, where);
2498 static int
2499 whatis_walk_modctl(uintptr_t addr, const struct modctl *m, mdb_whatis_t *w)
2501 char name[MODMAXNAMELEN];
2502 struct module mod;
2503 Shdr shdr;
2505 if (m->mod_mp == NULL)
2506 return (WALK_NEXT);
2508 if (mdb_vread(&mod, sizeof (mod), (uintptr_t)m->mod_mp) == -1) {
2509 mdb_warn("couldn't read modctl %p's module", addr);
2510 return (WALK_NEXT);
2513 if (mdb_readstr(name, sizeof (name), (uintptr_t)m->mod_modname) == -1)
2514 (void) mdb_snprintf(name, sizeof (name), "0x%p", addr);
2516 whatis_modctl_match(w, name,
2517 (uintptr_t)mod.text, mod.text_size, "text segment");
2518 whatis_modctl_match(w, name,
2519 (uintptr_t)mod.data, mod.data_size, "data segment");
2520 whatis_modctl_match(w, name,
2521 (uintptr_t)mod.bss, mod.bss_size, "bss segment");
2523 if (mdb_vread(&shdr, sizeof (shdr), (uintptr_t)mod.symhdr) == -1) {
2524 mdb_warn("couldn't read symbol header for %p's module", addr);
2525 return (WALK_NEXT);
2528 whatis_modctl_match(w, name,
2529 (uintptr_t)mod.symtbl, mod.nsyms * shdr.sh_entsize, "symtab");
2530 whatis_modctl_match(w, name,
2531 (uintptr_t)mod.symspace, mod.symsize, "symtab");
2533 return (WHATIS_WALKRET(w));
2536 /*ARGSUSED*/
2537 static int
2538 whatis_walk_memseg(uintptr_t addr, const struct memseg *seg, mdb_whatis_t *w)
2540 uintptr_t cur;
2542 uintptr_t base = (uintptr_t)seg->pages;
2543 size_t size = (uintptr_t)seg->epages - base;
2545 while (mdb_whatis_match(w, base, size, &cur)) {
2546 /* round our found pointer down to the page_t base. */
2547 size_t offset = (cur - base) % sizeof (page_t);
2549 mdb_whatis_report_object(w, cur, cur - offset,
2550 "allocated as a page structure\n");
2553 return (WHATIS_WALKRET(w));
2556 /*ARGSUSED*/
2557 static int
2558 whatis_run_modules(mdb_whatis_t *w, void *arg)
2560 if (mdb_walk("modctl", (mdb_walk_cb_t)whatis_walk_modctl, w) == -1) {
2561 mdb_warn("couldn't find modctl walker");
2562 return (1);
2564 return (0);
2567 /*ARGSUSED*/
2568 static int
2569 whatis_run_threads(mdb_whatis_t *w, void *ignored)
2572 * Now search all thread stacks. Yes, this is a little weak; we
2573 * can save a lot of work by first checking to see if the
2574 * address is in segkp vs. segkmem. But hey, computers are
2575 * fast.
2577 if (mdb_walk("thread", (mdb_walk_cb_t)whatis_walk_thread, w) == -1) {
2578 mdb_warn("couldn't find thread walker");
2579 return (1);
2581 return (0);
2584 /*ARGSUSED*/
2585 static int
2586 whatis_run_pages(mdb_whatis_t *w, void *ignored)
2588 if (mdb_walk("memseg", (mdb_walk_cb_t)whatis_walk_memseg, w) == -1) {
2589 mdb_warn("couldn't find memseg walker");
2590 return (1);
2592 return (0);
2595 /*ARGSUSED*/
2596 static int
2597 whatis_run_kmem(mdb_whatis_t *w, void *ignored)
2599 whatis_info_t wi;
2601 bzero(&wi, sizeof (wi));
2602 wi.wi_w = w;
2604 if (mdb_readvar(&wi.wi_msb_arena, "kmem_msb_arena") == -1)
2605 mdb_warn("unable to readvar \"kmem_msb_arena\"");
2607 if (mdb_readvar(&wi.wi_kmem_lite_count,
2608 "kmem_lite_count") == -1 || wi.wi_kmem_lite_count > 16)
2609 wi.wi_kmem_lite_count = 0;
2612 * We process kmem caches in the following order:
2614 * non-KMC_NOTOUCH, non-metadata (typically the most interesting)
2615 * metadata (can be huge with KMF_AUDIT)
2616 * KMC_NOTOUCH, non-metadata (see kmem_walk_all())
2618 if (mdb_walk("kmem_cache", (mdb_walk_cb_t)whatis_walk_touch,
2619 &wi) == -1 ||
2620 mdb_walk("kmem_cache", (mdb_walk_cb_t)whatis_walk_metadata,
2621 &wi) == -1 ||
2622 mdb_walk("kmem_cache", (mdb_walk_cb_t)whatis_walk_notouch,
2623 &wi) == -1) {
2624 mdb_warn("couldn't find kmem_cache walker");
2625 return (1);
2627 return (0);
2630 /*ARGSUSED*/
2631 static int
2632 whatis_run_vmem(mdb_whatis_t *w, void *ignored)
2634 whatis_info_t wi;
2636 bzero(&wi, sizeof (wi));
2637 wi.wi_w = w;
2639 if (mdb_walk("vmem_postfix",
2640 (mdb_walk_cb_t)whatis_walk_vmem, &wi) == -1) {
2641 mdb_warn("couldn't find vmem_postfix walker");
2642 return (1);
2644 return (0);
2647 typedef struct kmem_log_cpu {
2648 uintptr_t kmc_low;
2649 uintptr_t kmc_high;
2650 } kmem_log_cpu_t;
2652 typedef struct kmem_log_data {
2653 uintptr_t kmd_addr;
2654 kmem_log_cpu_t *kmd_cpu;
2655 } kmem_log_data_t;
2658 kmem_log_walk(uintptr_t addr, const kmem_bufctl_audit_t *b,
2659 kmem_log_data_t *kmd)
2661 int i;
2662 kmem_log_cpu_t *kmc = kmd->kmd_cpu;
2663 size_t bufsize;
2665 for (i = 0; i < NCPU; i++) {
2666 if (addr >= kmc[i].kmc_low && addr < kmc[i].kmc_high)
2667 break;
2670 if (kmd->kmd_addr) {
2671 if (b->bc_cache == NULL)
2672 return (WALK_NEXT);
2674 if (mdb_vread(&bufsize, sizeof (bufsize),
2675 (uintptr_t)&b->bc_cache->cache_bufsize) == -1) {
2676 mdb_warn(
2677 "failed to read cache_bufsize for cache at %p",
2678 b->bc_cache);
2679 return (WALK_ERR);
2682 if (kmd->kmd_addr < (uintptr_t)b->bc_addr ||
2683 kmd->kmd_addr >= (uintptr_t)b->bc_addr + bufsize)
2684 return (WALK_NEXT);
2687 if (i == NCPU)
2688 mdb_printf(" ");
2689 else
2690 mdb_printf("%3d", i);
2692 mdb_printf(" %0?p %0?p %16llx %0?p\n", addr, b->bc_addr,
2693 b->bc_timestamp, b->bc_thread);
2695 return (WALK_NEXT);
2698 /*ARGSUSED*/
2700 kmem_log(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2702 kmem_log_header_t lh;
2703 kmem_cpu_log_header_t clh;
2704 uintptr_t lhp, clhp;
2705 int ncpus;
2706 uintptr_t *cpu;
2707 GElf_Sym sym;
2708 kmem_log_cpu_t *kmc;
2709 int i;
2710 kmem_log_data_t kmd;
2711 uint_t opt_b = FALSE;
2713 if (mdb_getopts(argc, argv,
2714 'b', MDB_OPT_SETBITS, TRUE, &opt_b, NULL) != argc)
2715 return (DCMD_USAGE);
2717 if (mdb_readvar(&lhp, "kmem_transaction_log") == -1) {
2718 mdb_warn("failed to read 'kmem_transaction_log'");
2719 return (DCMD_ERR);
2722 if (lhp == (uintptr_t)NULL) {
2723 mdb_warn("no kmem transaction log\n");
2724 return (DCMD_ERR);
2727 mdb_readvar(&ncpus, "ncpus");
2729 if (mdb_vread(&lh, sizeof (kmem_log_header_t), lhp) == -1) {
2730 mdb_warn("failed to read log header at %p", lhp);
2731 return (DCMD_ERR);
2734 clhp = lhp + ((uintptr_t)&lh.lh_cpu[0] - (uintptr_t)&lh);
2736 cpu = mdb_alloc(sizeof (uintptr_t) * NCPU, UM_SLEEP | UM_GC);
2738 if (mdb_lookup_by_name("cpu", &sym) == -1) {
2739 mdb_warn("couldn't find 'cpu' array");
2740 return (DCMD_ERR);
2743 if (sym.st_size != NCPU * sizeof (uintptr_t)) {
2744 mdb_warn("expected 'cpu' to be of size %d; found %d\n",
2745 NCPU * sizeof (uintptr_t), sym.st_size);
2746 return (DCMD_ERR);
2749 if (mdb_vread(cpu, sym.st_size, (uintptr_t)sym.st_value) == -1) {
2750 mdb_warn("failed to read cpu array at %p", sym.st_value);
2751 return (DCMD_ERR);
2754 kmc = mdb_zalloc(sizeof (kmem_log_cpu_t) * NCPU, UM_SLEEP | UM_GC);
2755 kmd.kmd_addr = (uintptr_t)NULL;
2756 kmd.kmd_cpu = kmc;
2758 for (i = 0; i < NCPU; i++) {
2760 if (cpu[i] == (uintptr_t)NULL)
2761 continue;
2763 if (mdb_vread(&clh, sizeof (clh), clhp) == -1) {
2764 mdb_warn("cannot read cpu %d's log header at %p",
2765 i, clhp);
2766 return (DCMD_ERR);
2769 kmc[i].kmc_low = clh.clh_chunk * lh.lh_chunksize +
2770 (uintptr_t)lh.lh_base;
2771 kmc[i].kmc_high = (uintptr_t)clh.clh_current;
2773 clhp += sizeof (kmem_cpu_log_header_t);
2776 mdb_printf("%3s %-?s %-?s %16s %-?s\n", "CPU", "ADDR", "BUFADDR",
2777 "TIMESTAMP", "THREAD");
2780 * If we have been passed an address, print out only log entries
2781 * corresponding to that address. If opt_b is specified, then interpret
2782 * the address as a bufctl.
2784 if (flags & DCMD_ADDRSPEC) {
2785 kmem_bufctl_audit_t b;
2787 if (opt_b) {
2788 kmd.kmd_addr = addr;
2789 } else {
2790 if (mdb_vread(&b,
2791 sizeof (kmem_bufctl_audit_t), addr) == -1) {
2792 mdb_warn("failed to read bufctl at %p", addr);
2793 return (DCMD_ERR);
2796 (void) kmem_log_walk(addr, &b, &kmd);
2798 return (DCMD_OK);
2802 if (mdb_walk("kmem_log", (mdb_walk_cb_t)kmem_log_walk, &kmd) == -1) {
2803 mdb_warn("can't find kmem log walker");
2804 return (DCMD_ERR);
2807 return (DCMD_OK);
2810 typedef struct bufctl_history_cb {
2811 int bhc_flags;
2812 int bhc_argc;
2813 const mdb_arg_t *bhc_argv;
2814 int bhc_ret;
2815 } bufctl_history_cb_t;
2817 /*ARGSUSED*/
2818 static int
2819 bufctl_history_callback(uintptr_t addr, const void *ign, void *arg)
2821 bufctl_history_cb_t *bhc = arg;
2823 bhc->bhc_ret =
2824 bufctl(addr, bhc->bhc_flags, bhc->bhc_argc, bhc->bhc_argv);
2826 bhc->bhc_flags &= ~DCMD_LOOPFIRST;
2828 return ((bhc->bhc_ret == DCMD_OK)? WALK_NEXT : WALK_DONE);
2831 void
2832 bufctl_help(void)
2834 mdb_printf("%s",
2835 "Display the contents of kmem_bufctl_audit_ts, with optional filtering.\n\n");
2836 mdb_dec_indent(2);
2837 mdb_printf("%<b>OPTIONS%</b>\n");
2838 mdb_inc_indent(2);
2839 mdb_printf("%s",
2840 " -v Display the full content of the bufctl, including its stack trace\n"
2841 " -h retrieve the bufctl's transaction history, if available\n"
2842 " -a addr\n"
2843 " filter out bufctls not involving the buffer at addr\n"
2844 " -c caller\n"
2845 " filter out bufctls without the function/PC in their stack trace\n"
2846 " -e earliest\n"
2847 " filter out bufctls timestamped before earliest\n"
2848 " -l latest\n"
2849 " filter out bufctls timestamped after latest\n"
2850 " -t thread\n"
2851 " filter out bufctls not involving thread\n");
2855 bufctl(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2857 kmem_bufctl_audit_t bc;
2858 uint_t verbose = FALSE;
2859 uint_t history = FALSE;
2860 uint_t in_history = FALSE;
2861 uintptr_t caller = (uintptr_t)NULL, thread = (uintptr_t)NULL;
2862 uintptr_t laddr, haddr, baddr = (uintptr_t)NULL;
2863 hrtime_t earliest = 0, latest = 0;
2864 int i, depth;
2865 char c[MDB_SYM_NAMLEN];
2866 GElf_Sym sym;
2868 if (mdb_getopts(argc, argv,
2869 'v', MDB_OPT_SETBITS, TRUE, &verbose,
2870 'h', MDB_OPT_SETBITS, TRUE, &history,
2871 'H', MDB_OPT_SETBITS, TRUE, &in_history, /* internal */
2872 'c', MDB_OPT_UINTPTR, &caller,
2873 't', MDB_OPT_UINTPTR, &thread,
2874 'e', MDB_OPT_UINT64, &earliest,
2875 'l', MDB_OPT_UINT64, &latest,
2876 'a', MDB_OPT_UINTPTR, &baddr, NULL) != argc)
2877 return (DCMD_USAGE);
2879 if (!(flags & DCMD_ADDRSPEC))
2880 return (DCMD_USAGE);
2882 if (in_history && !history)
2883 return (DCMD_USAGE);
2885 if (history && !in_history) {
2886 mdb_arg_t *nargv = mdb_zalloc(sizeof (*nargv) * (argc + 1),
2887 UM_SLEEP | UM_GC);
2888 bufctl_history_cb_t bhc;
2890 nargv[0].a_type = MDB_TYPE_STRING;
2891 nargv[0].a_un.a_str = "-H"; /* prevent recursion */
2893 for (i = 0; i < argc; i++)
2894 nargv[i + 1] = argv[i];
2897 * When in history mode, we treat each element as if it
2898 * were in a seperate loop, so that the headers group
2899 * bufctls with similar histories.
2901 bhc.bhc_flags = flags | DCMD_LOOP | DCMD_LOOPFIRST;
2902 bhc.bhc_argc = argc + 1;
2903 bhc.bhc_argv = nargv;
2904 bhc.bhc_ret = DCMD_OK;
2906 if (mdb_pwalk("bufctl_history", bufctl_history_callback, &bhc,
2907 addr) == -1) {
2908 mdb_warn("unable to walk bufctl_history");
2909 return (DCMD_ERR);
2912 if (bhc.bhc_ret == DCMD_OK && !(flags & DCMD_PIPE_OUT))
2913 mdb_printf("\n");
2915 return (bhc.bhc_ret);
2918 if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
2919 if (verbose) {
2920 mdb_printf("%16s %16s %16s %16s\n"
2921 "%<u>%16s %16s %16s %16s%</u>\n",
2922 "ADDR", "BUFADDR", "TIMESTAMP", "THREAD",
2923 "", "CACHE", "LASTLOG", "CONTENTS");
2924 } else {
2925 mdb_printf("%<u>%-?s %-?s %-12s %-?s %s%</u>\n",
2926 "ADDR", "BUFADDR", "TIMESTAMP", "THREAD", "CALLER");
2930 if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
2931 mdb_warn("couldn't read bufctl at %p", addr);
2932 return (DCMD_ERR);
2936 * Guard against bogus bc_depth in case the bufctl is corrupt or
2937 * the address does not really refer to a bufctl.
2939 depth = MIN(bc.bc_depth, KMEM_STACK_DEPTH);
2941 if (caller != (uintptr_t)NULL) {
2942 laddr = caller;
2943 haddr = caller + sizeof (caller);
2945 if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, sizeof (c),
2946 &sym) != -1 && caller == (uintptr_t)sym.st_value) {
2948 * We were provided an exact symbol value; any
2949 * address in the function is valid.
2951 laddr = (uintptr_t)sym.st_value;
2952 haddr = (uintptr_t)sym.st_value + sym.st_size;
2955 for (i = 0; i < depth; i++)
2956 if (bc.bc_stack[i] >= laddr && bc.bc_stack[i] < haddr)
2957 break;
2959 if (i == depth)
2960 return (DCMD_OK);
2963 if (thread != 0 && (uintptr_t)bc.bc_thread != thread)
2964 return (DCMD_OK);
2966 if (earliest != 0 && bc.bc_timestamp < earliest)
2967 return (DCMD_OK);
2969 if (latest != 0 && bc.bc_timestamp > latest)
2970 return (DCMD_OK);
2972 if (baddr != 0 && (uintptr_t)bc.bc_addr != baddr)
2973 return (DCMD_OK);
2975 if (flags & DCMD_PIPE_OUT) {
2976 mdb_printf("%#lr\n", addr);
2977 return (DCMD_OK);
2980 if (verbose) {
2981 mdb_printf(
2982 "%<b>%16p%</b> %16p %16llx %16p\n"
2983 "%16s %16p %16p %16p\n",
2984 addr, bc.bc_addr, bc.bc_timestamp, bc.bc_thread,
2985 "", bc.bc_cache, bc.bc_lastlog, bc.bc_contents);
2987 mdb_inc_indent(17);
2988 for (i = 0; i < depth; i++)
2989 mdb_printf("%a\n", bc.bc_stack[i]);
2990 mdb_dec_indent(17);
2991 mdb_printf("\n");
2992 } else {
2993 mdb_printf("%0?p %0?p %12llx %0?p", addr, bc.bc_addr,
2994 bc.bc_timestamp, bc.bc_thread);
2996 for (i = 0; i < depth; i++) {
2997 if (mdb_lookup_by_addr(bc.bc_stack[i],
2998 MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
2999 continue;
3000 if (strncmp(c, "kmem_", 5) == 0)
3001 continue;
3002 mdb_printf(" %a\n", bc.bc_stack[i]);
3003 break;
3006 if (i >= depth)
3007 mdb_printf("\n");
3010 return (DCMD_OK);
3013 typedef struct kmem_verify {
3014 uint64_t *kmv_buf; /* buffer to read cache contents into */
3015 size_t kmv_size; /* number of bytes in kmv_buf */
3016 int kmv_corruption; /* > 0 if corruption found. */
3017 int kmv_besilent; /* report actual corruption sites */
3018 struct kmem_cache kmv_cache; /* the cache we're operating on */
3019 } kmem_verify_t;
3022 * verify_pattern()
3023 * verify that buf is filled with the pattern pat.
3025 static int64_t
3026 verify_pattern(uint64_t *buf_arg, size_t size, uint64_t pat)
3028 /*LINTED*/
3029 uint64_t *bufend = (uint64_t *)((char *)buf_arg + size);
3030 uint64_t *buf;
3032 for (buf = buf_arg; buf < bufend; buf++)
3033 if (*buf != pat)
3034 return ((uintptr_t)buf - (uintptr_t)buf_arg);
3035 return (-1);
3039 * verify_buftag()
3040 * verify that btp->bt_bxstat == (bcp ^ pat)
3042 static int
3043 verify_buftag(kmem_buftag_t *btp, uintptr_t pat)
3045 return (btp->bt_bxstat == ((intptr_t)btp->bt_bufctl ^ pat) ? 0 : -1);
3049 * verify_free()
3050 * verify the integrity of a free block of memory by checking
3051 * that it is filled with 0xdeadbeef and that its buftag is sane.
3053 /*ARGSUSED1*/
3054 static int
3055 verify_free(uintptr_t addr, const void *data, void *private)
3057 kmem_verify_t *kmv = (kmem_verify_t *)private;
3058 uint64_t *buf = kmv->kmv_buf; /* buf to validate */
3059 int64_t corrupt; /* corruption offset */
3060 kmem_buftag_t *buftagp; /* ptr to buftag */
3061 kmem_cache_t *cp = &kmv->kmv_cache;
3062 int besilent = kmv->kmv_besilent;
3064 /*LINTED*/
3065 buftagp = KMEM_BUFTAG(cp, buf);
3068 * Read the buffer to check.
3070 if (mdb_vread(buf, kmv->kmv_size, addr) == -1) {
3071 if (!besilent)
3072 mdb_warn("couldn't read %p", addr);
3073 return (WALK_NEXT);
3076 if ((corrupt = verify_pattern(buf, cp->cache_verify,
3077 KMEM_FREE_PATTERN)) >= 0) {
3078 if (!besilent)
3079 mdb_printf("buffer %p (free) seems corrupted, at %p\n",
3080 addr, (uintptr_t)addr + corrupt);
3081 goto corrupt;
3084 * When KMF_LITE is set, buftagp->bt_redzone is used to hold
3085 * the first bytes of the buffer, hence we cannot check for red
3086 * zone corruption.
3088 if ((cp->cache_flags & (KMF_HASH | KMF_LITE)) == KMF_HASH &&
3089 buftagp->bt_redzone != KMEM_REDZONE_PATTERN) {
3090 if (!besilent)
3091 mdb_printf("buffer %p (free) seems to "
3092 "have a corrupt redzone pattern\n", addr);
3093 goto corrupt;
3097 * confirm bufctl pointer integrity.
3099 if (verify_buftag(buftagp, KMEM_BUFTAG_FREE) == -1) {
3100 if (!besilent)
3101 mdb_printf("buffer %p (free) has a corrupt "
3102 "buftag\n", addr);
3103 goto corrupt;
3106 return (WALK_NEXT);
3107 corrupt:
3108 kmv->kmv_corruption++;
3109 return (WALK_NEXT);
3113 * verify_alloc()
3114 * Verify that the buftag of an allocated buffer makes sense with respect
3115 * to the buffer.
3117 /*ARGSUSED1*/
3118 static int
3119 verify_alloc(uintptr_t addr, const void *data, void *private)
3121 kmem_verify_t *kmv = (kmem_verify_t *)private;
3122 kmem_cache_t *cp = &kmv->kmv_cache;
3123 uint64_t *buf = kmv->kmv_buf; /* buf to validate */
3124 /*LINTED*/
3125 kmem_buftag_t *buftagp = KMEM_BUFTAG(cp, buf);
3126 uint32_t *ip = (uint32_t *)buftagp;
3127 uint8_t *bp = (uint8_t *)buf;
3128 int looks_ok = 0, size_ok = 1; /* flags for finding corruption */
3129 int besilent = kmv->kmv_besilent;
3132 * Read the buffer to check.
3134 if (mdb_vread(buf, kmv->kmv_size, addr) == -1) {
3135 if (!besilent)
3136 mdb_warn("couldn't read %p", addr);
3137 return (WALK_NEXT);
3141 * There are two cases to handle:
3142 * 1. If the buf was alloc'd using kmem_cache_alloc, it will have
3143 * 0xfeedfacefeedface at the end of it
3144 * 2. If the buf was alloc'd using kmem_alloc, it will have
3145 * 0xbb just past the end of the region in use. At the buftag,
3146 * it will have 0xfeedface (or, if the whole buffer is in use,
3147 * 0xfeedface & bb000000 or 0xfeedfacf & 000000bb depending on
3148 * endianness), followed by 32 bits containing the offset of the
3149 * 0xbb byte in the buffer.
3151 * Finally, the two 32-bit words that comprise the second half of the
3152 * buftag should xor to KMEM_BUFTAG_ALLOC
3155 if (buftagp->bt_redzone == KMEM_REDZONE_PATTERN)
3156 looks_ok = 1;
3157 else if (!KMEM_SIZE_VALID(ip[1]))
3158 size_ok = 0;
3159 else if (bp[KMEM_SIZE_DECODE(ip[1])] == KMEM_REDZONE_BYTE)
3160 looks_ok = 1;
3161 else
3162 size_ok = 0;
3164 if (!size_ok) {
3165 if (!besilent)
3166 mdb_printf("buffer %p (allocated) has a corrupt "
3167 "redzone size encoding\n", addr);
3168 goto corrupt;
3171 if (!looks_ok) {
3172 if (!besilent)
3173 mdb_printf("buffer %p (allocated) has a corrupt "
3174 "redzone signature\n", addr);
3175 goto corrupt;
3178 if (verify_buftag(buftagp, KMEM_BUFTAG_ALLOC) == -1) {
3179 if (!besilent)
3180 mdb_printf("buffer %p (allocated) has a "
3181 "corrupt buftag\n", addr);
3182 goto corrupt;
3185 return (WALK_NEXT);
3186 corrupt:
3187 kmv->kmv_corruption++;
3188 return (WALK_NEXT);
3191 /*ARGSUSED2*/
3193 kmem_verify(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3195 if (flags & DCMD_ADDRSPEC) {
3196 int check_alloc = 0, check_free = 0;
3197 kmem_verify_t kmv;
3199 if (mdb_vread(&kmv.kmv_cache, sizeof (kmv.kmv_cache),
3200 addr) == -1) {
3201 mdb_warn("couldn't read kmem_cache %p", addr);
3202 return (DCMD_ERR);
3205 kmv.kmv_size = kmv.kmv_cache.cache_buftag +
3206 sizeof (kmem_buftag_t);
3207 kmv.kmv_buf = mdb_alloc(kmv.kmv_size, UM_SLEEP | UM_GC);
3208 kmv.kmv_corruption = 0;
3210 if ((kmv.kmv_cache.cache_flags & KMF_REDZONE)) {
3211 check_alloc = 1;
3212 if (kmv.kmv_cache.cache_flags & KMF_DEADBEEF)
3213 check_free = 1;
3214 } else {
3215 if (!(flags & DCMD_LOOP)) {
3216 mdb_warn("cache %p (%s) does not have "
3217 "redzone checking enabled\n", addr,
3218 kmv.kmv_cache.cache_name);
3220 return (DCMD_ERR);
3223 if (flags & DCMD_LOOP) {
3225 * table mode, don't print out every corrupt buffer
3227 kmv.kmv_besilent = 1;
3228 } else {
3229 mdb_printf("Summary for cache '%s'\n",
3230 kmv.kmv_cache.cache_name);
3231 mdb_inc_indent(2);
3232 kmv.kmv_besilent = 0;
3235 if (check_alloc)
3236 (void) mdb_pwalk("kmem", verify_alloc, &kmv, addr);
3237 if (check_free)
3238 (void) mdb_pwalk("freemem", verify_free, &kmv, addr);
3240 if (flags & DCMD_LOOP) {
3241 if (kmv.kmv_corruption == 0) {
3242 mdb_printf("%-*s %?p clean\n",
3243 KMEM_CACHE_NAMELEN,
3244 kmv.kmv_cache.cache_name, addr);
3245 } else {
3246 char *s = ""; /* optional s in "buffer[s]" */
3247 if (kmv.kmv_corruption > 1)
3248 s = "s";
3250 mdb_printf("%-*s %?p %d corrupt buffer%s\n",
3251 KMEM_CACHE_NAMELEN,
3252 kmv.kmv_cache.cache_name, addr,
3253 kmv.kmv_corruption, s);
3255 } else {
3257 * This is the more verbose mode, when the user has
3258 * type addr::kmem_verify. If the cache was clean,
3259 * nothing will have yet been printed. So say something.
3261 if (kmv.kmv_corruption == 0)
3262 mdb_printf("clean\n");
3264 mdb_dec_indent(2);
3266 } else {
3268 * If the user didn't specify a cache to verify, we'll walk all
3269 * kmem_cache's, specifying ourself as a callback for each...
3270 * this is the equivalent of '::walk kmem_cache .::kmem_verify'
3272 mdb_printf("%<u>%-*s %-?s %-20s%</b>\n", KMEM_CACHE_NAMELEN,
3273 "Cache Name", "Addr", "Cache Integrity");
3274 (void) (mdb_walk_dcmd("kmem_cache", "kmem_verify", 0, NULL));
3277 return (DCMD_OK);
3280 typedef struct vmem_node {
3281 struct vmem_node *vn_next;
3282 struct vmem_node *vn_parent;
3283 struct vmem_node *vn_sibling;
3284 struct vmem_node *vn_children;
3285 uintptr_t vn_addr;
3286 int vn_marked;
3287 vmem_t vn_vmem;
3288 } vmem_node_t;
3290 typedef struct vmem_walk {
3291 vmem_node_t *vw_root;
3292 vmem_node_t *vw_current;
3293 } vmem_walk_t;
3296 vmem_walk_init(mdb_walk_state_t *wsp)
3298 uintptr_t vaddr, paddr;
3299 vmem_node_t *head = NULL, *root = NULL, *current = NULL, *parent, *vp;
3300 vmem_walk_t *vw;
3302 if (mdb_readvar(&vaddr, "vmem_list") == -1) {
3303 mdb_warn("couldn't read 'vmem_list'");
3304 return (WALK_ERR);
3307 while (vaddr != (uintptr_t)NULL) {
3308 vp = mdb_zalloc(sizeof (vmem_node_t), UM_SLEEP);
3309 vp->vn_addr = vaddr;
3310 vp->vn_next = head;
3311 head = vp;
3313 if (vaddr == wsp->walk_addr)
3314 current = vp;
3316 if (mdb_vread(&vp->vn_vmem, sizeof (vmem_t), vaddr) == -1) {
3317 mdb_warn("couldn't read vmem_t at %p", vaddr);
3318 goto err;
3321 vaddr = (uintptr_t)vp->vn_vmem.vm_next;
3324 for (vp = head; vp != NULL; vp = vp->vn_next) {
3326 if ((paddr = (uintptr_t)vp->vn_vmem.vm_source) ==
3327 (uintptr_t)NULL) {
3328 vp->vn_sibling = root;
3329 root = vp;
3330 continue;
3333 for (parent = head; parent != NULL; parent = parent->vn_next) {
3334 if (parent->vn_addr != paddr)
3335 continue;
3336 vp->vn_sibling = parent->vn_children;
3337 parent->vn_children = vp;
3338 vp->vn_parent = parent;
3339 break;
3342 if (parent == NULL) {
3343 mdb_warn("couldn't find %p's parent (%p)\n",
3344 vp->vn_addr, paddr);
3345 goto err;
3349 vw = mdb_zalloc(sizeof (vmem_walk_t), UM_SLEEP);
3350 vw->vw_root = root;
3352 if (current != NULL)
3353 vw->vw_current = current;
3354 else
3355 vw->vw_current = root;
3357 wsp->walk_data = vw;
3358 return (WALK_NEXT);
3359 err:
3360 for (vp = head; head != NULL; vp = head) {
3361 head = vp->vn_next;
3362 mdb_free(vp, sizeof (vmem_node_t));
3365 return (WALK_ERR);
3369 vmem_walk_step(mdb_walk_state_t *wsp)
3371 vmem_walk_t *vw = wsp->walk_data;
3372 vmem_node_t *vp;
3373 int rval;
3375 if ((vp = vw->vw_current) == NULL)
3376 return (WALK_DONE);
3378 rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
3380 if (vp->vn_children != NULL) {
3381 vw->vw_current = vp->vn_children;
3382 return (rval);
3385 do {
3386 vw->vw_current = vp->vn_sibling;
3387 vp = vp->vn_parent;
3388 } while (vw->vw_current == NULL && vp != NULL);
3390 return (rval);
3394 * The "vmem_postfix" walk walks the vmem arenas in post-fix order; all
3395 * children are visited before their parent. We perform the postfix walk
3396 * iteratively (rather than recursively) to allow mdb to regain control
3397 * after each callback.
3400 vmem_postfix_walk_step(mdb_walk_state_t *wsp)
3402 vmem_walk_t *vw = wsp->walk_data;
3403 vmem_node_t *vp = vw->vw_current;
3404 int rval;
3407 * If this node is marked, then we know that we have already visited
3408 * all of its children. If the node has any siblings, they need to
3409 * be visited next; otherwise, we need to visit the parent. Note
3410 * that vp->vn_marked will only be zero on the first invocation of
3411 * the step function.
3413 if (vp->vn_marked) {
3414 if (vp->vn_sibling != NULL)
3415 vp = vp->vn_sibling;
3416 else if (vp->vn_parent != NULL)
3417 vp = vp->vn_parent;
3418 else {
3420 * We have neither a parent, nor a sibling, and we
3421 * have already been visited; we're done.
3423 return (WALK_DONE);
3428 * Before we visit this node, visit its children.
3430 while (vp->vn_children != NULL && !vp->vn_children->vn_marked)
3431 vp = vp->vn_children;
3433 vp->vn_marked = 1;
3434 vw->vw_current = vp;
3435 rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
3437 return (rval);
3440 void
3441 vmem_walk_fini(mdb_walk_state_t *wsp)
3443 vmem_walk_t *vw = wsp->walk_data;
3444 vmem_node_t *root = vw->vw_root;
3445 int done;
3447 if (root == NULL)
3448 return;
3450 if ((vw->vw_root = root->vn_children) != NULL)
3451 vmem_walk_fini(wsp);
3453 vw->vw_root = root->vn_sibling;
3454 done = (root->vn_sibling == NULL && root->vn_parent == NULL);
3455 mdb_free(root, sizeof (vmem_node_t));
3457 if (done) {
3458 mdb_free(vw, sizeof (vmem_walk_t));
3459 } else {
3460 vmem_walk_fini(wsp);
3464 typedef struct vmem_seg_walk {
3465 uint8_t vsw_type;
3466 uintptr_t vsw_start;
3467 uintptr_t vsw_current;
3468 } vmem_seg_walk_t;
3470 /*ARGSUSED*/
3472 vmem_seg_walk_common_init(mdb_walk_state_t *wsp, uint8_t type, char *name)
3474 vmem_seg_walk_t *vsw;
3476 if (wsp->walk_addr == (uintptr_t)NULL) {
3477 mdb_warn("vmem_%s does not support global walks\n", name);
3478 return (WALK_ERR);
3481 wsp->walk_data = vsw = mdb_alloc(sizeof (vmem_seg_walk_t), UM_SLEEP);
3483 vsw->vsw_type = type;
3484 vsw->vsw_start = wsp->walk_addr + offsetof(vmem_t, vm_seg0);
3485 vsw->vsw_current = vsw->vsw_start;
3487 return (WALK_NEXT);
3491 * vmem segments can't have type 0 (this should be added to vmem_impl.h).
3493 #define VMEM_NONE 0
3496 vmem_alloc_walk_init(mdb_walk_state_t *wsp)
3498 return (vmem_seg_walk_common_init(wsp, VMEM_ALLOC, "alloc"));
3502 vmem_free_walk_init(mdb_walk_state_t *wsp)
3504 return (vmem_seg_walk_common_init(wsp, VMEM_FREE, "free"));
3508 vmem_span_walk_init(mdb_walk_state_t *wsp)
3510 return (vmem_seg_walk_common_init(wsp, VMEM_SPAN, "span"));
3514 vmem_seg_walk_init(mdb_walk_state_t *wsp)
3516 return (vmem_seg_walk_common_init(wsp, VMEM_NONE, "seg"));
3520 vmem_seg_walk_step(mdb_walk_state_t *wsp)
3522 vmem_seg_t seg;
3523 vmem_seg_walk_t *vsw = wsp->walk_data;
3524 uintptr_t addr = vsw->vsw_current;
3525 static size_t seg_size = 0;
3526 int rval;
3528 if (!seg_size) {
3529 if (mdb_readvar(&seg_size, "vmem_seg_size") == -1) {
3530 mdb_warn("failed to read 'vmem_seg_size'");
3531 seg_size = sizeof (vmem_seg_t);
3535 if (seg_size < sizeof (seg))
3536 bzero((caddr_t)&seg + seg_size, sizeof (seg) - seg_size);
3538 if (mdb_vread(&seg, seg_size, addr) == -1) {
3539 mdb_warn("couldn't read vmem_seg at %p", addr);
3540 return (WALK_ERR);
3543 vsw->vsw_current = (uintptr_t)seg.vs_anext;
3544 if (vsw->vsw_type != VMEM_NONE && seg.vs_type != vsw->vsw_type) {
3545 rval = WALK_NEXT;
3546 } else {
3547 rval = wsp->walk_callback(addr, &seg, wsp->walk_cbdata);
3550 if (vsw->vsw_current == vsw->vsw_start)
3551 return (WALK_DONE);
3553 return (rval);
3556 void
3557 vmem_seg_walk_fini(mdb_walk_state_t *wsp)
3559 vmem_seg_walk_t *vsw = wsp->walk_data;
3561 mdb_free(vsw, sizeof (vmem_seg_walk_t));
3564 #define VMEM_NAMEWIDTH 22
3567 vmem(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3569 vmem_t v, parent;
3570 vmem_kstat_t *vkp = &v.vm_kstat;
3571 uintptr_t paddr;
3572 int ident = 0;
3573 char c[VMEM_NAMEWIDTH];
3575 if (!(flags & DCMD_ADDRSPEC)) {
3576 if (mdb_walk_dcmd("vmem", "vmem", argc, argv) == -1) {
3577 mdb_warn("can't walk vmem");
3578 return (DCMD_ERR);
3580 return (DCMD_OK);
3583 if (DCMD_HDRSPEC(flags))
3584 mdb_printf("%-?s %-*s %10s %12s %9s %5s\n",
3585 "ADDR", VMEM_NAMEWIDTH, "NAME", "INUSE",
3586 "TOTAL", "SUCCEED", "FAIL");
3588 if (mdb_vread(&v, sizeof (v), addr) == -1) {
3589 mdb_warn("couldn't read vmem at %p", addr);
3590 return (DCMD_ERR);
3593 for (paddr = (uintptr_t)v.vm_source; paddr != (uintptr_t)NULL;
3594 ident += 2) {
3595 if (mdb_vread(&parent, sizeof (parent), paddr) == -1) {
3596 mdb_warn("couldn't trace %p's ancestry", addr);
3597 ident = 0;
3598 break;
3600 paddr = (uintptr_t)parent.vm_source;
3603 (void) mdb_snprintf(c, VMEM_NAMEWIDTH, "%*s%s", ident, "", v.vm_name);
3605 mdb_printf("%0?p %-*s %10llu %12llu %9llu %5llu\n",
3606 addr, VMEM_NAMEWIDTH, c,
3607 vkp->vk_mem_inuse.value.ui64, vkp->vk_mem_total.value.ui64,
3608 vkp->vk_alloc.value.ui64, vkp->vk_fail.value.ui64);
3610 return (DCMD_OK);
3613 void
3614 vmem_seg_help(void)
3616 mdb_printf("%s",
3617 "Display the contents of vmem_seg_ts, with optional filtering.\n\n"
3618 "\n"
3619 "A vmem_seg_t represents a range of addresses (or arbitrary numbers),\n"
3620 "representing a single chunk of data. Only ALLOC segments have debugging\n"
3621 "information.\n");
3622 mdb_dec_indent(2);
3623 mdb_printf("%<b>OPTIONS%</b>\n");
3624 mdb_inc_indent(2);
3625 mdb_printf("%s",
3626 " -v Display the full content of the vmem_seg, including its stack trace\n"
3627 " -s report the size of the segment, instead of the end address\n"
3628 " -c caller\n"
3629 " filter out segments without the function/PC in their stack trace\n"
3630 " -e earliest\n"
3631 " filter out segments timestamped before earliest\n"
3632 " -l latest\n"
3633 " filter out segments timestamped after latest\n"
3634 " -m minsize\n"
3635 " filer out segments smaller than minsize\n"
3636 " -M maxsize\n"
3637 " filer out segments larger than maxsize\n"
3638 " -t thread\n"
3639 " filter out segments not involving thread\n"
3640 " -T type\n"
3641 " filter out segments not of type 'type'\n"
3642 " type is one of: ALLOC/FREE/SPAN/ROTOR/WALKER\n");
3645 /*ARGSUSED*/
3647 vmem_seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3649 vmem_seg_t vs;
3650 pc_t *stk = vs.vs_stack;
3651 uintptr_t sz;
3652 uint8_t t;
3653 const char *type = NULL;
3654 GElf_Sym sym;
3655 char c[MDB_SYM_NAMLEN];
3656 int no_debug;
3657 int i;
3658 int depth;
3659 uintptr_t laddr, haddr;
3661 uintptr_t caller = (uintptr_t)NULL, thread = (uintptr_t)NULL;
3662 uintptr_t minsize = 0, maxsize = 0;
3664 hrtime_t earliest = 0, latest = 0;
3666 uint_t size = 0;
3667 uint_t verbose = 0;
3669 if (!(flags & DCMD_ADDRSPEC))
3670 return (DCMD_USAGE);
3672 if (mdb_getopts(argc, argv,
3673 'c', MDB_OPT_UINTPTR, &caller,
3674 'e', MDB_OPT_UINT64, &earliest,
3675 'l', MDB_OPT_UINT64, &latest,
3676 's', MDB_OPT_SETBITS, TRUE, &size,
3677 'm', MDB_OPT_UINTPTR, &minsize,
3678 'M', MDB_OPT_UINTPTR, &maxsize,
3679 't', MDB_OPT_UINTPTR, &thread,
3680 'T', MDB_OPT_STR, &type,
3681 'v', MDB_OPT_SETBITS, TRUE, &verbose,
3682 NULL) != argc)
3683 return (DCMD_USAGE);
3685 if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
3686 if (verbose) {
3687 mdb_printf("%16s %4s %16s %16s %16s\n"
3688 "%<u>%16s %4s %16s %16s %16s%</u>\n",
3689 "ADDR", "TYPE", "START", "END", "SIZE",
3690 "", "", "THREAD", "TIMESTAMP", "");
3691 } else {
3692 mdb_printf("%?s %4s %?s %?s %s\n", "ADDR", "TYPE",
3693 "START", size? "SIZE" : "END", "WHO");
3697 if (mdb_vread(&vs, sizeof (vs), addr) == -1) {
3698 mdb_warn("couldn't read vmem_seg at %p", addr);
3699 return (DCMD_ERR);
3702 if (type != NULL) {
3703 if (strcmp(type, "ALLC") == 0 || strcmp(type, "ALLOC") == 0)
3704 t = VMEM_ALLOC;
3705 else if (strcmp(type, "FREE") == 0)
3706 t = VMEM_FREE;
3707 else if (strcmp(type, "SPAN") == 0)
3708 t = VMEM_SPAN;
3709 else if (strcmp(type, "ROTR") == 0 ||
3710 strcmp(type, "ROTOR") == 0)
3711 t = VMEM_ROTOR;
3712 else if (strcmp(type, "WLKR") == 0 ||
3713 strcmp(type, "WALKER") == 0)
3714 t = VMEM_WALKER;
3715 else {
3716 mdb_warn("\"%s\" is not a recognized vmem_seg type\n",
3717 type);
3718 return (DCMD_ERR);
3721 if (vs.vs_type != t)
3722 return (DCMD_OK);
3725 sz = vs.vs_end - vs.vs_start;
3727 if (minsize != 0 && sz < minsize)
3728 return (DCMD_OK);
3730 if (maxsize != 0 && sz > maxsize)
3731 return (DCMD_OK);
3733 t = vs.vs_type;
3734 depth = vs.vs_depth;
3737 * debug info, when present, is only accurate for VMEM_ALLOC segments
3739 no_debug = (t != VMEM_ALLOC) ||
3740 (depth == 0 || depth > VMEM_STACK_DEPTH);
3742 if (no_debug) {
3743 if (caller != (uintptr_t)NULL || thread != (uintptr_t)NULL ||
3744 earliest != 0 ||
3745 latest != 0)
3746 return (DCMD_OK); /* not enough info */
3747 } else {
3748 if (caller != (uintptr_t)NULL) {
3749 laddr = caller;
3750 haddr = caller + sizeof (caller);
3752 if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c,
3753 sizeof (c), &sym) != -1 &&
3754 caller == (uintptr_t)sym.st_value) {
3756 * We were provided an exact symbol value; any
3757 * address in the function is valid.
3759 laddr = (uintptr_t)sym.st_value;
3760 haddr = (uintptr_t)sym.st_value + sym.st_size;
3763 for (i = 0; i < depth; i++)
3764 if (vs.vs_stack[i] >= laddr &&
3765 vs.vs_stack[i] < haddr)
3766 break;
3768 if (i == depth)
3769 return (DCMD_OK);
3772 if (thread != (uintptr_t)NULL &&
3773 (uintptr_t)vs.vs_thread != thread)
3774 return (DCMD_OK);
3776 if (earliest != 0 && vs.vs_timestamp < earliest)
3777 return (DCMD_OK);
3779 if (latest != 0 && vs.vs_timestamp > latest)
3780 return (DCMD_OK);
3783 type = (t == VMEM_ALLOC ? "ALLC" :
3784 t == VMEM_FREE ? "FREE" :
3785 t == VMEM_SPAN ? "SPAN" :
3786 t == VMEM_ROTOR ? "ROTR" :
3787 t == VMEM_WALKER ? "WLKR" :
3788 "????");
3790 if (flags & DCMD_PIPE_OUT) {
3791 mdb_printf("%#lr\n", addr);
3792 return (DCMD_OK);
3795 if (verbose) {
3796 mdb_printf("%<b>%16p%</b> %4s %16p %16p %16d\n",
3797 addr, type, vs.vs_start, vs.vs_end, sz);
3799 if (no_debug)
3800 return (DCMD_OK);
3802 mdb_printf("%16s %4s %16p %16llx\n",
3803 "", "", vs.vs_thread, vs.vs_timestamp);
3805 mdb_inc_indent(17);
3806 for (i = 0; i < depth; i++) {
3807 mdb_printf("%a\n", stk[i]);
3809 mdb_dec_indent(17);
3810 mdb_printf("\n");
3811 } else {
3812 mdb_printf("%0?p %4s %0?p %0?p", addr, type,
3813 vs.vs_start, size? sz : vs.vs_end);
3815 if (no_debug) {
3816 mdb_printf("\n");
3817 return (DCMD_OK);
3820 for (i = 0; i < depth; i++) {
3821 if (mdb_lookup_by_addr(stk[i], MDB_SYM_FUZZY,
3822 c, sizeof (c), &sym) == -1)
3823 continue;
3824 if (strncmp(c, "vmem_", 5) == 0)
3825 continue;
3826 break;
3828 mdb_printf(" %a\n", stk[i]);
3830 return (DCMD_OK);
3833 typedef struct kmalog_data {
3834 uintptr_t kma_addr;
3835 hrtime_t kma_newest;
3836 } kmalog_data_t;
3838 /*ARGSUSED*/
3839 static int
3840 showbc(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmalog_data_t *kma)
3842 char name[KMEM_CACHE_NAMELEN + 1];
3843 hrtime_t delta;
3844 int i, depth;
3845 size_t bufsize;
3847 if (bcp->bc_timestamp == 0)
3848 return (WALK_DONE);
3850 if (kma->kma_newest == 0)
3851 kma->kma_newest = bcp->bc_timestamp;
3853 if (kma->kma_addr) {
3854 if (mdb_vread(&bufsize, sizeof (bufsize),
3855 (uintptr_t)&bcp->bc_cache->cache_bufsize) == -1) {
3856 mdb_warn(
3857 "failed to read cache_bufsize for cache at %p",
3858 bcp->bc_cache);
3859 return (WALK_ERR);
3862 if (kma->kma_addr < (uintptr_t)bcp->bc_addr ||
3863 kma->kma_addr >= (uintptr_t)bcp->bc_addr + bufsize)
3864 return (WALK_NEXT);
3867 delta = kma->kma_newest - bcp->bc_timestamp;
3868 depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
3870 if (mdb_readstr(name, sizeof (name), (uintptr_t)
3871 &bcp->bc_cache->cache_name) <= 0)
3872 (void) mdb_snprintf(name, sizeof (name), "%a", bcp->bc_cache);
3874 mdb_printf("\nT-%lld.%09lld addr=%p %s\n",
3875 delta / NANOSEC, delta % NANOSEC, bcp->bc_addr, name);
3877 for (i = 0; i < depth; i++)
3878 mdb_printf("\t %a\n", bcp->bc_stack[i]);
3880 return (WALK_NEXT);
3884 kmalog(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3886 const char *logname = "kmem_transaction_log";
3887 kmalog_data_t kma;
3889 if (argc > 1)
3890 return (DCMD_USAGE);
3892 kma.kma_newest = 0;
3893 if (flags & DCMD_ADDRSPEC)
3894 kma.kma_addr = addr;
3895 else
3896 kma.kma_addr = (uintptr_t)NULL;
3898 if (argc > 0) {
3899 if (argv->a_type != MDB_TYPE_STRING)
3900 return (DCMD_USAGE);
3901 if (strcmp(argv->a_un.a_str, "fail") == 0)
3902 logname = "kmem_failure_log";
3903 else if (strcmp(argv->a_un.a_str, "slab") == 0)
3904 logname = "kmem_slab_log";
3905 else
3906 return (DCMD_USAGE);
3909 if (mdb_readvar(&addr, logname) == -1) {
3910 mdb_warn("failed to read %s log header pointer");
3911 return (DCMD_ERR);
3914 if (mdb_pwalk("kmem_log", (mdb_walk_cb_t)showbc, &kma, addr) == -1) {
3915 mdb_warn("failed to walk kmem log");
3916 return (DCMD_ERR);
3919 return (DCMD_OK);
3923 * As the final lure for die-hard crash(1M) users, we provide ::kmausers here.
3924 * The first piece is a structure which we use to accumulate kmem_cache_t
3925 * addresses of interest. The kmc_add is used as a callback for the kmem_cache
3926 * walker; we either add all caches, or ones named explicitly as arguments.
3929 typedef struct kmclist {
3930 const char *kmc_name; /* Name to match (or NULL) */
3931 uintptr_t *kmc_caches; /* List of kmem_cache_t addrs */
3932 int kmc_nelems; /* Num entries in kmc_caches */
3933 int kmc_size; /* Size of kmc_caches array */
3934 } kmclist_t;
3936 static int
3937 kmc_add(uintptr_t addr, const kmem_cache_t *cp, kmclist_t *kmc)
3939 void *p;
3940 int s;
3942 if (kmc->kmc_name == NULL ||
3943 strcmp(cp->cache_name, kmc->kmc_name) == 0) {
3945 * If we have a match, grow our array (if necessary), and then
3946 * add the virtual address of the matching cache to our list.
3948 if (kmc->kmc_nelems >= kmc->kmc_size) {
3949 s = kmc->kmc_size ? kmc->kmc_size * 2 : 256;
3950 p = mdb_alloc(sizeof (uintptr_t) * s, UM_SLEEP | UM_GC);
3952 bcopy(kmc->kmc_caches, p,
3953 sizeof (uintptr_t) * kmc->kmc_size);
3955 kmc->kmc_caches = p;
3956 kmc->kmc_size = s;
3959 kmc->kmc_caches[kmc->kmc_nelems++] = addr;
3960 return (kmc->kmc_name ? WALK_DONE : WALK_NEXT);
3963 return (WALK_NEXT);
3967 * The second piece of ::kmausers is a hash table of allocations. Each
3968 * allocation owner is identified by its stack trace and data_size. We then
3969 * track the total bytes of all such allocations, and the number of allocations
3970 * to report at the end. Once we have a list of caches, we walk through the
3971 * allocated bufctls of each, and update our hash table accordingly.
3974 typedef struct kmowner {
3975 struct kmowner *kmo_head; /* First hash elt in bucket */
3976 struct kmowner *kmo_next; /* Next hash elt in chain */
3977 size_t kmo_signature; /* Hash table signature */
3978 uint_t kmo_num; /* Number of allocations */
3979 size_t kmo_data_size; /* Size of each allocation */
3980 size_t kmo_total_size; /* Total bytes of allocation */
3981 int kmo_depth; /* Depth of stack trace */
3982 uintptr_t kmo_stack[KMEM_STACK_DEPTH]; /* Stack trace */
3983 } kmowner_t;
3985 typedef struct kmusers {
3986 uintptr_t kmu_addr; /* address of interest */
3987 const kmem_cache_t *kmu_cache; /* Current kmem cache */
3988 kmowner_t *kmu_hash; /* Hash table of owners */
3989 int kmu_nelems; /* Number of entries in use */
3990 int kmu_size; /* Total number of entries */
3991 } kmusers_t;
3993 static void
3994 kmu_add(kmusers_t *kmu, const kmem_bufctl_audit_t *bcp,
3995 size_t size, size_t data_size)
3997 int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
3998 size_t bucket, signature = data_size;
3999 kmowner_t *kmo, *kmoend;
4002 * If the hash table is full, double its size and rehash everything.
4004 if (kmu->kmu_nelems >= kmu->kmu_size) {
4005 int s = kmu->kmu_size ? kmu->kmu_size * 2 : 1024;
4007 kmo = mdb_alloc(sizeof (kmowner_t) * s, UM_SLEEP | UM_GC);
4008 bcopy(kmu->kmu_hash, kmo, sizeof (kmowner_t) * kmu->kmu_size);
4009 kmu->kmu_hash = kmo;
4010 kmu->kmu_size = s;
4012 kmoend = kmu->kmu_hash + kmu->kmu_size;
4013 for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++)
4014 kmo->kmo_head = NULL;
4016 kmoend = kmu->kmu_hash + kmu->kmu_nelems;
4017 for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++) {
4018 bucket = kmo->kmo_signature & (kmu->kmu_size - 1);
4019 kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head;
4020 kmu->kmu_hash[bucket].kmo_head = kmo;
4025 * Finish computing the hash signature from the stack trace, and then
4026 * see if the owner is in the hash table. If so, update our stats.
4028 for (i = 0; i < depth; i++)
4029 signature += bcp->bc_stack[i];
4031 bucket = signature & (kmu->kmu_size - 1);
4033 for (kmo = kmu->kmu_hash[bucket].kmo_head; kmo; kmo = kmo->kmo_next) {
4034 if (kmo->kmo_signature == signature) {
4035 size_t difference = 0;
4037 difference |= kmo->kmo_data_size - data_size;
4038 difference |= kmo->kmo_depth - depth;
4040 for (i = 0; i < depth; i++) {
4041 difference |= kmo->kmo_stack[i] -
4042 bcp->bc_stack[i];
4045 if (difference == 0) {
4046 kmo->kmo_total_size += size;
4047 kmo->kmo_num++;
4048 return;
4054 * If the owner is not yet hashed, grab the next element and fill it
4055 * in based on the allocation information.
4057 kmo = &kmu->kmu_hash[kmu->kmu_nelems++];
4058 kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head;
4059 kmu->kmu_hash[bucket].kmo_head = kmo;
4061 kmo->kmo_signature = signature;
4062 kmo->kmo_num = 1;
4063 kmo->kmo_data_size = data_size;
4064 kmo->kmo_total_size = size;
4065 kmo->kmo_depth = depth;
4067 for (i = 0; i < depth; i++)
4068 kmo->kmo_stack[i] = bcp->bc_stack[i];
4072 * When ::kmausers is invoked without the -f flag, we simply update our hash
4073 * table with the information from each allocated bufctl.
4075 /*ARGSUSED*/
4076 static int
4077 kmause1(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu)
4079 const kmem_cache_t *cp = kmu->kmu_cache;
4081 kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize);
4082 return (WALK_NEXT);
4086 * When ::kmausers is invoked with the -f flag, we print out the information
4087 * for each bufctl as well as updating the hash table.
4089 static int
4090 kmause2(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu)
4092 int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
4093 const kmem_cache_t *cp = kmu->kmu_cache;
4094 kmem_bufctl_t bufctl;
4096 if (kmu->kmu_addr) {
4097 if (mdb_vread(&bufctl, sizeof (bufctl), addr) == -1)
4098 mdb_warn("couldn't read bufctl at %p", addr);
4099 else if (kmu->kmu_addr < (uintptr_t)bufctl.bc_addr ||
4100 kmu->kmu_addr >= (uintptr_t)bufctl.bc_addr +
4101 cp->cache_bufsize)
4102 return (WALK_NEXT);
4105 mdb_printf("size %d, addr %p, thread %p, cache %s\n",
4106 cp->cache_bufsize, addr, bcp->bc_thread, cp->cache_name);
4108 for (i = 0; i < depth; i++)
4109 mdb_printf("\t %a\n", bcp->bc_stack[i]);
4111 kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize);
4112 return (WALK_NEXT);
4116 * We sort our results by allocation size before printing them.
4118 static int
4119 kmownercmp(const void *lp, const void *rp)
4121 const kmowner_t *lhs = lp;
4122 const kmowner_t *rhs = rp;
4124 return (rhs->kmo_total_size - lhs->kmo_total_size);
4128 * The main engine of ::kmausers is relatively straightforward: First we
4129 * accumulate our list of kmem_cache_t addresses into the kmclist_t. Next we
4130 * iterate over the allocated bufctls of each cache in the list. Finally,
4131 * we sort and print our results.
4133 /*ARGSUSED*/
4135 kmausers(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
4137 int mem_threshold = 8192; /* Minimum # bytes for printing */
4138 int cnt_threshold = 100; /* Minimum # blocks for printing */
4139 int audited_caches = 0; /* Number of KMF_AUDIT caches found */
4140 int do_all_caches = 1; /* Do all caches (no arguments) */
4141 int opt_e = FALSE; /* Include "small" users */
4142 int opt_f = FALSE; /* Print stack traces */
4144 mdb_walk_cb_t callback = (mdb_walk_cb_t)kmause1;
4145 kmowner_t *kmo, *kmoend;
4146 int i, oelems;
4148 kmclist_t kmc;
4149 kmusers_t kmu;
4151 bzero(&kmc, sizeof (kmc));
4152 bzero(&kmu, sizeof (kmu));
4154 while ((i = mdb_getopts(argc, argv,
4155 'e', MDB_OPT_SETBITS, TRUE, &opt_e,
4156 'f', MDB_OPT_SETBITS, TRUE, &opt_f, NULL)) != argc) {
4158 argv += i; /* skip past options we just processed */
4159 argc -= i; /* adjust argc */
4161 if (argv->a_type != MDB_TYPE_STRING || *argv->a_un.a_str == '-')
4162 return (DCMD_USAGE);
4164 oelems = kmc.kmc_nelems;
4165 kmc.kmc_name = argv->a_un.a_str;
4166 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc);
4168 if (kmc.kmc_nelems == oelems) {
4169 mdb_warn("unknown kmem cache: %s\n", kmc.kmc_name);
4170 return (DCMD_ERR);
4173 do_all_caches = 0;
4174 argv++;
4175 argc--;
4178 if (flags & DCMD_ADDRSPEC) {
4179 opt_f = TRUE;
4180 kmu.kmu_addr = addr;
4181 } else {
4182 kmu.kmu_addr = (uintptr_t)NULL;
4185 if (opt_e)
4186 mem_threshold = cnt_threshold = 0;
4188 if (opt_f)
4189 callback = (mdb_walk_cb_t)kmause2;
4191 if (do_all_caches) {
4192 kmc.kmc_name = NULL; /* match all cache names */
4193 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc);
4196 for (i = 0; i < kmc.kmc_nelems; i++) {
4197 uintptr_t cp = kmc.kmc_caches[i];
4198 kmem_cache_t c;
4200 if (mdb_vread(&c, sizeof (c), cp) == -1) {
4201 mdb_warn("failed to read cache at %p", cp);
4202 continue;
4205 if (!(c.cache_flags & KMF_AUDIT)) {
4206 if (!do_all_caches) {
4207 mdb_warn("KMF_AUDIT is not enabled for %s\n",
4208 c.cache_name);
4210 continue;
4213 kmu.kmu_cache = &c;
4214 (void) mdb_pwalk("bufctl", callback, &kmu, cp);
4215 audited_caches++;
4218 if (audited_caches == 0 && do_all_caches) {
4219 mdb_warn("KMF_AUDIT is not enabled for any caches\n");
4220 return (DCMD_ERR);
4223 qsort(kmu.kmu_hash, kmu.kmu_nelems, sizeof (kmowner_t), kmownercmp);
4224 kmoend = kmu.kmu_hash + kmu.kmu_nelems;
4226 for (kmo = kmu.kmu_hash; kmo < kmoend; kmo++) {
4227 if (kmo->kmo_total_size < mem_threshold &&
4228 kmo->kmo_num < cnt_threshold)
4229 continue;
4230 mdb_printf("%lu bytes for %u allocations with data size %lu:\n",
4231 kmo->kmo_total_size, kmo->kmo_num, kmo->kmo_data_size);
4232 for (i = 0; i < kmo->kmo_depth; i++)
4233 mdb_printf("\t %a\n", kmo->kmo_stack[i]);
4236 return (DCMD_OK);
4239 void
4240 kmausers_help(void)
4242 mdb_printf(
4243 "Displays the largest users of the kmem allocator, sorted by \n"
4244 "trace. If one or more caches is specified, only those caches\n"
4245 "will be searched. By default, all caches are searched. If an\n"
4246 "address is specified, then only those allocations which include\n"
4247 "the given address are displayed. Specifying an address implies\n"
4248 "-f.\n"
4249 "\n"
4250 "\t-e\tInclude all users, not just the largest\n"
4251 "\t-f\tDisplay individual allocations. By default, users are\n"
4252 "\t\tgrouped by stack\n");
4255 static int
4256 kmem_ready_check(void)
4258 int ready;
4260 if (mdb_readvar(&ready, "kmem_ready") < 0)
4261 return (-1); /* errno is set for us */
4263 return (ready);
4266 void
4267 kmem_statechange(void)
4269 static int been_ready = 0;
4271 if (been_ready)
4272 return;
4274 if (kmem_ready_check() <= 0)
4275 return;
4277 been_ready = 1;
4278 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_init_walkers, NULL);
4281 void
4282 kmem_init(void)
4284 mdb_walker_t w = {
4285 "kmem_cache", "walk list of kmem caches", kmem_cache_walk_init,
4286 list_walk_step, list_walk_fini
4290 * If kmem is ready, we'll need to invoke the kmem_cache walker
4291 * immediately. Walkers in the linkage structure won't be ready until
4292 * _mdb_init returns, so we'll need to add this one manually. If kmem
4293 * is ready, we'll use the walker to initialize the caches. If kmem
4294 * isn't ready, we'll register a callback that will allow us to defer
4295 * cache walking until it is.
4297 if (mdb_add_walker(&w) != 0) {
4298 mdb_warn("failed to add kmem_cache walker");
4299 return;
4302 kmem_statechange();
4304 /* register our ::whatis handlers */
4305 mdb_whatis_register("modules", whatis_run_modules, NULL,
4306 WHATIS_PRIO_EARLY, WHATIS_REG_NO_ID);
4307 mdb_whatis_register("threads", whatis_run_threads, NULL,
4308 WHATIS_PRIO_EARLY, WHATIS_REG_NO_ID);
4309 mdb_whatis_register("pages", whatis_run_pages, NULL,
4310 WHATIS_PRIO_EARLY, WHATIS_REG_NO_ID);
4311 mdb_whatis_register("kmem", whatis_run_kmem, NULL,
4312 WHATIS_PRIO_ALLOCATOR, 0);
4313 mdb_whatis_register("vmem", whatis_run_vmem, NULL,
4314 WHATIS_PRIO_ALLOCATOR, 0);
4317 typedef struct whatthread {
4318 uintptr_t wt_target;
4319 int wt_verbose;
4320 } whatthread_t;
4322 static int
4323 whatthread_walk_thread(uintptr_t addr, const kthread_t *t, whatthread_t *w)
4325 uintptr_t current, data;
4327 if (t->t_stkbase == NULL)
4328 return (WALK_NEXT);
4331 * Search the thread's stack for the given pointer. Note that it would
4332 * be more efficient to follow ::kgrep's lead and read in page-sized
4333 * chunks, but this routine is already fast and simple.
4335 for (current = (uintptr_t)t->t_stkbase; current < (uintptr_t)t->t_stk;
4336 current += sizeof (uintptr_t)) {
4337 if (mdb_vread(&data, sizeof (data), current) == -1) {
4338 mdb_warn("couldn't read thread %p's stack at %p",
4339 addr, current);
4340 return (WALK_ERR);
4343 if (data == w->wt_target) {
4344 if (w->wt_verbose) {
4345 mdb_printf("%p in thread %p's stack%s\n",
4346 current, addr, stack_active(t, current));
4347 } else {
4348 mdb_printf("%#lr\n", addr);
4349 return (WALK_NEXT);
4354 return (WALK_NEXT);
4358 whatthread(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
4360 whatthread_t w;
4362 if (!(flags & DCMD_ADDRSPEC))
4363 return (DCMD_USAGE);
4365 w.wt_verbose = FALSE;
4366 w.wt_target = addr;
4368 if (mdb_getopts(argc, argv,
4369 'v', MDB_OPT_SETBITS, TRUE, &w.wt_verbose, NULL) != argc)
4370 return (DCMD_USAGE);
4372 if (mdb_walk("thread", (mdb_walk_cb_t)whatthread_walk_thread, &w)
4373 == -1) {
4374 mdb_warn("couldn't walk threads");
4375 return (DCMD_ERR);
4378 return (DCMD_OK);