4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
27 * Copyright 2011 Joyent, Inc. All rights reserved.
28 * Copyright (c) 2012 by Delphix. All rights reserved.
31 #include <mdb/mdb_param.h>
32 #include <mdb/mdb_modapi.h>
33 #include <mdb/mdb_ctf.h>
34 #include <mdb/mdb_whatis.h>
35 #include <sys/cpuvar.h>
36 #include <sys/kmem_impl.h>
37 #include <sys/vmem_impl.h>
38 #include <sys/machelf.h>
39 #include <sys/modctl.h>
41 #include <sys/panic.h>
42 #include <sys/stack.h>
43 #include <sys/sysmacros.h>
52 #define dprintf(x) if (mdb_debug_level) { \
53 mdb_printf("kmem debug: "); \
58 #define KM_ALLOCATED 0x01
60 #define KM_BUFCTL 0x04
61 #define KM_CONSTRUCTED 0x08 /* only constructed free buffers */
64 static int mdb_debug_level
= 0;
68 kmem_init_walkers(uintptr_t addr
, const kmem_cache_t
*c
, void *ignored
)
73 (void) mdb_snprintf(descr
, sizeof (descr
),
74 "walk the %s cache", c
->cache_name
);
76 w
.walk_name
= c
->cache_name
;
78 w
.walk_init
= kmem_walk_init
;
79 w
.walk_step
= kmem_walk_step
;
80 w
.walk_fini
= kmem_walk_fini
;
81 w
.walk_init_arg
= (void *)addr
;
83 if (mdb_add_walker(&w
) == -1)
84 mdb_warn("failed to add %s walker", c
->cache_name
);
91 kmem_debug(uintptr_t addr
, uint_t flags
, int argc
, const mdb_arg_t
*argv
)
95 mdb_printf("kmem: debugging is now %s\n",
96 mdb_debug_level
? "on" : "off");
102 kmem_cache_walk_init(mdb_walk_state_t
*wsp
)
106 if (mdb_lookup_by_name("kmem_caches", &sym
) == -1) {
107 mdb_warn("couldn't find kmem_caches");
111 wsp
->walk_addr
= (uintptr_t)sym
.st_value
;
113 return (list_walk_init_named(wsp
, "cache list", "cache"));
117 kmem_cpu_cache_walk_init(mdb_walk_state_t
*wsp
)
119 if (wsp
->walk_addr
== (uintptr_t)NULL
) {
120 mdb_warn("kmem_cpu_cache doesn't support global walks");
124 if (mdb_layered_walk("cpu", wsp
) == -1) {
125 mdb_warn("couldn't walk 'cpu'");
129 wsp
->walk_data
= (void *)wsp
->walk_addr
;
135 kmem_cpu_cache_walk_step(mdb_walk_state_t
*wsp
)
137 uintptr_t caddr
= (uintptr_t)wsp
->walk_data
;
138 const cpu_t
*cpu
= wsp
->walk_layer
;
141 caddr
+= OFFSETOF(kmem_cache_t
, cache_cpu
[cpu
->cpu_seqid
]);
143 if (mdb_vread(&cc
, sizeof (kmem_cpu_cache_t
), caddr
) == -1) {
144 mdb_warn("couldn't read kmem_cpu_cache at %p", caddr
);
148 return (wsp
->walk_callback(caddr
, &cc
, wsp
->walk_cbdata
));
152 kmem_slab_check(void *p
, uintptr_t saddr
, void *arg
)
155 uintptr_t caddr
= (uintptr_t)arg
;
156 if ((uintptr_t)sp
->slab_cache
!= caddr
) {
157 mdb_warn("slab %p isn't in cache %p (in cache %p)\n",
158 saddr
, caddr
, sp
->slab_cache
);
166 kmem_partial_slab_check(void *p
, uintptr_t saddr
, void *arg
)
170 int rc
= kmem_slab_check(p
, saddr
, arg
);
175 if (!KMEM_SLAB_IS_PARTIAL(sp
)) {
176 mdb_warn("slab %p is not a partial slab\n", saddr
);
184 kmem_complete_slab_check(void *p
, uintptr_t saddr
, void *arg
)
188 int rc
= kmem_slab_check(p
, saddr
, arg
);
193 if (!KMEM_SLAB_IS_ALL_USED(sp
)) {
194 mdb_warn("slab %p is not completely allocated\n", saddr
);
202 uintptr_t kns_cache_addr
;
207 kmem_nth_slab_check(void *p
, uintptr_t saddr
, void *arg
)
209 kmem_nth_slab_t
*chkp
= arg
;
211 int rc
= kmem_slab_check(p
, saddr
, (void *)chkp
->kns_cache_addr
);
216 return (chkp
->kns_nslabs
-- == 0 ? 1 : 0);
220 kmem_complete_slab_walk_init(mdb_walk_state_t
*wsp
)
222 uintptr_t caddr
= wsp
->walk_addr
;
224 wsp
->walk_addr
= (uintptr_t)(caddr
+
225 offsetof(kmem_cache_t
, cache_complete_slabs
));
227 return (list_walk_init_checked(wsp
, "slab list", "slab",
228 kmem_complete_slab_check
, (void *)caddr
));
232 kmem_partial_slab_walk_init(mdb_walk_state_t
*wsp
)
234 uintptr_t caddr
= wsp
->walk_addr
;
236 wsp
->walk_addr
= (uintptr_t)(caddr
+
237 offsetof(kmem_cache_t
, cache_partial_slabs
));
239 return (avl_walk_init_checked(wsp
, "slab list", "slab",
240 kmem_partial_slab_check
, (void *)caddr
));
244 kmem_slab_walk_init(mdb_walk_state_t
*wsp
)
246 uintptr_t caddr
= wsp
->walk_addr
;
248 if (caddr
== (uintptr_t)NULL
) {
249 mdb_warn("kmem_slab doesn't support global walks\n");
253 combined_walk_init(wsp
);
254 combined_walk_add(wsp
,
255 kmem_complete_slab_walk_init
, list_walk_step
, list_walk_fini
);
256 combined_walk_add(wsp
,
257 kmem_partial_slab_walk_init
, avl_walk_step
, avl_walk_fini
);
263 kmem_first_complete_slab_walk_init(mdb_walk_state_t
*wsp
)
265 uintptr_t caddr
= wsp
->walk_addr
;
266 kmem_nth_slab_t
*chk
;
268 chk
= mdb_alloc(sizeof (kmem_nth_slab_t
),
270 chk
->kns_cache_addr
= caddr
;
272 wsp
->walk_addr
= (uintptr_t)(caddr
+
273 offsetof(kmem_cache_t
, cache_complete_slabs
));
275 return (list_walk_init_checked(wsp
, "slab list", "slab",
276 kmem_nth_slab_check
, chk
));
280 kmem_slab_walk_partial_init(mdb_walk_state_t
*wsp
)
282 uintptr_t caddr
= wsp
->walk_addr
;
285 if (caddr
== (uintptr_t)NULL
) {
286 mdb_warn("kmem_slab_partial doesn't support global walks\n");
290 if (mdb_vread(&c
, sizeof (c
), caddr
) == -1) {
291 mdb_warn("couldn't read kmem_cache at %p", caddr
);
295 combined_walk_init(wsp
);
298 * Some consumers (umem_walk_step(), in particular) require at
299 * least one callback if there are any buffers in the cache. So
300 * if there are *no* partial slabs, report the first full slab, if
303 * Yes, this is ugly, but it's cleaner than the other possibilities.
305 if (c
.cache_partial_slabs
.avl_numnodes
== 0) {
306 combined_walk_add(wsp
, kmem_first_complete_slab_walk_init
,
307 list_walk_step
, list_walk_fini
);
309 combined_walk_add(wsp
, kmem_partial_slab_walk_init
,
310 avl_walk_step
, avl_walk_fini
);
317 kmem_cache(uintptr_t addr
, uint_t flags
, int ac
, const mdb_arg_t
*argv
)
320 const char *filter
= NULL
;
322 if (mdb_getopts(ac
, argv
,
323 'n', MDB_OPT_STR
, &filter
,
328 if (!(flags
& DCMD_ADDRSPEC
)) {
329 if (mdb_walk_dcmd("kmem_cache", "kmem_cache", ac
, argv
) == -1) {
330 mdb_warn("can't walk kmem_cache");
336 if (DCMD_HDRSPEC(flags
))
337 mdb_printf("%-?s %-25s %4s %6s %8s %8s\n", "ADDR", "NAME",
338 "FLAG", "CFLAG", "BUFSIZE", "BUFTOTL");
340 if (mdb_vread(&c
, sizeof (c
), addr
) == -1) {
341 mdb_warn("couldn't read kmem_cache at %p", addr
);
345 if ((filter
!= NULL
) && (strstr(c
.cache_name
, filter
) == NULL
))
348 mdb_printf("%0?p %-25s %04x %06x %8ld %8lld\n", addr
, c
.cache_name
,
349 c
.cache_flags
, c
.cache_cflags
, c
.cache_bufsize
, c
.cache_buftotal
);
355 kmem_cache_help(void)
357 mdb_printf("%s", "Print kernel memory caches.\n\n");
359 mdb_printf("%<b>OPTIONS%</b>\n");
363 " name of kmem cache (or matching partial name)\n"
365 "Column\tDescription\n"
367 "ADDR\t\taddress of kmem cache\n"
368 "NAME\t\tname of kmem cache\n"
369 "FLAG\t\tvarious cache state flags\n"
370 "CFLAG\t\tcache creation flags\n"
371 "BUFSIZE\tobject size in bytes\n"
372 "BUFTOTL\tcurrent total buffers in cache (allocated and free)\n");
375 #define LABEL_WIDTH 11
377 kmem_slabs_print_dist(uint_t
*ks_bucket
, size_t buffers_per_slab
,
378 size_t maxbuckets
, size_t minbucketsize
)
383 const int *distarray
;
386 buckets
= buffers_per_slab
;
389 for (i
= 0; i
<= buffers_per_slab
; i
++)
390 total
+= ks_bucket
[i
];
393 buckets
= MIN(buckets
, maxbuckets
);
395 if (minbucketsize
> 1) {
397 * minbucketsize does not apply to the first bucket reserved
398 * for completely allocated slabs
400 buckets
= MIN(buckets
, 1 + ((buffers_per_slab
- 1) /
402 if ((buckets
< 2) && (buffers_per_slab
> 1)) {
404 minbucketsize
= (buffers_per_slab
- 1);
409 * The first printed bucket is reserved for completely allocated slabs.
410 * Passing (buckets - 1) excludes that bucket from the generated
411 * distribution, since we're handling it as a special case.
413 complete
[0] = buffers_per_slab
;
414 complete
[1] = buffers_per_slab
+ 1;
415 distarray
= dist_linear(buckets
- 1, 1, buffers_per_slab
- 1);
417 mdb_printf("%*s\n", LABEL_WIDTH
, "Allocated");
418 dist_print_header("Buffers", LABEL_WIDTH
, "Slabs");
420 dist_print_bucket(complete
, 0, ks_bucket
, total
, LABEL_WIDTH
);
422 * Print bucket ranges in descending order after the first bucket for
423 * completely allocated slabs, so a person can see immediately whether
424 * or not there is fragmentation without having to scan possibly
425 * multiple screens of output. Starting at (buckets - 2) excludes the
426 * extra terminating bucket.
428 for (i
= buckets
- 2; i
>= 0; i
--) {
429 dist_print_bucket(distarray
, i
, ks_bucket
, total
, LABEL_WIDTH
);
437 kmem_first_slab(uintptr_t addr
, const kmem_slab_t
*sp
, boolean_t
*is_slab
)
445 kmem_first_partial_slab(uintptr_t addr
, const kmem_slab_t
*sp
,
449 * The "kmem_partial_slab" walker reports the first full slab if there
450 * are no partial slabs (for the sake of consumers that require at least
451 * one callback if there are any buffers in the cache).
453 *is_slab
= KMEM_SLAB_IS_PARTIAL(sp
);
457 typedef struct kmem_slab_usage
{
458 int ksu_refcnt
; /* count of allocated buffers on slab */
459 boolean_t ksu_nomove
; /* slab marked non-reclaimable */
462 typedef struct kmem_slab_stats
{
463 const kmem_cache_t
*ks_cp
;
464 int ks_slabs
; /* slabs in cache */
465 int ks_partial_slabs
; /* partially allocated slabs in cache */
466 uint64_t ks_unused_buffers
; /* total unused buffers in cache */
467 int ks_max_buffers_per_slab
; /* max buffers per slab */
468 int ks_usage_len
; /* ks_usage array length */
469 kmem_slab_usage_t
*ks_usage
; /* partial slab usage */
470 uint_t
*ks_bucket
; /* slab usage distribution */
475 kmem_slablist_stat(uintptr_t addr
, const kmem_slab_t
*sp
,
476 kmem_slab_stats_t
*ks
)
478 kmem_slab_usage_t
*ksu
;
482 ks
->ks_bucket
[sp
->slab_refcnt
]++;
484 unused
= (sp
->slab_chunks
- sp
->slab_refcnt
);
489 ks
->ks_partial_slabs
++;
490 ks
->ks_unused_buffers
+= unused
;
492 if (ks
->ks_partial_slabs
> ks
->ks_usage_len
) {
493 kmem_slab_usage_t
*usage
;
494 int len
= ks
->ks_usage_len
;
496 len
= (len
== 0 ? 16 : len
* 2);
497 usage
= mdb_zalloc(len
* sizeof (kmem_slab_usage_t
), UM_SLEEP
);
498 if (ks
->ks_usage
!= NULL
) {
499 bcopy(ks
->ks_usage
, usage
,
500 ks
->ks_usage_len
* sizeof (kmem_slab_usage_t
));
501 mdb_free(ks
->ks_usage
,
502 ks
->ks_usage_len
* sizeof (kmem_slab_usage_t
));
504 ks
->ks_usage
= usage
;
505 ks
->ks_usage_len
= len
;
508 ksu
= &ks
->ks_usage
[ks
->ks_partial_slabs
- 1];
509 ksu
->ksu_refcnt
= sp
->slab_refcnt
;
510 ksu
->ksu_nomove
= (sp
->slab_flags
& KMEM_SLAB_NOMOVE
);
517 mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
518 "", "", "Partial", "", "Unused", "");
519 mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
520 "Cache Name", "Slabs", "Slabs", "Buffers", "Buffers", "Waste");
521 mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
522 "-------------------------", "--------", "--------", "---------",
523 "---------", "------");
527 kmem_slabs(uintptr_t addr
, uint_t flags
, int argc
, const mdb_arg_t
*argv
)
530 kmem_slab_stats_t stats
;
534 size_t maxbuckets
= 1;
535 size_t minbucketsize
= 0;
536 const char *filter
= NULL
;
537 const char *name
= NULL
;
538 uint_t opt_v
= FALSE
;
539 boolean_t buckets
= B_FALSE
;
540 boolean_t skip
= B_FALSE
;
542 if (mdb_getopts(argc
, argv
,
543 'B', MDB_OPT_UINTPTR
, &minbucketsize
,
544 'b', MDB_OPT_UINTPTR
, &maxbuckets
,
545 'n', MDB_OPT_STR
, &filter
,
546 'N', MDB_OPT_STR
, &name
,
547 'v', MDB_OPT_SETBITS
, TRUE
, &opt_v
,
552 if ((maxbuckets
!= 1) || (minbucketsize
!= 0)) {
556 if (!(flags
& DCMD_ADDRSPEC
)) {
557 if (mdb_walk_dcmd("kmem_cache", "kmem_slabs", argc
,
559 mdb_warn("can't walk kmem_cache");
565 if (mdb_vread(&c
, sizeof (c
), addr
) == -1) {
566 mdb_warn("couldn't read kmem_cache at %p", addr
);
571 skip
= ((filter
!= NULL
) &&
572 (strstr(c
.cache_name
, filter
) == NULL
));
573 } else if (filter
== NULL
) {
574 skip
= (strcmp(c
.cache_name
, name
) != 0);
576 /* match either -n or -N */
577 skip
= ((strcmp(c
.cache_name
, name
) != 0) &&
578 (strstr(c
.cache_name
, filter
) == NULL
));
581 if (!(opt_v
|| buckets
) && DCMD_HDRSPEC(flags
)) {
583 } else if ((opt_v
|| buckets
) && !skip
) {
584 if (DCMD_HDRSPEC(flags
)) {
587 boolean_t is_slab
= B_FALSE
;
588 const char *walker_name
;
590 cb
= (mdb_walk_cb_t
)kmem_first_partial_slab
;
591 walker_name
= "kmem_slab_partial";
593 cb
= (mdb_walk_cb_t
)kmem_first_slab
;
594 walker_name
= "kmem_slab";
596 (void) mdb_pwalk(walker_name
, cb
, &is_slab
, addr
);
607 bzero(&stats
, sizeof (kmem_slab_stats_t
));
609 stats
.ks_max_buffers_per_slab
= c
.cache_maxchunks
;
610 /* +1 to include a zero bucket */
611 stats
.ks_bucket
= mdb_zalloc((stats
.ks_max_buffers_per_slab
+ 1) *
612 sizeof (*stats
.ks_bucket
), UM_SLEEP
);
613 cb
= (mdb_walk_cb_t
)kmem_slablist_stat
;
614 (void) mdb_pwalk("kmem_slab", cb
, &stats
, addr
);
616 if (c
.cache_buftotal
== 0) {
620 uint64_t n
= stats
.ks_unused_buffers
* 10000;
621 pct
= (int)(n
/ c
.cache_buftotal
);
622 tenths_pct
= pct
- ((pct
/ 100) * 100);
623 tenths_pct
= (tenths_pct
+ 5) / 10; /* round nearest tenth */
624 if (tenths_pct
== 10) {
631 mdb_printf("%-25s %8d %8d %9lld %9lld %3d.%1d%%\n", c
.cache_name
,
632 stats
.ks_slabs
, stats
.ks_partial_slabs
, c
.cache_buftotal
,
633 stats
.ks_unused_buffers
, pct
, tenths_pct
);
635 if (maxbuckets
== 0) {
636 maxbuckets
= stats
.ks_max_buffers_per_slab
;
639 if (((maxbuckets
> 1) || (minbucketsize
> 0)) &&
640 (stats
.ks_slabs
> 0)) {
642 kmem_slabs_print_dist(stats
.ks_bucket
,
643 stats
.ks_max_buffers_per_slab
, maxbuckets
, minbucketsize
);
646 mdb_free(stats
.ks_bucket
, (stats
.ks_max_buffers_per_slab
+ 1) *
647 sizeof (*stats
.ks_bucket
));
653 if (opt_v
&& (stats
.ks_partial_slabs
> 0)) {
655 kmem_slab_usage_t
*ksu
;
657 mdb_printf(" %d complete (%d), %d partial:",
658 (stats
.ks_slabs
- stats
.ks_partial_slabs
),
659 stats
.ks_max_buffers_per_slab
,
660 stats
.ks_partial_slabs
);
662 for (i
= 0; i
< stats
.ks_partial_slabs
; i
++) {
663 ksu
= &stats
.ks_usage
[i
];
664 mdb_printf(" %d%s", ksu
->ksu_refcnt
,
665 (ksu
->ksu_nomove
? "*" : ""));
670 if (stats
.ks_usage_len
> 0) {
671 mdb_free(stats
.ks_usage
,
672 stats
.ks_usage_len
* sizeof (kmem_slab_usage_t
));
679 kmem_slabs_help(void)
682 "Display slab usage per kmem cache.\n\n");
684 mdb_printf("%<b>OPTIONS%</b>\n");
688 " name of kmem cache (or matching partial name)\n"
690 " exact name of kmem cache\n"
692 " Print a distribution of allocated buffers per slab using at\n"
693 " most maxbins bins. The first bin is reserved for completely\n"
694 " allocated slabs. Setting maxbins to zero (-b 0) has the same\n"
695 " effect as specifying the maximum allocated buffers per slab\n"
696 " or setting minbinsize to 1 (-B 1).\n"
698 " Print a distribution of allocated buffers per slab, making\n"
699 " all bins (except the first, reserved for completely allocated\n"
700 " slabs) at least minbinsize buffers apart.\n"
701 " -v verbose output: List the allocated buffer count of each partial\n"
702 " slab on the free list in order from front to back to show how\n"
703 " closely the slabs are ordered by usage. For example\n"
705 " 10 complete, 3 partial (8): 7 3 1\n"
707 " means there are thirteen slabs with eight buffers each, including\n"
708 " three partially allocated slabs with less than all eight buffers\n"
711 " Buffer allocations are always from the front of the partial slab\n"
712 " list. When a buffer is freed from a completely used slab, that\n"
713 " slab is added to the front of the partial slab list. Assuming\n"
714 " that all buffers are equally likely to be freed soon, the\n"
715 " desired order of partial slabs is most-used at the front of the\n"
716 " list and least-used at the back (as in the example above).\n"
717 " However, if a slab contains an allocated buffer that will not\n"
718 " soon be freed, it would be better for that slab to be at the\n"
719 " front where all of its buffers can be allocated. Taking a slab\n"
720 " off the partial slab list (either with all buffers freed or all\n"
721 " buffers allocated) reduces cache fragmentation.\n"
723 " A slab's allocated buffer count representing a partial slab (9 in\n"
724 " the example below) may be marked as follows:\n"
726 " 9* An asterisk indicates that kmem has marked the slab non-\n"
727 " reclaimable because the kmem client refused to move one of the\n"
728 " slab's buffers. Since kmem does not expect to completely free the\n"
729 " slab, it moves it to the front of the list in the hope of\n"
730 " completely allocating it instead. A slab marked with an asterisk\n"
731 " stays marked for as long as it remains on the partial slab list.\n"
733 "Column\t\tDescription\n"
735 "Cache Name\t\tname of kmem cache\n"
736 "Slabs\t\t\ttotal slab count\n"
737 "Partial Slabs\t\tcount of partially allocated slabs on the free list\n"
738 "Buffers\t\ttotal buffer count (Slabs * (buffers per slab))\n"
739 "Unused Buffers\tcount of unallocated buffers across all partial slabs\n"
740 "Waste\t\t\t(Unused Buffers / Buffers) does not include space\n"
741 "\t\t\t for accounting structures (debug mode), slab\n"
742 "\t\t\t coloring (incremental small offsets to stagger\n"
743 "\t\t\t buffer alignment), or the per-CPU magazine layer\n");
747 addrcmp(const void *lhs
, const void *rhs
)
749 uintptr_t p1
= *((uintptr_t *)lhs
);
750 uintptr_t p2
= *((uintptr_t *)rhs
);
760 bufctlcmp(const kmem_bufctl_audit_t
**lhs
, const kmem_bufctl_audit_t
**rhs
)
762 const kmem_bufctl_audit_t
*bcp1
= *lhs
;
763 const kmem_bufctl_audit_t
*bcp2
= *rhs
;
765 if (bcp1
->bc_timestamp
> bcp2
->bc_timestamp
)
768 if (bcp1
->bc_timestamp
< bcp2
->bc_timestamp
)
774 typedef struct kmem_hash_walk
{
775 uintptr_t *kmhw_table
;
778 kmem_bufctl_t kmhw_cur
;
782 kmem_hash_walk_init(mdb_walk_state_t
*wsp
)
784 kmem_hash_walk_t
*kmhw
;
787 uintptr_t haddr
, addr
= wsp
->walk_addr
;
791 if (addr
== (uintptr_t)NULL
) {
792 mdb_warn("kmem_hash doesn't support global walks\n");
796 if (mdb_vread(&c
, sizeof (c
), addr
) == -1) {
797 mdb_warn("couldn't read cache at addr %p", addr
);
801 if (!(c
.cache_flags
& KMF_HASH
)) {
802 mdb_warn("cache %p doesn't have a hash table\n", addr
);
803 return (WALK_DONE
); /* nothing to do */
806 kmhw
= mdb_zalloc(sizeof (kmem_hash_walk_t
), UM_SLEEP
);
807 kmhw
->kmhw_cur
.bc_next
= NULL
;
810 kmhw
->kmhw_nelems
= nelems
= c
.cache_hash_mask
+ 1;
811 hsize
= nelems
* sizeof (uintptr_t);
812 haddr
= (uintptr_t)c
.cache_hash_table
;
814 kmhw
->kmhw_table
= hash
= mdb_alloc(hsize
, UM_SLEEP
);
815 if (mdb_vread(hash
, hsize
, haddr
) == -1) {
816 mdb_warn("failed to read hash table at %p", haddr
);
817 mdb_free(hash
, hsize
);
818 mdb_free(kmhw
, sizeof (kmem_hash_walk_t
));
822 wsp
->walk_data
= kmhw
;
828 kmem_hash_walk_step(mdb_walk_state_t
*wsp
)
830 kmem_hash_walk_t
*kmhw
= wsp
->walk_data
;
831 uintptr_t addr
= (uintptr_t)NULL
;
833 if ((addr
= (uintptr_t)kmhw
->kmhw_cur
.bc_next
) == (uintptr_t)NULL
) {
834 while (kmhw
->kmhw_pos
< kmhw
->kmhw_nelems
) {
835 if ((addr
= kmhw
->kmhw_table
[kmhw
->kmhw_pos
++]) !=
840 if (addr
== (uintptr_t)NULL
)
843 if (mdb_vread(&kmhw
->kmhw_cur
, sizeof (kmem_bufctl_t
), addr
) == -1) {
844 mdb_warn("couldn't read kmem_bufctl_t at addr %p", addr
);
848 return (wsp
->walk_callback(addr
, &kmhw
->kmhw_cur
, wsp
->walk_cbdata
));
852 kmem_hash_walk_fini(mdb_walk_state_t
*wsp
)
854 kmem_hash_walk_t
*kmhw
= wsp
->walk_data
;
859 mdb_free(kmhw
->kmhw_table
, kmhw
->kmhw_nelems
* sizeof (uintptr_t));
860 mdb_free(kmhw
, sizeof (kmem_hash_walk_t
));
864 * Find the address of the bufctl structure for the address 'buf' in cache
865 * 'cp', which is at address caddr, and place it in *out.
868 kmem_hash_lookup(kmem_cache_t
*cp
, uintptr_t caddr
, void *buf
, uintptr_t *out
)
870 uintptr_t bucket
= (uintptr_t)KMEM_HASH(cp
, buf
);
874 if (mdb_vread(&bcp
, sizeof (kmem_bufctl_t
*), bucket
) == -1) {
875 mdb_warn("unable to read hash bucket for %p in cache %p",
880 while (bcp
!= NULL
) {
881 if (mdb_vread(&bc
, sizeof (kmem_bufctl_t
),
882 (uintptr_t)bcp
) == -1) {
883 mdb_warn("unable to read bufctl at %p", bcp
);
886 if (bc
.bc_addr
== buf
) {
887 *out
= (uintptr_t)bcp
;
893 mdb_warn("unable to find bufctl for %p in cache %p\n", buf
, caddr
);
898 kmem_get_magsize(const kmem_cache_t
*cp
)
900 uintptr_t addr
= (uintptr_t)cp
->cache_magtype
;
906 * if cpu 0 has a non-zero magsize, it must be correct. caches
907 * with KMF_NOMAGAZINE have disabled their magazine layers, so
908 * it is okay to return 0 for them.
910 if ((res
= cp
->cache_cpu
[0].cc_magsize
) != 0 ||
911 (cp
->cache_flags
& KMF_NOMAGAZINE
))
914 if (mdb_lookup_by_name("kmem_magtype", &mt_sym
) == -1) {
915 mdb_warn("unable to read 'kmem_magtype'");
916 } else if (addr
< mt_sym
.st_value
||
917 addr
+ sizeof (mt
) - 1 > mt_sym
.st_value
+ mt_sym
.st_size
- 1 ||
918 ((addr
- mt_sym
.st_value
) % sizeof (mt
)) != 0) {
919 mdb_warn("cache '%s' has invalid magtype pointer (%p)\n",
920 cp
->cache_name
, addr
);
923 if (mdb_vread(&mt
, sizeof (mt
), addr
) == -1) {
924 mdb_warn("unable to read magtype at %a", addr
);
927 return (mt
.mt_magsize
);
932 kmem_estimate_slab(uintptr_t addr
, const kmem_slab_t
*sp
, size_t *est
)
934 *est
-= (sp
->slab_chunks
- sp
->slab_refcnt
);
940 * Returns an upper bound on the number of allocated buffers in a given
944 kmem_estimate_allocated(uintptr_t addr
, const kmem_cache_t
*cp
)
949 cache_est
= cp
->cache_buftotal
;
951 (void) mdb_pwalk("kmem_slab_partial",
952 (mdb_walk_cb_t
)kmem_estimate_slab
, &cache_est
, addr
);
954 if ((magsize
= kmem_get_magsize(cp
)) != 0) {
955 size_t mag_est
= cp
->cache_full
.ml_total
* magsize
;
957 if (cache_est
>= mag_est
) {
958 cache_est
-= mag_est
;
960 mdb_warn("cache %p's magazine layer holds more buffers "
961 "than the slab layer.\n", addr
);
967 #define READMAG_ROUNDS(rounds) { \
968 if (mdb_vread(mp, magbsize, (uintptr_t)kmp) == -1) { \
969 mdb_warn("couldn't read magazine at %p", kmp); \
972 for (i = 0; i < rounds; i++) { \
973 maglist[magcnt++] = mp->mag_round[i]; \
974 if (magcnt == magmax) { \
975 mdb_warn("%d magazines exceeds fudge factor\n", \
983 kmem_read_magazines(kmem_cache_t
*cp
, uintptr_t addr
, int ncpus
,
984 void ***maglistp
, size_t *magcntp
, size_t *magmaxp
, int alloc_flags
)
986 kmem_magazine_t
*kmp
, *mp
;
987 void **maglist
= NULL
;
989 size_t magsize
, magmax
, magbsize
;
993 * Read the magtype out of the cache, after verifying the pointer's
996 magsize
= kmem_get_magsize(cp
);
1005 * There are several places where we need to go buffer hunting:
1006 * the per-CPU loaded magazine, the per-CPU spare full magazine,
1007 * and the full magazine list in the depot.
1009 * For an upper bound on the number of buffers in the magazine
1010 * layer, we have the number of magazines on the cache_full
1011 * list plus at most two magazines per CPU (the loaded and the
1012 * spare). Toss in 100 magazines as a fudge factor in case this
1013 * is live (the number "100" comes from the same fudge factor in
1016 magmax
= (cp
->cache_full
.ml_total
+ 2 * ncpus
+ 100) * magsize
;
1017 magbsize
= offsetof(kmem_magazine_t
, mag_round
[magsize
]);
1019 if (magbsize
>= PAGESIZE
/ 2) {
1020 mdb_warn("magazine size for cache %p unreasonable (%x)\n",
1025 maglist
= mdb_alloc(magmax
* sizeof (void *), alloc_flags
);
1026 mp
= mdb_alloc(magbsize
, alloc_flags
);
1027 if (mp
== NULL
|| maglist
== NULL
)
1031 * First up: the magazines in the depot (i.e. on the cache_full list).
1033 for (kmp
= cp
->cache_full
.ml_list
; kmp
!= NULL
; ) {
1034 READMAG_ROUNDS(magsize
);
1037 if (kmp
== cp
->cache_full
.ml_list
)
1038 break; /* cache_full list loop detected */
1041 dprintf(("cache_full list done\n"));
1044 * Now whip through the CPUs, snagging the loaded magazines
1047 * In order to prevent inconsistent dumps, rounds and prounds
1048 * are copied aside before dumping begins.
1050 for (cpu
= 0; cpu
< ncpus
; cpu
++) {
1051 kmem_cpu_cache_t
*ccp
= &cp
->cache_cpu
[cpu
];
1052 short rounds
, prounds
;
1054 if (KMEM_DUMPCC(ccp
)) {
1055 rounds
= ccp
->cc_dump_rounds
;
1056 prounds
= ccp
->cc_dump_prounds
;
1058 rounds
= ccp
->cc_rounds
;
1059 prounds
= ccp
->cc_prounds
;
1062 dprintf(("reading cpu cache %p\n",
1063 (uintptr_t)ccp
- (uintptr_t)cp
+ addr
));
1066 (kmp
= ccp
->cc_loaded
) != NULL
) {
1067 dprintf(("reading %d loaded rounds\n", rounds
));
1068 READMAG_ROUNDS(rounds
);
1072 (kmp
= ccp
->cc_ploaded
) != NULL
) {
1073 dprintf(("reading %d previously loaded rounds\n",
1075 READMAG_ROUNDS(prounds
);
1079 dprintf(("magazine layer: %d buffers\n", magcnt
));
1081 if (!(alloc_flags
& UM_GC
))
1082 mdb_free(mp
, magbsize
);
1084 *maglistp
= maglist
;
1091 if (!(alloc_flags
& UM_GC
)) {
1093 mdb_free(mp
, magbsize
);
1095 mdb_free(maglist
, magmax
* sizeof (void *));
1101 kmem_walk_callback(mdb_walk_state_t
*wsp
, uintptr_t buf
)
1103 return (wsp
->walk_callback(buf
, NULL
, wsp
->walk_cbdata
));
1107 bufctl_walk_callback(kmem_cache_t
*cp
, mdb_walk_state_t
*wsp
, uintptr_t buf
)
1109 kmem_bufctl_audit_t b
;
1112 * if KMF_AUDIT is not set, we know that we're looking at a
1115 if (!(cp
->cache_flags
& KMF_AUDIT
) ||
1116 mdb_vread(&b
, sizeof (kmem_bufctl_audit_t
), buf
) == -1) {
1117 (void) memset(&b
, 0, sizeof (b
));
1118 if (mdb_vread(&b
, sizeof (kmem_bufctl_t
), buf
) == -1) {
1119 mdb_warn("unable to read bufctl at %p", buf
);
1124 return (wsp
->walk_callback(buf
, &b
, wsp
->walk_cbdata
));
1127 typedef struct kmem_walk
{
1130 uintptr_t kmw_addr
; /* cache address */
1131 kmem_cache_t
*kmw_cp
;
1145 char *kmw_valid
; /* to keep track of freed buffers */
1146 char *kmw_ubase
; /* buffer for slab data */
1150 kmem_walk_init_common(mdb_walk_state_t
*wsp
, int type
)
1157 size_t magmax
, magcnt
;
1158 void **maglist
= NULL
;
1159 uint_t chunksize
, slabsize
;
1160 int status
= WALK_ERR
;
1161 uintptr_t addr
= wsp
->walk_addr
;
1162 const char *layered
;
1166 if (addr
== (uintptr_t)NULL
) {
1167 mdb_warn("kmem walk doesn't support global walks\n");
1171 dprintf(("walking %p\n", addr
));
1174 * First we need to figure out how many CPUs are configured in the
1175 * system to know how much to slurp out.
1177 mdb_readvar(&ncpus
, "max_ncpus");
1179 csize
= KMEM_CACHE_SIZE(ncpus
);
1180 cp
= mdb_alloc(csize
, UM_SLEEP
);
1182 if (mdb_vread(cp
, csize
, addr
) == -1) {
1183 mdb_warn("couldn't read cache at addr %p", addr
);
1188 * It's easy for someone to hand us an invalid cache address.
1189 * Unfortunately, it is hard for this walker to survive an
1190 * invalid cache cleanly. So we make sure that:
1192 * 1. the vmem arena for the cache is readable,
1193 * 2. the vmem arena's quantum is a power of 2,
1194 * 3. our slabsize is a multiple of the quantum, and
1195 * 4. our chunksize is >0 and less than our slabsize.
1197 if (mdb_vread(&vm_quantum
, sizeof (vm_quantum
),
1198 (uintptr_t)&cp
->cache_arena
->vm_quantum
) == -1 ||
1200 (vm_quantum
& (vm_quantum
- 1)) != 0 ||
1201 cp
->cache_slabsize
< vm_quantum
||
1202 P2PHASE(cp
->cache_slabsize
, vm_quantum
) != 0 ||
1203 cp
->cache_chunksize
== 0 ||
1204 cp
->cache_chunksize
> cp
->cache_slabsize
) {
1205 mdb_warn("%p is not a valid kmem_cache_t\n", addr
);
1209 dprintf(("buf total is %d\n", cp
->cache_buftotal
));
1211 if (cp
->cache_buftotal
== 0) {
1212 mdb_free(cp
, csize
);
1217 * If they ask for bufctls, but it's a small-slab cache,
1218 * there is nothing to report.
1220 if ((type
& KM_BUFCTL
) && !(cp
->cache_flags
& KMF_HASH
)) {
1221 dprintf(("bufctl requested, not KMF_HASH (flags: %p)\n",
1223 mdb_free(cp
, csize
);
1228 * If they want constructed buffers, but there's no constructor or
1229 * the cache has DEADBEEF checking enabled, there is nothing to report.
1231 if ((type
& KM_CONSTRUCTED
) && (!(type
& KM_FREE
) ||
1232 cp
->cache_constructor
== NULL
||
1233 (cp
->cache_flags
& (KMF_DEADBEEF
| KMF_LITE
)) == KMF_DEADBEEF
)) {
1234 mdb_free(cp
, csize
);
1239 * Read in the contents of the magazine layer
1241 if (kmem_read_magazines(cp
, addr
, ncpus
, &maglist
, &magcnt
,
1242 &magmax
, UM_SLEEP
) == WALK_ERR
)
1246 * We have all of the buffers from the magazines; if we are walking
1247 * allocated buffers, sort them so we can bsearch them later.
1249 if (type
& KM_ALLOCATED
)
1250 qsort(maglist
, magcnt
, sizeof (void *), addrcmp
);
1252 wsp
->walk_data
= kmw
= mdb_zalloc(sizeof (kmem_walk_t
), UM_SLEEP
);
1254 kmw
->kmw_type
= type
;
1255 kmw
->kmw_addr
= addr
;
1257 kmw
->kmw_csize
= csize
;
1258 kmw
->kmw_maglist
= maglist
;
1259 kmw
->kmw_max
= magmax
;
1260 kmw
->kmw_count
= magcnt
;
1264 * When walking allocated buffers in a KMF_HASH cache, we walk the
1265 * hash table instead of the slab layer.
1267 if ((cp
->cache_flags
& KMF_HASH
) && (type
& KM_ALLOCATED
)) {
1268 layered
= "kmem_hash";
1270 kmw
->kmw_type
|= KM_HASH
;
1273 * If we are walking freed buffers, we only need the
1274 * magazine layer plus the partially allocated slabs.
1275 * To walk allocated buffers, we need all of the slabs.
1277 if (type
& KM_ALLOCATED
)
1278 layered
= "kmem_slab";
1280 layered
= "kmem_slab_partial";
1283 * for small-slab caches, we read in the entire slab. For
1284 * freed buffers, we can just walk the freelist. For
1285 * allocated buffers, we use a 'valid' array to track
1286 * the freed buffers.
1288 if (!(cp
->cache_flags
& KMF_HASH
)) {
1289 chunksize
= cp
->cache_chunksize
;
1290 slabsize
= cp
->cache_slabsize
;
1292 kmw
->kmw_ubase
= mdb_alloc(slabsize
+
1293 sizeof (kmem_bufctl_t
), UM_SLEEP
);
1295 if (type
& KM_ALLOCATED
)
1297 mdb_alloc(slabsize
/ chunksize
, UM_SLEEP
);
1303 if (mdb_layered_walk(layered
, wsp
) == -1) {
1304 mdb_warn("unable to start layered '%s' walk", layered
);
1309 if (status
== WALK_ERR
) {
1311 mdb_free(kmw
->kmw_valid
, slabsize
/ chunksize
);
1314 mdb_free(kmw
->kmw_ubase
, slabsize
+
1315 sizeof (kmem_bufctl_t
));
1317 if (kmw
->kmw_maglist
)
1318 mdb_free(kmw
->kmw_maglist
,
1319 kmw
->kmw_max
* sizeof (uintptr_t));
1321 mdb_free(kmw
, sizeof (kmem_walk_t
));
1322 wsp
->walk_data
= NULL
;
1326 if (status
== WALK_ERR
)
1327 mdb_free(cp
, csize
);
1333 kmem_walk_step(mdb_walk_state_t
*wsp
)
1335 kmem_walk_t
*kmw
= wsp
->walk_data
;
1336 int type
= kmw
->kmw_type
;
1337 kmem_cache_t
*cp
= kmw
->kmw_cp
;
1339 void **maglist
= kmw
->kmw_maglist
;
1340 int magcnt
= kmw
->kmw_count
;
1342 uintptr_t chunksize
, slabsize
;
1344 const kmem_slab_t
*sp
;
1345 const kmem_bufctl_t
*bcp
;
1353 char *valid
, *ubase
;
1356 * first, handle the 'kmem_hash' layered walk case
1358 if (type
& KM_HASH
) {
1360 * We have a buffer which has been allocated out of the
1361 * global layer. We need to make sure that it's not
1362 * actually sitting in a magazine before we report it as
1363 * an allocated buffer.
1365 buf
= ((const kmem_bufctl_t
*)wsp
->walk_layer
)->bc_addr
;
1368 bsearch(&buf
, maglist
, magcnt
, sizeof (void *),
1372 if (type
& KM_BUFCTL
)
1373 return (bufctl_walk_callback(cp
, wsp
, wsp
->walk_addr
));
1375 return (kmem_walk_callback(wsp
, (uintptr_t)buf
));
1380 addr
= kmw
->kmw_addr
;
1383 * If we're walking freed buffers, report everything in the
1384 * magazine layer before processing the first slab.
1386 if ((type
& KM_FREE
) && magcnt
!= 0) {
1387 kmw
->kmw_count
= 0; /* only do this once */
1388 for (i
= 0; i
< magcnt
; i
++) {
1391 if (type
& KM_BUFCTL
) {
1394 if (cp
->cache_flags
& KMF_BUFTAG
) {
1398 /* LINTED - alignment */
1399 btp
= KMEM_BUFTAG(cp
, buf
);
1400 if (mdb_vread(&tag
, sizeof (tag
),
1401 (uintptr_t)btp
) == -1) {
1402 mdb_warn("reading buftag for "
1403 "%p at %p", buf
, btp
);
1406 out
= (uintptr_t)tag
.bt_bufctl
;
1408 if (kmem_hash_lookup(cp
, addr
, buf
,
1412 ret
= bufctl_walk_callback(cp
, wsp
, out
);
1414 ret
= kmem_walk_callback(wsp
, (uintptr_t)buf
);
1417 if (ret
!= WALK_NEXT
)
1423 * If they want constructed buffers, we're finished, since the
1424 * magazine layer holds them all.
1426 if (type
& KM_CONSTRUCTED
)
1430 * Handle the buffers in the current slab
1432 chunksize
= cp
->cache_chunksize
;
1433 slabsize
= cp
->cache_slabsize
;
1435 sp
= wsp
->walk_layer
;
1436 chunks
= sp
->slab_chunks
;
1437 kbase
= sp
->slab_base
;
1439 dprintf(("kbase is %p\n", kbase
));
1441 if (!(cp
->cache_flags
& KMF_HASH
)) {
1442 valid
= kmw
->kmw_valid
;
1443 ubase
= kmw
->kmw_ubase
;
1445 if (mdb_vread(ubase
, chunks
* chunksize
,
1446 (uintptr_t)kbase
) == -1) {
1447 mdb_warn("failed to read slab contents at %p", kbase
);
1452 * Set up the valid map as fully allocated -- we'll punch
1455 if (type
& KM_ALLOCATED
)
1456 (void) memset(valid
, 1, chunks
);
1463 * walk the slab's freelist
1465 bcp
= sp
->slab_head
;
1467 dprintf(("refcnt is %d; chunks is %d\n", sp
->slab_refcnt
, chunks
));
1470 * since we could be in the middle of allocating a buffer,
1471 * our refcnt could be one higher than it aught. So we
1472 * check one further on the freelist than the count allows.
1474 for (i
= sp
->slab_refcnt
; i
<= chunks
; i
++) {
1477 dprintf(("bcp is %p\n", bcp
));
1483 "slab %p in cache %p freelist too short by %d\n",
1484 sp
, addr
, chunks
- i
);
1488 if (cp
->cache_flags
& KMF_HASH
) {
1489 if (mdb_vread(&bc
, sizeof (bc
), (uintptr_t)bcp
) == -1) {
1490 mdb_warn("failed to read bufctl ptr at %p",
1497 * Otherwise the buffer is (or should be) in the slab
1498 * that we've read in; determine its offset in the
1499 * slab, validate that it's not corrupt, and add to
1500 * our base address to find the umem_bufctl_t. (Note
1501 * that we don't need to add the size of the bufctl
1502 * to our offset calculation because of the slop that's
1503 * allocated for the buffer at ubase.)
1505 uintptr_t offs
= (uintptr_t)bcp
- (uintptr_t)kbase
;
1507 if (offs
> chunks
* chunksize
) {
1508 mdb_warn("found corrupt bufctl ptr %p"
1509 " in slab %p in cache %p\n", bcp
,
1510 wsp
->walk_addr
, addr
);
1514 bc
= *((kmem_bufctl_t
*)((uintptr_t)ubase
+ offs
));
1515 buf
= KMEM_BUF(cp
, bcp
);
1518 ndx
= ((uintptr_t)buf
- (uintptr_t)kbase
) / chunksize
;
1520 if (ndx
> slabsize
/ cp
->cache_bufsize
) {
1522 * This is very wrong; we have managed to find
1523 * a buffer in the slab which shouldn't
1524 * actually be here. Emit a warning, and
1527 mdb_warn("buf %p is out of range for "
1528 "slab %p, cache %p\n", buf
, sp
, addr
);
1529 } else if (type
& KM_ALLOCATED
) {
1531 * we have found a buffer on the slab's freelist;
1537 * Report this freed buffer
1539 if (type
& KM_BUFCTL
) {
1540 ret
= bufctl_walk_callback(cp
, wsp
,
1543 ret
= kmem_walk_callback(wsp
, (uintptr_t)buf
);
1545 if (ret
!= WALK_NEXT
)
1553 dprintf(("slab %p in cache %p freelist too long (%p)\n",
1558 * If we are walking freed buffers, the loop above handled reporting
1564 if (type
& KM_BUFCTL
) {
1565 mdb_warn("impossible situation: small-slab KM_BUFCTL walk for "
1566 "cache %p\n", addr
);
1571 * Report allocated buffers, skipping buffers in the magazine layer.
1572 * We only get this far for small-slab caches.
1574 for (i
= 0; ret
== WALK_NEXT
&& i
< chunks
; i
++) {
1575 buf
= (char *)kbase
+ i
* chunksize
;
1578 continue; /* on slab freelist */
1581 bsearch(&buf
, maglist
, magcnt
, sizeof (void *),
1583 continue; /* in magazine layer */
1585 ret
= kmem_walk_callback(wsp
, (uintptr_t)buf
);
1591 kmem_walk_fini(mdb_walk_state_t
*wsp
)
1593 kmem_walk_t
*kmw
= wsp
->walk_data
;
1594 uintptr_t chunksize
;
1600 if (kmw
->kmw_maglist
!= NULL
)
1601 mdb_free(kmw
->kmw_maglist
, kmw
->kmw_max
* sizeof (void *));
1603 chunksize
= kmw
->kmw_cp
->cache_chunksize
;
1604 slabsize
= kmw
->kmw_cp
->cache_slabsize
;
1606 if (kmw
->kmw_valid
!= NULL
)
1607 mdb_free(kmw
->kmw_valid
, slabsize
/ chunksize
);
1608 if (kmw
->kmw_ubase
!= NULL
)
1609 mdb_free(kmw
->kmw_ubase
, slabsize
+ sizeof (kmem_bufctl_t
));
1611 mdb_free(kmw
->kmw_cp
, kmw
->kmw_csize
);
1612 mdb_free(kmw
, sizeof (kmem_walk_t
));
1617 kmem_walk_all(uintptr_t addr
, const kmem_cache_t
*c
, mdb_walk_state_t
*wsp
)
1620 * Buffers allocated from NOTOUCH caches can also show up as freed
1621 * memory in other caches. This can be a little confusing, so we
1622 * don't walk NOTOUCH caches when walking all caches (thereby assuring
1623 * that "::walk kmem" and "::walk freemem" yield disjoint output).
1625 if (c
->cache_cflags
& KMC_NOTOUCH
)
1628 if (mdb_pwalk(wsp
->walk_data
, wsp
->walk_callback
,
1629 wsp
->walk_cbdata
, addr
) == -1)
1635 #define KMEM_WALK_ALL(name, wsp) { \
1636 wsp->walk_data = (name); \
1637 if (mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_walk_all, wsp) == -1) \
1638 return (WALK_ERR); \
1639 return (WALK_DONE); \
1643 kmem_walk_init(mdb_walk_state_t
*wsp
)
1645 if (wsp
->walk_arg
!= NULL
)
1646 wsp
->walk_addr
= (uintptr_t)wsp
->walk_arg
;
1648 if (wsp
->walk_addr
== (uintptr_t)NULL
)
1649 KMEM_WALK_ALL("kmem", wsp
);
1650 return (kmem_walk_init_common(wsp
, KM_ALLOCATED
));
1654 bufctl_walk_init(mdb_walk_state_t
*wsp
)
1656 if (wsp
->walk_addr
== (uintptr_t)NULL
)
1657 KMEM_WALK_ALL("bufctl", wsp
);
1658 return (kmem_walk_init_common(wsp
, KM_ALLOCATED
| KM_BUFCTL
));
1662 freemem_walk_init(mdb_walk_state_t
*wsp
)
1664 if (wsp
->walk_addr
== (uintptr_t)NULL
)
1665 KMEM_WALK_ALL("freemem", wsp
);
1666 return (kmem_walk_init_common(wsp
, KM_FREE
));
1670 freemem_constructed_walk_init(mdb_walk_state_t
*wsp
)
1672 if (wsp
->walk_addr
== (uintptr_t)NULL
)
1673 KMEM_WALK_ALL("freemem_constructed", wsp
);
1674 return (kmem_walk_init_common(wsp
, KM_FREE
| KM_CONSTRUCTED
));
1678 freectl_walk_init(mdb_walk_state_t
*wsp
)
1680 if (wsp
->walk_addr
== (uintptr_t)NULL
)
1681 KMEM_WALK_ALL("freectl", wsp
);
1682 return (kmem_walk_init_common(wsp
, KM_FREE
| KM_BUFCTL
));
1686 freectl_constructed_walk_init(mdb_walk_state_t
*wsp
)
1688 if (wsp
->walk_addr
== (uintptr_t)NULL
)
1689 KMEM_WALK_ALL("freectl_constructed", wsp
);
1690 return (kmem_walk_init_common(wsp
,
1691 KM_FREE
| KM_BUFCTL
| KM_CONSTRUCTED
));
1694 typedef struct bufctl_history_walk
{
1696 kmem_cache_t
*bhw_cache
;
1697 kmem_slab_t
*bhw_slab
;
1698 hrtime_t bhw_timestamp
;
1699 } bufctl_history_walk_t
;
1702 bufctl_history_walk_init(mdb_walk_state_t
*wsp
)
1704 bufctl_history_walk_t
*bhw
;
1705 kmem_bufctl_audit_t bc
;
1706 kmem_bufctl_audit_t bcn
;
1708 if (wsp
->walk_addr
== (uintptr_t)NULL
) {
1709 mdb_warn("bufctl_history walk doesn't support global walks\n");
1713 if (mdb_vread(&bc
, sizeof (bc
), wsp
->walk_addr
) == -1) {
1714 mdb_warn("unable to read bufctl at %p", wsp
->walk_addr
);
1718 bhw
= mdb_zalloc(sizeof (*bhw
), UM_SLEEP
);
1719 bhw
->bhw_timestamp
= 0;
1720 bhw
->bhw_cache
= bc
.bc_cache
;
1721 bhw
->bhw_slab
= bc
.bc_slab
;
1724 * sometimes the first log entry matches the base bufctl; in that
1725 * case, skip the base bufctl.
1727 if (bc
.bc_lastlog
!= NULL
&&
1728 mdb_vread(&bcn
, sizeof (bcn
), (uintptr_t)bc
.bc_lastlog
) != -1 &&
1729 bc
.bc_addr
== bcn
.bc_addr
&&
1730 bc
.bc_cache
== bcn
.bc_cache
&&
1731 bc
.bc_slab
== bcn
.bc_slab
&&
1732 bc
.bc_timestamp
== bcn
.bc_timestamp
&&
1733 bc
.bc_thread
== bcn
.bc_thread
)
1734 bhw
->bhw_next
= bc
.bc_lastlog
;
1736 bhw
->bhw_next
= (void *)wsp
->walk_addr
;
1738 wsp
->walk_addr
= (uintptr_t)bc
.bc_addr
;
1739 wsp
->walk_data
= bhw
;
1745 bufctl_history_walk_step(mdb_walk_state_t
*wsp
)
1747 bufctl_history_walk_t
*bhw
= wsp
->walk_data
;
1748 uintptr_t addr
= (uintptr_t)bhw
->bhw_next
;
1749 uintptr_t baseaddr
= wsp
->walk_addr
;
1750 kmem_bufctl_audit_t bc
;
1752 if (addr
== (uintptr_t)NULL
)
1755 if (mdb_vread(&bc
, sizeof (bc
), addr
) == -1) {
1756 mdb_warn("unable to read bufctl at %p", bhw
->bhw_next
);
1761 * The bufctl is only valid if the address, cache, and slab are
1762 * correct. We also check that the timestamp is decreasing, to
1763 * prevent infinite loops.
1765 if ((uintptr_t)bc
.bc_addr
!= baseaddr
||
1766 bc
.bc_cache
!= bhw
->bhw_cache
||
1767 bc
.bc_slab
!= bhw
->bhw_slab
||
1768 (bhw
->bhw_timestamp
!= 0 && bc
.bc_timestamp
>= bhw
->bhw_timestamp
))
1771 bhw
->bhw_next
= bc
.bc_lastlog
;
1772 bhw
->bhw_timestamp
= bc
.bc_timestamp
;
1774 return (wsp
->walk_callback(addr
, &bc
, wsp
->walk_cbdata
));
1778 bufctl_history_walk_fini(mdb_walk_state_t
*wsp
)
1780 bufctl_history_walk_t
*bhw
= wsp
->walk_data
;
1782 mdb_free(bhw
, sizeof (*bhw
));
1785 typedef struct kmem_log_walk
{
1786 kmem_bufctl_audit_t
*klw_base
;
1787 kmem_bufctl_audit_t
**klw_sorted
;
1788 kmem_log_header_t klw_lh
;
1795 kmem_log_walk_init(mdb_walk_state_t
*wsp
)
1797 uintptr_t lp
= wsp
->walk_addr
;
1798 kmem_log_walk_t
*klw
;
1799 kmem_log_header_t
*lhp
;
1800 int maxndx
, i
, j
, k
;
1803 * By default (global walk), walk the kmem_transaction_log. Otherwise
1804 * read the log whose kmem_log_header_t is stored at walk_addr.
1806 if (lp
== (uintptr_t)NULL
&&
1807 mdb_readvar(&lp
, "kmem_transaction_log") == -1) {
1808 mdb_warn("failed to read 'kmem_transaction_log'");
1812 if (lp
== (uintptr_t)NULL
) {
1813 mdb_warn("log is disabled\n");
1817 klw
= mdb_zalloc(sizeof (kmem_log_walk_t
), UM_SLEEP
);
1820 if (mdb_vread(lhp
, sizeof (kmem_log_header_t
), lp
) == -1) {
1821 mdb_warn("failed to read log header at %p", lp
);
1822 mdb_free(klw
, sizeof (kmem_log_walk_t
));
1826 klw
->klw_size
= lhp
->lh_chunksize
* lhp
->lh_nchunks
;
1827 klw
->klw_base
= mdb_alloc(klw
->klw_size
, UM_SLEEP
);
1828 maxndx
= lhp
->lh_chunksize
/ sizeof (kmem_bufctl_audit_t
) - 1;
1830 if (mdb_vread(klw
->klw_base
, klw
->klw_size
,
1831 (uintptr_t)lhp
->lh_base
) == -1) {
1832 mdb_warn("failed to read log at base %p", lhp
->lh_base
);
1833 mdb_free(klw
->klw_base
, klw
->klw_size
);
1834 mdb_free(klw
, sizeof (kmem_log_walk_t
));
1838 klw
->klw_sorted
= mdb_alloc(maxndx
* lhp
->lh_nchunks
*
1839 sizeof (kmem_bufctl_audit_t
*), UM_SLEEP
);
1841 for (i
= 0, k
= 0; i
< lhp
->lh_nchunks
; i
++) {
1842 kmem_bufctl_audit_t
*chunk
= (kmem_bufctl_audit_t
*)
1843 ((uintptr_t)klw
->klw_base
+ i
* lhp
->lh_chunksize
);
1845 for (j
= 0; j
< maxndx
; j
++)
1846 klw
->klw_sorted
[k
++] = &chunk
[j
];
1849 qsort(klw
->klw_sorted
, k
, sizeof (kmem_bufctl_audit_t
*),
1850 (int(*)(const void *, const void *))bufctlcmp
);
1852 klw
->klw_maxndx
= k
;
1853 wsp
->walk_data
= klw
;
1859 kmem_log_walk_step(mdb_walk_state_t
*wsp
)
1861 kmem_log_walk_t
*klw
= wsp
->walk_data
;
1862 kmem_bufctl_audit_t
*bcp
;
1864 if (klw
->klw_ndx
== klw
->klw_maxndx
)
1867 bcp
= klw
->klw_sorted
[klw
->klw_ndx
++];
1869 return (wsp
->walk_callback((uintptr_t)bcp
- (uintptr_t)klw
->klw_base
+
1870 (uintptr_t)klw
->klw_lh
.lh_base
, bcp
, wsp
->walk_cbdata
));
1874 kmem_log_walk_fini(mdb_walk_state_t
*wsp
)
1876 kmem_log_walk_t
*klw
= wsp
->walk_data
;
1878 mdb_free(klw
->klw_base
, klw
->klw_size
);
1879 mdb_free(klw
->klw_sorted
, klw
->klw_maxndx
*
1880 sizeof (kmem_bufctl_audit_t
*));
1881 mdb_free(klw
, sizeof (kmem_log_walk_t
));
1884 typedef struct allocdby_bufctl
{
1887 } allocdby_bufctl_t
;
1889 typedef struct allocdby_walk
{
1890 const char *abw_walk
;
1891 uintptr_t abw_thread
;
1894 allocdby_bufctl_t
*abw_buf
;
1899 allocdby_walk_bufctl(uintptr_t addr
, const kmem_bufctl_audit_t
*bcp
,
1900 allocdby_walk_t
*abw
)
1902 if ((uintptr_t)bcp
->bc_thread
!= abw
->abw_thread
)
1905 if (abw
->abw_nbufs
== abw
->abw_size
) {
1906 allocdby_bufctl_t
*buf
;
1907 size_t oldsize
= sizeof (allocdby_bufctl_t
) * abw
->abw_size
;
1909 buf
= mdb_zalloc(oldsize
<< 1, UM_SLEEP
);
1911 bcopy(abw
->abw_buf
, buf
, oldsize
);
1912 mdb_free(abw
->abw_buf
, oldsize
);
1914 abw
->abw_size
<<= 1;
1918 abw
->abw_buf
[abw
->abw_nbufs
].abb_addr
= addr
;
1919 abw
->abw_buf
[abw
->abw_nbufs
].abb_ts
= bcp
->bc_timestamp
;
1927 allocdby_walk_cache(uintptr_t addr
, const kmem_cache_t
*c
, allocdby_walk_t
*abw
)
1929 if (mdb_pwalk(abw
->abw_walk
, (mdb_walk_cb_t
)allocdby_walk_bufctl
,
1931 mdb_warn("couldn't walk bufctl for cache %p", addr
);
1939 allocdby_cmp(const allocdby_bufctl_t
*lhs
, const allocdby_bufctl_t
*rhs
)
1941 if (lhs
->abb_ts
< rhs
->abb_ts
)
1943 if (lhs
->abb_ts
> rhs
->abb_ts
)
1949 allocdby_walk_init_common(mdb_walk_state_t
*wsp
, const char *walk
)
1951 allocdby_walk_t
*abw
;
1953 if (wsp
->walk_addr
== (uintptr_t)NULL
) {
1954 mdb_warn("allocdby walk doesn't support global walks\n");
1958 abw
= mdb_zalloc(sizeof (allocdby_walk_t
), UM_SLEEP
);
1960 abw
->abw_thread
= wsp
->walk_addr
;
1961 abw
->abw_walk
= walk
;
1962 abw
->abw_size
= 128; /* something reasonable */
1964 mdb_zalloc(abw
->abw_size
* sizeof (allocdby_bufctl_t
), UM_SLEEP
);
1966 wsp
->walk_data
= abw
;
1968 if (mdb_walk("kmem_cache",
1969 (mdb_walk_cb_t
)allocdby_walk_cache
, abw
) == -1) {
1970 mdb_warn("couldn't walk kmem_cache");
1971 allocdby_walk_fini(wsp
);
1975 qsort(abw
->abw_buf
, abw
->abw_nbufs
, sizeof (allocdby_bufctl_t
),
1976 (int(*)(const void *, const void *))allocdby_cmp
);
1982 allocdby_walk_init(mdb_walk_state_t
*wsp
)
1984 return (allocdby_walk_init_common(wsp
, "bufctl"));
1988 freedby_walk_init(mdb_walk_state_t
*wsp
)
1990 return (allocdby_walk_init_common(wsp
, "freectl"));
1994 allocdby_walk_step(mdb_walk_state_t
*wsp
)
1996 allocdby_walk_t
*abw
= wsp
->walk_data
;
1997 kmem_bufctl_audit_t bc
;
2000 if (abw
->abw_ndx
== abw
->abw_nbufs
)
2003 addr
= abw
->abw_buf
[abw
->abw_ndx
++].abb_addr
;
2005 if (mdb_vread(&bc
, sizeof (bc
), addr
) == -1) {
2006 mdb_warn("couldn't read bufctl at %p", addr
);
2010 return (wsp
->walk_callback(addr
, &bc
, wsp
->walk_cbdata
));
2014 allocdby_walk_fini(mdb_walk_state_t
*wsp
)
2016 allocdby_walk_t
*abw
= wsp
->walk_data
;
2018 mdb_free(abw
->abw_buf
, sizeof (allocdby_bufctl_t
) * abw
->abw_size
);
2019 mdb_free(abw
, sizeof (allocdby_walk_t
));
2024 allocdby_walk(uintptr_t addr
, const kmem_bufctl_audit_t
*bcp
, void *ignored
)
2026 char c
[MDB_SYM_NAMLEN
];
2030 mdb_printf("%0?p %12llx ", addr
, bcp
->bc_timestamp
);
2031 for (i
= 0; i
< bcp
->bc_depth
; i
++) {
2032 if (mdb_lookup_by_addr(bcp
->bc_stack
[i
],
2033 MDB_SYM_FUZZY
, c
, sizeof (c
), &sym
) == -1)
2035 if (strncmp(c
, "kmem_", 5) == 0)
2037 mdb_printf("%s+0x%lx",
2038 c
, bcp
->bc_stack
[i
] - (uintptr_t)sym
.st_value
);
2047 allocdby_common(uintptr_t addr
, uint_t flags
, const char *w
)
2049 if (!(flags
& DCMD_ADDRSPEC
))
2050 return (DCMD_USAGE
);
2052 mdb_printf("%-?s %12s %s\n", "BUFCTL", "TIMESTAMP", "CALLER");
2054 if (mdb_pwalk(w
, (mdb_walk_cb_t
)allocdby_walk
, NULL
, addr
) == -1) {
2055 mdb_warn("can't walk '%s' for %p", w
, addr
);
2064 allocdby(uintptr_t addr
, uint_t flags
, int argc
, const mdb_arg_t
*argv
)
2066 return (allocdby_common(addr
, flags
, "allocdby"));
2071 freedby(uintptr_t addr
, uint_t flags
, int argc
, const mdb_arg_t
*argv
)
2073 return (allocdby_common(addr
, flags
, "freedby"));
2077 * Return a string describing the address in relation to the given thread's
2080 * - If the thread state is TS_FREE, return " (inactive interrupt thread)".
2082 * - If the address is above the stack pointer, return an empty string
2083 * signifying that the address is active.
2085 * - If the address is below the stack pointer, and the thread is not on proc,
2086 * return " (below sp)".
2088 * - If the address is below the stack pointer, and the thread is on proc,
2089 * return " (possibly below sp)". Depending on context, we may or may not
2090 * have an accurate t_sp.
2093 stack_active(const kthread_t
*t
, uintptr_t addr
)
2098 if (t
->t_state
== TS_FREE
)
2099 return (" (inactive interrupt thread)");
2102 * Check to see if we're on the panic stack. If so, ignore t_sp, as it
2103 * no longer relates to the thread's real stack.
2105 if (mdb_lookup_by_name("panic_stack", &sym
) == 0) {
2106 panicstk
= (uintptr_t)sym
.st_value
;
2108 if (t
->t_sp
>= panicstk
&& t
->t_sp
< panicstk
+ PANICSTKSIZE
)
2112 if (addr
>= t
->t_sp
+ STACK_BIAS
)
2115 if (t
->t_state
== TS_ONPROC
)
2116 return (" (possibly below sp)");
2118 return (" (below sp)");
2122 * Additional state for the kmem and vmem ::whatis handlers
2124 typedef struct whatis_info
{
2126 const kmem_cache_t
*wi_cache
;
2127 const vmem_t
*wi_vmem
;
2128 vmem_t
*wi_msb_arena
;
2129 size_t wi_slab_size
;
2130 uint_t wi_slab_found
;
2131 uint_t wi_kmem_lite_count
;
2135 /* call one of our dcmd functions with "-v" and the provided address */
2137 whatis_call_printer(mdb_dcmd_f
*dcmd
, uintptr_t addr
)
2140 a
.a_type
= MDB_TYPE_STRING
;
2141 a
.a_un
.a_str
= "-v";
2144 (void) (*dcmd
)(addr
, DCMD_ADDRSPEC
, 1, &a
);
2148 whatis_print_kmf_lite(uintptr_t btaddr
, size_t count
)
2150 #define KMEM_LITE_MAX 16
2151 pc_t callers
[KMEM_LITE_MAX
];
2152 pc_t uninit
= (pc_t
)KMEM_UNINITIALIZED_PATTERN
;
2156 const char *plural
= "";
2159 /* validate our arguments and read in the buftag */
2160 if (count
== 0 || count
> KMEM_LITE_MAX
||
2161 mdb_vread(&bt
, sizeof (bt
), btaddr
) == -1)
2164 /* validate the buffer state and read in the callers */
2165 stat
= (intptr_t)bt
.bt_bufctl
^ bt
.bt_bxstat
;
2167 if (stat
!= KMEM_BUFTAG_ALLOC
&& stat
!= KMEM_BUFTAG_FREE
)
2170 if (mdb_vread(callers
, count
* sizeof (pc_t
),
2171 btaddr
+ offsetof(kmem_buftag_lite_t
, bt_history
)) == -1)
2174 /* If there aren't any filled in callers, bail */
2175 if (callers
[0] == uninit
)
2178 plural
= (callers
[1] == uninit
) ? "" : "s";
2180 /* Everything's done and checked; print them out */
2184 mdb_printf("recent caller%s: %a", plural
, callers
[0]);
2185 for (i
= 1; i
< count
; i
++) {
2186 if (callers
[i
] == uninit
)
2188 mdb_printf(", %a", callers
[i
]);
2194 whatis_print_kmem(whatis_info_t
*wi
, uintptr_t maddr
, uintptr_t addr
,
2197 mdb_whatis_t
*w
= wi
->wi_w
;
2199 const kmem_cache_t
*cp
= wi
->wi_cache
;
2200 /* LINTED pointer cast may result in improper alignment */
2201 uintptr_t btaddr
= (uintptr_t)KMEM_BUFTAG(cp
, addr
);
2202 int quiet
= (mdb_whatis_flags(w
) & WHATIS_QUIET
);
2203 int call_printer
= (!quiet
&& (cp
->cache_flags
& KMF_AUDIT
));
2205 mdb_whatis_report_object(w
, maddr
, addr
, "");
2207 if (baddr
!= 0 && !call_printer
)
2208 mdb_printf("bufctl %p ", baddr
);
2210 mdb_printf("%s from %s",
2211 (wi
->wi_freemem
== FALSE
) ? "allocated" : "freed", cp
->cache_name
);
2213 if (baddr
!= 0 && call_printer
) {
2214 whatis_call_printer(bufctl
, baddr
);
2218 /* for KMF_LITE caches, try to print out the previous callers */
2219 if (!quiet
&& (cp
->cache_flags
& KMF_LITE
))
2220 whatis_print_kmf_lite(btaddr
, wi
->wi_kmem_lite_count
);
2227 whatis_walk_kmem(uintptr_t addr
, void *ignored
, whatis_info_t
*wi
)
2229 mdb_whatis_t
*w
= wi
->wi_w
;
2232 size_t size
= wi
->wi_cache
->cache_bufsize
;
2234 while (mdb_whatis_match(w
, addr
, size
, &cur
))
2235 whatis_print_kmem(wi
, cur
, addr
, (uintptr_t)NULL
);
2237 return (WHATIS_WALKRET(w
));
2242 whatis_walk_bufctl(uintptr_t baddr
, const kmem_bufctl_t
*bcp
, whatis_info_t
*wi
)
2244 mdb_whatis_t
*w
= wi
->wi_w
;
2247 uintptr_t addr
= (uintptr_t)bcp
->bc_addr
;
2248 size_t size
= wi
->wi_cache
->cache_bufsize
;
2250 while (mdb_whatis_match(w
, addr
, size
, &cur
))
2251 whatis_print_kmem(wi
, cur
, addr
, baddr
);
2253 return (WHATIS_WALKRET(w
));
2257 whatis_walk_seg(uintptr_t addr
, const vmem_seg_t
*vs
, whatis_info_t
*wi
)
2259 mdb_whatis_t
*w
= wi
->wi_w
;
2261 size_t size
= vs
->vs_end
- vs
->vs_start
;
2264 /* We're not interested in anything but alloc and free segments */
2265 if (vs
->vs_type
!= VMEM_ALLOC
&& vs
->vs_type
!= VMEM_FREE
)
2268 while (mdb_whatis_match(w
, vs
->vs_start
, size
, &cur
)) {
2269 mdb_whatis_report_object(w
, cur
, vs
->vs_start
, "");
2272 * If we're not printing it seperately, provide the vmem_seg
2273 * pointer if it has a stack trace.
2275 if ((mdb_whatis_flags(w
) & WHATIS_QUIET
) &&
2276 (!(mdb_whatis_flags(w
) & WHATIS_BUFCTL
) ||
2277 (vs
->vs_type
== VMEM_ALLOC
&& vs
->vs_depth
!= 0))) {
2278 mdb_printf("vmem_seg %p ", addr
);
2281 mdb_printf("%s from the %s vmem arena",
2282 (vs
->vs_type
== VMEM_ALLOC
) ? "allocated" : "freed",
2283 wi
->wi_vmem
->vm_name
);
2285 if (!(mdb_whatis_flags(w
) & WHATIS_QUIET
))
2286 whatis_call_printer(vmem_seg
, addr
);
2291 return (WHATIS_WALKRET(w
));
2295 whatis_walk_vmem(uintptr_t addr
, const vmem_t
*vmem
, whatis_info_t
*wi
)
2297 mdb_whatis_t
*w
= wi
->wi_w
;
2298 const char *nm
= vmem
->vm_name
;
2300 int identifier
= ((vmem
->vm_cflags
& VMC_IDENTIFIER
) != 0);
2301 int idspace
= ((mdb_whatis_flags(w
) & WHATIS_IDSPACE
) != 0);
2303 if (identifier
!= idspace
)
2308 if (mdb_whatis_flags(w
) & WHATIS_VERBOSE
)
2309 mdb_printf("Searching vmem arena %s...\n", nm
);
2311 if (mdb_pwalk("vmem_seg",
2312 (mdb_walk_cb_t
)whatis_walk_seg
, wi
, addr
) == -1) {
2313 mdb_warn("can't walk vmem_seg for %p", addr
);
2317 return (WHATIS_WALKRET(w
));
2322 whatis_walk_slab(uintptr_t saddr
, const kmem_slab_t
*sp
, whatis_info_t
*wi
)
2324 mdb_whatis_t
*w
= wi
->wi_w
;
2326 /* It must overlap with the slab data, or it's not interesting */
2327 if (mdb_whatis_overlaps(w
,
2328 (uintptr_t)sp
->slab_base
, wi
->wi_slab_size
)) {
2329 wi
->wi_slab_found
++;
2336 whatis_walk_cache(uintptr_t addr
, const kmem_cache_t
*c
, whatis_info_t
*wi
)
2338 mdb_whatis_t
*w
= wi
->wi_w
;
2340 char *walk
, *freewalk
;
2344 int identifier
= ((c
->cache_flags
& KMC_IDENTIFIER
) != 0);
2345 int idspace
= ((mdb_whatis_flags(w
) & WHATIS_IDSPACE
) != 0);
2347 if (identifier
!= idspace
)
2350 /* Override the '-b' flag as necessary */
2351 if (!(c
->cache_flags
& KMF_HASH
))
2352 do_bufctl
= FALSE
; /* no bufctls to walk */
2353 else if (c
->cache_flags
& KMF_AUDIT
)
2354 do_bufctl
= TRUE
; /* we always want debugging info */
2356 do_bufctl
= ((mdb_whatis_flags(w
) & WHATIS_BUFCTL
) != 0);
2360 freewalk
= "freectl";
2361 func
= (mdb_walk_cb_t
)whatis_walk_bufctl
;
2364 freewalk
= "freemem";
2365 func
= (mdb_walk_cb_t
)whatis_walk_kmem
;
2370 if (mdb_whatis_flags(w
) & WHATIS_VERBOSE
)
2371 mdb_printf("Searching %s...\n", c
->cache_name
);
2374 * If more then two buffers live on each slab, figure out if we're
2375 * interested in anything in any slab before doing the more expensive
2376 * kmem/freemem (bufctl/freectl) walkers.
2378 wi
->wi_slab_size
= c
->cache_slabsize
- c
->cache_maxcolor
;
2379 if (!(c
->cache_flags
& KMF_HASH
))
2380 wi
->wi_slab_size
-= sizeof (kmem_slab_t
);
2382 if ((wi
->wi_slab_size
/ c
->cache_chunksize
) > 2) {
2383 wi
->wi_slab_found
= 0;
2384 if (mdb_pwalk("kmem_slab", (mdb_walk_cb_t
)whatis_walk_slab
, wi
,
2386 mdb_warn("can't find kmem_slab walker");
2389 if (wi
->wi_slab_found
== 0)
2393 wi
->wi_freemem
= FALSE
;
2394 if (mdb_pwalk(walk
, func
, wi
, addr
) == -1) {
2395 mdb_warn("can't find %s walker", walk
);
2399 if (mdb_whatis_done(w
))
2403 * We have searched for allocated memory; now search for freed memory.
2405 if (mdb_whatis_flags(w
) & WHATIS_VERBOSE
)
2406 mdb_printf("Searching %s for free memory...\n", c
->cache_name
);
2408 wi
->wi_freemem
= TRUE
;
2409 if (mdb_pwalk(freewalk
, func
, wi
, addr
) == -1) {
2410 mdb_warn("can't find %s walker", freewalk
);
2414 return (WHATIS_WALKRET(w
));
2418 whatis_walk_touch(uintptr_t addr
, const kmem_cache_t
*c
, whatis_info_t
*wi
)
2420 if (c
->cache_arena
== wi
->wi_msb_arena
||
2421 (c
->cache_cflags
& KMC_NOTOUCH
))
2424 return (whatis_walk_cache(addr
, c
, wi
));
2428 whatis_walk_metadata(uintptr_t addr
, const kmem_cache_t
*c
, whatis_info_t
*wi
)
2430 if (c
->cache_arena
!= wi
->wi_msb_arena
)
2433 return (whatis_walk_cache(addr
, c
, wi
));
2437 whatis_walk_notouch(uintptr_t addr
, const kmem_cache_t
*c
, whatis_info_t
*wi
)
2439 if (c
->cache_arena
== wi
->wi_msb_arena
||
2440 !(c
->cache_cflags
& KMC_NOTOUCH
))
2443 return (whatis_walk_cache(addr
, c
, wi
));
2447 whatis_walk_thread(uintptr_t addr
, const kthread_t
*t
, mdb_whatis_t
*w
)
2454 * Often, one calls ::whatis on an address from a thread structure.
2455 * We use this opportunity to short circuit this case...
2457 while (mdb_whatis_match(w
, addr
, sizeof (kthread_t
), &cur
))
2458 mdb_whatis_report_object(w
, cur
, addr
,
2459 "allocated as a thread structure\n");
2462 * Now check the stack
2464 if (t
->t_stkbase
== NULL
)
2468 * This assumes that t_stk is the end of the stack, but it's really
2469 * only the initial stack pointer for the thread. Arguments to the
2470 * initial procedure, SA(MINFRAME), etc. are all after t_stk. So
2471 * that 't->t_stk::whatis' reports "part of t's stack", we include
2472 * t_stk in the range (the "+ 1", below), but the kernel should
2473 * really include the full stack bounds where we can find it.
2475 saddr
= (uintptr_t)t
->t_stkbase
;
2476 size
= (uintptr_t)t
->t_stk
- saddr
+ 1;
2477 while (mdb_whatis_match(w
, saddr
, size
, &cur
))
2478 mdb_whatis_report_object(w
, cur
, cur
,
2479 "in thread %p's stack%s\n", addr
, stack_active(t
, cur
));
2481 return (WHATIS_WALKRET(w
));
2485 whatis_modctl_match(mdb_whatis_t
*w
, const char *name
,
2486 uintptr_t base
, size_t size
, const char *where
)
2491 * Since we're searching for addresses inside a module, we report
2494 while (mdb_whatis_match(w
, base
, size
, &cur
))
2495 mdb_whatis_report_address(w
, cur
, "in %s's %s\n", name
, where
);
2499 whatis_walk_modctl(uintptr_t addr
, const struct modctl
*m
, mdb_whatis_t
*w
)
2501 char name
[MODMAXNAMELEN
];
2505 if (m
->mod_mp
== NULL
)
2508 if (mdb_vread(&mod
, sizeof (mod
), (uintptr_t)m
->mod_mp
) == -1) {
2509 mdb_warn("couldn't read modctl %p's module", addr
);
2513 if (mdb_readstr(name
, sizeof (name
), (uintptr_t)m
->mod_modname
) == -1)
2514 (void) mdb_snprintf(name
, sizeof (name
), "0x%p", addr
);
2516 whatis_modctl_match(w
, name
,
2517 (uintptr_t)mod
.text
, mod
.text_size
, "text segment");
2518 whatis_modctl_match(w
, name
,
2519 (uintptr_t)mod
.data
, mod
.data_size
, "data segment");
2520 whatis_modctl_match(w
, name
,
2521 (uintptr_t)mod
.bss
, mod
.bss_size
, "bss segment");
2523 if (mdb_vread(&shdr
, sizeof (shdr
), (uintptr_t)mod
.symhdr
) == -1) {
2524 mdb_warn("couldn't read symbol header for %p's module", addr
);
2528 whatis_modctl_match(w
, name
,
2529 (uintptr_t)mod
.symtbl
, mod
.nsyms
* shdr
.sh_entsize
, "symtab");
2530 whatis_modctl_match(w
, name
,
2531 (uintptr_t)mod
.symspace
, mod
.symsize
, "symtab");
2533 return (WHATIS_WALKRET(w
));
2538 whatis_walk_memseg(uintptr_t addr
, const struct memseg
*seg
, mdb_whatis_t
*w
)
2542 uintptr_t base
= (uintptr_t)seg
->pages
;
2543 size_t size
= (uintptr_t)seg
->epages
- base
;
2545 while (mdb_whatis_match(w
, base
, size
, &cur
)) {
2546 /* round our found pointer down to the page_t base. */
2547 size_t offset
= (cur
- base
) % sizeof (page_t
);
2549 mdb_whatis_report_object(w
, cur
, cur
- offset
,
2550 "allocated as a page structure\n");
2553 return (WHATIS_WALKRET(w
));
2558 whatis_run_modules(mdb_whatis_t
*w
, void *arg
)
2560 if (mdb_walk("modctl", (mdb_walk_cb_t
)whatis_walk_modctl
, w
) == -1) {
2561 mdb_warn("couldn't find modctl walker");
2569 whatis_run_threads(mdb_whatis_t
*w
, void *ignored
)
2572 * Now search all thread stacks. Yes, this is a little weak; we
2573 * can save a lot of work by first checking to see if the
2574 * address is in segkp vs. segkmem. But hey, computers are
2577 if (mdb_walk("thread", (mdb_walk_cb_t
)whatis_walk_thread
, w
) == -1) {
2578 mdb_warn("couldn't find thread walker");
2586 whatis_run_pages(mdb_whatis_t
*w
, void *ignored
)
2588 if (mdb_walk("memseg", (mdb_walk_cb_t
)whatis_walk_memseg
, w
) == -1) {
2589 mdb_warn("couldn't find memseg walker");
2597 whatis_run_kmem(mdb_whatis_t
*w
, void *ignored
)
2601 bzero(&wi
, sizeof (wi
));
2604 if (mdb_readvar(&wi
.wi_msb_arena
, "kmem_msb_arena") == -1)
2605 mdb_warn("unable to readvar \"kmem_msb_arena\"");
2607 if (mdb_readvar(&wi
.wi_kmem_lite_count
,
2608 "kmem_lite_count") == -1 || wi
.wi_kmem_lite_count
> 16)
2609 wi
.wi_kmem_lite_count
= 0;
2612 * We process kmem caches in the following order:
2614 * non-KMC_NOTOUCH, non-metadata (typically the most interesting)
2615 * metadata (can be huge with KMF_AUDIT)
2616 * KMC_NOTOUCH, non-metadata (see kmem_walk_all())
2618 if (mdb_walk("kmem_cache", (mdb_walk_cb_t
)whatis_walk_touch
,
2620 mdb_walk("kmem_cache", (mdb_walk_cb_t
)whatis_walk_metadata
,
2622 mdb_walk("kmem_cache", (mdb_walk_cb_t
)whatis_walk_notouch
,
2624 mdb_warn("couldn't find kmem_cache walker");
2632 whatis_run_vmem(mdb_whatis_t
*w
, void *ignored
)
2636 bzero(&wi
, sizeof (wi
));
2639 if (mdb_walk("vmem_postfix",
2640 (mdb_walk_cb_t
)whatis_walk_vmem
, &wi
) == -1) {
2641 mdb_warn("couldn't find vmem_postfix walker");
2647 typedef struct kmem_log_cpu
{
2652 typedef struct kmem_log_data
{
2654 kmem_log_cpu_t
*kmd_cpu
;
2658 kmem_log_walk(uintptr_t addr
, const kmem_bufctl_audit_t
*b
,
2659 kmem_log_data_t
*kmd
)
2662 kmem_log_cpu_t
*kmc
= kmd
->kmd_cpu
;
2665 for (i
= 0; i
< NCPU
; i
++) {
2666 if (addr
>= kmc
[i
].kmc_low
&& addr
< kmc
[i
].kmc_high
)
2670 if (kmd
->kmd_addr
) {
2671 if (b
->bc_cache
== NULL
)
2674 if (mdb_vread(&bufsize
, sizeof (bufsize
),
2675 (uintptr_t)&b
->bc_cache
->cache_bufsize
) == -1) {
2677 "failed to read cache_bufsize for cache at %p",
2682 if (kmd
->kmd_addr
< (uintptr_t)b
->bc_addr
||
2683 kmd
->kmd_addr
>= (uintptr_t)b
->bc_addr
+ bufsize
)
2690 mdb_printf("%3d", i
);
2692 mdb_printf(" %0?p %0?p %16llx %0?p\n", addr
, b
->bc_addr
,
2693 b
->bc_timestamp
, b
->bc_thread
);
2700 kmem_log(uintptr_t addr
, uint_t flags
, int argc
, const mdb_arg_t
*argv
)
2702 kmem_log_header_t lh
;
2703 kmem_cpu_log_header_t clh
;
2704 uintptr_t lhp
, clhp
;
2708 kmem_log_cpu_t
*kmc
;
2710 kmem_log_data_t kmd
;
2711 uint_t opt_b
= FALSE
;
2713 if (mdb_getopts(argc
, argv
,
2714 'b', MDB_OPT_SETBITS
, TRUE
, &opt_b
, NULL
) != argc
)
2715 return (DCMD_USAGE
);
2717 if (mdb_readvar(&lhp
, "kmem_transaction_log") == -1) {
2718 mdb_warn("failed to read 'kmem_transaction_log'");
2722 if (lhp
== (uintptr_t)NULL
) {
2723 mdb_warn("no kmem transaction log\n");
2727 mdb_readvar(&ncpus
, "ncpus");
2729 if (mdb_vread(&lh
, sizeof (kmem_log_header_t
), lhp
) == -1) {
2730 mdb_warn("failed to read log header at %p", lhp
);
2734 clhp
= lhp
+ ((uintptr_t)&lh
.lh_cpu
[0] - (uintptr_t)&lh
);
2736 cpu
= mdb_alloc(sizeof (uintptr_t) * NCPU
, UM_SLEEP
| UM_GC
);
2738 if (mdb_lookup_by_name("cpu", &sym
) == -1) {
2739 mdb_warn("couldn't find 'cpu' array");
2743 if (sym
.st_size
!= NCPU
* sizeof (uintptr_t)) {
2744 mdb_warn("expected 'cpu' to be of size %d; found %d\n",
2745 NCPU
* sizeof (uintptr_t), sym
.st_size
);
2749 if (mdb_vread(cpu
, sym
.st_size
, (uintptr_t)sym
.st_value
) == -1) {
2750 mdb_warn("failed to read cpu array at %p", sym
.st_value
);
2754 kmc
= mdb_zalloc(sizeof (kmem_log_cpu_t
) * NCPU
, UM_SLEEP
| UM_GC
);
2755 kmd
.kmd_addr
= (uintptr_t)NULL
;
2758 for (i
= 0; i
< NCPU
; i
++) {
2760 if (cpu
[i
] == (uintptr_t)NULL
)
2763 if (mdb_vread(&clh
, sizeof (clh
), clhp
) == -1) {
2764 mdb_warn("cannot read cpu %d's log header at %p",
2769 kmc
[i
].kmc_low
= clh
.clh_chunk
* lh
.lh_chunksize
+
2770 (uintptr_t)lh
.lh_base
;
2771 kmc
[i
].kmc_high
= (uintptr_t)clh
.clh_current
;
2773 clhp
+= sizeof (kmem_cpu_log_header_t
);
2776 mdb_printf("%3s %-?s %-?s %16s %-?s\n", "CPU", "ADDR", "BUFADDR",
2777 "TIMESTAMP", "THREAD");
2780 * If we have been passed an address, print out only log entries
2781 * corresponding to that address. If opt_b is specified, then interpret
2782 * the address as a bufctl.
2784 if (flags
& DCMD_ADDRSPEC
) {
2785 kmem_bufctl_audit_t b
;
2788 kmd
.kmd_addr
= addr
;
2791 sizeof (kmem_bufctl_audit_t
), addr
) == -1) {
2792 mdb_warn("failed to read bufctl at %p", addr
);
2796 (void) kmem_log_walk(addr
, &b
, &kmd
);
2802 if (mdb_walk("kmem_log", (mdb_walk_cb_t
)kmem_log_walk
, &kmd
) == -1) {
2803 mdb_warn("can't find kmem log walker");
2810 typedef struct bufctl_history_cb
{
2813 const mdb_arg_t
*bhc_argv
;
2815 } bufctl_history_cb_t
;
2819 bufctl_history_callback(uintptr_t addr
, const void *ign
, void *arg
)
2821 bufctl_history_cb_t
*bhc
= arg
;
2824 bufctl(addr
, bhc
->bhc_flags
, bhc
->bhc_argc
, bhc
->bhc_argv
);
2826 bhc
->bhc_flags
&= ~DCMD_LOOPFIRST
;
2828 return ((bhc
->bhc_ret
== DCMD_OK
)? WALK_NEXT
: WALK_DONE
);
2835 "Display the contents of kmem_bufctl_audit_ts, with optional filtering.\n\n");
2837 mdb_printf("%<b>OPTIONS%</b>\n");
2840 " -v Display the full content of the bufctl, including its stack trace\n"
2841 " -h retrieve the bufctl's transaction history, if available\n"
2843 " filter out bufctls not involving the buffer at addr\n"
2845 " filter out bufctls without the function/PC in their stack trace\n"
2847 " filter out bufctls timestamped before earliest\n"
2849 " filter out bufctls timestamped after latest\n"
2851 " filter out bufctls not involving thread\n");
2855 bufctl(uintptr_t addr
, uint_t flags
, int argc
, const mdb_arg_t
*argv
)
2857 kmem_bufctl_audit_t bc
;
2858 uint_t verbose
= FALSE
;
2859 uint_t history
= FALSE
;
2860 uint_t in_history
= FALSE
;
2861 uintptr_t caller
= (uintptr_t)NULL
, thread
= (uintptr_t)NULL
;
2862 uintptr_t laddr
, haddr
, baddr
= (uintptr_t)NULL
;
2863 hrtime_t earliest
= 0, latest
= 0;
2865 char c
[MDB_SYM_NAMLEN
];
2868 if (mdb_getopts(argc
, argv
,
2869 'v', MDB_OPT_SETBITS
, TRUE
, &verbose
,
2870 'h', MDB_OPT_SETBITS
, TRUE
, &history
,
2871 'H', MDB_OPT_SETBITS
, TRUE
, &in_history
, /* internal */
2872 'c', MDB_OPT_UINTPTR
, &caller
,
2873 't', MDB_OPT_UINTPTR
, &thread
,
2874 'e', MDB_OPT_UINT64
, &earliest
,
2875 'l', MDB_OPT_UINT64
, &latest
,
2876 'a', MDB_OPT_UINTPTR
, &baddr
, NULL
) != argc
)
2877 return (DCMD_USAGE
);
2879 if (!(flags
& DCMD_ADDRSPEC
))
2880 return (DCMD_USAGE
);
2882 if (in_history
&& !history
)
2883 return (DCMD_USAGE
);
2885 if (history
&& !in_history
) {
2886 mdb_arg_t
*nargv
= mdb_zalloc(sizeof (*nargv
) * (argc
+ 1),
2888 bufctl_history_cb_t bhc
;
2890 nargv
[0].a_type
= MDB_TYPE_STRING
;
2891 nargv
[0].a_un
.a_str
= "-H"; /* prevent recursion */
2893 for (i
= 0; i
< argc
; i
++)
2894 nargv
[i
+ 1] = argv
[i
];
2897 * When in history mode, we treat each element as if it
2898 * were in a seperate loop, so that the headers group
2899 * bufctls with similar histories.
2901 bhc
.bhc_flags
= flags
| DCMD_LOOP
| DCMD_LOOPFIRST
;
2902 bhc
.bhc_argc
= argc
+ 1;
2903 bhc
.bhc_argv
= nargv
;
2904 bhc
.bhc_ret
= DCMD_OK
;
2906 if (mdb_pwalk("bufctl_history", bufctl_history_callback
, &bhc
,
2908 mdb_warn("unable to walk bufctl_history");
2912 if (bhc
.bhc_ret
== DCMD_OK
&& !(flags
& DCMD_PIPE_OUT
))
2915 return (bhc
.bhc_ret
);
2918 if (DCMD_HDRSPEC(flags
) && !(flags
& DCMD_PIPE_OUT
)) {
2920 mdb_printf("%16s %16s %16s %16s\n"
2921 "%<u>%16s %16s %16s %16s%</u>\n",
2922 "ADDR", "BUFADDR", "TIMESTAMP", "THREAD",
2923 "", "CACHE", "LASTLOG", "CONTENTS");
2925 mdb_printf("%<u>%-?s %-?s %-12s %-?s %s%</u>\n",
2926 "ADDR", "BUFADDR", "TIMESTAMP", "THREAD", "CALLER");
2930 if (mdb_vread(&bc
, sizeof (bc
), addr
) == -1) {
2931 mdb_warn("couldn't read bufctl at %p", addr
);
2936 * Guard against bogus bc_depth in case the bufctl is corrupt or
2937 * the address does not really refer to a bufctl.
2939 depth
= MIN(bc
.bc_depth
, KMEM_STACK_DEPTH
);
2941 if (caller
!= (uintptr_t)NULL
) {
2943 haddr
= caller
+ sizeof (caller
);
2945 if (mdb_lookup_by_addr(caller
, MDB_SYM_FUZZY
, c
, sizeof (c
),
2946 &sym
) != -1 && caller
== (uintptr_t)sym
.st_value
) {
2948 * We were provided an exact symbol value; any
2949 * address in the function is valid.
2951 laddr
= (uintptr_t)sym
.st_value
;
2952 haddr
= (uintptr_t)sym
.st_value
+ sym
.st_size
;
2955 for (i
= 0; i
< depth
; i
++)
2956 if (bc
.bc_stack
[i
] >= laddr
&& bc
.bc_stack
[i
] < haddr
)
2963 if (thread
!= 0 && (uintptr_t)bc
.bc_thread
!= thread
)
2966 if (earliest
!= 0 && bc
.bc_timestamp
< earliest
)
2969 if (latest
!= 0 && bc
.bc_timestamp
> latest
)
2972 if (baddr
!= 0 && (uintptr_t)bc
.bc_addr
!= baddr
)
2975 if (flags
& DCMD_PIPE_OUT
) {
2976 mdb_printf("%#lr\n", addr
);
2982 "%<b>%16p%</b> %16p %16llx %16p\n"
2983 "%16s %16p %16p %16p\n",
2984 addr
, bc
.bc_addr
, bc
.bc_timestamp
, bc
.bc_thread
,
2985 "", bc
.bc_cache
, bc
.bc_lastlog
, bc
.bc_contents
);
2988 for (i
= 0; i
< depth
; i
++)
2989 mdb_printf("%a\n", bc
.bc_stack
[i
]);
2993 mdb_printf("%0?p %0?p %12llx %0?p", addr
, bc
.bc_addr
,
2994 bc
.bc_timestamp
, bc
.bc_thread
);
2996 for (i
= 0; i
< depth
; i
++) {
2997 if (mdb_lookup_by_addr(bc
.bc_stack
[i
],
2998 MDB_SYM_FUZZY
, c
, sizeof (c
), &sym
) == -1)
3000 if (strncmp(c
, "kmem_", 5) == 0)
3002 mdb_printf(" %a\n", bc
.bc_stack
[i
]);
3013 typedef struct kmem_verify
{
3014 uint64_t *kmv_buf
; /* buffer to read cache contents into */
3015 size_t kmv_size
; /* number of bytes in kmv_buf */
3016 int kmv_corruption
; /* > 0 if corruption found. */
3017 int kmv_besilent
; /* report actual corruption sites */
3018 struct kmem_cache kmv_cache
; /* the cache we're operating on */
3023 * verify that buf is filled with the pattern pat.
3026 verify_pattern(uint64_t *buf_arg
, size_t size
, uint64_t pat
)
3029 uint64_t *bufend
= (uint64_t *)((char *)buf_arg
+ size
);
3032 for (buf
= buf_arg
; buf
< bufend
; buf
++)
3034 return ((uintptr_t)buf
- (uintptr_t)buf_arg
);
3040 * verify that btp->bt_bxstat == (bcp ^ pat)
3043 verify_buftag(kmem_buftag_t
*btp
, uintptr_t pat
)
3045 return (btp
->bt_bxstat
== ((intptr_t)btp
->bt_bufctl
^ pat
) ? 0 : -1);
3050 * verify the integrity of a free block of memory by checking
3051 * that it is filled with 0xdeadbeef and that its buftag is sane.
3055 verify_free(uintptr_t addr
, const void *data
, void *private)
3057 kmem_verify_t
*kmv
= (kmem_verify_t
*)private;
3058 uint64_t *buf
= kmv
->kmv_buf
; /* buf to validate */
3059 int64_t corrupt
; /* corruption offset */
3060 kmem_buftag_t
*buftagp
; /* ptr to buftag */
3061 kmem_cache_t
*cp
= &kmv
->kmv_cache
;
3062 int besilent
= kmv
->kmv_besilent
;
3065 buftagp
= KMEM_BUFTAG(cp
, buf
);
3068 * Read the buffer to check.
3070 if (mdb_vread(buf
, kmv
->kmv_size
, addr
) == -1) {
3072 mdb_warn("couldn't read %p", addr
);
3076 if ((corrupt
= verify_pattern(buf
, cp
->cache_verify
,
3077 KMEM_FREE_PATTERN
)) >= 0) {
3079 mdb_printf("buffer %p (free) seems corrupted, at %p\n",
3080 addr
, (uintptr_t)addr
+ corrupt
);
3084 * When KMF_LITE is set, buftagp->bt_redzone is used to hold
3085 * the first bytes of the buffer, hence we cannot check for red
3088 if ((cp
->cache_flags
& (KMF_HASH
| KMF_LITE
)) == KMF_HASH
&&
3089 buftagp
->bt_redzone
!= KMEM_REDZONE_PATTERN
) {
3091 mdb_printf("buffer %p (free) seems to "
3092 "have a corrupt redzone pattern\n", addr
);
3097 * confirm bufctl pointer integrity.
3099 if (verify_buftag(buftagp
, KMEM_BUFTAG_FREE
) == -1) {
3101 mdb_printf("buffer %p (free) has a corrupt "
3108 kmv
->kmv_corruption
++;
3114 * Verify that the buftag of an allocated buffer makes sense with respect
3119 verify_alloc(uintptr_t addr
, const void *data
, void *private)
3121 kmem_verify_t
*kmv
= (kmem_verify_t
*)private;
3122 kmem_cache_t
*cp
= &kmv
->kmv_cache
;
3123 uint64_t *buf
= kmv
->kmv_buf
; /* buf to validate */
3125 kmem_buftag_t
*buftagp
= KMEM_BUFTAG(cp
, buf
);
3126 uint32_t *ip
= (uint32_t *)buftagp
;
3127 uint8_t *bp
= (uint8_t *)buf
;
3128 int looks_ok
= 0, size_ok
= 1; /* flags for finding corruption */
3129 int besilent
= kmv
->kmv_besilent
;
3132 * Read the buffer to check.
3134 if (mdb_vread(buf
, kmv
->kmv_size
, addr
) == -1) {
3136 mdb_warn("couldn't read %p", addr
);
3141 * There are two cases to handle:
3142 * 1. If the buf was alloc'd using kmem_cache_alloc, it will have
3143 * 0xfeedfacefeedface at the end of it
3144 * 2. If the buf was alloc'd using kmem_alloc, it will have
3145 * 0xbb just past the end of the region in use. At the buftag,
3146 * it will have 0xfeedface (or, if the whole buffer is in use,
3147 * 0xfeedface & bb000000 or 0xfeedfacf & 000000bb depending on
3148 * endianness), followed by 32 bits containing the offset of the
3149 * 0xbb byte in the buffer.
3151 * Finally, the two 32-bit words that comprise the second half of the
3152 * buftag should xor to KMEM_BUFTAG_ALLOC
3155 if (buftagp
->bt_redzone
== KMEM_REDZONE_PATTERN
)
3157 else if (!KMEM_SIZE_VALID(ip
[1]))
3159 else if (bp
[KMEM_SIZE_DECODE(ip
[1])] == KMEM_REDZONE_BYTE
)
3166 mdb_printf("buffer %p (allocated) has a corrupt "
3167 "redzone size encoding\n", addr
);
3173 mdb_printf("buffer %p (allocated) has a corrupt "
3174 "redzone signature\n", addr
);
3178 if (verify_buftag(buftagp
, KMEM_BUFTAG_ALLOC
) == -1) {
3180 mdb_printf("buffer %p (allocated) has a "
3181 "corrupt buftag\n", addr
);
3187 kmv
->kmv_corruption
++;
3193 kmem_verify(uintptr_t addr
, uint_t flags
, int argc
, const mdb_arg_t
*argv
)
3195 if (flags
& DCMD_ADDRSPEC
) {
3196 int check_alloc
= 0, check_free
= 0;
3199 if (mdb_vread(&kmv
.kmv_cache
, sizeof (kmv
.kmv_cache
),
3201 mdb_warn("couldn't read kmem_cache %p", addr
);
3205 kmv
.kmv_size
= kmv
.kmv_cache
.cache_buftag
+
3206 sizeof (kmem_buftag_t
);
3207 kmv
.kmv_buf
= mdb_alloc(kmv
.kmv_size
, UM_SLEEP
| UM_GC
);
3208 kmv
.kmv_corruption
= 0;
3210 if ((kmv
.kmv_cache
.cache_flags
& KMF_REDZONE
)) {
3212 if (kmv
.kmv_cache
.cache_flags
& KMF_DEADBEEF
)
3215 if (!(flags
& DCMD_LOOP
)) {
3216 mdb_warn("cache %p (%s) does not have "
3217 "redzone checking enabled\n", addr
,
3218 kmv
.kmv_cache
.cache_name
);
3223 if (flags
& DCMD_LOOP
) {
3225 * table mode, don't print out every corrupt buffer
3227 kmv
.kmv_besilent
= 1;
3229 mdb_printf("Summary for cache '%s'\n",
3230 kmv
.kmv_cache
.cache_name
);
3232 kmv
.kmv_besilent
= 0;
3236 (void) mdb_pwalk("kmem", verify_alloc
, &kmv
, addr
);
3238 (void) mdb_pwalk("freemem", verify_free
, &kmv
, addr
);
3240 if (flags
& DCMD_LOOP
) {
3241 if (kmv
.kmv_corruption
== 0) {
3242 mdb_printf("%-*s %?p clean\n",
3244 kmv
.kmv_cache
.cache_name
, addr
);
3246 char *s
= ""; /* optional s in "buffer[s]" */
3247 if (kmv
.kmv_corruption
> 1)
3250 mdb_printf("%-*s %?p %d corrupt buffer%s\n",
3252 kmv
.kmv_cache
.cache_name
, addr
,
3253 kmv
.kmv_corruption
, s
);
3257 * This is the more verbose mode, when the user has
3258 * type addr::kmem_verify. If the cache was clean,
3259 * nothing will have yet been printed. So say something.
3261 if (kmv
.kmv_corruption
== 0)
3262 mdb_printf("clean\n");
3268 * If the user didn't specify a cache to verify, we'll walk all
3269 * kmem_cache's, specifying ourself as a callback for each...
3270 * this is the equivalent of '::walk kmem_cache .::kmem_verify'
3272 mdb_printf("%<u>%-*s %-?s %-20s%</b>\n", KMEM_CACHE_NAMELEN
,
3273 "Cache Name", "Addr", "Cache Integrity");
3274 (void) (mdb_walk_dcmd("kmem_cache", "kmem_verify", 0, NULL
));
3280 typedef struct vmem_node
{
3281 struct vmem_node
*vn_next
;
3282 struct vmem_node
*vn_parent
;
3283 struct vmem_node
*vn_sibling
;
3284 struct vmem_node
*vn_children
;
3290 typedef struct vmem_walk
{
3291 vmem_node_t
*vw_root
;
3292 vmem_node_t
*vw_current
;
3296 vmem_walk_init(mdb_walk_state_t
*wsp
)
3298 uintptr_t vaddr
, paddr
;
3299 vmem_node_t
*head
= NULL
, *root
= NULL
, *current
= NULL
, *parent
, *vp
;
3302 if (mdb_readvar(&vaddr
, "vmem_list") == -1) {
3303 mdb_warn("couldn't read 'vmem_list'");
3307 while (vaddr
!= (uintptr_t)NULL
) {
3308 vp
= mdb_zalloc(sizeof (vmem_node_t
), UM_SLEEP
);
3309 vp
->vn_addr
= vaddr
;
3313 if (vaddr
== wsp
->walk_addr
)
3316 if (mdb_vread(&vp
->vn_vmem
, sizeof (vmem_t
), vaddr
) == -1) {
3317 mdb_warn("couldn't read vmem_t at %p", vaddr
);
3321 vaddr
= (uintptr_t)vp
->vn_vmem
.vm_next
;
3324 for (vp
= head
; vp
!= NULL
; vp
= vp
->vn_next
) {
3326 if ((paddr
= (uintptr_t)vp
->vn_vmem
.vm_source
) ==
3328 vp
->vn_sibling
= root
;
3333 for (parent
= head
; parent
!= NULL
; parent
= parent
->vn_next
) {
3334 if (parent
->vn_addr
!= paddr
)
3336 vp
->vn_sibling
= parent
->vn_children
;
3337 parent
->vn_children
= vp
;
3338 vp
->vn_parent
= parent
;
3342 if (parent
== NULL
) {
3343 mdb_warn("couldn't find %p's parent (%p)\n",
3344 vp
->vn_addr
, paddr
);
3349 vw
= mdb_zalloc(sizeof (vmem_walk_t
), UM_SLEEP
);
3352 if (current
!= NULL
)
3353 vw
->vw_current
= current
;
3355 vw
->vw_current
= root
;
3357 wsp
->walk_data
= vw
;
3360 for (vp
= head
; head
!= NULL
; vp
= head
) {
3362 mdb_free(vp
, sizeof (vmem_node_t
));
3369 vmem_walk_step(mdb_walk_state_t
*wsp
)
3371 vmem_walk_t
*vw
= wsp
->walk_data
;
3375 if ((vp
= vw
->vw_current
) == NULL
)
3378 rval
= wsp
->walk_callback(vp
->vn_addr
, &vp
->vn_vmem
, wsp
->walk_cbdata
);
3380 if (vp
->vn_children
!= NULL
) {
3381 vw
->vw_current
= vp
->vn_children
;
3386 vw
->vw_current
= vp
->vn_sibling
;
3388 } while (vw
->vw_current
== NULL
&& vp
!= NULL
);
3394 * The "vmem_postfix" walk walks the vmem arenas in post-fix order; all
3395 * children are visited before their parent. We perform the postfix walk
3396 * iteratively (rather than recursively) to allow mdb to regain control
3397 * after each callback.
3400 vmem_postfix_walk_step(mdb_walk_state_t
*wsp
)
3402 vmem_walk_t
*vw
= wsp
->walk_data
;
3403 vmem_node_t
*vp
= vw
->vw_current
;
3407 * If this node is marked, then we know that we have already visited
3408 * all of its children. If the node has any siblings, they need to
3409 * be visited next; otherwise, we need to visit the parent. Note
3410 * that vp->vn_marked will only be zero on the first invocation of
3411 * the step function.
3413 if (vp
->vn_marked
) {
3414 if (vp
->vn_sibling
!= NULL
)
3415 vp
= vp
->vn_sibling
;
3416 else if (vp
->vn_parent
!= NULL
)
3420 * We have neither a parent, nor a sibling, and we
3421 * have already been visited; we're done.
3428 * Before we visit this node, visit its children.
3430 while (vp
->vn_children
!= NULL
&& !vp
->vn_children
->vn_marked
)
3431 vp
= vp
->vn_children
;
3434 vw
->vw_current
= vp
;
3435 rval
= wsp
->walk_callback(vp
->vn_addr
, &vp
->vn_vmem
, wsp
->walk_cbdata
);
3441 vmem_walk_fini(mdb_walk_state_t
*wsp
)
3443 vmem_walk_t
*vw
= wsp
->walk_data
;
3444 vmem_node_t
*root
= vw
->vw_root
;
3450 if ((vw
->vw_root
= root
->vn_children
) != NULL
)
3451 vmem_walk_fini(wsp
);
3453 vw
->vw_root
= root
->vn_sibling
;
3454 done
= (root
->vn_sibling
== NULL
&& root
->vn_parent
== NULL
);
3455 mdb_free(root
, sizeof (vmem_node_t
));
3458 mdb_free(vw
, sizeof (vmem_walk_t
));
3460 vmem_walk_fini(wsp
);
3464 typedef struct vmem_seg_walk
{
3466 uintptr_t vsw_start
;
3467 uintptr_t vsw_current
;
3472 vmem_seg_walk_common_init(mdb_walk_state_t
*wsp
, uint8_t type
, char *name
)
3474 vmem_seg_walk_t
*vsw
;
3476 if (wsp
->walk_addr
== (uintptr_t)NULL
) {
3477 mdb_warn("vmem_%s does not support global walks\n", name
);
3481 wsp
->walk_data
= vsw
= mdb_alloc(sizeof (vmem_seg_walk_t
), UM_SLEEP
);
3483 vsw
->vsw_type
= type
;
3484 vsw
->vsw_start
= wsp
->walk_addr
+ offsetof(vmem_t
, vm_seg0
);
3485 vsw
->vsw_current
= vsw
->vsw_start
;
3491 * vmem segments can't have type 0 (this should be added to vmem_impl.h).
3496 vmem_alloc_walk_init(mdb_walk_state_t
*wsp
)
3498 return (vmem_seg_walk_common_init(wsp
, VMEM_ALLOC
, "alloc"));
3502 vmem_free_walk_init(mdb_walk_state_t
*wsp
)
3504 return (vmem_seg_walk_common_init(wsp
, VMEM_FREE
, "free"));
3508 vmem_span_walk_init(mdb_walk_state_t
*wsp
)
3510 return (vmem_seg_walk_common_init(wsp
, VMEM_SPAN
, "span"));
3514 vmem_seg_walk_init(mdb_walk_state_t
*wsp
)
3516 return (vmem_seg_walk_common_init(wsp
, VMEM_NONE
, "seg"));
3520 vmem_seg_walk_step(mdb_walk_state_t
*wsp
)
3523 vmem_seg_walk_t
*vsw
= wsp
->walk_data
;
3524 uintptr_t addr
= vsw
->vsw_current
;
3525 static size_t seg_size
= 0;
3529 if (mdb_readvar(&seg_size
, "vmem_seg_size") == -1) {
3530 mdb_warn("failed to read 'vmem_seg_size'");
3531 seg_size
= sizeof (vmem_seg_t
);
3535 if (seg_size
< sizeof (seg
))
3536 bzero((caddr_t
)&seg
+ seg_size
, sizeof (seg
) - seg_size
);
3538 if (mdb_vread(&seg
, seg_size
, addr
) == -1) {
3539 mdb_warn("couldn't read vmem_seg at %p", addr
);
3543 vsw
->vsw_current
= (uintptr_t)seg
.vs_anext
;
3544 if (vsw
->vsw_type
!= VMEM_NONE
&& seg
.vs_type
!= vsw
->vsw_type
) {
3547 rval
= wsp
->walk_callback(addr
, &seg
, wsp
->walk_cbdata
);
3550 if (vsw
->vsw_current
== vsw
->vsw_start
)
3557 vmem_seg_walk_fini(mdb_walk_state_t
*wsp
)
3559 vmem_seg_walk_t
*vsw
= wsp
->walk_data
;
3561 mdb_free(vsw
, sizeof (vmem_seg_walk_t
));
3564 #define VMEM_NAMEWIDTH 22
3567 vmem(uintptr_t addr
, uint_t flags
, int argc
, const mdb_arg_t
*argv
)
3570 vmem_kstat_t
*vkp
= &v
.vm_kstat
;
3573 char c
[VMEM_NAMEWIDTH
];
3575 if (!(flags
& DCMD_ADDRSPEC
)) {
3576 if (mdb_walk_dcmd("vmem", "vmem", argc
, argv
) == -1) {
3577 mdb_warn("can't walk vmem");
3583 if (DCMD_HDRSPEC(flags
))
3584 mdb_printf("%-?s %-*s %10s %12s %9s %5s\n",
3585 "ADDR", VMEM_NAMEWIDTH
, "NAME", "INUSE",
3586 "TOTAL", "SUCCEED", "FAIL");
3588 if (mdb_vread(&v
, sizeof (v
), addr
) == -1) {
3589 mdb_warn("couldn't read vmem at %p", addr
);
3593 for (paddr
= (uintptr_t)v
.vm_source
; paddr
!= (uintptr_t)NULL
;
3595 if (mdb_vread(&parent
, sizeof (parent
), paddr
) == -1) {
3596 mdb_warn("couldn't trace %p's ancestry", addr
);
3600 paddr
= (uintptr_t)parent
.vm_source
;
3603 (void) mdb_snprintf(c
, VMEM_NAMEWIDTH
, "%*s%s", ident
, "", v
.vm_name
);
3605 mdb_printf("%0?p %-*s %10llu %12llu %9llu %5llu\n",
3606 addr
, VMEM_NAMEWIDTH
, c
,
3607 vkp
->vk_mem_inuse
.value
.ui64
, vkp
->vk_mem_total
.value
.ui64
,
3608 vkp
->vk_alloc
.value
.ui64
, vkp
->vk_fail
.value
.ui64
);
3617 "Display the contents of vmem_seg_ts, with optional filtering.\n\n"
3619 "A vmem_seg_t represents a range of addresses (or arbitrary numbers),\n"
3620 "representing a single chunk of data. Only ALLOC segments have debugging\n"
3623 mdb_printf("%<b>OPTIONS%</b>\n");
3626 " -v Display the full content of the vmem_seg, including its stack trace\n"
3627 " -s report the size of the segment, instead of the end address\n"
3629 " filter out segments without the function/PC in their stack trace\n"
3631 " filter out segments timestamped before earliest\n"
3633 " filter out segments timestamped after latest\n"
3635 " filer out segments smaller than minsize\n"
3637 " filer out segments larger than maxsize\n"
3639 " filter out segments not involving thread\n"
3641 " filter out segments not of type 'type'\n"
3642 " type is one of: ALLOC/FREE/SPAN/ROTOR/WALKER\n");
3647 vmem_seg(uintptr_t addr
, uint_t flags
, int argc
, const mdb_arg_t
*argv
)
3650 pc_t
*stk
= vs
.vs_stack
;
3653 const char *type
= NULL
;
3655 char c
[MDB_SYM_NAMLEN
];
3659 uintptr_t laddr
, haddr
;
3661 uintptr_t caller
= (uintptr_t)NULL
, thread
= (uintptr_t)NULL
;
3662 uintptr_t minsize
= 0, maxsize
= 0;
3664 hrtime_t earliest
= 0, latest
= 0;
3669 if (!(flags
& DCMD_ADDRSPEC
))
3670 return (DCMD_USAGE
);
3672 if (mdb_getopts(argc
, argv
,
3673 'c', MDB_OPT_UINTPTR
, &caller
,
3674 'e', MDB_OPT_UINT64
, &earliest
,
3675 'l', MDB_OPT_UINT64
, &latest
,
3676 's', MDB_OPT_SETBITS
, TRUE
, &size
,
3677 'm', MDB_OPT_UINTPTR
, &minsize
,
3678 'M', MDB_OPT_UINTPTR
, &maxsize
,
3679 't', MDB_OPT_UINTPTR
, &thread
,
3680 'T', MDB_OPT_STR
, &type
,
3681 'v', MDB_OPT_SETBITS
, TRUE
, &verbose
,
3683 return (DCMD_USAGE
);
3685 if (DCMD_HDRSPEC(flags
) && !(flags
& DCMD_PIPE_OUT
)) {
3687 mdb_printf("%16s %4s %16s %16s %16s\n"
3688 "%<u>%16s %4s %16s %16s %16s%</u>\n",
3689 "ADDR", "TYPE", "START", "END", "SIZE",
3690 "", "", "THREAD", "TIMESTAMP", "");
3692 mdb_printf("%?s %4s %?s %?s %s\n", "ADDR", "TYPE",
3693 "START", size
? "SIZE" : "END", "WHO");
3697 if (mdb_vread(&vs
, sizeof (vs
), addr
) == -1) {
3698 mdb_warn("couldn't read vmem_seg at %p", addr
);
3703 if (strcmp(type
, "ALLC") == 0 || strcmp(type
, "ALLOC") == 0)
3705 else if (strcmp(type
, "FREE") == 0)
3707 else if (strcmp(type
, "SPAN") == 0)
3709 else if (strcmp(type
, "ROTR") == 0 ||
3710 strcmp(type
, "ROTOR") == 0)
3712 else if (strcmp(type
, "WLKR") == 0 ||
3713 strcmp(type
, "WALKER") == 0)
3716 mdb_warn("\"%s\" is not a recognized vmem_seg type\n",
3721 if (vs
.vs_type
!= t
)
3725 sz
= vs
.vs_end
- vs
.vs_start
;
3727 if (minsize
!= 0 && sz
< minsize
)
3730 if (maxsize
!= 0 && sz
> maxsize
)
3734 depth
= vs
.vs_depth
;
3737 * debug info, when present, is only accurate for VMEM_ALLOC segments
3739 no_debug
= (t
!= VMEM_ALLOC
) ||
3740 (depth
== 0 || depth
> VMEM_STACK_DEPTH
);
3743 if (caller
!= (uintptr_t)NULL
|| thread
!= (uintptr_t)NULL
||
3746 return (DCMD_OK
); /* not enough info */
3748 if (caller
!= (uintptr_t)NULL
) {
3750 haddr
= caller
+ sizeof (caller
);
3752 if (mdb_lookup_by_addr(caller
, MDB_SYM_FUZZY
, c
,
3753 sizeof (c
), &sym
) != -1 &&
3754 caller
== (uintptr_t)sym
.st_value
) {
3756 * We were provided an exact symbol value; any
3757 * address in the function is valid.
3759 laddr
= (uintptr_t)sym
.st_value
;
3760 haddr
= (uintptr_t)sym
.st_value
+ sym
.st_size
;
3763 for (i
= 0; i
< depth
; i
++)
3764 if (vs
.vs_stack
[i
] >= laddr
&&
3765 vs
.vs_stack
[i
] < haddr
)
3772 if (thread
!= (uintptr_t)NULL
&&
3773 (uintptr_t)vs
.vs_thread
!= thread
)
3776 if (earliest
!= 0 && vs
.vs_timestamp
< earliest
)
3779 if (latest
!= 0 && vs
.vs_timestamp
> latest
)
3783 type
= (t
== VMEM_ALLOC
? "ALLC" :
3784 t
== VMEM_FREE
? "FREE" :
3785 t
== VMEM_SPAN
? "SPAN" :
3786 t
== VMEM_ROTOR
? "ROTR" :
3787 t
== VMEM_WALKER
? "WLKR" :
3790 if (flags
& DCMD_PIPE_OUT
) {
3791 mdb_printf("%#lr\n", addr
);
3796 mdb_printf("%<b>%16p%</b> %4s %16p %16p %16d\n",
3797 addr
, type
, vs
.vs_start
, vs
.vs_end
, sz
);
3802 mdb_printf("%16s %4s %16p %16llx\n",
3803 "", "", vs
.vs_thread
, vs
.vs_timestamp
);
3806 for (i
= 0; i
< depth
; i
++) {
3807 mdb_printf("%a\n", stk
[i
]);
3812 mdb_printf("%0?p %4s %0?p %0?p", addr
, type
,
3813 vs
.vs_start
, size
? sz
: vs
.vs_end
);
3820 for (i
= 0; i
< depth
; i
++) {
3821 if (mdb_lookup_by_addr(stk
[i
], MDB_SYM_FUZZY
,
3822 c
, sizeof (c
), &sym
) == -1)
3824 if (strncmp(c
, "vmem_", 5) == 0)
3828 mdb_printf(" %a\n", stk
[i
]);
3833 typedef struct kmalog_data
{
3835 hrtime_t kma_newest
;
3840 showbc(uintptr_t addr
, const kmem_bufctl_audit_t
*bcp
, kmalog_data_t
*kma
)
3842 char name
[KMEM_CACHE_NAMELEN
+ 1];
3847 if (bcp
->bc_timestamp
== 0)
3850 if (kma
->kma_newest
== 0)
3851 kma
->kma_newest
= bcp
->bc_timestamp
;
3853 if (kma
->kma_addr
) {
3854 if (mdb_vread(&bufsize
, sizeof (bufsize
),
3855 (uintptr_t)&bcp
->bc_cache
->cache_bufsize
) == -1) {
3857 "failed to read cache_bufsize for cache at %p",
3862 if (kma
->kma_addr
< (uintptr_t)bcp
->bc_addr
||
3863 kma
->kma_addr
>= (uintptr_t)bcp
->bc_addr
+ bufsize
)
3867 delta
= kma
->kma_newest
- bcp
->bc_timestamp
;
3868 depth
= MIN(bcp
->bc_depth
, KMEM_STACK_DEPTH
);
3870 if (mdb_readstr(name
, sizeof (name
), (uintptr_t)
3871 &bcp
->bc_cache
->cache_name
) <= 0)
3872 (void) mdb_snprintf(name
, sizeof (name
), "%a", bcp
->bc_cache
);
3874 mdb_printf("\nT-%lld.%09lld addr=%p %s\n",
3875 delta
/ NANOSEC
, delta
% NANOSEC
, bcp
->bc_addr
, name
);
3877 for (i
= 0; i
< depth
; i
++)
3878 mdb_printf("\t %a\n", bcp
->bc_stack
[i
]);
3884 kmalog(uintptr_t addr
, uint_t flags
, int argc
, const mdb_arg_t
*argv
)
3886 const char *logname
= "kmem_transaction_log";
3890 return (DCMD_USAGE
);
3893 if (flags
& DCMD_ADDRSPEC
)
3894 kma
.kma_addr
= addr
;
3896 kma
.kma_addr
= (uintptr_t)NULL
;
3899 if (argv
->a_type
!= MDB_TYPE_STRING
)
3900 return (DCMD_USAGE
);
3901 if (strcmp(argv
->a_un
.a_str
, "fail") == 0)
3902 logname
= "kmem_failure_log";
3903 else if (strcmp(argv
->a_un
.a_str
, "slab") == 0)
3904 logname
= "kmem_slab_log";
3906 return (DCMD_USAGE
);
3909 if (mdb_readvar(&addr
, logname
) == -1) {
3910 mdb_warn("failed to read %s log header pointer");
3914 if (mdb_pwalk("kmem_log", (mdb_walk_cb_t
)showbc
, &kma
, addr
) == -1) {
3915 mdb_warn("failed to walk kmem log");
3923 * As the final lure for die-hard crash(1M) users, we provide ::kmausers here.
3924 * The first piece is a structure which we use to accumulate kmem_cache_t
3925 * addresses of interest. The kmc_add is used as a callback for the kmem_cache
3926 * walker; we either add all caches, or ones named explicitly as arguments.
3929 typedef struct kmclist
{
3930 const char *kmc_name
; /* Name to match (or NULL) */
3931 uintptr_t *kmc_caches
; /* List of kmem_cache_t addrs */
3932 int kmc_nelems
; /* Num entries in kmc_caches */
3933 int kmc_size
; /* Size of kmc_caches array */
3937 kmc_add(uintptr_t addr
, const kmem_cache_t
*cp
, kmclist_t
*kmc
)
3942 if (kmc
->kmc_name
== NULL
||
3943 strcmp(cp
->cache_name
, kmc
->kmc_name
) == 0) {
3945 * If we have a match, grow our array (if necessary), and then
3946 * add the virtual address of the matching cache to our list.
3948 if (kmc
->kmc_nelems
>= kmc
->kmc_size
) {
3949 s
= kmc
->kmc_size
? kmc
->kmc_size
* 2 : 256;
3950 p
= mdb_alloc(sizeof (uintptr_t) * s
, UM_SLEEP
| UM_GC
);
3952 bcopy(kmc
->kmc_caches
, p
,
3953 sizeof (uintptr_t) * kmc
->kmc_size
);
3955 kmc
->kmc_caches
= p
;
3959 kmc
->kmc_caches
[kmc
->kmc_nelems
++] = addr
;
3960 return (kmc
->kmc_name
? WALK_DONE
: WALK_NEXT
);
3967 * The second piece of ::kmausers is a hash table of allocations. Each
3968 * allocation owner is identified by its stack trace and data_size. We then
3969 * track the total bytes of all such allocations, and the number of allocations
3970 * to report at the end. Once we have a list of caches, we walk through the
3971 * allocated bufctls of each, and update our hash table accordingly.
3974 typedef struct kmowner
{
3975 struct kmowner
*kmo_head
; /* First hash elt in bucket */
3976 struct kmowner
*kmo_next
; /* Next hash elt in chain */
3977 size_t kmo_signature
; /* Hash table signature */
3978 uint_t kmo_num
; /* Number of allocations */
3979 size_t kmo_data_size
; /* Size of each allocation */
3980 size_t kmo_total_size
; /* Total bytes of allocation */
3981 int kmo_depth
; /* Depth of stack trace */
3982 uintptr_t kmo_stack
[KMEM_STACK_DEPTH
]; /* Stack trace */
3985 typedef struct kmusers
{
3986 uintptr_t kmu_addr
; /* address of interest */
3987 const kmem_cache_t
*kmu_cache
; /* Current kmem cache */
3988 kmowner_t
*kmu_hash
; /* Hash table of owners */
3989 int kmu_nelems
; /* Number of entries in use */
3990 int kmu_size
; /* Total number of entries */
3994 kmu_add(kmusers_t
*kmu
, const kmem_bufctl_audit_t
*bcp
,
3995 size_t size
, size_t data_size
)
3997 int i
, depth
= MIN(bcp
->bc_depth
, KMEM_STACK_DEPTH
);
3998 size_t bucket
, signature
= data_size
;
3999 kmowner_t
*kmo
, *kmoend
;
4002 * If the hash table is full, double its size and rehash everything.
4004 if (kmu
->kmu_nelems
>= kmu
->kmu_size
) {
4005 int s
= kmu
->kmu_size
? kmu
->kmu_size
* 2 : 1024;
4007 kmo
= mdb_alloc(sizeof (kmowner_t
) * s
, UM_SLEEP
| UM_GC
);
4008 bcopy(kmu
->kmu_hash
, kmo
, sizeof (kmowner_t
) * kmu
->kmu_size
);
4009 kmu
->kmu_hash
= kmo
;
4012 kmoend
= kmu
->kmu_hash
+ kmu
->kmu_size
;
4013 for (kmo
= kmu
->kmu_hash
; kmo
< kmoend
; kmo
++)
4014 kmo
->kmo_head
= NULL
;
4016 kmoend
= kmu
->kmu_hash
+ kmu
->kmu_nelems
;
4017 for (kmo
= kmu
->kmu_hash
; kmo
< kmoend
; kmo
++) {
4018 bucket
= kmo
->kmo_signature
& (kmu
->kmu_size
- 1);
4019 kmo
->kmo_next
= kmu
->kmu_hash
[bucket
].kmo_head
;
4020 kmu
->kmu_hash
[bucket
].kmo_head
= kmo
;
4025 * Finish computing the hash signature from the stack trace, and then
4026 * see if the owner is in the hash table. If so, update our stats.
4028 for (i
= 0; i
< depth
; i
++)
4029 signature
+= bcp
->bc_stack
[i
];
4031 bucket
= signature
& (kmu
->kmu_size
- 1);
4033 for (kmo
= kmu
->kmu_hash
[bucket
].kmo_head
; kmo
; kmo
= kmo
->kmo_next
) {
4034 if (kmo
->kmo_signature
== signature
) {
4035 size_t difference
= 0;
4037 difference
|= kmo
->kmo_data_size
- data_size
;
4038 difference
|= kmo
->kmo_depth
- depth
;
4040 for (i
= 0; i
< depth
; i
++) {
4041 difference
|= kmo
->kmo_stack
[i
] -
4045 if (difference
== 0) {
4046 kmo
->kmo_total_size
+= size
;
4054 * If the owner is not yet hashed, grab the next element and fill it
4055 * in based on the allocation information.
4057 kmo
= &kmu
->kmu_hash
[kmu
->kmu_nelems
++];
4058 kmo
->kmo_next
= kmu
->kmu_hash
[bucket
].kmo_head
;
4059 kmu
->kmu_hash
[bucket
].kmo_head
= kmo
;
4061 kmo
->kmo_signature
= signature
;
4063 kmo
->kmo_data_size
= data_size
;
4064 kmo
->kmo_total_size
= size
;
4065 kmo
->kmo_depth
= depth
;
4067 for (i
= 0; i
< depth
; i
++)
4068 kmo
->kmo_stack
[i
] = bcp
->bc_stack
[i
];
4072 * When ::kmausers is invoked without the -f flag, we simply update our hash
4073 * table with the information from each allocated bufctl.
4077 kmause1(uintptr_t addr
, const kmem_bufctl_audit_t
*bcp
, kmusers_t
*kmu
)
4079 const kmem_cache_t
*cp
= kmu
->kmu_cache
;
4081 kmu_add(kmu
, bcp
, cp
->cache_bufsize
, cp
->cache_bufsize
);
4086 * When ::kmausers is invoked with the -f flag, we print out the information
4087 * for each bufctl as well as updating the hash table.
4090 kmause2(uintptr_t addr
, const kmem_bufctl_audit_t
*bcp
, kmusers_t
*kmu
)
4092 int i
, depth
= MIN(bcp
->bc_depth
, KMEM_STACK_DEPTH
);
4093 const kmem_cache_t
*cp
= kmu
->kmu_cache
;
4094 kmem_bufctl_t bufctl
;
4096 if (kmu
->kmu_addr
) {
4097 if (mdb_vread(&bufctl
, sizeof (bufctl
), addr
) == -1)
4098 mdb_warn("couldn't read bufctl at %p", addr
);
4099 else if (kmu
->kmu_addr
< (uintptr_t)bufctl
.bc_addr
||
4100 kmu
->kmu_addr
>= (uintptr_t)bufctl
.bc_addr
+
4105 mdb_printf("size %d, addr %p, thread %p, cache %s\n",
4106 cp
->cache_bufsize
, addr
, bcp
->bc_thread
, cp
->cache_name
);
4108 for (i
= 0; i
< depth
; i
++)
4109 mdb_printf("\t %a\n", bcp
->bc_stack
[i
]);
4111 kmu_add(kmu
, bcp
, cp
->cache_bufsize
, cp
->cache_bufsize
);
4116 * We sort our results by allocation size before printing them.
4119 kmownercmp(const void *lp
, const void *rp
)
4121 const kmowner_t
*lhs
= lp
;
4122 const kmowner_t
*rhs
= rp
;
4124 return (rhs
->kmo_total_size
- lhs
->kmo_total_size
);
4128 * The main engine of ::kmausers is relatively straightforward: First we
4129 * accumulate our list of kmem_cache_t addresses into the kmclist_t. Next we
4130 * iterate over the allocated bufctls of each cache in the list. Finally,
4131 * we sort and print our results.
4135 kmausers(uintptr_t addr
, uint_t flags
, int argc
, const mdb_arg_t
*argv
)
4137 int mem_threshold
= 8192; /* Minimum # bytes for printing */
4138 int cnt_threshold
= 100; /* Minimum # blocks for printing */
4139 int audited_caches
= 0; /* Number of KMF_AUDIT caches found */
4140 int do_all_caches
= 1; /* Do all caches (no arguments) */
4141 int opt_e
= FALSE
; /* Include "small" users */
4142 int opt_f
= FALSE
; /* Print stack traces */
4144 mdb_walk_cb_t callback
= (mdb_walk_cb_t
)kmause1
;
4145 kmowner_t
*kmo
, *kmoend
;
4151 bzero(&kmc
, sizeof (kmc
));
4152 bzero(&kmu
, sizeof (kmu
));
4154 while ((i
= mdb_getopts(argc
, argv
,
4155 'e', MDB_OPT_SETBITS
, TRUE
, &opt_e
,
4156 'f', MDB_OPT_SETBITS
, TRUE
, &opt_f
, NULL
)) != argc
) {
4158 argv
+= i
; /* skip past options we just processed */
4159 argc
-= i
; /* adjust argc */
4161 if (argv
->a_type
!= MDB_TYPE_STRING
|| *argv
->a_un
.a_str
== '-')
4162 return (DCMD_USAGE
);
4164 oelems
= kmc
.kmc_nelems
;
4165 kmc
.kmc_name
= argv
->a_un
.a_str
;
4166 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t
)kmc_add
, &kmc
);
4168 if (kmc
.kmc_nelems
== oelems
) {
4169 mdb_warn("unknown kmem cache: %s\n", kmc
.kmc_name
);
4178 if (flags
& DCMD_ADDRSPEC
) {
4180 kmu
.kmu_addr
= addr
;
4182 kmu
.kmu_addr
= (uintptr_t)NULL
;
4186 mem_threshold
= cnt_threshold
= 0;
4189 callback
= (mdb_walk_cb_t
)kmause2
;
4191 if (do_all_caches
) {
4192 kmc
.kmc_name
= NULL
; /* match all cache names */
4193 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t
)kmc_add
, &kmc
);
4196 for (i
= 0; i
< kmc
.kmc_nelems
; i
++) {
4197 uintptr_t cp
= kmc
.kmc_caches
[i
];
4200 if (mdb_vread(&c
, sizeof (c
), cp
) == -1) {
4201 mdb_warn("failed to read cache at %p", cp
);
4205 if (!(c
.cache_flags
& KMF_AUDIT
)) {
4206 if (!do_all_caches
) {
4207 mdb_warn("KMF_AUDIT is not enabled for %s\n",
4214 (void) mdb_pwalk("bufctl", callback
, &kmu
, cp
);
4218 if (audited_caches
== 0 && do_all_caches
) {
4219 mdb_warn("KMF_AUDIT is not enabled for any caches\n");
4223 qsort(kmu
.kmu_hash
, kmu
.kmu_nelems
, sizeof (kmowner_t
), kmownercmp
);
4224 kmoend
= kmu
.kmu_hash
+ kmu
.kmu_nelems
;
4226 for (kmo
= kmu
.kmu_hash
; kmo
< kmoend
; kmo
++) {
4227 if (kmo
->kmo_total_size
< mem_threshold
&&
4228 kmo
->kmo_num
< cnt_threshold
)
4230 mdb_printf("%lu bytes for %u allocations with data size %lu:\n",
4231 kmo
->kmo_total_size
, kmo
->kmo_num
, kmo
->kmo_data_size
);
4232 for (i
= 0; i
< kmo
->kmo_depth
; i
++)
4233 mdb_printf("\t %a\n", kmo
->kmo_stack
[i
]);
4243 "Displays the largest users of the kmem allocator, sorted by \n"
4244 "trace. If one or more caches is specified, only those caches\n"
4245 "will be searched. By default, all caches are searched. If an\n"
4246 "address is specified, then only those allocations which include\n"
4247 "the given address are displayed. Specifying an address implies\n"
4250 "\t-e\tInclude all users, not just the largest\n"
4251 "\t-f\tDisplay individual allocations. By default, users are\n"
4252 "\t\tgrouped by stack\n");
4256 kmem_ready_check(void)
4260 if (mdb_readvar(&ready
, "kmem_ready") < 0)
4261 return (-1); /* errno is set for us */
4267 kmem_statechange(void)
4269 static int been_ready
= 0;
4274 if (kmem_ready_check() <= 0)
4278 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t
)kmem_init_walkers
, NULL
);
4285 "kmem_cache", "walk list of kmem caches", kmem_cache_walk_init
,
4286 list_walk_step
, list_walk_fini
4290 * If kmem is ready, we'll need to invoke the kmem_cache walker
4291 * immediately. Walkers in the linkage structure won't be ready until
4292 * _mdb_init returns, so we'll need to add this one manually. If kmem
4293 * is ready, we'll use the walker to initialize the caches. If kmem
4294 * isn't ready, we'll register a callback that will allow us to defer
4295 * cache walking until it is.
4297 if (mdb_add_walker(&w
) != 0) {
4298 mdb_warn("failed to add kmem_cache walker");
4304 /* register our ::whatis handlers */
4305 mdb_whatis_register("modules", whatis_run_modules
, NULL
,
4306 WHATIS_PRIO_EARLY
, WHATIS_REG_NO_ID
);
4307 mdb_whatis_register("threads", whatis_run_threads
, NULL
,
4308 WHATIS_PRIO_EARLY
, WHATIS_REG_NO_ID
);
4309 mdb_whatis_register("pages", whatis_run_pages
, NULL
,
4310 WHATIS_PRIO_EARLY
, WHATIS_REG_NO_ID
);
4311 mdb_whatis_register("kmem", whatis_run_kmem
, NULL
,
4312 WHATIS_PRIO_ALLOCATOR
, 0);
4313 mdb_whatis_register("vmem", whatis_run_vmem
, NULL
,
4314 WHATIS_PRIO_ALLOCATOR
, 0);
4317 typedef struct whatthread
{
4318 uintptr_t wt_target
;
4323 whatthread_walk_thread(uintptr_t addr
, const kthread_t
*t
, whatthread_t
*w
)
4325 uintptr_t current
, data
;
4327 if (t
->t_stkbase
== NULL
)
4331 * Search the thread's stack for the given pointer. Note that it would
4332 * be more efficient to follow ::kgrep's lead and read in page-sized
4333 * chunks, but this routine is already fast and simple.
4335 for (current
= (uintptr_t)t
->t_stkbase
; current
< (uintptr_t)t
->t_stk
;
4336 current
+= sizeof (uintptr_t)) {
4337 if (mdb_vread(&data
, sizeof (data
), current
) == -1) {
4338 mdb_warn("couldn't read thread %p's stack at %p",
4343 if (data
== w
->wt_target
) {
4344 if (w
->wt_verbose
) {
4345 mdb_printf("%p in thread %p's stack%s\n",
4346 current
, addr
, stack_active(t
, current
));
4348 mdb_printf("%#lr\n", addr
);
4358 whatthread(uintptr_t addr
, uint_t flags
, int argc
, const mdb_arg_t
*argv
)
4362 if (!(flags
& DCMD_ADDRSPEC
))
4363 return (DCMD_USAGE
);
4365 w
.wt_verbose
= FALSE
;
4368 if (mdb_getopts(argc
, argv
,
4369 'v', MDB_OPT_SETBITS
, TRUE
, &w
.wt_verbose
, NULL
) != argc
)
4370 return (DCMD_USAGE
);
4372 if (mdb_walk("thread", (mdb_walk_cb_t
)whatthread_walk_thread
, &w
)
4374 mdb_warn("couldn't walk threads");