4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2014, Joyent, Inc. All rights reserved.
24 * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
28 * Kernel statistics framework
31 #include <sys/types.h>
33 #include <sys/systm.h>
34 #include <sys/vmsystm.h>
35 #include <sys/t_lock.h>
36 #include <sys/param.h>
37 #include <sys/errno.h>
39 #include <sys/sysmacros.h>
40 #include <sys/cmn_err.h>
41 #include <sys/kstat.h>
42 #include <sys/sysinfo.h>
43 #include <sys/cpuvar.h>
44 #include <sys/fcntl.h>
45 #include <sys/flock.h>
46 #include <sys/vnode.h>
50 #include <sys/debug.h>
53 #include <sys/pool_pset.h>
54 #include <sys/cpupart.h>
56 #include <sys/loadavg.h>
59 #include <vm/seg_kmem.h>
62 * Global lock to protect the AVL trees and kstat_chain_id.
64 static kmutex_t kstat_chain_lock
;
67 * Every install/delete kstat bumps kstat_chain_id. This is used by:
69 * (1) /dev/kstat, to detect changes in the kstat chain across ioctls;
71 * (2) kstat_create(), to assign a KID (kstat ID) to each new kstat.
72 * /dev/kstat uses the KID as a cookie for kstat lookups.
74 * We reserve the first two IDs because some kstats are created before
75 * the well-known ones (kstat_headers = 0, kstat_types = 1).
77 * We also bump the kstat_chain_id if a zone is gaining or losing visibility
78 * into a particular kstat, which is logically equivalent to a kstat being
82 kid_t kstat_chain_id
= 2;
85 * As far as zones are concerned, there are 3 types of kstat:
87 * 1) Those which have a well-known name, and which should return per-zone data
88 * depending on which zone is doing the kstat_read(). sockfs:0:sock_unix_list
89 * is an example of this type of kstat.
91 * 2) Those which should only be exported to a particular list of zones.
92 * For example, in the case of nfs:*:mntinfo, we don't want zone A to be
93 * able to see NFS mounts associated with zone B, while we want the
94 * global zone to be able to see all mounts on the system.
96 * 3) Those that can be exported to all zones. Most system-related
97 * kstats fall within this category.
99 * An ekstat_t thus contains a list of kstats that the zone is to be
100 * exported to. The lookup of a name:instance:module thus translates to a
101 * lookup of name:instance:module:myzone; if the kstat is not exported
102 * to all zones, and does not have the caller's zoneid explicitly
103 * enumerated in the list of zones to be exported to, it is the same as
104 * if the kstat didn't exist.
106 * Writing to kstats is currently disallowed from within a non-global
107 * zone, although this restriction could be removed in the future.
109 typedef struct kstat_zone
{
111 struct kstat_zone
*next
;
115 * Extended kstat structure -- for internal use only.
117 typedef struct ekstat
{
118 kstat_t e_ks
; /* the kstat itself */
119 size_t e_size
; /* total allocation size */
120 kthread_t
*e_owner
; /* thread holding this kstat */
121 kcondvar_t e_cv
; /* wait for owner == NULL */
122 avl_node_t e_avl_bykid
; /* AVL tree to sort by KID */
123 avl_node_t e_avl_byname
; /* AVL tree to sort by name */
124 kstat_zone_t e_zone
; /* zone to export stats to */
127 static uint64_t kstat_initial
[8192];
128 static void *kstat_initial_ptr
= kstat_initial
;
129 static size_t kstat_initial_avail
= sizeof (kstat_initial
);
130 static vmem_t
*kstat_arena
;
132 #define KSTAT_ALIGN (sizeof (uint64_t))
134 static avl_tree_t kstat_avl_bykid
;
135 static avl_tree_t kstat_avl_byname
;
138 * Various pointers we need to create kstats at boot time in kstat_init()
140 extern kstat_named_t
*segmapcnt_ptr
;
141 extern uint_t segmapcnt_ndata
;
142 extern int segmap_kstat_update(kstat_t
*, int);
143 extern kstat_named_t
*biostats_ptr
;
144 extern uint_t biostats_ndata
;
145 extern kstat_named_t
*pollstats_ptr
;
146 extern uint_t pollstats_ndata
;
150 extern time_t boot_time
;
151 extern sysinfo_t sysinfo
;
152 extern vminfo_t vminfo
;
157 kstat_named_t deficit
;
158 kstat_named_t clk_intr
;
161 kstat_named_t avenrun_1min
;
162 kstat_named_t avenrun_5min
;
163 kstat_named_t avenrun_15min
;
164 kstat_named_t boot_time
;
165 kstat_named_t nsec_per_tick
;
166 } system_misc_kstat
= {
167 { "ncpus", KSTAT_DATA_UINT32
},
168 { "lbolt", KSTAT_DATA_UINT32
},
169 { "deficit", KSTAT_DATA_UINT32
},
170 { "clk_intr", KSTAT_DATA_UINT32
},
171 { "vac", KSTAT_DATA_UINT32
},
172 { "nproc", KSTAT_DATA_UINT32
},
173 { "avenrun_1min", KSTAT_DATA_UINT32
},
174 { "avenrun_5min", KSTAT_DATA_UINT32
},
175 { "avenrun_15min", KSTAT_DATA_UINT32
},
176 { "boot_time", KSTAT_DATA_UINT32
},
177 { "nsec_per_tick", KSTAT_DATA_UINT32
},
181 kstat_named_t physmem
;
182 kstat_named_t nalloc
;
184 kstat_named_t nalloc_calls
;
185 kstat_named_t nfree_calls
;
186 kstat_named_t kernelbase
;
187 kstat_named_t econtig
;
188 kstat_named_t freemem
;
189 kstat_named_t availrmem
;
190 kstat_named_t lotsfree
;
191 kstat_named_t desfree
;
192 kstat_named_t minfree
;
193 kstat_named_t fastscan
;
194 kstat_named_t slowscan
;
196 kstat_named_t desscan
;
197 kstat_named_t pp_kernel
;
198 kstat_named_t pagesfree
;
199 kstat_named_t pageslocked
;
200 kstat_named_t pagestotal
;
201 } system_pages_kstat
= {
202 { "physmem", KSTAT_DATA_ULONG
},
203 { "nalloc", KSTAT_DATA_ULONG
},
204 { "nfree", KSTAT_DATA_ULONG
},
205 { "nalloc_calls", KSTAT_DATA_ULONG
},
206 { "nfree_calls", KSTAT_DATA_ULONG
},
207 { "kernelbase", KSTAT_DATA_ULONG
},
208 { "econtig", KSTAT_DATA_ULONG
},
209 { "freemem", KSTAT_DATA_ULONG
},
210 { "availrmem", KSTAT_DATA_ULONG
},
211 { "lotsfree", KSTAT_DATA_ULONG
},
212 { "desfree", KSTAT_DATA_ULONG
},
213 { "minfree", KSTAT_DATA_ULONG
},
214 { "fastscan", KSTAT_DATA_ULONG
},
215 { "slowscan", KSTAT_DATA_ULONG
},
216 { "nscan", KSTAT_DATA_ULONG
},
217 { "desscan", KSTAT_DATA_ULONG
},
218 { "pp_kernel", KSTAT_DATA_ULONG
},
219 { "pagesfree", KSTAT_DATA_ULONG
},
220 { "pageslocked", KSTAT_DATA_ULONG
},
221 { "pagestotal", KSTAT_DATA_ULONG
},
224 static int header_kstat_update(kstat_t
*, int);
225 static int header_kstat_snapshot(kstat_t
*, void *, int);
226 static int system_misc_kstat_update(kstat_t
*, int);
227 static int system_pages_kstat_update(kstat_t
*, int);
230 char name
[KSTAT_STRLEN
];
234 } kstat_data_type
[KSTAT_NUM_TYPES
] = {
235 { "raw", 1, 0, INT_MAX
},
236 { "name=value", sizeof (kstat_named_t
), 0, INT_MAX
},
237 { "interrupt", sizeof (kstat_intr_t
), 1, 1 },
238 { "i/o", sizeof (kstat_io_t
), 1, 1 },
239 { "event_timer", sizeof (kstat_timer_t
), 0, INT_MAX
},
243 kstat_zone_find(kstat_t
*k
, zoneid_t zoneid
)
245 ekstat_t
*e
= (ekstat_t
*)k
;
248 ASSERT(MUTEX_HELD(&kstat_chain_lock
));
249 for (kz
= &e
->e_zone
; kz
!= NULL
; kz
= kz
->next
) {
250 if (zoneid
== ALL_ZONES
|| kz
->zoneid
== ALL_ZONES
)
252 if (zoneid
== kz
->zoneid
)
259 kstat_zone_remove(kstat_t
*k
, zoneid_t zoneid
)
261 ekstat_t
*e
= (ekstat_t
*)k
;
262 kstat_zone_t
*kz
, *t
= NULL
;
264 mutex_enter(&kstat_chain_lock
);
265 if (zoneid
== e
->e_zone
.zoneid
) {
268 e
->e_zone
.zoneid
= kz
->zoneid
;
269 e
->e_zone
.next
= kz
->next
;
272 for (kz
= &e
->e_zone
; kz
->next
!= NULL
; kz
= kz
->next
) {
273 if (kz
->next
->zoneid
== zoneid
) {
279 ASSERT(t
!= NULL
); /* we removed something */
283 mutex_exit(&kstat_chain_lock
);
284 kmem_free(kz
, sizeof (*kz
));
288 kstat_zone_add(kstat_t
*k
, zoneid_t zoneid
)
290 ekstat_t
*e
= (ekstat_t
*)k
;
293 kz
= kmem_alloc(sizeof (*kz
), KM_NOSLEEP
);
296 mutex_enter(&kstat_chain_lock
);
298 kz
->next
= e
->e_zone
.next
;
301 mutex_exit(&kstat_chain_lock
);
305 * Compare the list of zones for the given kstats, returning 0 if they match
306 * (ie, one list contains ALL_ZONES or both lists contain the same zoneid).
307 * In practice, this is called indirectly by kstat_hold_byname(), so one of the
308 * two lists always has one element, and this is an O(n) operation rather than
312 kstat_zone_compare(ekstat_t
*e1
, ekstat_t
*e2
)
314 kstat_zone_t
*kz1
, *kz2
;
316 ASSERT(MUTEX_HELD(&kstat_chain_lock
));
317 for (kz1
= &e1
->e_zone
; kz1
!= NULL
; kz1
= kz1
->next
) {
318 for (kz2
= &e2
->e_zone
; kz2
!= NULL
; kz2
= kz2
->next
) {
319 if (kz1
->zoneid
== ALL_ZONES
||
320 kz2
->zoneid
== ALL_ZONES
)
322 if (kz1
->zoneid
== kz2
->zoneid
)
326 return (e1
->e_zone
.zoneid
< e2
->e_zone
.zoneid
? -1 : 1);
330 * Support for keeping kstats sorted in AVL trees for fast lookups.
333 kstat_compare_bykid(const void *a1
, const void *a2
)
335 const kstat_t
*k1
= a1
;
336 const kstat_t
*k2
= a2
;
338 if (k1
->ks_kid
< k2
->ks_kid
)
340 if (k1
->ks_kid
> k2
->ks_kid
)
342 return (kstat_zone_compare((ekstat_t
*)k1
, (ekstat_t
*)k2
));
346 kstat_compare_byname(const void *a1
, const void *a2
)
348 const kstat_t
*k1
= a1
;
349 const kstat_t
*k2
= a2
;
352 s
= strcmp(k1
->ks_module
, k2
->ks_module
);
358 if (k1
->ks_instance
< k2
->ks_instance
)
360 if (k1
->ks_instance
> k2
->ks_instance
)
363 s
= strcmp(k1
->ks_name
, k2
->ks_name
);
369 return (kstat_zone_compare((ekstat_t
*)k1
, (ekstat_t
*)k2
));
373 kstat_hold(avl_tree_t
*t
, ekstat_t
*template)
378 mutex_enter(&kstat_chain_lock
);
380 ksp
= avl_find(t
, template, NULL
);
384 if (e
->e_owner
== NULL
) {
385 e
->e_owner
= curthread
;
388 cv_wait(&e
->e_cv
, &kstat_chain_lock
);
390 mutex_exit(&kstat_chain_lock
);
395 kstat_rele(kstat_t
*ksp
)
397 ekstat_t
*e
= (ekstat_t
*)ksp
;
399 mutex_enter(&kstat_chain_lock
);
400 ASSERT(e
->e_owner
== curthread
);
402 cv_broadcast(&e
->e_cv
);
403 mutex_exit(&kstat_chain_lock
);
407 kstat_hold_bykid(kid_t kid
, zoneid_t zoneid
)
412 e
.e_zone
.zoneid
= zoneid
;
413 e
.e_zone
.next
= NULL
;
415 return (kstat_hold(&kstat_avl_bykid
, &e
));
419 kstat_hold_byname(const char *ks_module
, int ks_instance
, const char *ks_name
,
424 kstat_set_string(e
.e_ks
.ks_module
, ks_module
);
425 e
.e_ks
.ks_instance
= ks_instance
;
426 kstat_set_string(e
.e_ks
.ks_name
, ks_name
);
427 e
.e_zone
.zoneid
= ks_zoneid
;
428 e
.e_zone
.next
= NULL
;
429 return (kstat_hold(&kstat_avl_byname
, &e
));
433 kstat_alloc(size_t size
)
437 size
= P2ROUNDUP(sizeof (ekstat_t
) + size
, KSTAT_ALIGN
);
439 if (kstat_arena
== NULL
) {
440 if (size
<= kstat_initial_avail
) {
441 e
= kstat_initial_ptr
;
442 kstat_initial_ptr
= (char *)kstat_initial_ptr
+ size
;
443 kstat_initial_avail
-= size
;
446 e
= vmem_alloc(kstat_arena
, size
, VM_NOSLEEP
);
452 cv_init(&e
->e_cv
, NULL
, CV_DEFAULT
, NULL
);
459 kstat_free(ekstat_t
*e
)
461 cv_destroy(&e
->e_cv
);
462 vmem_free(kstat_arena
, e
, e
->e_size
);
466 * Create various system kstats.
473 avl_tree_t
*t
= &kstat_avl_bykid
;
476 * Set up the kstat vmem arena.
478 kstat_arena
= vmem_create("kstat",
479 kstat_initial
, sizeof (kstat_initial
), KSTAT_ALIGN
,
480 segkmem_alloc
, segkmem_free
, heap_arena
, 0, VM_SLEEP
);
483 * Make initial kstats appear as though they were allocated.
485 for (e
= avl_first(t
); e
!= NULL
; e
= avl_walk(t
, e
, AVL_AFTER
))
486 (void) vmem_xalloc(kstat_arena
, e
->e_size
, KSTAT_ALIGN
,
487 0, 0, e
, (char *)e
+ e
->e_size
,
488 VM_NOSLEEP
| VM_BESTFIT
| VM_PANIC
);
491 * The mother of all kstats. The first kstat in the system, which
492 * always has KID 0, has the headers for all kstats (including itself)
493 * as its data. Thus, the kstat driver does not need any special
494 * interface to extract the kstat chain.
497 ksp
= kstat_create("unix", 0, "kstat_headers", "kstat", KSTAT_TYPE_RAW
,
498 0, KSTAT_FLAG_VIRTUAL
| KSTAT_FLAG_VAR_SIZE
);
500 ksp
->ks_lock
= &kstat_chain_lock
;
501 ksp
->ks_update
= header_kstat_update
;
502 ksp
->ks_snapshot
= header_kstat_snapshot
;
505 panic("cannot create kstat 'kstat_headers'");
508 ksp
= kstat_create("unix", 0, "kstat_types", "kstat",
509 KSTAT_TYPE_NAMED
, KSTAT_NUM_TYPES
, 0);
512 kstat_named_t
*kn
= KSTAT_NAMED_PTR(ksp
);
514 for (i
= 0; i
< KSTAT_NUM_TYPES
; i
++) {
515 kstat_named_init(&kn
[i
], kstat_data_type
[i
].name
,
522 ksp
= kstat_create("unix", 0, "sysinfo", "misc", KSTAT_TYPE_RAW
,
523 sizeof (sysinfo_t
), KSTAT_FLAG_VIRTUAL
);
525 ksp
->ks_data
= (void *) &sysinfo
;
529 ksp
= kstat_create("unix", 0, "vminfo", "vm", KSTAT_TYPE_RAW
,
530 sizeof (vminfo_t
), KSTAT_FLAG_VIRTUAL
);
532 ksp
->ks_data
= (void *) &vminfo
;
536 ksp
= kstat_create("unix", 0, "segmap", "vm", KSTAT_TYPE_NAMED
,
537 segmapcnt_ndata
, KSTAT_FLAG_VIRTUAL
);
539 ksp
->ks_data
= (void *) segmapcnt_ptr
;
540 ksp
->ks_update
= segmap_kstat_update
;
544 ksp
= kstat_create("unix", 0, "biostats", "misc", KSTAT_TYPE_NAMED
,
545 biostats_ndata
, KSTAT_FLAG_VIRTUAL
);
547 ksp
->ks_data
= (void *) biostats_ptr
;
551 ksp
= kstat_create("unix", 0, "var", "misc", KSTAT_TYPE_RAW
,
552 sizeof (struct var
), KSTAT_FLAG_VIRTUAL
);
554 ksp
->ks_data
= (void *) &v
;
558 ksp
= kstat_create("unix", 0, "system_misc", "misc", KSTAT_TYPE_NAMED
,
559 sizeof (system_misc_kstat
) / sizeof (kstat_named_t
),
562 ksp
->ks_data
= (void *) &system_misc_kstat
;
563 ksp
->ks_update
= system_misc_kstat_update
;
567 ksp
= kstat_create("unix", 0, "system_pages", "pages", KSTAT_TYPE_NAMED
,
568 sizeof (system_pages_kstat
) / sizeof (kstat_named_t
),
571 ksp
->ks_data
= (void *) &system_pages_kstat
;
572 ksp
->ks_update
= system_pages_kstat_update
;
576 ksp
= kstat_create("poll", 0, "pollstats", "misc", KSTAT_TYPE_NAMED
,
577 pollstats_ndata
, KSTAT_FLAG_VIRTUAL
| KSTAT_FLAG_WRITABLE
);
580 ksp
->ks_data
= pollstats_ptr
;
586 * Caller of this should ensure that the string pointed by src
587 * doesn't change while kstat's lock is held. Not doing so defeats
588 * kstat's snapshot strategy as explained in <sys/kstat.h>
591 kstat_named_setstr(kstat_named_t
*knp
, const char *src
)
593 if (knp
->data_type
!= KSTAT_DATA_STRING
)
594 panic("kstat_named_setstr('%p', '%p'): "
595 "named kstat is not of type KSTAT_DATA_STRING",
596 (void *)knp
, (void *)src
);
598 KSTAT_NAMED_STR_PTR(knp
) = (char *)src
;
600 KSTAT_NAMED_STR_BUFLEN(knp
) = strlen(src
) + 1;
602 KSTAT_NAMED_STR_BUFLEN(knp
) = 0;
606 kstat_set_string(char *dst
, const char *src
)
608 bzero(dst
, KSTAT_STRLEN
);
609 (void) strncpy(dst
, src
, KSTAT_STRLEN
- 1);
613 kstat_named_init(kstat_named_t
*knp
, const char *name
, uchar_t data_type
)
615 kstat_set_string(knp
->name
, name
);
616 knp
->data_type
= data_type
;
618 if (data_type
== KSTAT_DATA_STRING
)
619 kstat_named_setstr(knp
, NULL
);
623 kstat_timer_init(kstat_timer_t
*ktp
, const char *name
)
625 kstat_set_string(ktp
->name
, name
);
630 default_kstat_update(kstat_t
*ksp
, int rw
)
637 * Named kstats with variable-length long strings have a standard
638 * way of determining how much space is needed to hold the snapshot:
640 if (ksp
->ks_data
!= NULL
&& ksp
->ks_type
== KSTAT_TYPE_NAMED
&&
641 (ksp
->ks_flags
& (KSTAT_FLAG_VAR_SIZE
| KSTAT_FLAG_LONGSTRINGS
))) {
644 * Add in the space required for the strings
646 knp
= KSTAT_NAMED_PTR(ksp
);
647 for (i
= 0; i
< ksp
->ks_ndata
; i
++, knp
++) {
648 if (knp
->data_type
== KSTAT_DATA_STRING
)
649 len
+= KSTAT_NAMED_STR_BUFLEN(knp
);
652 ksp
->ks_ndata
* sizeof (kstat_named_t
) + len
;
658 default_kstat_snapshot(kstat_t
*ksp
, void *buf
, int rw
)
664 ksp
->ks_snaptime
= cur_time
= gethrtime();
666 if (rw
== KSTAT_WRITE
) {
667 if (!(ksp
->ks_flags
& KSTAT_FLAG_WRITABLE
))
669 bcopy(buf
, ksp
->ks_data
, ksp
->ks_data_size
);
674 * KSTAT_TYPE_NAMED kstats are defined to have ks_ndata
675 * number of kstat_named_t structures, followed by an optional
676 * string segment. The ks_data generally holds only the
677 * kstat_named_t structures. So we copy it first. The strings,
678 * if any, are copied below. For other kstat types, ks_data holds the
682 namedsz
= sizeof (kstat_named_t
) * ksp
->ks_ndata
;
683 if (ksp
->ks_type
== KSTAT_TYPE_NAMED
&& ksp
->ks_data_size
> namedsz
)
684 bcopy(ksp
->ks_data
, buf
, namedsz
);
686 bcopy(ksp
->ks_data
, buf
, ksp
->ks_data_size
);
689 * Apply kstat type-specific data massaging
691 switch (ksp
->ks_type
) {
695 * Normalize time units and deal with incomplete transactions
697 kiop
= (kstat_io_t
*)buf
;
699 scalehrtime(&kiop
->wtime
);
700 scalehrtime(&kiop
->wlentime
);
701 scalehrtime(&kiop
->wlastupdate
);
702 scalehrtime(&kiop
->rtime
);
703 scalehrtime(&kiop
->rlentime
);
704 scalehrtime(&kiop
->rlastupdate
);
706 if (kiop
->wcnt
!= 0) {
707 /* like kstat_waitq_exit */
708 hrtime_t wfix
= cur_time
- kiop
->wlastupdate
;
709 kiop
->wlastupdate
= cur_time
;
710 kiop
->wlentime
+= kiop
->wcnt
* wfix
;
714 if (kiop
->rcnt
!= 0) {
715 /* like kstat_runq_exit */
716 hrtime_t rfix
= cur_time
- kiop
->rlastupdate
;
717 kiop
->rlastupdate
= cur_time
;
718 kiop
->rlentime
+= kiop
->rcnt
* rfix
;
723 case KSTAT_TYPE_NAMED
:
725 * Massage any long strings in at the end of the buffer
727 if (ksp
->ks_data_size
> namedsz
) {
729 kstat_named_t
*knp
= buf
;
730 char *dst
= (char *)(knp
+ ksp
->ks_ndata
);
732 * Copy strings and update pointers
734 for (i
= 0; i
< ksp
->ks_ndata
; i
++, knp
++) {
735 if (knp
->data_type
== KSTAT_DATA_STRING
&&
736 KSTAT_NAMED_STR_PTR(knp
) != NULL
) {
737 bcopy(KSTAT_NAMED_STR_PTR(knp
), dst
,
738 KSTAT_NAMED_STR_BUFLEN(knp
));
739 KSTAT_NAMED_STR_PTR(knp
) = dst
;
740 dst
+= KSTAT_NAMED_STR_BUFLEN(knp
);
743 ASSERT(dst
<= ((char *)buf
+ ksp
->ks_data_size
));
751 header_kstat_update(kstat_t
*header_ksp
, int rw
)
755 avl_tree_t
*t
= &kstat_avl_bykid
;
758 if (rw
== KSTAT_WRITE
)
761 ASSERT(MUTEX_HELD(&kstat_chain_lock
));
763 zoneid
= getzoneid();
764 for (e
= avl_first(t
); e
!= NULL
; e
= avl_walk(t
, e
, AVL_AFTER
)) {
765 if (kstat_zone_find((kstat_t
*)e
, zoneid
) &&
766 (e
->e_ks
.ks_flags
& KSTAT_FLAG_INVALID
) == 0) {
770 header_ksp
->ks_ndata
= nkstats
;
771 header_ksp
->ks_data_size
= nkstats
* sizeof (kstat_t
);
776 * Copy out the data section of kstat 0, which consists of the list
777 * of all kstat headers. By specification, these headers must be
778 * copied out in order of increasing KID.
781 header_kstat_snapshot(kstat_t
*header_ksp
, void *buf
, int rw
)
784 avl_tree_t
*t
= &kstat_avl_bykid
;
787 header_ksp
->ks_snaptime
= gethrtime();
789 if (rw
== KSTAT_WRITE
)
792 ASSERT(MUTEX_HELD(&kstat_chain_lock
));
794 zoneid
= getzoneid();
795 for (e
= avl_first(t
); e
!= NULL
; e
= avl_walk(t
, e
, AVL_AFTER
)) {
796 if (kstat_zone_find((kstat_t
*)e
, zoneid
) &&
797 (e
->e_ks
.ks_flags
& KSTAT_FLAG_INVALID
) == 0) {
798 bcopy(&e
->e_ks
, buf
, sizeof (kstat_t
));
799 buf
= (char *)buf
+ sizeof (kstat_t
);
808 system_misc_kstat_update(kstat_t
*ksp
, int rw
)
811 int *loadavgp
= &avenrun
[0];
812 time_t zone_boot_time
;
814 hrtime_t zone_hrtime
;
817 if (rw
== KSTAT_WRITE
)
820 if (!INGLOBALZONE(curproc
)) {
822 * Here we grab cpu_lock which is OK as long as no-one in the
823 * future attempts to lookup this particular kstat
824 * (unix:0:system_misc) while holding cpu_lock.
826 mutex_enter(&cpu_lock
);
827 if (pool_pset_enabled()) {
828 myncpus
= zone_ncpus_get(curproc
->p_zone
);
831 mutex_exit(&cpu_lock
);
832 loadavgp
= &curproc
->p_zone
->zone_avenrun
[0];
835 if (INGLOBALZONE(curproc
)) {
836 zone_boot_time
= boot_time
;
837 zone_lbolt
= ddi_get_lbolt();
840 zone_boot_time
= curproc
->p_zone
->zone_boot_time
;
842 zone_hrtime
= gethrtime();
843 zone_lbolt
= (clock_t)(NSEC_TO_TICK(zone_hrtime
) -
844 NSEC_TO_TICK(curproc
->p_zone
->zone_zsched
->p_mstart
));
845 mutex_enter(&curproc
->p_zone
->zone_nlwps_lock
);
846 zone_nproc
= curproc
->p_zone
->zone_nprocs
;
847 mutex_exit(&curproc
->p_zone
->zone_nlwps_lock
);
850 system_misc_kstat
.ncpus
.value
.ui32
= (uint32_t)myncpus
;
851 system_misc_kstat
.lbolt
.value
.ui32
= (uint32_t)zone_lbolt
;
852 system_misc_kstat
.deficit
.value
.ui32
= (uint32_t)deficit
;
853 system_misc_kstat
.clk_intr
.value
.ui32
= (uint32_t)zone_lbolt
;
854 system_misc_kstat
.vac
.value
.ui32
= (uint32_t)vac
;
855 system_misc_kstat
.nproc
.value
.ui32
= (uint32_t)zone_nproc
;
856 system_misc_kstat
.avenrun_1min
.value
.ui32
= (uint32_t)loadavgp
[0];
857 system_misc_kstat
.avenrun_5min
.value
.ui32
= (uint32_t)loadavgp
[1];
858 system_misc_kstat
.avenrun_15min
.value
.ui32
= (uint32_t)loadavgp
[2];
859 system_misc_kstat
.boot_time
.value
.ui32
= (uint32_t)
861 system_misc_kstat
.nsec_per_tick
.value
.ui32
= (uint32_t)
866 extern caddr_t econtig
;
870 system_pages_kstat_update(kstat_t
*ksp
, int rw
)
872 kobj_stat_t kobj_stat
;
874 if (rw
== KSTAT_WRITE
) {
878 kobj_stat_get(&kobj_stat
);
879 system_pages_kstat
.physmem
.value
.ul
= (ulong_t
)physmem
;
880 system_pages_kstat
.nalloc
.value
.ul
= kobj_stat
.nalloc
;
881 system_pages_kstat
.nfree
.value
.ul
= kobj_stat
.nfree
;
882 system_pages_kstat
.nalloc_calls
.value
.ul
= kobj_stat
.nalloc_calls
;
883 system_pages_kstat
.nfree_calls
.value
.ul
= kobj_stat
.nfree_calls
;
884 system_pages_kstat
.kernelbase
.value
.ul
= (ulong_t
)KERNELBASE
;
886 system_pages_kstat
.econtig
.value
.ul
= (ulong_t
)econtig
;
888 system_pages_kstat
.freemem
.value
.ul
= (ulong_t
)freemem
;
889 system_pages_kstat
.availrmem
.value
.ul
= (ulong_t
)availrmem
;
890 system_pages_kstat
.lotsfree
.value
.ul
= (ulong_t
)lotsfree
;
891 system_pages_kstat
.desfree
.value
.ul
= (ulong_t
)desfree
;
892 system_pages_kstat
.minfree
.value
.ul
= (ulong_t
)minfree
;
893 system_pages_kstat
.fastscan
.value
.ul
= (ulong_t
)fastscan
;
894 system_pages_kstat
.slowscan
.value
.ul
= (ulong_t
)slowscan
;
895 system_pages_kstat
.nscan
.value
.ul
= (ulong_t
)nscan
;
896 system_pages_kstat
.desscan
.value
.ul
= (ulong_t
)desscan
;
897 system_pages_kstat
.pagesfree
.value
.ul
= (ulong_t
)freemem
;
898 system_pages_kstat
.pageslocked
.value
.ul
= (ulong_t
)(availrmem_initial
-
900 system_pages_kstat
.pagestotal
.value
.ul
= (ulong_t
)total_pages
;
902 * pp_kernel represents total pages used by the kernel since the
903 * startup. This formula takes into account the boottime kernel
904 * footprint and also considers the availrmem changes because of
905 * user explicit page locking.
907 system_pages_kstat
.pp_kernel
.value
.ul
= (ulong_t
)(physinstalled
-
908 obp_pages
- availrmem
- k_anoninfo
.ani_mem_resv
-
909 anon_segkp_pages_locked
- pages_locked
-
910 pages_claimed
- pages_useclaim
);
916 kstat_create(const char *ks_module
, int ks_instance
, const char *ks_name
,
917 const char *ks_class
, uchar_t ks_type
, uint_t ks_ndata
, uchar_t ks_flags
)
919 return (kstat_create_zone(ks_module
, ks_instance
, ks_name
, ks_class
,
920 ks_type
, ks_ndata
, ks_flags
, ALL_ZONES
));
924 * Allocate and initialize a kstat structure. Or, if a dormant kstat with
925 * the specified name exists, reactivate it. Returns a pointer to the kstat
926 * on success, NULL on failure. The kstat will not be visible to the
927 * kstat driver until kstat_install().
930 kstat_create_zone(const char *ks_module
, int ks_instance
, const char *ks_name
,
931 const char *ks_class
, uchar_t ks_type
, uint_t ks_ndata
, uchar_t ks_flags
,
938 char namebuf
[KSTAT_STRLEN
+ 16];
940 if (avl_numnodes(&kstat_avl_bykid
) == 0) {
941 avl_create(&kstat_avl_bykid
, kstat_compare_bykid
,
942 sizeof (ekstat_t
), offsetof(struct ekstat
, e_avl_bykid
));
944 avl_create(&kstat_avl_byname
, kstat_compare_byname
,
945 sizeof (ekstat_t
), offsetof(struct ekstat
, e_avl_byname
));
949 * If ks_name == NULL, set the ks_name to <module><instance>.
951 if (ks_name
== NULL
) {
952 char buf
[KSTAT_STRLEN
];
953 kstat_set_string(buf
, ks_module
);
954 (void) sprintf(namebuf
, "%s%d", buf
, ks_instance
);
959 * Make sure it's a valid kstat data type
961 if (ks_type
>= KSTAT_NUM_TYPES
) {
962 cmn_err(CE_WARN
, "kstat_create('%s', %d, '%s'): "
963 "invalid kstat type %d",
964 ks_module
, ks_instance
, ks_name
, ks_type
);
969 * Don't allow persistent virtual kstats -- it makes no sense.
970 * ks_data points to garbage when the client goes away.
972 if ((ks_flags
& KSTAT_FLAG_PERSISTENT
) &&
973 (ks_flags
& KSTAT_FLAG_VIRTUAL
)) {
974 cmn_err(CE_WARN
, "kstat_create('%s', %d, '%s'): "
975 "cannot create persistent virtual kstat",
976 ks_module
, ks_instance
, ks_name
);
981 * Don't allow variable-size physical kstats, since the framework's
982 * memory allocation for physical kstat data is fixed at creation time.
984 if ((ks_flags
& KSTAT_FLAG_VAR_SIZE
) &&
985 !(ks_flags
& KSTAT_FLAG_VIRTUAL
)) {
986 cmn_err(CE_WARN
, "kstat_create('%s', %d, '%s'): "
987 "cannot create variable-size physical kstat",
988 ks_module
, ks_instance
, ks_name
);
993 * Make sure the number of data fields is within legal range
995 if (ks_ndata
< kstat_data_type
[ks_type
].min_ndata
||
996 ks_ndata
> kstat_data_type
[ks_type
].max_ndata
) {
997 cmn_err(CE_WARN
, "kstat_create('%s', %d, '%s'): "
998 "ks_ndata=%d out of range [%d, %d]",
999 ks_module
, ks_instance
, ks_name
, (int)ks_ndata
,
1000 kstat_data_type
[ks_type
].min_ndata
,
1001 kstat_data_type
[ks_type
].max_ndata
);
1005 ks_data_size
= kstat_data_type
[ks_type
].size
* ks_ndata
;
1008 * If the named kstat already exists and is dormant, reactivate it.
1010 ksp
= kstat_hold_byname(ks_module
, ks_instance
, ks_name
, ks_zoneid
);
1012 if (!(ksp
->ks_flags
& KSTAT_FLAG_DORMANT
)) {
1014 * The named kstat exists but is not dormant --
1015 * this is a kstat namespace collision.
1019 "kstat_create('%s', %d, '%s'): namespace collision",
1020 ks_module
, ks_instance
, ks_name
);
1023 if ((strcmp(ksp
->ks_class
, ks_class
) != 0) ||
1024 (ksp
->ks_type
!= ks_type
) ||
1025 (ksp
->ks_ndata
!= ks_ndata
) ||
1026 (ks_flags
& KSTAT_FLAG_VIRTUAL
)) {
1028 * The name is the same, but the other key parameters
1029 * differ from those of the dormant kstat -- bogus.
1032 cmn_err(CE_WARN
, "kstat_create('%s', %d, '%s'): "
1033 "invalid reactivation of dormant kstat",
1034 ks_module
, ks_instance
, ks_name
);
1038 * Return dormant kstat pointer to caller. As usual,
1039 * the kstat is marked invalid until kstat_install().
1041 ksp
->ks_flags
|= KSTAT_FLAG_INVALID
;
1047 * Allocate memory for the new kstat header and, if this is a physical
1048 * kstat, the data section.
1050 e
= kstat_alloc(ks_flags
& KSTAT_FLAG_VIRTUAL
? 0 : ks_data_size
);
1052 cmn_err(CE_NOTE
, "kstat_create('%s', %d, '%s'): "
1053 "insufficient kernel memory",
1054 ks_module
, ks_instance
, ks_name
);
1059 * Initialize as many fields as we can. The caller may reset
1060 * ks_lock, ks_update, ks_private, and ks_snapshot as necessary.
1061 * Creators of virtual kstats may also reset ks_data. It is
1062 * also up to the caller to initialize the kstat data section,
1063 * if necessary. All initialization must be complete before
1064 * calling kstat_install().
1066 e
->e_zone
.zoneid
= ks_zoneid
;
1067 e
->e_zone
.next
= NULL
;
1070 ksp
->ks_crtime
= gethrtime();
1071 kstat_set_string(ksp
->ks_module
, ks_module
);
1072 ksp
->ks_instance
= ks_instance
;
1073 kstat_set_string(ksp
->ks_name
, ks_name
);
1074 ksp
->ks_type
= ks_type
;
1075 kstat_set_string(ksp
->ks_class
, ks_class
);
1076 ksp
->ks_flags
= ks_flags
| KSTAT_FLAG_INVALID
;
1077 if (ks_flags
& KSTAT_FLAG_VIRTUAL
)
1078 ksp
->ks_data
= NULL
;
1080 ksp
->ks_data
= (void *)(e
+ 1);
1081 ksp
->ks_ndata
= ks_ndata
;
1082 ksp
->ks_data_size
= ks_data_size
;
1083 ksp
->ks_snaptime
= ksp
->ks_crtime
;
1084 ksp
->ks_update
= default_kstat_update
;
1085 ksp
->ks_private
= NULL
;
1086 ksp
->ks_snapshot
= default_kstat_snapshot
;
1087 ksp
->ks_lock
= NULL
;
1089 mutex_enter(&kstat_chain_lock
);
1092 * Add our kstat to the AVL trees.
1094 if (avl_find(&kstat_avl_byname
, e
, &where
) != NULL
) {
1095 mutex_exit(&kstat_chain_lock
);
1097 "kstat_create('%s', %d, '%s'): namespace collision",
1098 ks_module
, ks_instance
, ks_name
);
1102 avl_insert(&kstat_avl_byname
, e
, where
);
1105 * Loop around until we find an unused KID.
1108 ksp
->ks_kid
= kstat_chain_id
++;
1109 } while (avl_find(&kstat_avl_bykid
, e
, &where
) != NULL
);
1110 avl_insert(&kstat_avl_bykid
, e
, where
);
1112 mutex_exit(&kstat_chain_lock
);
1118 * Activate a fully initialized kstat and make it visible to /dev/kstat.
1121 kstat_install(kstat_t
*ksp
)
1123 zoneid_t zoneid
= ((ekstat_t
*)ksp
)->e_zone
.zoneid
;
1126 * If this is a variable-size kstat, it MUST provide kstat data locking
1127 * to prevent data-size races with kstat readers.
1129 if ((ksp
->ks_flags
& KSTAT_FLAG_VAR_SIZE
) && ksp
->ks_lock
== NULL
) {
1130 panic("kstat_install('%s', %d, '%s'): "
1131 "cannot create variable-size kstat without data lock",
1132 ksp
->ks_module
, ksp
->ks_instance
, ksp
->ks_name
);
1135 if (kstat_hold_bykid(ksp
->ks_kid
, zoneid
) != ksp
) {
1136 cmn_err(CE_WARN
, "kstat_install(%p): does not exist",
1141 if (ksp
->ks_type
== KSTAT_TYPE_NAMED
&& ksp
->ks_data
!= NULL
) {
1143 kstat_named_t
*knp
= KSTAT_NAMED_PTR(ksp
);
1145 for (i
= 0; i
< ksp
->ks_ndata
; i
++, knp
++) {
1146 if (knp
->data_type
== KSTAT_DATA_STRING
) {
1147 ksp
->ks_flags
|= KSTAT_FLAG_LONGSTRINGS
;
1152 * The default snapshot routine does not handle KSTAT_WRITE
1155 if ((ksp
->ks_flags
& KSTAT_FLAG_LONGSTRINGS
) &&
1156 (ksp
->ks_flags
& KSTAT_FLAG_WRITABLE
) &&
1157 (ksp
->ks_snapshot
== default_kstat_snapshot
)) {
1158 panic("kstat_install('%s', %d, '%s'): "
1159 "named kstat containing KSTAT_DATA_STRING "
1160 "is writable but uses default snapshot routine",
1161 ksp
->ks_module
, ksp
->ks_instance
, ksp
->ks_name
);
1165 if (ksp
->ks_flags
& KSTAT_FLAG_DORMANT
) {
1168 * We are reactivating a dormant kstat. Initialize the
1169 * caller's underlying data to the value it had when the
1170 * kstat went dormant, and mark the kstat as active.
1171 * Grab the provider's kstat lock if it's not already held.
1173 kmutex_t
*lp
= ksp
->ks_lock
;
1174 if (lp
!= NULL
&& MUTEX_NOT_HELD(lp
)) {
1176 (void) KSTAT_UPDATE(ksp
, KSTAT_WRITE
);
1179 (void) KSTAT_UPDATE(ksp
, KSTAT_WRITE
);
1181 ksp
->ks_flags
&= ~KSTAT_FLAG_DORMANT
;
1185 * Now that the kstat is active, make it visible to the kstat driver.
1186 * When copying out kstats the count is determined in
1187 * header_kstat_update() and actually copied into kbuf in
1188 * header_kstat_snapshot(). kstat_chain_lock is held across the two
1189 * calls to ensure that this list doesn't change. Thus, we need to
1190 * also take the lock to ensure that the we don't copy the new kstat
1191 * in the 2nd pass and overrun the buf.
1193 mutex_enter(&kstat_chain_lock
);
1194 ksp
->ks_flags
&= ~KSTAT_FLAG_INVALID
;
1195 mutex_exit(&kstat_chain_lock
);
1200 * Remove a kstat from the system. Or, if it's a persistent kstat,
1201 * just update the data and mark it as dormant.
1204 kstat_delete(kstat_t
*ksp
)
1207 ekstat_t
*e
= (ekstat_t
*)ksp
;
1211 ASSERT(ksp
!= NULL
);
1216 zoneid
= e
->e_zone
.zoneid
;
1220 if (lp
!= NULL
&& MUTEX_HELD(lp
)) {
1221 panic("kstat_delete(%p): caller holds data lock %p",
1222 (void *)ksp
, (void *)lp
);
1225 if (kstat_hold_bykid(ksp
->ks_kid
, zoneid
) != ksp
) {
1226 cmn_err(CE_WARN
, "kstat_delete(%p): does not exist",
1231 if (ksp
->ks_flags
& KSTAT_FLAG_PERSISTENT
) {
1233 * Update the data one last time, so that all activity
1234 * prior to going dormant has been accounted for.
1237 (void) KSTAT_UPDATE(ksp
, KSTAT_READ
);
1241 * Mark the kstat as dormant and restore caller-modifiable
1242 * fields to default values, so the kstat is readable during
1243 * the dormant phase.
1245 ksp
->ks_flags
|= KSTAT_FLAG_DORMANT
;
1246 ksp
->ks_lock
= NULL
;
1247 ksp
->ks_update
= default_kstat_update
;
1248 ksp
->ks_private
= NULL
;
1249 ksp
->ks_snapshot
= default_kstat_snapshot
;
1255 * Remove the kstat from the framework's AVL trees,
1256 * free the allocated memory, and increment kstat_chain_id so
1257 * /dev/kstat clients can detect the event.
1259 mutex_enter(&kstat_chain_lock
);
1260 avl_remove(&kstat_avl_bykid
, e
);
1261 avl_remove(&kstat_avl_byname
, e
);
1263 mutex_exit(&kstat_chain_lock
);
1265 kz
= e
->e_zone
.next
;
1266 while (kz
!= NULL
) {
1267 kstat_zone_t
*t
= kz
;
1270 kmem_free(t
, sizeof (*t
));
1277 kstat_delete_byname_zone(const char *ks_module
, int ks_instance
,
1278 const char *ks_name
, zoneid_t ks_zoneid
)
1282 ksp
= kstat_hold_byname(ks_module
, ks_instance
, ks_name
, ks_zoneid
);
1290 kstat_delete_byname(const char *ks_module
, int ks_instance
, const char *ks_name
)
1292 kstat_delete_byname_zone(ks_module
, ks_instance
, ks_name
, ALL_ZONES
);
1296 * The sparc V9 versions of these routines can be much cheaper than
1297 * the poor 32-bit compiler can comprehend, so they're in sparcv9_subr.s.
1298 * For simplicity, however, we always feed the C versions to lint.
1300 #if !defined(__sparc) || defined(lint) || defined(__lint)
1303 kstat_waitq_enter(kstat_io_t
*kiop
)
1305 hrtime_t
new, delta
;
1308 new = gethrtime_unscaled();
1309 delta
= new - kiop
->wlastupdate
;
1310 kiop
->wlastupdate
= new;
1311 wcnt
= kiop
->wcnt
++;
1313 kiop
->wlentime
+= delta
* wcnt
;
1314 kiop
->wtime
+= delta
;
1319 kstat_waitq_exit(kstat_io_t
*kiop
)
1321 hrtime_t
new, delta
;
1324 new = gethrtime_unscaled();
1325 delta
= new - kiop
->wlastupdate
;
1326 kiop
->wlastupdate
= new;
1327 wcnt
= kiop
->wcnt
--;
1328 ASSERT((int)wcnt
> 0);
1329 kiop
->wlentime
+= delta
* wcnt
;
1330 kiop
->wtime
+= delta
;
1334 kstat_runq_enter(kstat_io_t
*kiop
)
1336 hrtime_t
new, delta
;
1339 new = gethrtime_unscaled();
1340 delta
= new - kiop
->rlastupdate
;
1341 kiop
->rlastupdate
= new;
1342 rcnt
= kiop
->rcnt
++;
1344 kiop
->rlentime
+= delta
* rcnt
;
1345 kiop
->rtime
+= delta
;
1350 kstat_runq_exit(kstat_io_t
*kiop
)
1352 hrtime_t
new, delta
;
1355 new = gethrtime_unscaled();
1356 delta
= new - kiop
->rlastupdate
;
1357 kiop
->rlastupdate
= new;
1358 rcnt
= kiop
->rcnt
--;
1359 ASSERT((int)rcnt
> 0);
1360 kiop
->rlentime
+= delta
* rcnt
;
1361 kiop
->rtime
+= delta
;
1365 kstat_waitq_to_runq(kstat_io_t
*kiop
)
1367 hrtime_t
new, delta
;
1370 new = gethrtime_unscaled();
1372 delta
= new - kiop
->wlastupdate
;
1373 kiop
->wlastupdate
= new;
1374 wcnt
= kiop
->wcnt
--;
1375 ASSERT((int)wcnt
> 0);
1376 kiop
->wlentime
+= delta
* wcnt
;
1377 kiop
->wtime
+= delta
;
1379 delta
= new - kiop
->rlastupdate
;
1380 kiop
->rlastupdate
= new;
1381 rcnt
= kiop
->rcnt
++;
1383 kiop
->rlentime
+= delta
* rcnt
;
1384 kiop
->rtime
+= delta
;
1389 kstat_runq_back_to_waitq(kstat_io_t
*kiop
)
1391 hrtime_t
new, delta
;
1394 new = gethrtime_unscaled();
1396 delta
= new - kiop
->rlastupdate
;
1397 kiop
->rlastupdate
= new;
1398 rcnt
= kiop
->rcnt
--;
1399 ASSERT((int)rcnt
> 0);
1400 kiop
->rlentime
+= delta
* rcnt
;
1401 kiop
->rtime
+= delta
;
1403 delta
= new - kiop
->wlastupdate
;
1404 kiop
->wlastupdate
= new;
1405 wcnt
= kiop
->wcnt
++;
1407 kiop
->wlentime
+= delta
* wcnt
;
1408 kiop
->wtime
+= delta
;
1415 kstat_timer_start(kstat_timer_t
*ktp
)
1417 ktp
->start_time
= gethrtime();
1421 kstat_timer_stop(kstat_timer_t
*ktp
)
1424 u_longlong_t num_events
;
1426 ktp
->stop_time
= etime
= gethrtime();
1427 etime
-= ktp
->start_time
;
1428 num_events
= ktp
->num_events
;
1429 if (etime
< ktp
->min_time
|| num_events
== 0)
1430 ktp
->min_time
= etime
;
1431 if (etime
> ktp
->max_time
)
1432 ktp
->max_time
= etime
;
1433 ktp
->elapsed_time
+= etime
;
1434 ktp
->num_events
= num_events
+ 1;