4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 * Copyright 2015 Joyent, Inc.
32 #include <sys/types.h>
33 #include <sys/errno.h>
34 #include <sys/sunddi.h>
35 #include <sys/systm.h>
37 #include <sys/cpupart.h>
39 #include <sys/lgrp_user.h>
40 #include <sys/promif.h> /* for prom_printf() */
41 #include <sys/sysmacros.h>
42 #include <sys/policy.h>
47 /* definitions for mi_validity */
52 * run through the given number of addresses and requests and return the
53 * corresponding memory information for each address
56 meminfo(int addr_count
, struct meminfo
*mip
)
58 size_t in_size
, out_size
, req_size
, val_size
;
61 int i
, j
, out_idx
, info_count
;
65 int *req_array
, *val_array
;
66 uint64_t *in_array
, *out_array
;
71 #if defined(_SYSCALL32_IMPL)
72 struct meminfo32 minfo32
;
76 * Make sure that there is at least one address to translate and
77 * limit how many virtual addresses the kernel can do per call
80 return (set_errno(EINVAL
));
81 else if (addr_count
> MAX_MEMINFO_CNT
)
82 addr_count
= MAX_MEMINFO_CNT
;
84 if (get_udatamodel() == DATAMODEL_NATIVE
) {
85 if (copyin(mip
, &minfo
, sizeof (struct meminfo
)))
86 return (set_errno(EFAULT
));
88 #if defined(_SYSCALL32_IMPL)
90 bzero(&minfo
, sizeof (minfo
));
91 if (copyin(mip
, &minfo32
, sizeof (struct meminfo32
)))
92 return (set_errno(EFAULT
));
93 minfo
.mi_inaddr
= (const uint64_t *)(uintptr_t)
95 minfo
.mi_info_req
= (const uint_t
*)(uintptr_t)
97 minfo
.mi_info_count
= minfo32
.mi_info_count
;
98 minfo
.mi_outdata
= (uint64_t *)(uintptr_t)
100 minfo
.mi_validity
= (uint_t
*)(uintptr_t)
105 * all the input parameters have been copied in:-
106 * addr_count - number of input addresses
107 * minfo.mi_inaddr - array of input addresses
108 * minfo.mi_info_req - array of types of information requested
109 * minfo.mi_info_count - no. of pieces of info requested for each addr
110 * minfo.mi_outdata - array into which the results are placed
111 * minfo.mi_validity - array containing bitwise result codes; 0th bit
112 * evaluates validity of corresponding input
113 * address, 1st bit validity of response to first
114 * member of info_req, etc.
117 /* make sure mi_info_count is within limit */
118 info_count
= minfo
.mi_info_count
;
119 if (info_count
< 1 || info_count
> MAX_MEMINFO_REQ
)
120 return (set_errno(EINVAL
));
123 * allocate buffer in_array for the input addresses and copy them in
125 in_size
= sizeof (uint64_t) * addr_count
;
126 in_array
= kmem_alloc(in_size
, KM_SLEEP
);
127 if (copyin(minfo
.mi_inaddr
, in_array
, in_size
)) {
128 kmem_free(in_array
, in_size
);
129 return (set_errno(EFAULT
));
133 * allocate buffer req_array for the input info_reqs and copy them in
135 req_size
= sizeof (uint_t
) * info_count
;
136 req_array
= kmem_alloc(req_size
, KM_SLEEP
);
137 if (copyin(minfo
.mi_info_req
, req_array
, req_size
)) {
138 kmem_free(req_array
, req_size
);
139 kmem_free(in_array
, in_size
);
140 return (set_errno(EFAULT
));
144 * Validate privs for each req.
146 for (i
= 0; i
< info_count
; i
++) {
147 switch (req_array
[i
] & MEMINFO_MASK
) {
149 case MEMINFO_VPAGESIZE
:
152 if (secpolicy_meminfo(CRED()) != 0) {
153 kmem_free(req_array
, req_size
);
154 kmem_free(in_array
, in_size
);
155 return (set_errno(EPERM
));
162 * allocate buffer out_array which holds the results and will have
163 * to be copied out later
165 out_size
= sizeof (uint64_t) * addr_count
* info_count
;
166 out_array
= kmem_alloc(out_size
, KM_SLEEP
);
169 * allocate buffer val_array which holds the validity bits and will
170 * have to be copied out later
172 val_size
= sizeof (uint_t
) * addr_count
;
173 val_array
= kmem_alloc(val_size
, KM_SLEEP
);
175 if ((req_array
[0] & MEMINFO_MASK
) == MEMINFO_PLGRP
) {
176 /* find the corresponding lgroup for each physical address */
177 for (i
= 0; i
< addr_count
; i
++) {
180 lgrp
= lgrp_pfn_to_lgrp(pfn
);
182 out_array
[i
] = lgrp
->lgrp_id
;
183 val_array
[i
] = VALID_ADDR
| VALID_REQ
;
190 /* get the corresponding memory info for each virtual address */
193 AS_LOCK_ENTER(as
, RW_READER
);
195 for (i
= out_idx
= 0; i
< addr_count
; i
++, out_idx
+=
198 vaddr
= (uintptr_t)(addr
& ~PAGEOFFSET
);
199 if (!as_segat(as
, (caddr_t
)vaddr
)) {
203 val_array
[i
] = VALID_ADDR
;
204 pfn
= hat_getpfnum(hat
, (caddr_t
)vaddr
);
205 if (pfn
!= PFN_INVALID
) {
206 paddr
= (uint64_t)((pfn
<< PAGESHIFT
) |
207 (addr
& PAGEOFFSET
));
208 for (j
= 0; j
< info_count
; j
++) {
209 switch (req_array
[j
] & MEMINFO_MASK
) {
210 case MEMINFO_VPHYSICAL
:
212 * return the physical address
213 * corresponding to the input
216 out_array
[out_idx
+ j
] = paddr
;
217 val_array
[i
] |= VALID_REQ
<< j
;
221 * return the lgroup of physical
222 * page corresponding to the
223 * input virtual address
225 lgrp
= lgrp_pfn_to_lgrp(pfn
);
227 out_array
[out_idx
+ j
] =
233 case MEMINFO_VPAGESIZE
:
235 * return the size of physical
236 * page corresponding to the
237 * input virtual address
239 pgsz
= hat_getpagesize(hat
,
242 out_array
[out_idx
+ j
] =
248 case MEMINFO_VREPLCNT
:
251 * return the no. replicated
252 * physical pages corresponding
253 * to the input virtual address,
254 * so it is always 0 at the
257 out_array
[out_idx
+ j
] = 0;
258 val_array
[i
] |= VALID_REQ
<< j
;
263 * return the nth physical
264 * replica of the specified
268 case MEMINFO_VREPL_LGRP
:
271 * return the lgroup of nth
272 * physical replica of the
273 * specified virtual address
278 * this is for physical address
279 * only, shouldn't mix with
292 /* copy out the results and validity bits and free the buffers */
293 if ((copyout(out_array
, minfo
.mi_outdata
, out_size
) != 0) ||
294 (copyout(val_array
, minfo
.mi_validity
, val_size
) != 0))
295 ret
= set_errno(EFAULT
);
297 kmem_free(in_array
, in_size
);
298 kmem_free(out_array
, out_size
);
299 kmem_free(req_array
, req_size
);
300 kmem_free(val_array
, val_size
);
307 * Initialize lgroup affinities for thread
310 lgrp_affinity_init(lgrp_affinity_t
**bufaddr
)
318 * Free lgroup affinities for thread and set to NULL
319 * just in case thread gets recycled
322 lgrp_affinity_free(lgrp_affinity_t
**bufaddr
)
324 if (bufaddr
&& *bufaddr
) {
325 kmem_free(*bufaddr
, nlgrpsmax
* sizeof (lgrp_affinity_t
));
331 #define P_ANY -2 /* cookie specifying any ID */
335 * Find LWP with given ID in specified process and get its affinity for
339 lgrp_affinity_get_thread(proc_t
*p
, id_t lwpid
, lgrp_id_t lgrp
)
345 ASSERT(MUTEX_HELD(&p
->p_lock
));
351 * The process may be executing in proc_exit() and its p->p_list may be
355 return (set_errno(ESRCH
));
358 if (t
->t_tid
== lwpid
|| lwpid
== P_ANY
) {
361 * Check to see whether caller has permission to set
364 if (t
->t_cid
== 0 || !hasprocperm(t
->t_cred
, CRED())) {
366 return (set_errno(EPERM
));
369 if (t
->t_lgrp_affinity
)
370 aff
= t
->t_lgrp_affinity
[lgrp
];
375 } while ((t
= t
->t_forw
) != p
->p_tlist
);
377 aff
= set_errno(ESRCH
);
384 * Get lgroup affinity for given LWP
387 lgrp_affinity_get(lgrp_affinity_args_t
*ap
)
390 lgrp_affinity_args_t args
;
400 if (copyin(ap
, &args
, sizeof (lgrp_affinity_args_t
)) != 0)
401 return (set_errno(EFAULT
));
404 idtype
= args
.idtype
;
408 * Check for invalid lgroup
410 if (lgrp
< 0 || lgrp
== LGRP_NONE
)
411 return (set_errno(EINVAL
));
414 * Check for existing lgroup
416 if (lgrp
> lgrp_alloc_max
)
417 return (set_errno(ESRCH
));
420 * Get lgroup affinity for given LWP or process
426 * LWP in current process
429 mutex_enter(&p
->p_lock
);
430 if (id
!= P_MYID
) /* different thread */
431 aff
= lgrp_affinity_get_thread(p
, id
, lgrp
);
432 else { /* current thread */
436 if (t
->t_lgrp_affinity
)
437 aff
= t
->t_lgrp_affinity
[lgrp
];
440 mutex_exit(&p
->p_lock
);
447 mutex_enter(&pidlock
);
454 mutex_exit(&pidlock
);
455 return (set_errno(ESRCH
));
459 mutex_enter(&p
->p_lock
);
460 aff
= lgrp_affinity_get_thread(p
, P_ANY
, lgrp
);
461 mutex_exit(&p
->p_lock
);
463 mutex_exit(&pidlock
);
467 aff
= set_errno(EINVAL
);
476 * Find lgroup for which this thread has most affinity in specified partition
477 * starting from home lgroup unless specified starting lgroup is preferred
480 lgrp_affinity_best(kthread_t
*t
, struct cpupart
*cpupart
, lgrp_id_t start
,
481 boolean_t prefer_start
)
483 lgrp_affinity_t
*affs
;
484 lgrp_affinity_t best_aff
;
492 ASSERT((MUTEX_HELD(&cpu_lock
) || curthread
->t_preempt
> 0) ||
493 (MUTEX_HELD(&ttoproc(t
)->p_lock
) && THREAD_LOCK_HELD(t
)));
494 ASSERT(cpupart
!= NULL
);
496 if (t
->t_lgrp_affinity
== NULL
)
499 affs
= t
->t_lgrp_affinity
;
502 * Thread bound to CPU
504 if (t
->t_bind_cpu
!= PBIND_NONE
) {
508 * Find which lpl has most affinity among leaf lpl directly
509 * containing CPU and its ancestor lpls
511 cp
= cpu
[t
->t_bind_cpu
];
513 best_lpl
= lpl
= cp
->cpu_lpl
;
514 best_aff
= affs
[best_lpl
->lpl_lgrpid
];
515 while (lpl
->lpl_parent
!= NULL
) {
516 lpl
= lpl
->lpl_parent
;
517 lgrpid
= lpl
->lpl_lgrpid
;
518 if (affs
[lgrpid
] > best_aff
) {
520 best_aff
= affs
[lgrpid
];
527 * Start searching from home lgroup unless given starting lgroup is
528 * preferred or home lgroup isn't in given pset. Use root lgroup as
529 * starting point if both home and starting lgroups aren't in given
532 ASSERT(start
>= 0 && start
<= lgrp_alloc_max
);
533 home
= t
->t_lpl
->lpl_lgrpid
;
534 if (!prefer_start
&& LGRP_CPUS_IN_PART(home
, cpupart
))
536 else if (start
!= LGRP_NONE
&& LGRP_CPUS_IN_PART(start
, cpupart
))
539 lgrpid
= LGRP_ROOTID
;
541 best_lpl
= &cpupart
->cp_lgrploads
[lgrpid
];
542 best_aff
= affs
[lgrpid
];
546 * Skip any lgroups that don't have CPU resources
547 * in this processor set.
549 if (!LGRP_CPUS_IN_PART(lgrpid
, cpupart
)) {
550 if (++lgrpid
> lgrp_alloc_max
)
551 lgrpid
= 0; /* wrap the search */
556 * Find lgroup with most affinity
558 lpl
= &cpupart
->cp_lgrploads
[lgrpid
];
559 if (affs
[lgrpid
] > best_aff
) {
560 best_aff
= affs
[lgrpid
];
564 if (++lgrpid
> lgrp_alloc_max
)
565 lgrpid
= 0; /* wrap the search */
567 } while (lgrpid
!= finish
);
570 * No lgroup (in this pset) with any affinity
572 if (best_aff
== LGRP_AFF_NONE
)
575 lgrpid
= best_lpl
->lpl_lgrpid
;
576 ASSERT(LGRP_CPUS_IN_PART(lgrpid
, cpupart
) && best_lpl
->lpl_ncpu
> 0);
583 * Set thread's affinity for given lgroup
586 lgrp_affinity_set_thread(kthread_t
*t
, lgrp_id_t lgrp
, lgrp_affinity_t aff
,
587 lgrp_affinity_t
**aff_buf
)
589 lgrp_affinity_t
*affs
;
596 ASSERT(MUTEX_HELD(&ttoproc(t
)->p_lock
));
603 * Check to see whether caller has permission to set affinity for
606 if (t
->t_cid
== 0 || !hasprocperm(t
->t_cred
, CRED())) {
608 return (set_errno(EPERM
));
611 if (t
->t_lgrp_affinity
== NULL
) {
612 if (aff
== LGRP_AFF_NONE
) {
616 ASSERT(aff_buf
!= NULL
&& *aff_buf
!= NULL
);
617 t
->t_lgrp_affinity
= *aff_buf
;
621 affs
= t
->t_lgrp_affinity
;
625 * Find lgroup for which thread has most affinity,
626 * starting with lgroup for which affinity being set
628 best_lpl
= lgrp_affinity_best(t
, t
->t_cpupart
, lgrp
, B_TRUE
);
631 * Rehome if found lgroup with more affinity than home or lgroup for
632 * which affinity is being set has same affinity as home
634 home
= t
->t_lpl
->lpl_lgrpid
;
635 if (best_lpl
!= NULL
&& best_lpl
!= t
->t_lpl
) {
636 best
= best_lpl
->lpl_lgrpid
;
637 if (affs
[best
] > affs
[home
] || (affs
[best
] == affs
[home
] &&
639 lgrp_move_thread(t
, best_lpl
, 1);
649 * Set process' affinity for specified lgroup
652 lgrp_affinity_set_proc(proc_t
*p
, lgrp_id_t lgrp
, lgrp_affinity_t aff
,
653 lgrp_affinity_t
**aff_buf_array
)
655 lgrp_affinity_t
*buf
;
661 ASSERT(MUTEX_HELD(&pidlock
) && MUTEX_HELD(&p
->p_lock
));
662 ASSERT(aff_buf_array
!= NULL
);
669 * Set lgroup affinity for thread
671 buf
= aff_buf_array
[i
];
672 retval
= lgrp_affinity_set_thread(t
, lgrp
, aff
, &buf
);
674 if (err
== 0 && retval
!= 0)
678 * Advance pointer to next buffer
681 ASSERT(i
< p
->p_lwpcnt
);
682 aff_buf_array
[i
] = NULL
;
686 } while ((t
= t
->t_forw
) != p
->p_tlist
);
693 * Set LWP's or process' affinity for specified lgroup
695 * When setting affinities, pidlock, process p_lock, and thread_lock()
696 * need to be held in that order to protect target thread's pset, process,
697 * process contents, and thread contents. thread_lock() does splhigh(),
698 * so it ends up having similiar effect as kpreempt_disable(), so it will
699 * protect calls to lgrp_move_thread() and lgrp_choose() from pset changes.
702 lgrp_affinity_set(lgrp_affinity_args_t
*ap
)
705 lgrp_affinity_t
*aff_buf
;
706 lgrp_affinity_args_t args
;
717 if (copyin(ap
, &args
, sizeof (lgrp_affinity_args_t
)) != 0)
718 return (set_errno(EFAULT
));
720 idtype
= args
.idtype
;
726 * Check for invalid lgroup
728 if (lgrp
< 0 || lgrp
== LGRP_NONE
)
729 return (set_errno(EINVAL
));
732 * Check for existing lgroup
734 if (lgrp
> lgrp_alloc_max
)
735 return (set_errno(ESRCH
));
738 * Check for legal affinity
740 if (aff
!= LGRP_AFF_NONE
&& aff
!= LGRP_AFF_WEAK
&&
741 aff
!= LGRP_AFF_STRONG
)
742 return (set_errno(EINVAL
));
745 * Must be process or LWP ID
747 if (idtype
!= P_LWPID
&& idtype
!= P_PID
)
748 return (set_errno(EINVAL
));
751 * Set given LWP's or process' affinity for specified lgroup
757 * Allocate memory for thread's lgroup affinities
758 * ahead of time w/o holding locks
760 aff_buf
= kmem_zalloc(nlgrpsmax
* sizeof (lgrp_affinity_t
),
766 * Set affinity for thread
768 mutex_enter(&p
->p_lock
);
769 if (id
== P_MYID
) { /* current thread */
770 retval
= lgrp_affinity_set_thread(curthread
, lgrp
, aff
,
772 } else if (p
->p_tlist
== NULL
) {
773 retval
= set_errno(ESRCH
);
774 } else { /* other thread */
780 if (t
->t_tid
== id
) {
781 retval
= lgrp_affinity_set_thread(t
,
782 lgrp
, aff
, &aff_buf
);
786 } while ((t
= t
->t_forw
) != p
->p_tlist
);
788 retval
= set_errno(ESRCH
);
790 mutex_exit(&p
->p_lock
);
793 * Free memory for lgroup affinities,
794 * since thread didn't need it
798 nlgrpsmax
* sizeof (lgrp_affinity_t
));
805 lgrp_affinity_t
**aff_buf_array
;
812 mutex_enter(&pidlock
);
820 mutex_exit(&pidlock
);
821 return (set_errno(ESRCH
));
825 * Get number of threads in process
827 * NOTE: Only care about user processes,
828 * so p_lwpcnt should be number of threads.
830 mutex_enter(&p
->p_lock
);
831 nthreads
= p
->p_lwpcnt
;
832 mutex_exit(&p
->p_lock
);
834 mutex_exit(&pidlock
);
837 return (set_errno(ESRCH
));
840 * Preallocate memory for lgroup affinities for
841 * each thread in process now to avoid holding
842 * any locks. Allocate an array to hold a buffer
845 aff_buf_array
= kmem_zalloc(nthreads
*
846 sizeof (lgrp_affinity_t
*), KM_SLEEP
);
848 size
= nlgrpsmax
* sizeof (lgrp_affinity_t
);
849 for (i
= 0; i
< nthreads
; i
++)
850 aff_buf_array
[i
] = kmem_zalloc(size
, KM_SLEEP
);
852 mutex_enter(&pidlock
);
855 * Get process again since dropped locks to allocate
856 * memory (except current process)
862 * Process went away after we dropped locks and before
863 * reacquiring them, so drop locks, free memory, and
867 mutex_exit(&pidlock
);
868 for (i
= 0; i
< nthreads
; i
++)
869 kmem_free(aff_buf_array
[i
], size
);
870 kmem_free(aff_buf_array
,
871 nthreads
* sizeof (lgrp_affinity_t
*));
872 return (set_errno(ESRCH
));
875 mutex_enter(&p
->p_lock
);
878 * See whether number of threads is same
879 * If not, drop locks, free memory, and try again
881 if (nthreads
!= p
->p_lwpcnt
) {
882 mutex_exit(&p
->p_lock
);
883 mutex_exit(&pidlock
);
884 for (i
= 0; i
< nthreads
; i
++)
885 kmem_free(aff_buf_array
[i
], size
);
886 kmem_free(aff_buf_array
,
887 nthreads
* sizeof (lgrp_affinity_t
*));
892 * Set lgroup affinity for threads in process
894 retval
= lgrp_affinity_set_proc(p
, lgrp
, aff
,
897 mutex_exit(&p
->p_lock
);
898 mutex_exit(&pidlock
);
901 * Free any leftover memory, since some threads may
902 * have already allocated memory and set lgroup
905 for (i
= 0; i
< nthreads
; i
++)
906 if (aff_buf_array
[i
] != NULL
)
907 kmem_free(aff_buf_array
[i
], size
);
908 kmem_free(aff_buf_array
,
909 nthreads
* sizeof (lgrp_affinity_t
*));
913 } while (nthreads
!= p
->p_lwpcnt
);
918 retval
= set_errno(EINVAL
);
927 * Return the latest generation number for the lgroup hierarchy
928 * with the given view
931 lgrp_generation(lgrp_view_t view
)
939 * Determine generation number for given view
941 if (view
== LGRP_VIEW_OS
)
943 * Return generation number of lgroup hierarchy for OS view
948 * For caller's view, use generation numbers for lgroup
949 * hierarchy and caller's pset
950 * NOTE: Caller needs to check for change in pset ID
952 cpupart
= curthread
->t_cpupart
;
954 gen
= lgrp_gen
+ cpupart
->cp_gen
;
964 lgrp_home_thread(kthread_t
*t
)
969 ASSERT(MUTEX_HELD(&ttoproc(t
)->p_lock
));
974 * Check to see whether caller has permission to set affinity for
977 if (t
->t_cid
== 0 || !hasprocperm(t
->t_cred
, CRED())) {
979 return (set_errno(EPERM
));
982 home
= lgrp_home_id(t
);
990 * Get home lgroup of given process or thread
993 lgrp_home_get(idtype_t idtype
, id_t id
)
1000 * Get home lgroup of given LWP or process
1008 * Set affinity for thread
1010 mutex_enter(&p
->p_lock
);
1011 if (id
== P_MYID
) { /* current thread */
1012 retval
= lgrp_home_thread(curthread
);
1013 } else if (p
->p_tlist
== NULL
) {
1014 retval
= set_errno(ESRCH
);
1015 } else { /* other thread */
1020 if (t
->t_tid
== id
) {
1021 retval
= lgrp_home_thread(t
);
1025 } while ((t
= t
->t_forw
) != p
->p_tlist
);
1027 retval
= set_errno(ESRCH
);
1029 mutex_exit(&p
->p_lock
);
1036 mutex_enter(&pidlock
);
1044 mutex_exit(&pidlock
);
1045 return (set_errno(ESRCH
));
1048 mutex_enter(&p
->p_lock
);
1051 retval
= set_errno(ESRCH
);
1053 retval
= lgrp_home_thread(t
);
1054 mutex_exit(&p
->p_lock
);
1056 mutex_exit(&pidlock
);
1061 retval
= set_errno(EINVAL
);
1070 * Return latency between "from" and "to" lgroups
1072 * This latency number can only be used for relative comparison
1073 * between lgroups on the running system, cannot be used across platforms,
1074 * and may not reflect the actual latency. It is platform and implementation
1075 * specific, so platform gets to decide its value. It would be nice if the
1076 * number was at least proportional to make comparisons more meaningful though.
1079 lgrp_latency(lgrp_id_t from
, lgrp_id_t to
)
1087 ASSERT(MUTEX_HELD(&cpu_lock
));
1089 if (from
< 0 || to
< 0)
1090 return (set_errno(EINVAL
));
1092 if (from
> lgrp_alloc_max
|| to
> lgrp_alloc_max
)
1093 return (set_errno(ESRCH
));
1095 from_lgrp
= lgrp_table
[from
];
1096 to_lgrp
= lgrp_table
[to
];
1098 if (!LGRP_EXISTS(from_lgrp
) || !LGRP_EXISTS(to_lgrp
)) {
1099 return (set_errno(ESRCH
));
1103 * Get latency for same lgroup
1106 latency
= from_lgrp
->lgrp_latency
;
1111 * Get latency between leaf lgroups
1113 if (from_lgrp
->lgrp_childcnt
== 0 && to_lgrp
->lgrp_childcnt
== 0)
1114 return (lgrp_plat_latency(from_lgrp
->lgrp_plathand
,
1115 to_lgrp
->lgrp_plathand
));
1118 * Determine max latency between resources in two lgroups
1121 for (i
= 0; i
<= lgrp_alloc_max
; i
++) {
1126 from_rsrc
= lgrp_table
[i
];
1127 if (!LGRP_EXISTS(from_rsrc
) ||
1128 !klgrpset_ismember(from_lgrp
->lgrp_set
[LGRP_RSRC_CPU
], i
))
1131 for (j
= 0; j
<= lgrp_alloc_max
; j
++) {
1132 to_rsrc
= lgrp_table
[j
];
1133 if (!LGRP_EXISTS(to_rsrc
) ||
1134 klgrpset_ismember(to_lgrp
->lgrp_set
[LGRP_RSRC_MEM
],
1137 latency
= lgrp_plat_latency(from_rsrc
->lgrp_plathand
,
1138 to_rsrc
->lgrp_plathand
);
1139 if (latency
> latency_max
)
1140 latency_max
= latency
;
1143 return (latency_max
);
1148 * Return lgroup interface version number
1151 * 2 - lgrp_latency_cookie() and lgrp_resources() added
1154 lgrp_version(int version
)
1157 * Return LGRP_VER_NONE when requested version isn't supported
1159 if (version
< LGRP_VER_NONE
|| version
> LGRP_VER_CURRENT
)
1160 return (LGRP_VER_NONE
);
1163 * Return current version when LGRP_VER_NONE passed in
1165 if (version
== LGRP_VER_NONE
)
1166 return (LGRP_VER_CURRENT
);
1169 * Otherwise, return supported version.
1176 * Snapshot of lgroup hieararchy
1178 * One snapshot is kept and is based on the kernel's native data model, so
1179 * a 32-bit snapshot is kept for the 32-bit kernel and a 64-bit one for the
1180 * 64-bit kernel. If a 32-bit user wants a snapshot from the 64-bit kernel,
1181 * the kernel generates a 32-bit snapshot from the data in its 64-bit snapshot.
1183 * The format is defined by lgroup snapshot header and the layout of
1184 * the snapshot in memory is as follows:
1185 * 1) lgroup snapshot header
1186 * - specifies format of snapshot
1187 * - defined by lgrp_snapshot_header_t
1188 * 2) lgroup info array
1189 * - contains information about each lgroup
1190 * - one element for each lgroup
1191 * - each element is defined by lgrp_info_t
1192 * 3) lgroup CPU ID array
1193 * - contains list (array) of CPU IDs for each lgroup
1194 * - lgrp_info_t points into array and specifies how many CPUs belong to
1196 * 4) lgroup parents array
1197 * - contains lgroup bitmask of parents for each lgroup
1198 * - bitmask is an array of unsigned longs and its size depends on nlgrpsmax
1199 * 5) lgroup children array
1200 * - contains lgroup bitmask of children for each lgroup
1201 * - bitmask is an array of unsigned longs and its size depends on nlgrpsmax
1202 * 6) lgroup resources array
1203 * - contains lgroup bitmask of resources for each lgroup
1204 * - bitmask is an array of unsigned longs and its size depends on nlgrpsmax
1205 * 7) lgroup latency table
1206 * - contains latency from each lgroup to each of other lgroups
1208 * NOTE: Must use nlgrpsmax for per lgroup data structures because lgroups
1209 * may be sparsely allocated.
1211 lgrp_snapshot_header_t
*lgrp_snap
= NULL
; /* lgroup snapshot */
1212 static kmutex_t lgrp_snap_lock
; /* snapshot lock */
1216 * Take a snapshot of lgroup hierarchy and return size of buffer
1217 * needed to hold snapshot
1222 size_t bitmask_size
;
1223 size_t bitmasks_size
;
1231 ulong_t
*lgrp_children
;
1232 processorid_t
*lgrp_cpuids
;
1233 lgrp_info_t
*lgrp_info
;
1235 ulong_t
*lgrp_parents
;
1236 ulong_t
*lgrp_rsets
;
1241 size_t snap_hdr_size
;
1242 #ifdef _SYSCALL32_IMPL
1243 model_t model
= DATAMODEL_NATIVE
;
1246 * Have up-to-date snapshot, so check to see whether caller is 32-bit
1247 * program and need to return size of 32-bit snapshot now.
1249 model
= get_udatamodel();
1250 if (model
== DATAMODEL_ILP32
&& lgrp_snap
&&
1251 lgrp_snap
->ss_gen
== lgrp_gen
) {
1253 snap_nlgrpsmax
= lgrp_snap
->ss_nlgrps_max
;
1256 * Calculate size of buffer needed for 32-bit snapshot,
1257 * rounding up size of each object to allow for alignment
1258 * of next object in buffer.
1260 snap_hdr_size
= P2ROUNDUP(sizeof (lgrp_snapshot_header32_t
),
1261 sizeof (caddr32_t
));
1263 P2ROUNDUP(snap_nlgrpsmax
* sizeof (lgrp_info32_t
),
1264 sizeof (processorid_t
));
1266 P2ROUNDUP(lgrp_snap
->ss_ncpus
* sizeof (processorid_t
),
1270 * lgroup bitmasks needed for parents, children, and resources
1271 * for each lgroup and pset lgroup set
1273 bitmask_size
= BT_SIZEOFMAP(snap_nlgrpsmax
);
1274 bitmasks_size
= (((2 + LGRP_RSRC_COUNT
) *
1275 snap_nlgrpsmax
) + 1) * bitmask_size
;
1278 * Size of latency table and buffer
1280 lats_size
= snap_nlgrpsmax
* sizeof (caddr32_t
) +
1281 snap_nlgrpsmax
* snap_nlgrpsmax
* sizeof (int);
1283 bufsize
= snap_hdr_size
+ info_size
+ cpuids_size
+
1284 bitmasks_size
+ lats_size
;
1287 #endif /* _SYSCALL32_IMPL */
1290 * Check whether snapshot is up-to-date
1291 * Free it and take another one if not
1294 if (lgrp_snap
->ss_gen
== lgrp_gen
)
1295 return (lgrp_snap
->ss_size
);
1297 kmem_free(lgrp_snap
, lgrp_snap
->ss_size
);
1302 * Allocate memory for snapshot
1303 * w/o holding cpu_lock while waiting for memory
1305 while (lgrp_snap
== NULL
) {
1309 * Take snapshot of lgroup generation number
1310 * and configuration size dependent information
1311 * NOTE: Only count number of online CPUs,
1312 * since only online CPUs appear in lgroups.
1314 mutex_enter(&cpu_lock
);
1315 old_generation
= lgrp_gen
;
1316 snap_ncpus
= ncpus_online
;
1317 snap_nlgrps
= nlgrps
;
1318 snap_nlgrpsmax
= nlgrpsmax
;
1319 mutex_exit(&cpu_lock
);
1322 * Calculate size of buffer needed for snapshot,
1323 * rounding up size of each object to allow for alignment
1324 * of next object in buffer.
1326 snap_hdr_size
= P2ROUNDUP(sizeof (lgrp_snapshot_header_t
),
1328 info_size
= P2ROUNDUP(snap_nlgrpsmax
* sizeof (lgrp_info_t
),
1329 sizeof (processorid_t
));
1330 cpuids_size
= P2ROUNDUP(snap_ncpus
* sizeof (processorid_t
),
1333 * lgroup bitmasks needed for pset lgroup set and parents,
1334 * children, and resource sets for each lgroup
1336 bitmask_size
= BT_SIZEOFMAP(snap_nlgrpsmax
);
1337 bitmasks_size
= (((2 + LGRP_RSRC_COUNT
) *
1338 snap_nlgrpsmax
) + 1) * bitmask_size
;
1341 * Size of latency table and buffer
1343 lats_size
= snap_nlgrpsmax
* sizeof (int *) +
1344 snap_nlgrpsmax
* snap_nlgrpsmax
* sizeof (int);
1346 bufsize
= snap_hdr_size
+ info_size
+ cpuids_size
+
1347 bitmasks_size
+ lats_size
;
1350 * Allocate memory for buffer
1352 lgrp_snap
= kmem_zalloc(bufsize
, KM_NOSLEEP
);
1353 if (lgrp_snap
== NULL
)
1354 return (set_errno(ENOMEM
));
1357 * Check whether generation number has changed
1359 mutex_enter(&cpu_lock
);
1360 if (lgrp_gen
== old_generation
)
1361 break; /* hasn't change, so done. */
1364 * Generation number changed, so free memory and try again.
1366 mutex_exit(&cpu_lock
);
1367 kmem_free(lgrp_snap
, bufsize
);
1372 * Fill in lgroup snapshot header
1373 * (including pointers to tables of lgroup info, CPU IDs, and parents
1376 lgrp_snap
->ss_version
= LGRP_VER_CURRENT
;
1379 * XXX For now, liblgrp only needs to know whether the hierarchy
1380 * XXX only has one level or not
1382 if (snap_nlgrps
== 1)
1383 lgrp_snap
->ss_levels
= 1;
1385 lgrp_snap
->ss_levels
= 2;
1387 lgrp_snap
->ss_root
= LGRP_ROOTID
;
1389 lgrp_snap
->ss_nlgrps
= lgrp_snap
->ss_nlgrps_os
= snap_nlgrps
;
1390 lgrp_snap
->ss_nlgrps_max
= snap_nlgrpsmax
;
1391 lgrp_snap
->ss_ncpus
= snap_ncpus
;
1392 lgrp_snap
->ss_gen
= lgrp_gen
;
1393 lgrp_snap
->ss_view
= LGRP_VIEW_OS
;
1394 lgrp_snap
->ss_pset
= 0; /* NOTE: caller should set if needed */
1395 lgrp_snap
->ss_size
= bufsize
;
1396 lgrp_snap
->ss_magic
= (uintptr_t)lgrp_snap
;
1398 lgrp_snap
->ss_info
= lgrp_info
=
1399 (lgrp_info_t
*)((uintptr_t)lgrp_snap
+ snap_hdr_size
);
1401 lgrp_snap
->ss_cpuids
= lgrp_cpuids
=
1402 (processorid_t
*)((uintptr_t)lgrp_info
+ info_size
);
1404 lgrp_snap
->ss_lgrpset
= lgrpset
=
1405 (ulong_t
*)((uintptr_t)lgrp_cpuids
+ cpuids_size
);
1407 lgrp_snap
->ss_parents
= lgrp_parents
=
1408 (ulong_t
*)((uintptr_t)lgrpset
+ bitmask_size
);
1410 lgrp_snap
->ss_children
= lgrp_children
=
1411 (ulong_t
*)((uintptr_t)lgrp_parents
+ (snap_nlgrpsmax
*
1414 lgrp_snap
->ss_rsets
= lgrp_rsets
=
1415 (ulong_t
*)((uintptr_t)lgrp_children
+ (snap_nlgrpsmax
*
1418 lgrp_snap
->ss_latencies
= lgrp_lats
=
1419 (int **)((uintptr_t)lgrp_rsets
+ (LGRP_RSRC_COUNT
*
1420 snap_nlgrpsmax
* bitmask_size
));
1423 * Fill in lgroup information
1426 for (i
= 0; i
< snap_nlgrpsmax
; i
++) {
1433 lgrp
= lgrp_table
[i
];
1434 if (!LGRP_EXISTS(lgrp
)) {
1435 bzero(&lgrp_info
[i
], sizeof (lgrp_info
[i
]));
1436 lgrp_info
[i
].info_lgrpid
= LGRP_NONE
;
1440 lgrp_info
[i
].info_lgrpid
= i
;
1441 lgrp_info
[i
].info_latency
= lgrp
->lgrp_latency
;
1444 * Fill in parents, children, and lgroup resources
1446 lgrp_info
[i
].info_parents
=
1447 (ulong_t
*)((uintptr_t)lgrp_parents
+ (i
* bitmask_size
));
1449 if (lgrp
->lgrp_parent
)
1450 BT_SET(lgrp_info
[i
].info_parents
,
1451 lgrp
->lgrp_parent
->lgrp_id
);
1453 lgrp_info
[i
].info_children
=
1454 (ulong_t
*)((uintptr_t)lgrp_children
+ (i
* bitmask_size
));
1456 for (j
= 0; j
< snap_nlgrpsmax
; j
++)
1457 if (klgrpset_ismember(lgrp
->lgrp_children
, j
))
1458 BT_SET(lgrp_info
[i
].info_children
, j
);
1460 lgrp_info
[i
].info_rset
=
1461 (ulong_t
*)((uintptr_t)lgrp_rsets
+
1462 (i
* LGRP_RSRC_COUNT
* bitmask_size
));
1464 for (j
= 0; j
< LGRP_RSRC_COUNT
; j
++) {
1467 rset
= (ulong_t
*)((uintptr_t)lgrp_info
[i
].info_rset
+
1468 (j
* bitmask_size
));
1469 for (k
= 0; k
< snap_nlgrpsmax
; k
++)
1470 if (klgrpset_ismember(lgrp
->lgrp_set
[j
], k
))
1478 lgrp_info
[i
].info_cpuids
= NULL
;
1479 cp
= head
= lgrp
->lgrp_cpu
;
1481 lgrp_info
[i
].info_cpuids
= &lgrp_cpuids
[cpu_index
];
1483 lgrp_cpuids
[cpu_index
] = cp
->cpu_id
;
1486 cp
= cp
->cpu_next_lgrp
;
1487 } while (cp
!= head
);
1489 ASSERT(cpu_count
== lgrp
->lgrp_cpucnt
);
1490 lgrp_info
[i
].info_ncpus
= cpu_count
;
1493 * Fill in memory sizes for lgroups that directly contain
1496 if (klgrpset_ismember(lgrp
->lgrp_set
[LGRP_RSRC_MEM
], i
)) {
1497 lgrp_info
[i
].info_mem_free
=
1498 lgrp_mem_size(i
, LGRP_MEM_SIZE_FREE
);
1499 lgrp_info
[i
].info_mem_install
=
1500 lgrp_mem_size(i
, LGRP_MEM_SIZE_INSTALL
);
1504 * Fill in latency table and buffer
1506 lgrp_lats
[i
] = (int *)((uintptr_t)lgrp_lats
+ snap_nlgrpsmax
*
1507 sizeof (int *) + i
* snap_nlgrpsmax
* sizeof (int));
1508 for (j
= 0; j
< snap_nlgrpsmax
; j
++) {
1512 if (!LGRP_EXISTS(to
))
1514 lgrp_lats
[i
][j
] = lgrp_latency(lgrp
->lgrp_id
,
1518 ASSERT(cpu_index
== snap_ncpus
);
1521 mutex_exit(&cpu_lock
);
1523 #ifdef _SYSCALL32_IMPL
1525 * Check to see whether caller is 32-bit program and need to return
1526 * size of 32-bit snapshot now that snapshot has been taken/updated.
1527 * May not have been able to do this earlier if snapshot was out of
1528 * date or didn't exist yet.
1530 if (model
== DATAMODEL_ILP32
) {
1532 snap_nlgrpsmax
= lgrp_snap
->ss_nlgrps_max
;
1535 * Calculate size of buffer needed for 32-bit snapshot,
1536 * rounding up size of each object to allow for alignment
1537 * of next object in buffer.
1539 snap_hdr_size
= P2ROUNDUP(sizeof (lgrp_snapshot_header32_t
),
1540 sizeof (caddr32_t
));
1542 P2ROUNDUP(snap_nlgrpsmax
* sizeof (lgrp_info32_t
),
1543 sizeof (processorid_t
));
1545 P2ROUNDUP(lgrp_snap
->ss_ncpus
* sizeof (processorid_t
),
1548 bitmask_size
= BT_SIZEOFMAP(snap_nlgrpsmax
);
1549 bitmasks_size
= (((2 + LGRP_RSRC_COUNT
) * snap_nlgrpsmax
) +
1554 * Size of latency table and buffer
1556 lats_size
= (snap_nlgrpsmax
* sizeof (caddr32_t
)) +
1557 (snap_nlgrpsmax
* snap_nlgrpsmax
* sizeof (int));
1559 bufsize
= snap_hdr_size
+ info_size
+ cpuids_size
+
1560 bitmasks_size
+ lats_size
;
1563 #endif /* _SYSCALL32_IMPL */
1565 return (lgrp_snap
->ss_size
);
1570 * Copy snapshot into given user buffer, fix up any pointers in buffer to point
1571 * into user instead of kernel address space, and return size of buffer
1572 * needed to hold snapshot
1575 lgrp_snapshot_copy(char *buf
, size_t bufsize
)
1577 size_t bitmask_size
;
1582 lgrp_info_t
*lgrp_info
;
1584 size_t snap_hdr_size
;
1587 lgrp_snapshot_header_t
*user_snap
;
1588 lgrp_info_t
*user_info
;
1589 lgrp_info_t
*user_info_buffer
;
1590 processorid_t
*user_cpuids
;
1591 ulong_t
*user_lgrpset
;
1592 ulong_t
*user_parents
;
1593 ulong_t
*user_children
;
1595 int **user_lats_buffer
;
1596 ulong_t
*user_rsets
;
1598 if (lgrp_snap
== NULL
)
1601 if (buf
== NULL
|| bufsize
<= 0)
1602 return (lgrp_snap
->ss_size
);
1605 * User needs to try getting size of buffer again
1606 * because given buffer size is too small.
1607 * The lgroup hierarchy may have changed after they asked for the size
1608 * but before the snapshot was taken.
1610 if (bufsize
< lgrp_snap
->ss_size
)
1611 return (set_errno(EAGAIN
));
1613 snap_ncpus
= lgrp_snap
->ss_ncpus
;
1614 snap_nlgrpsmax
= lgrp_snap
->ss_nlgrps_max
;
1617 * Fill in lgrpset now because caller may have change psets
1620 for (i
= 0; i
< snap_nlgrpsmax
; i
++) {
1621 if (klgrpset_ismember(curthread
->t_cpupart
->cp_lgrpset
,
1623 BT_SET(lgrp_snap
->ss_lgrpset
, i
);
1629 * Copy lgroup snapshot (snapshot header, lgroup info, and CPU IDs)
1630 * into user buffer all at once
1632 if (copyout(lgrp_snap
, buf
, lgrp_snap
->ss_size
) != 0)
1633 return (set_errno(EFAULT
));
1636 * Round up sizes of lgroup snapshot header and info for alignment
1638 snap_hdr_size
= P2ROUNDUP(sizeof (lgrp_snapshot_header_t
),
1640 info_size
= P2ROUNDUP(snap_nlgrpsmax
* sizeof (lgrp_info_t
),
1641 sizeof (processorid_t
));
1642 cpuids_size
= P2ROUNDUP(snap_ncpus
* sizeof (processorid_t
),
1645 bitmask_size
= BT_SIZEOFMAP(snap_nlgrpsmax
);
1648 * Calculate pointers into user buffer for lgroup snapshot header,
1651 user_snap
= (lgrp_snapshot_header_t
*)buf
;
1652 user_info
= (lgrp_info_t
*)((uintptr_t)user_snap
+ snap_hdr_size
);
1653 user_cpuids
= (processorid_t
*)((uintptr_t)user_info
+ info_size
);
1654 user_lgrpset
= (ulong_t
*)((uintptr_t)user_cpuids
+ cpuids_size
);
1655 user_parents
= (ulong_t
*)((uintptr_t)user_lgrpset
+ bitmask_size
);
1656 user_children
= (ulong_t
*)((uintptr_t)user_parents
+
1657 (snap_nlgrpsmax
* bitmask_size
));
1658 user_rsets
= (ulong_t
*)((uintptr_t)user_children
+
1659 (snap_nlgrpsmax
* bitmask_size
));
1660 user_lats
= (int **)((uintptr_t)user_rsets
+
1661 (LGRP_RSRC_COUNT
* snap_nlgrpsmax
* bitmask_size
));
1664 * Copyout magic number (ie. pointer to beginning of buffer)
1666 if (copyout(&buf
, &user_snap
->ss_magic
, sizeof (buf
)) != 0)
1667 return (set_errno(EFAULT
));
1670 * Fix up pointers in user buffer to point into user buffer
1671 * not kernel snapshot
1673 if (copyout(&user_info
, &user_snap
->ss_info
, sizeof (user_info
)) != 0)
1674 return (set_errno(EFAULT
));
1676 if (copyout(&user_cpuids
, &user_snap
->ss_cpuids
,
1677 sizeof (user_cpuids
)) != 0)
1678 return (set_errno(EFAULT
));
1680 if (copyout(&user_lgrpset
, &user_snap
->ss_lgrpset
,
1681 sizeof (user_lgrpset
)) != 0)
1682 return (set_errno(EFAULT
));
1684 if (copyout(&user_parents
, &user_snap
->ss_parents
,
1685 sizeof (user_parents
)) != 0)
1686 return (set_errno(EFAULT
));
1688 if (copyout(&user_children
, &user_snap
->ss_children
,
1689 sizeof (user_children
)) != 0)
1690 return (set_errno(EFAULT
));
1692 if (copyout(&user_rsets
, &user_snap
->ss_rsets
,
1693 sizeof (user_rsets
)) != 0)
1694 return (set_errno(EFAULT
));
1696 if (copyout(&user_lats
, &user_snap
->ss_latencies
,
1697 sizeof (user_lats
)) != 0)
1698 return (set_errno(EFAULT
));
1701 * Make copies of lgroup info and latency table, fix up pointers,
1702 * and then copy them into user buffer
1704 user_info_buffer
= kmem_zalloc(info_size
, KM_NOSLEEP
);
1705 if (user_info_buffer
== NULL
)
1706 return (set_errno(ENOMEM
));
1708 user_lats_buffer
= kmem_zalloc(snap_nlgrpsmax
* sizeof (int *),
1710 if (user_lats_buffer
== NULL
) {
1711 kmem_free(user_info_buffer
, info_size
);
1712 return (set_errno(ENOMEM
));
1715 lgrp_info
= (lgrp_info_t
*)((uintptr_t)lgrp_snap
+ snap_hdr_size
);
1716 bcopy(lgrp_info
, user_info_buffer
, info_size
);
1719 for (i
= 0; i
< snap_nlgrpsmax
; i
++) {
1723 * Skip non-existent lgroups
1725 if (user_info_buffer
[i
].info_lgrpid
== LGRP_NONE
)
1729 * Update free memory size since it changes frequently
1730 * Only do so for lgroups directly containing memory
1732 * NOTE: This must be done before changing the pointers to
1733 * point into user space since we need to dereference
1734 * lgroup resource set
1736 snap_rset
= &lgrp_info
[i
].info_rset
[LGRP_RSRC_MEM
*
1737 BT_BITOUL(snap_nlgrpsmax
)];
1738 if (BT_TEST(snap_rset
, i
))
1739 user_info_buffer
[i
].info_mem_free
=
1740 lgrp_mem_size(i
, LGRP_MEM_SIZE_FREE
);
1743 * Fix up pointers to parents, children, resources, and
1746 user_info_buffer
[i
].info_parents
=
1747 (ulong_t
*)((uintptr_t)user_parents
+ (i
* bitmask_size
));
1748 user_info_buffer
[i
].info_children
=
1749 (ulong_t
*)((uintptr_t)user_children
+ (i
* bitmask_size
));
1750 user_info_buffer
[i
].info_rset
=
1751 (ulong_t
*)((uintptr_t)user_rsets
+
1752 (i
* LGRP_RSRC_COUNT
* bitmask_size
));
1753 user_lats_buffer
[i
] = (int *)((uintptr_t)user_lats
+
1754 (snap_nlgrpsmax
* sizeof (int *)) + (i
* snap_nlgrpsmax
*
1758 * Fix up pointer to CPU IDs
1760 if (user_info_buffer
[i
].info_ncpus
== 0) {
1761 user_info_buffer
[i
].info_cpuids
= NULL
;
1764 user_info_buffer
[i
].info_cpuids
= &user_cpuids
[cpu_index
];
1765 cpu_index
+= user_info_buffer
[i
].info_ncpus
;
1767 ASSERT(cpu_index
== snap_ncpus
);
1770 * Copy lgroup info and latency table with pointers fixed up to point
1771 * into user buffer out to user buffer now
1773 retval
= lgrp_snap
->ss_size
;
1774 if (copyout(user_info_buffer
, user_info
, info_size
) != 0)
1775 retval
= set_errno(EFAULT
);
1776 kmem_free(user_info_buffer
, info_size
);
1778 if (copyout(user_lats_buffer
, user_lats
, snap_nlgrpsmax
*
1779 sizeof (int *)) != 0)
1780 retval
= set_errno(EFAULT
);
1781 kmem_free(user_lats_buffer
, snap_nlgrpsmax
* sizeof (int *));
1787 #ifdef _SYSCALL32_IMPL
1789 * Make 32-bit copy of snapshot, fix up any pointers in buffer to point
1790 * into user instead of kernel address space, copy 32-bit snapshot into
1791 * given user buffer, and return size of buffer needed to hold snapshot
1794 lgrp_snapshot_copy32(caddr32_t buf
, size32_t bufsize
)
1796 size32_t bitmask_size
;
1797 size32_t bitmasks_size
;
1798 size32_t children_size
;
1800 size32_t cpuids_size
;
1805 lgrp_info_t
*lgrp_info
;
1806 lgrp_snapshot_header32_t
*lgrp_snap32
;
1807 lgrp_info32_t
*lgrp_info32
;
1808 processorid_t
*lgrp_cpuids32
;
1809 caddr32_t
*lgrp_lats32
;
1810 int **lgrp_lats32_kernel
;
1812 uint_t
*lgrp_parents32
;
1813 uint_t
*lgrp_children32
;
1814 uint_t
*lgrp_rsets32
;
1815 size32_t parents_size
;
1816 size32_t rsets_size
;
1818 size32_t snap_hdr_size
;
1823 if (lgrp_snap
== NULL
)
1826 snap_ncpus
= lgrp_snap
->ss_ncpus
;
1827 snap_nlgrpsmax
= lgrp_snap
->ss_nlgrps_max
;
1830 * Calculate size of buffer needed for 32-bit snapshot,
1831 * rounding up size of each object to allow for alignment
1832 * of next object in buffer.
1834 snap_hdr_size
= P2ROUNDUP(sizeof (lgrp_snapshot_header32_t
),
1835 sizeof (caddr32_t
));
1836 info_size
= P2ROUNDUP(snap_nlgrpsmax
* sizeof (lgrp_info32_t
),
1837 sizeof (processorid_t
));
1838 cpuids_size
= P2ROUNDUP(snap_ncpus
* sizeof (processorid_t
),
1841 bitmask_size
= BT_SIZEOFMAP32(snap_nlgrpsmax
);
1843 set_size
= bitmask_size
;
1844 parents_size
= snap_nlgrpsmax
* bitmask_size
;
1845 children_size
= snap_nlgrpsmax
* bitmask_size
;
1846 rsets_size
= P2ROUNDUP(LGRP_RSRC_COUNT
* snap_nlgrpsmax
*
1847 (int)bitmask_size
, sizeof (caddr32_t
));
1849 bitmasks_size
= set_size
+ parents_size
+ children_size
+ rsets_size
;
1852 * Size of latency table and buffer
1854 lats_size
= (snap_nlgrpsmax
* sizeof (caddr32_t
)) +
1855 (snap_nlgrpsmax
* snap_nlgrpsmax
* sizeof (int));
1857 snap_size
= snap_hdr_size
+ info_size
+ cpuids_size
+ bitmasks_size
+
1860 if (buf
== NULL
|| bufsize
<= 0) {
1865 * User needs to try getting size of buffer again
1866 * because given buffer size is too small.
1867 * The lgroup hierarchy may have changed after they asked for the size
1868 * but before the snapshot was taken.
1870 if (bufsize
< snap_size
)
1871 return (set_errno(EAGAIN
));
1874 * Make 32-bit copy of snapshot, fix up pointers to point into user
1875 * buffer not kernel, and then copy whole thing into user buffer
1877 lgrp_snap32
= kmem_zalloc(snap_size
, KM_NOSLEEP
);
1878 if (lgrp_snap32
== NULL
)
1879 return (set_errno(ENOMEM
));
1882 * Calculate pointers into 32-bit copy of snapshot
1883 * for lgroup info, CPU IDs, pset lgroup bitmask, parents, children,
1884 * resources, and latency table and buffer
1886 lgrp_info32
= (lgrp_info32_t
*)((uintptr_t)lgrp_snap32
+
1888 lgrp_cpuids32
= (processorid_t
*)((uintptr_t)lgrp_info32
+ info_size
);
1889 lgrp_set32
= (uint_t
*)((uintptr_t)lgrp_cpuids32
+ cpuids_size
);
1890 lgrp_parents32
= (uint_t
*)((uintptr_t)lgrp_set32
+ set_size
);
1891 lgrp_children32
= (uint_t
*)((uintptr_t)lgrp_parents32
+ parents_size
);
1892 lgrp_rsets32
= (uint_t
*)((uintptr_t)lgrp_children32
+ children_size
);
1893 lgrp_lats32
= (caddr32_t
*)((uintptr_t)lgrp_rsets32
+ rsets_size
);
1896 * Make temporary lgroup latency table of pointers for kernel to use
1897 * to fill in rows of table with latencies from each lgroup
1899 lgrp_lats32_kernel
= kmem_zalloc(snap_nlgrpsmax
* sizeof (int *),
1901 if (lgrp_lats32_kernel
== NULL
) {
1902 kmem_free(lgrp_snap32
, snap_size
);
1903 return (set_errno(ENOMEM
));
1907 * Fill in 32-bit lgroup snapshot header
1908 * (with pointers into user's buffer for lgroup info, CPU IDs,
1909 * bit masks, and latencies)
1911 lgrp_snap32
->ss_version
= lgrp_snap
->ss_version
;
1912 lgrp_snap32
->ss_levels
= lgrp_snap
->ss_levels
;
1913 lgrp_snap32
->ss_nlgrps
= lgrp_snap32
->ss_nlgrps_os
=
1914 lgrp_snap
->ss_nlgrps
;
1915 lgrp_snap32
->ss_nlgrps_max
= snap_nlgrpsmax
;
1916 lgrp_snap32
->ss_root
= lgrp_snap
->ss_root
;
1917 lgrp_snap32
->ss_ncpus
= lgrp_snap
->ss_ncpus
;
1918 lgrp_snap32
->ss_gen
= lgrp_snap
->ss_gen
;
1919 lgrp_snap32
->ss_view
= LGRP_VIEW_OS
;
1920 lgrp_snap32
->ss_size
= snap_size
;
1921 lgrp_snap32
->ss_magic
= buf
;
1922 lgrp_snap32
->ss_info
= buf
+ snap_hdr_size
;
1923 lgrp_snap32
->ss_cpuids
= lgrp_snap32
->ss_info
+ info_size
;
1924 lgrp_snap32
->ss_lgrpset
= lgrp_snap32
->ss_cpuids
+ cpuids_size
;
1925 lgrp_snap32
->ss_parents
= lgrp_snap32
->ss_lgrpset
+ bitmask_size
;
1926 lgrp_snap32
->ss_children
= lgrp_snap32
->ss_parents
+
1927 (snap_nlgrpsmax
* bitmask_size
);
1928 lgrp_snap32
->ss_rsets
= lgrp_snap32
->ss_children
+
1929 (snap_nlgrpsmax
* bitmask_size
);
1930 lgrp_snap32
->ss_latencies
= lgrp_snap32
->ss_rsets
+
1931 (LGRP_RSRC_COUNT
* snap_nlgrpsmax
* bitmask_size
);
1934 * Fill in lgrpset now because caller may have change psets
1937 for (i
= 0; i
< snap_nlgrpsmax
; i
++) {
1938 if (klgrpset_ismember(curthread
->t_cpupart
->cp_lgrpset
,
1940 BT_SET32(lgrp_set32
, i
);
1946 * Fill in 32-bit copy of lgroup info and fix up pointers
1947 * to point into user's buffer instead of kernel's
1950 lgrp_info
= lgrp_snap
->ss_info
;
1951 for (i
= 0; i
< snap_nlgrpsmax
; i
++) {
1958 * Skip non-existent lgroups
1960 if (lgrp_info
[i
].info_lgrpid
== LGRP_NONE
) {
1961 bzero(&lgrp_info32
[i
], sizeof (lgrp_info32
[i
]));
1962 lgrp_info32
[i
].info_lgrpid
= LGRP_NONE
;
1967 * Fill in parents, children, lgroup resource set, and
1968 * latencies from snapshot
1970 parents
= (uint_t
*)((uintptr_t)lgrp_parents32
+
1972 children
= (uint_t
*)((uintptr_t)lgrp_children32
+
1974 snap_rset
= (ulong_t
*)((uintptr_t)lgrp_snap
->ss_rsets
+
1975 (i
* LGRP_RSRC_COUNT
* BT_SIZEOFMAP(snap_nlgrpsmax
)));
1976 lgrp_rset
= (uint_t
*)((uintptr_t)lgrp_rsets32
+
1977 (i
* LGRP_RSRC_COUNT
* bitmask_size
));
1978 lgrp_lats32_kernel
[i
] = (int *)((uintptr_t)lgrp_lats32
+
1979 snap_nlgrpsmax
* sizeof (caddr32_t
) + i
* snap_nlgrpsmax
*
1981 for (j
= 0; j
< snap_nlgrpsmax
; j
++) {
1985 if (BT_TEST(&lgrp_snap
->ss_parents
[i
], j
))
1986 BT_SET32(parents
, j
);
1988 if (BT_TEST(&lgrp_snap
->ss_children
[i
], j
))
1989 BT_SET32(children
, j
);
1991 for (k
= 0; k
< LGRP_RSRC_COUNT
; k
++) {
1992 rset
= (uint_t
*)((uintptr_t)lgrp_rset
+
1994 if (BT_TEST(&snap_rset
[k
], j
))
1998 lgrp_lats32_kernel
[i
][j
] =
1999 lgrp_snap
->ss_latencies
[i
][j
];
2003 * Fix up pointer to latency buffer
2005 lgrp_lats32
[i
] = lgrp_snap32
->ss_latencies
+
2006 snap_nlgrpsmax
* sizeof (caddr32_t
) + i
* snap_nlgrpsmax
*
2010 * Fix up pointers for parents, children, and resources
2012 lgrp_info32
[i
].info_parents
= lgrp_snap32
->ss_parents
+
2014 lgrp_info32
[i
].info_children
= lgrp_snap32
->ss_children
+
2016 lgrp_info32
[i
].info_rset
= lgrp_snap32
->ss_rsets
+
2017 (i
* LGRP_RSRC_COUNT
* bitmask_size
);
2020 * Fill in memory and CPU info
2021 * Only fill in memory for lgroups directly containing memory
2023 snap_rset
= &lgrp_info
[i
].info_rset
[LGRP_RSRC_MEM
*
2024 BT_BITOUL(snap_nlgrpsmax
)];
2025 if (BT_TEST(snap_rset
, i
)) {
2026 lgrp_info32
[i
].info_mem_free
= lgrp_mem_size(i
,
2027 LGRP_MEM_SIZE_FREE
);
2028 lgrp_info32
[i
].info_mem_install
=
2029 lgrp_info
[i
].info_mem_install
;
2032 lgrp_info32
[i
].info_ncpus
= lgrp_info
[i
].info_ncpus
;
2034 lgrp_info32
[i
].info_lgrpid
= lgrp_info
[i
].info_lgrpid
;
2035 lgrp_info32
[i
].info_latency
= lgrp_info
[i
].info_latency
;
2037 if (lgrp_info32
[i
].info_ncpus
== 0) {
2038 lgrp_info32
[i
].info_cpuids
= 0;
2043 * Fix up pointer for CPU IDs
2045 lgrp_info32
[i
].info_cpuids
= lgrp_snap32
->ss_cpuids
+
2046 (cpu_index
* sizeof (processorid_t
));
2047 cpu_index
+= lgrp_info32
[i
].info_ncpus
;
2049 ASSERT(cpu_index
== snap_ncpus
);
2052 * Copy lgroup CPU IDs into 32-bit snapshot
2053 * before copying it out into user's buffer
2055 bcopy(lgrp_snap
->ss_cpuids
, lgrp_cpuids32
, cpuids_size
);
2058 * Copy 32-bit lgroup snapshot into user's buffer all at once
2060 if (copyout(lgrp_snap32
, (void *)(uintptr_t)buf
, snap_size
) != 0) {
2061 kmem_free(lgrp_snap32
, snap_size
);
2062 kmem_free(lgrp_lats32_kernel
, snap_nlgrpsmax
* sizeof (int *));
2063 return (set_errno(EFAULT
));
2066 kmem_free(lgrp_snap32
, snap_size
);
2067 kmem_free(lgrp_lats32_kernel
, snap_nlgrpsmax
* sizeof (int *));
2071 #endif /* _SYSCALL32_IMPL */
2075 lgrpsys(int subcode
, long ia
, void *ap
)
2082 case LGRP_SYS_AFFINITY_GET
:
2083 return (lgrp_affinity_get((lgrp_affinity_args_t
*)ap
));
2085 case LGRP_SYS_AFFINITY_SET
:
2086 return (lgrp_affinity_set((lgrp_affinity_args_t
*)ap
));
2088 case LGRP_SYS_GENERATION
:
2089 return (lgrp_generation(ia
));
2092 return (lgrp_home_get((idtype_t
)ia
, (id_t
)(uintptr_t)ap
));
2094 case LGRP_SYS_LATENCY
:
2095 mutex_enter(&cpu_lock
);
2096 latency
= lgrp_latency(ia
, (lgrp_id_t
)(uintptr_t)ap
);
2097 mutex_exit(&cpu_lock
);
2100 case LGRP_SYS_MEMINFO
:
2101 return (meminfo(ia
, (struct meminfo
*)ap
));
2103 case LGRP_SYS_VERSION
:
2104 return (lgrp_version(ia
));
2106 case LGRP_SYS_SNAPSHOT
:
2107 mutex_enter(&lgrp_snap_lock
);
2108 bufsize
= lgrp_snapshot();
2110 if (get_udatamodel() == DATAMODEL_NATIVE
)
2111 bufsize
= lgrp_snapshot_copy(ap
, ia
);
2112 #ifdef _SYSCALL32_IMPL
2114 bufsize
= lgrp_snapshot_copy32(
2115 (caddr32_t
)(uintptr_t)ap
, ia
);
2116 #endif /* _SYSCALL32_IMPL */
2118 mutex_exit(&lgrp_snap_lock
);
2126 return (set_errno(EINVAL
));