4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 1996, 2010, Oracle and/or its affiliates. All rights reserved.
25 #include <sys/types.h>
26 #include <sys/systm.h>
27 #include <sys/cmn_err.h>
28 #include <sys/cpuvar.h>
29 #include <sys/thread.h>
32 #include <sys/debug.h>
33 #include <sys/cpupart.h>
36 #include <sys/cyclic.h>
39 #include <sys/loadavg.h>
40 #include <sys/class.h>
43 #include <sys/pool_pset.h>
44 #include <sys/policy.h>
47 * Calling pool_lock() protects the pools configuration, which includes
48 * CPU partitions. cpu_lock protects the CPU partition list, and prevents
49 * partitions from being created or destroyed while the lock is held.
50 * The lock ordering with respect to related locks is:
52 * pool_lock() ---> cpu_lock ---> pidlock --> p_lock
54 * Blocking memory allocations may be made while holding "pool_lock"
59 * The cp_default partition is allocated statically, but its lgroup load average
60 * (lpl) list is allocated dynamically after kmem subsystem is initialized. This
61 * saves some memory since the space allocated reflects the actual number of
62 * lgroups supported by the platform. The lgrp facility provides a temporary
63 * space to hold lpl information during system bootstrap.
66 cpupart_t
*cp_list_head
;
68 static cpupartid_t cp_id_next
;
70 uint_t cp_numparts_nonempty
;
73 * Need to limit total number of partitions to avoid slowing down the
74 * clock code too much. The clock code traverses the list of
75 * partitions and needs to be able to execute in a reasonable amount
76 * of time (less than 1/hz seconds). The maximum is sized based on
77 * max_ncpus so it shouldn't be a problem unless there are large
78 * numbers of empty partitions.
80 static uint_t cp_max_numparts
;
83 * Processor sets and CPU partitions are different but related concepts.
84 * A processor set is a user-level abstraction allowing users to create
85 * sets of CPUs and bind threads exclusively to those sets. A CPU
86 * partition is a kernel dispatcher object consisting of a set of CPUs
87 * and a global dispatch queue. The processor set abstraction is
88 * implemented via a CPU partition, and currently there is a 1-1
89 * mapping between processor sets and partitions (excluding the default
90 * partition, which is not visible as a processor set). Hence, the
91 * numbering for processor sets and CPU partitions is identical. This
92 * may not always be true in the future, and these macros could become
93 * less trivial if we support e.g. a processor set containing multiple
96 #define PSTOCP(psid) ((cpupartid_t)((psid) == PS_NONE ? CP_DEFAULT : (psid)))
97 #define CPTOPS(cpid) ((psetid_t)((cpid) == CP_DEFAULT ? PS_NONE : (cpid)))
99 static int cpupart_unbind_threads(cpupart_t
*, boolean_t
);
102 * Find a CPU partition given a processor set ID.
105 cpupart_find_all(psetid_t psid
)
108 cpupartid_t cpid
= PSTOCP(psid
);
110 ASSERT(MUTEX_HELD(&cpu_lock
));
112 /* default partition not visible as a processor set */
113 if (psid
== CP_DEFAULT
)
117 return (curthread
->t_cpupart
);
121 if (cp
->cp_id
== cpid
)
124 } while (cp
!= cp_list_head
);
129 * Find a CPU partition given a processor set ID if the processor set
130 * should be visible from the calling zone.
133 cpupart_find(psetid_t psid
)
137 ASSERT(MUTEX_HELD(&cpu_lock
));
138 cp
= cpupart_find_all(psid
);
139 if (cp
!= NULL
&& !INGLOBALZONE(curproc
) && pool_pset_enabled() &&
140 zone_pset_get(curproc
->p_zone
) != CPTOPS(cp
->cp_id
))
146 cpupart_kstat_update(kstat_t
*ksp
, int rw
)
148 cpupart_t
*cp
= (cpupart_t
*)ksp
->ks_private
;
149 cpupart_kstat_t
*cpksp
= ksp
->ks_data
;
151 if (rw
== KSTAT_WRITE
)
154 cpksp
->cpk_updates
.value
.ui64
= cp
->cp_updates
;
155 cpksp
->cpk_runnable
.value
.ui64
= cp
->cp_nrunnable_cum
;
156 cpksp
->cpk_waiting
.value
.ui64
= cp
->cp_nwaiting_cum
;
157 cpksp
->cpk_ncpus
.value
.ui32
= cp
->cp_ncpus
;
158 cpksp
->cpk_avenrun_1min
.value
.ui32
= cp
->cp_hp_avenrun
[0] >>
160 cpksp
->cpk_avenrun_5min
.value
.ui32
= cp
->cp_hp_avenrun
[1] >>
162 cpksp
->cpk_avenrun_15min
.value
.ui32
= cp
->cp_hp_avenrun
[2] >>
168 cpupart_kstat_create(cpupart_t
*cp
)
173 ASSERT(MUTEX_HELD(&cpu_lock
));
176 * We have a bit of a chicken-egg problem since this code will
177 * get called to create the kstats for CP_DEFAULT before the
178 * pools framework gets initialized. We circumvent the problem
179 * by special-casing cp_default.
181 if (cp
!= &cp_default
&& pool_pset_enabled())
182 zoneid
= GLOBAL_ZONEID
;
185 ksp
= kstat_create_zone("unix", cp
->cp_id
, "pset", "misc",
187 sizeof (cpupart_kstat_t
) / sizeof (kstat_named_t
), 0, zoneid
);
189 cpupart_kstat_t
*cpksp
= ksp
->ks_data
;
191 kstat_named_init(&cpksp
->cpk_updates
, "updates",
193 kstat_named_init(&cpksp
->cpk_runnable
, "runnable",
195 kstat_named_init(&cpksp
->cpk_waiting
, "waiting",
197 kstat_named_init(&cpksp
->cpk_ncpus
, "ncpus",
199 kstat_named_init(&cpksp
->cpk_avenrun_1min
, "avenrun_1min",
201 kstat_named_init(&cpksp
->cpk_avenrun_5min
, "avenrun_5min",
203 kstat_named_init(&cpksp
->cpk_avenrun_15min
, "avenrun_15min",
206 ksp
->ks_update
= cpupart_kstat_update
;
207 ksp
->ks_private
= cp
;
215 * Initialize the cpupart's lgrp partions (lpls)
218 cpupart_lpl_initialize(cpupart_t
*cp
)
222 sz
= cp
->cp_nlgrploads
= lgrp_plat_max_lgrps();
223 cp
->cp_lgrploads
= kmem_zalloc(sizeof (lpl_t
) * sz
, KM_SLEEP
);
225 for (i
= 0; i
< sz
; i
++) {
227 * The last entry of the lpl's resource set is always NULL
228 * by design (to facilitate iteration)...hence the "oversizing"
231 cp
->cp_lgrploads
[i
].lpl_rset_sz
= sz
+ 1;
232 cp
->cp_lgrploads
[i
].lpl_rset
=
233 kmem_zalloc(sizeof (struct lgrp_ld
*) * (sz
+ 1), KM_SLEEP
);
234 cp
->cp_lgrploads
[i
].lpl_id2rset
=
235 kmem_zalloc(sizeof (int) * (sz
+ 1), KM_SLEEP
);
236 cp
->cp_lgrploads
[i
].lpl_lgrpid
= i
;
241 * Teardown the cpupart's lgrp partitions
244 cpupart_lpl_teardown(cpupart_t
*cp
)
249 for (i
= 0; i
< cp
->cp_nlgrploads
; i
++) {
250 lpl
= &cp
->cp_lgrploads
[i
];
252 sz
= lpl
->lpl_rset_sz
;
253 kmem_free(lpl
->lpl_rset
, sizeof (struct lgrp_ld
*) * sz
);
254 kmem_free(lpl
->lpl_id2rset
, sizeof (int) * sz
);
255 lpl
->lpl_rset
= NULL
;
256 lpl
->lpl_id2rset
= NULL
;
258 kmem_free(cp
->cp_lgrploads
, sizeof (lpl_t
) * cp
->cp_nlgrploads
);
259 cp
->cp_lgrploads
= NULL
;
263 * Initialize the default partition and kpreempt disp queue.
266 cpupart_initialize_default(void)
270 cp_list_head
= &cp_default
;
271 cp_default
.cp_next
= &cp_default
;
272 cp_default
.cp_prev
= &cp_default
;
273 cp_default
.cp_id
= CP_DEFAULT
;
274 cp_default
.cp_kp_queue
.disp_maxrunpri
= -1;
275 cp_default
.cp_kp_queue
.disp_max_unbound_pri
= -1;
276 cp_default
.cp_kp_queue
.disp_cpu
= NULL
;
277 cp_default
.cp_gen
= 0;
278 cp_default
.cp_loadavg
.lg_cur
= 0;
279 cp_default
.cp_loadavg
.lg_len
= 0;
280 cp_default
.cp_loadavg
.lg_total
= 0;
281 for (i
= 0; i
< S_LOADAVG_SZ
; i
++) {
282 cp_default
.cp_loadavg
.lg_loads
[i
] = 0;
284 DISP_LOCK_INIT(&cp_default
.cp_kp_queue
.disp_lock
);
285 cp_id_next
= CP_DEFAULT
+ 1;
286 cpupart_kstat_create(&cp_default
);
288 if (cp_max_numparts
== 0) /* allow for /etc/system tuning */
289 cp_max_numparts
= max_ncpus
* 2 + 1;
291 * Allocate space for cp_default list of lgrploads
293 cpupart_lpl_initialize(&cp_default
);
296 * The initial lpl topology is created in a special lpl list
297 * lpl_bootstrap. It should be copied to cp_default.
298 * NOTE: lpl_topo_bootstrap() also updates CPU0 cpu_lpl pointer to point
299 * to the correct lpl in the cp_default.cp_lgrploads list.
301 lpl_topo_bootstrap(cp_default
.cp_lgrploads
,
302 cp_default
.cp_nlgrploads
);
305 cp_default
.cp_attr
= PSET_NOESCAPE
;
306 cp_numparts_nonempty
= 1;
310 t0
.t_lpl
= &cp_default
.cp_lgrploads
[LGRP_ROOTID
];
312 bitset_init(&cp_default
.cp_cmt_pgs
);
313 bitset_init_fanout(&cp_default
.cp_haltset
, cp_haltset_fanout
);
315 bitset_resize(&cp_default
.cp_haltset
, max_ncpus
);
320 cpupart_move_cpu(cpu_t
*cp
, cpupart_t
*newpp
, int forced
)
323 cpu_t
*ncp
, *newlist
;
325 int move_threads
= 1;
331 boolean_t unbind_all_threads
= (forced
!= 0);
333 ASSERT(MUTEX_HELD(&cpu_lock
));
334 ASSERT(newpp
!= NULL
);
336 oldpp
= cp
->cpu_part
;
337 ASSERT(oldpp
!= NULL
);
338 ASSERT(oldpp
->cp_ncpus
> 0);
340 if (newpp
== oldpp
) {
342 * Don't need to do anything.
347 cpu_state_change_notify(cp
->cpu_id
, CPU_CPUPART_OUT
);
349 if (!disp_bound_partition(cp
, 0)) {
351 * Don't need to move threads if there are no threads in
352 * the partition. Note that threads can't enter the
353 * partition while we're holding cpu_lock.
356 } else if (oldpp
->cp_ncpus
== 1) {
358 * The last CPU is removed from a partition which has threads
359 * running in it. Some of these threads may be bound to this
362 * Attempt to unbind threads from the CPU and from the processor
363 * set. Note that no threads should be bound to this CPU since
364 * cpupart_move_threads will refuse to move bound threads to
367 (void) cpu_unbind(oldpp
->cp_cpulist
->cpu_id
, B_FALSE
);
368 (void) cpupart_unbind_threads(oldpp
, B_FALSE
);
370 if (!disp_bound_partition(cp
, 0)) {
372 * No bound threads in this partition any more
377 * There are still threads bound to the partition
379 cpu_state_change_notify(cp
->cpu_id
, CPU_CPUPART_IN
);
385 * If forced flag is set unbind any threads from this CPU.
386 * Otherwise unbind soft-bound threads only.
388 if ((ret
= cpu_unbind(cp
->cpu_id
, unbind_all_threads
)) != 0) {
389 cpu_state_change_notify(cp
->cpu_id
, CPU_CPUPART_IN
);
394 * Stop further threads weak binding to this cpu.
400 * Notify the Processor Groups subsystem that the CPU
401 * will be moving cpu partitions. This is done before
402 * CPUs are paused to provide an opportunity for any
403 * needed memory allocations.
405 pg_cpupart_out(cp
, oldpp
);
406 pg_cpupart_in(cp
, newpp
);
412 * Check for threads strong or weak bound to this CPU.
414 for (loop_count
= 0; disp_bound_threads(cp
, 0); loop_count
++) {
415 if (loop_count
>= 5) {
416 cpu_state_change_notify(cp
->cpu_id
,
418 pg_cpupart_out(cp
, newpp
);
419 pg_cpupart_in(cp
, oldpp
);
421 return (EBUSY
); /* some threads still bound */
428 * Before we actually start changing data structures, notify
429 * the cyclic subsystem that we want to move this CPU out of its
432 if (!cyclic_move_out(cp
)) {
434 * This CPU must be the last CPU in a processor set with
437 cpu_state_change_notify(cp
->cpu_id
, CPU_CPUPART_IN
);
438 pg_cpupart_out(cp
, newpp
);
439 pg_cpupart_in(cp
, oldpp
);
444 pause_cpus(cp
, NULL
);
448 * The thread on cpu before the pause thread may have read
449 * cpu_inmotion before we raised the barrier above. Check
452 if (disp_bound_threads(cp
, 1)) {
460 * Now that CPUs are paused, let the PG subsystem perform
461 * any necessary data structure updates.
463 pg_cpupart_move(cp
, oldpp
, newpp
);
465 /* save this cpu's lgroup -- it'll be the same in the new partition */
466 lgrpid
= cp
->cpu_lpl
->lpl_lgrpid
;
468 cpu_lpl
= cp
->cpu_lpl
;
470 * let the lgroup framework know cp has left the partition
472 lgrp_config(LGRP_CONFIG_CPUPART_DEL
, (uintptr_t)cp
, lgrpid
);
474 /* move out of old partition */
476 if (oldpp
->cp_ncpus
> 0) {
478 ncp
= cp
->cpu_prev_part
->cpu_next_part
= cp
->cpu_next_part
;
479 cp
->cpu_next_part
->cpu_prev_part
= cp
->cpu_prev_part
;
480 if (oldpp
->cp_cpulist
== cp
) {
481 oldpp
->cp_cpulist
= ncp
;
484 ncp
= oldpp
->cp_cpulist
= NULL
;
485 cp_numparts_nonempty
--;
486 ASSERT(cp_numparts_nonempty
!= 0);
490 /* move into new partition */
491 newlist
= newpp
->cp_cpulist
;
492 if (newlist
== NULL
) {
493 newpp
->cp_cpulist
= cp
->cpu_next_part
= cp
->cpu_prev_part
= cp
;
494 cp_numparts_nonempty
++;
495 ASSERT(cp_numparts_nonempty
!= 0);
497 cp
->cpu_next_part
= newlist
;
498 cp
->cpu_prev_part
= newlist
->cpu_prev_part
;
499 newlist
->cpu_prev_part
->cpu_next_part
= cp
;
500 newlist
->cpu_prev_part
= cp
;
502 cp
->cpu_part
= newpp
;
506 ASSERT(bitset_is_null(&newpp
->cp_haltset
));
507 ASSERT(bitset_is_null(&oldpp
->cp_haltset
));
510 * let the lgroup framework know cp has entered the partition
512 lgrp_config(LGRP_CONFIG_CPUPART_ADD
, (uintptr_t)cp
, lgrpid
);
515 * If necessary, move threads off processor.
521 * Walk thru the active process list to look for
522 * threads that need to have a new home lgroup,
523 * or the last CPU they run on is the same CPU
524 * being moved out of the partition.
527 for (p
= practive
; p
!= NULL
; p
= p
->p_next
) {
538 ASSERT(t
->t_lpl
!= NULL
);
541 * Update the count of how many threads are
542 * in this CPU's lgroup but have a different lpl
545 if (t
->t_lpl
!= cpu_lpl
&&
546 t
->t_lpl
->lpl_lgrpid
== lgrpid
)
549 * If the lgroup that t is assigned to no
550 * longer has any CPUs in t's partition,
551 * we'll have to choose a new lgroup for t.
554 if (!LGRP_CPUS_IN_PART(t
->t_lpl
->lpl_lgrpid
,
557 lgrp_choose(t
, t
->t_cpupart
), 0);
561 * make sure lpl points to our own partition
563 ASSERT(t
->t_lpl
>= t
->t_cpupart
->cp_lgrploads
&&
564 (t
->t_lpl
< t
->t_cpupart
->cp_lgrploads
+
565 t
->t_cpupart
->cp_nlgrploads
));
567 ASSERT(t
->t_lpl
->lpl_ncpu
> 0);
569 /* Update CPU last ran on if it was this CPU */
570 if (t
->t_cpu
== cp
&& t
->t_cpupart
== oldpp
&&
571 t
->t_bound_cpu
!= cp
) {
572 t
->t_cpu
= disp_lowpri_cpu(ncp
,
573 t
->t_lpl
, t
->t_pri
, NULL
);
576 } while (t
!= p
->p_tlist
);
579 * Didn't find any threads in the same lgroup as this
580 * CPU with a different lpl, so remove the lgroup from
581 * the process lgroup bitmask.
585 klgrpset_del(p
->p_lgrpset
, lgrpid
);
589 * Walk thread list looking for threads that need to be
590 * rehomed, since there are some threads that are not in
591 * their process's p_tlist.
597 ASSERT(t
!= NULL
&& t
->t_lpl
!= NULL
);
600 * If the lgroup that t is assigned to no
601 * longer has any CPUs in t's partition,
602 * we'll have to choose a new lgroup for t.
603 * Also, choose best lgroup for home when
604 * thread has specified lgroup affinities,
605 * since there may be an lgroup with more
606 * affinity available after moving CPUs
609 if (!LGRP_CPUS_IN_PART(t
->t_lpl
->lpl_lgrpid
,
610 t
->t_cpupart
) || t
->t_lgrp_affinity
) {
612 lgrp_choose(t
, t
->t_cpupart
), 1);
615 /* make sure lpl points to our own partition */
616 ASSERT((t
->t_lpl
>= t
->t_cpupart
->cp_lgrploads
) &&
617 (t
->t_lpl
< t
->t_cpupart
->cp_lgrploads
+
618 t
->t_cpupart
->cp_nlgrploads
));
620 ASSERT(t
->t_lpl
->lpl_ncpu
> 0);
622 /* Update CPU last ran on if it was this CPU */
623 if (t
->t_cpu
== cp
&& t
->t_cpupart
== oldpp
&&
624 t
->t_bound_cpu
!= cp
) {
625 t
->t_cpu
= disp_lowpri_cpu(ncp
, t
->t_lpl
,
630 } while (t
!= curthread
);
633 * Clear off the CPU's run queue, and the kp queue if the
634 * partition is now empty.
636 disp_cpu_inactive(cp
);
639 * Make cp switch to a thread from the new partition.
642 cp
->cpu_kprunrun
= 1;
649 * Let anyone interested know that cpu has been added to the set.
651 cpu_state_change_notify(cp
->cpu_id
, CPU_CPUPART_IN
);
654 * Now let the cyclic subsystem know that it can reshuffle cyclics
655 * bound to the new processor set.
663 * Check if thread can be moved to a new cpu partition. Called by
664 * cpupart_move_thread() and pset_bind_start().
667 cpupart_movable_thread(kthread_id_t tp
, cpupart_t
*cp
, int ignore
)
669 ASSERT(MUTEX_HELD(&cpu_lock
));
670 ASSERT(MUTEX_HELD(&ttoproc(tp
)->p_lock
));
672 ASSERT(THREAD_LOCK_HELD(tp
));
675 * CPU-bound threads can't be moved.
678 cpu_t
*boundcpu
= tp
->t_bound_cpu
? tp
->t_bound_cpu
:
680 if (boundcpu
!= NULL
&& boundcpu
->cpu_part
!= cp
)
684 if (tp
->t_cid
== sysdccid
) {
685 return (EINVAL
); /* For now, sysdc threads can't move */
692 * Move thread to new partition. If ignore is non-zero, then CPU
693 * bindings should be ignored (this is used when destroying a
697 cpupart_move_thread(kthread_id_t tp
, cpupart_t
*newpp
, int ignore
,
698 void *projbuf
, void *zonebuf
)
700 cpupart_t
*oldpp
= tp
->t_cpupart
;
703 ASSERT(MUTEX_HELD(&cpu_lock
));
704 ASSERT(MUTEX_HELD(&pidlock
));
705 ASSERT(MUTEX_HELD(&ttoproc(tp
)->p_lock
));
706 ASSERT(newpp
!= NULL
);
708 if (newpp
->cp_cpulist
== NULL
)
712 * Check for errors first.
715 if ((ret
= cpupart_movable_thread(tp
, newpp
, ignore
)) != 0) {
720 /* move the thread */
721 if (oldpp
!= newpp
) {
723 * Make the thread switch to the new partition.
725 tp
->t_cpupart
= newpp
;
726 ASSERT(tp
->t_lpl
!= NULL
);
728 * Leave the thread on the same lgroup if possible; otherwise
729 * choose a new lgroup for it. In either case, update its
732 if (LGRP_CPUS_IN_PART(tp
->t_lpl
->lpl_lgrpid
, newpp
) &&
733 tp
->t_lgrp_affinity
== NULL
) {
735 * The thread's lgroup has CPUs in the thread's new
736 * partition, so the thread can stay assigned to the
737 * same lgroup. Update its t_lpl to point to the
738 * lpl_t for its lgroup in its new partition.
740 lgrp_move_thread(tp
, &tp
->t_cpupart
->\
741 cp_lgrploads
[tp
->t_lpl
->lpl_lgrpid
], 1);
744 * The thread's lgroup has no cpus in its new
745 * partition or it has specified lgroup affinities,
746 * so choose the best lgroup for the thread and
747 * assign it to that lgroup.
749 lgrp_move_thread(tp
, lgrp_choose(tp
, tp
->t_cpupart
),
753 * make sure lpl points to our own partition
755 ASSERT((tp
->t_lpl
>= tp
->t_cpupart
->cp_lgrploads
) &&
756 (tp
->t_lpl
< tp
->t_cpupart
->cp_lgrploads
+
757 tp
->t_cpupart
->cp_nlgrploads
));
759 ASSERT(tp
->t_lpl
->lpl_ncpu
> 0);
761 if (tp
->t_state
== TS_ONPROC
) {
763 } else if (tp
->t_state
== TS_RUN
) {
770 * Our binding has changed; set TP_CHANGEBIND.
772 tp
->t_proc_flag
|= TP_CHANGEBIND
;
776 fss_changepset(tp
, newpp
, projbuf
, zonebuf
);
778 return (0); /* success */
783 * This function binds a thread to a partition. Must be called with the
784 * p_lock of the containing process held (to keep the thread from going
785 * away), and thus also with cpu_lock held (since cpu_lock must be
786 * acquired before p_lock). If ignore is non-zero, then CPU bindings
787 * should be ignored (this is used when destroying a partition).
790 cpupart_bind_thread(kthread_id_t tp
, psetid_t psid
, int ignore
, void *projbuf
,
795 ASSERT(pool_lock_held());
796 ASSERT(MUTEX_HELD(&cpu_lock
));
797 ASSERT(MUTEX_HELD(&pidlock
));
798 ASSERT(MUTEX_HELD(&ttoproc(tp
)->p_lock
));
803 newpp
= cpupart_find(psid
);
808 return (cpupart_move_thread(tp
, newpp
, ignore
, projbuf
, zonebuf
));
813 * Create a new partition. On MP systems, this also allocates a
814 * kpreempt disp queue for that partition.
817 cpupart_create(psetid_t
*psid
)
821 ASSERT(pool_lock_held());
823 pp
= kmem_zalloc(sizeof (cpupart_t
), KM_SLEEP
);
824 pp
->cp_nlgrploads
= lgrp_plat_max_lgrps();
825 pp
->cp_lgrploads
= kmem_zalloc(sizeof (lpl_t
) * pp
->cp_nlgrploads
,
828 mutex_enter(&cpu_lock
);
829 if (cp_numparts
== cp_max_numparts
) {
830 mutex_exit(&cpu_lock
);
831 kmem_free(pp
->cp_lgrploads
, sizeof (lpl_t
) * pp
->cp_nlgrploads
);
832 pp
->cp_lgrploads
= NULL
;
833 kmem_free(pp
, sizeof (cpupart_t
));
837 /* find the next free partition ID */
838 while (cpupart_find(CPTOPS(cp_id_next
)) != NULL
)
840 pp
->cp_id
= cp_id_next
++;
842 pp
->cp_cpulist
= NULL
;
844 klgrpset_clear(pp
->cp_lgrpset
);
845 pp
->cp_kp_queue
.disp_maxrunpri
= -1;
846 pp
->cp_kp_queue
.disp_max_unbound_pri
= -1;
847 pp
->cp_kp_queue
.disp_cpu
= NULL
;
849 DISP_LOCK_INIT(&pp
->cp_kp_queue
.disp_lock
);
850 *psid
= CPTOPS(pp
->cp_id
);
851 disp_kp_alloc(&pp
->cp_kp_queue
, v
.v_nglobpris
);
852 cpupart_kstat_create(pp
);
853 cpupart_lpl_initialize(pp
);
855 bitset_init(&pp
->cp_cmt_pgs
);
858 * Initialize and size the partition's bitset of halted CPUs.
860 bitset_init_fanout(&pp
->cp_haltset
, cp_haltset_fanout
);
861 bitset_resize(&pp
->cp_haltset
, max_ncpus
);
864 * Pause all CPUs while changing the partition list, to make sure
865 * the clock thread (which traverses the list without holding
866 * cpu_lock) isn't running.
868 pause_cpus(NULL
, NULL
);
869 pp
->cp_next
= cp_list_head
;
870 pp
->cp_prev
= cp_list_head
->cp_prev
;
871 cp_list_head
->cp_prev
->cp_next
= pp
;
872 cp_list_head
->cp_prev
= pp
;
874 mutex_exit(&cpu_lock
);
880 * Move threads from specified partition to cp_default. If `force' is specified,
881 * move all threads, otherwise move only soft-bound threads.
884 cpupart_unbind_threads(cpupart_t
*pp
, boolean_t unbind_all
)
886 void *projbuf
, *zonebuf
;
890 psetid_t psid
= pp
->cp_id
;
892 ASSERT(pool_lock_held());
893 ASSERT(MUTEX_HELD(&cpu_lock
));
895 if (pp
== NULL
|| pp
== &cp_default
) {
900 * Pre-allocate enough buffers for FSS for all active projects and
901 * for all active zones on the system. Unused buffers will be
902 * freed later by fss_freebuf().
904 projbuf
= fss_allocbuf(FSS_NPROJ_BUF
, FSS_ALLOC_PROJ
);
905 zonebuf
= fss_allocbuf(FSS_NPROJ_BUF
, FSS_ALLOC_ZONE
);
907 mutex_enter(&pidlock
);
910 if (t
->t_bind_pset
== psid
) {
911 again
: p
= ttoproc(t
);
912 mutex_enter(&p
->p_lock
);
913 if (ttoproc(t
) != p
) {
915 * lwp_exit has changed this thread's process
916 * pointer before we grabbed its p_lock.
918 mutex_exit(&p
->p_lock
);
923 * Can only unbind threads which have revocable binding
924 * unless force unbinding requested.
926 if (unbind_all
|| TB_PSET_IS_SOFT(t
)) {
927 err
= cpupart_bind_thread(t
, PS_NONE
, 1,
930 mutex_exit(&p
->p_lock
);
931 mutex_exit(&pidlock
);
932 fss_freebuf(projbuf
, FSS_ALLOC_PROJ
);
933 fss_freebuf(zonebuf
, FSS_ALLOC_ZONE
);
936 t
->t_bind_pset
= PS_NONE
;
938 mutex_exit(&p
->p_lock
);
941 } while (t
!= curthread
);
943 mutex_exit(&pidlock
);
944 fss_freebuf(projbuf
, FSS_ALLOC_PROJ
);
945 fss_freebuf(zonebuf
, FSS_ALLOC_ZONE
);
950 * Destroy a partition.
953 cpupart_destroy(psetid_t psid
)
955 cpu_t
*cp
, *first_cp
;
956 cpupart_t
*pp
, *newpp
;
959 ASSERT(pool_lock_held());
960 mutex_enter(&cpu_lock
);
962 pp
= cpupart_find(psid
);
963 if (pp
== NULL
|| pp
== &cp_default
) {
964 mutex_exit(&cpu_lock
);
969 * Unbind all the threads currently bound to the partition.
971 err
= cpupart_unbind_threads(pp
, B_TRUE
);
973 mutex_exit(&cpu_lock
);
978 while ((cp
= pp
->cp_cpulist
) != NULL
) {
979 if (err
= cpupart_move_cpu(cp
, newpp
, 0)) {
980 mutex_exit(&cpu_lock
);
985 ASSERT(bitset_is_null(&pp
->cp_cmt_pgs
));
986 ASSERT(bitset_is_null(&pp
->cp_haltset
));
989 * Teardown the partition's group of active CMT PGs and halted
990 * CPUs now that they have all left.
992 bitset_fini(&pp
->cp_cmt_pgs
);
993 bitset_fini(&pp
->cp_haltset
);
996 * Reset the pointers in any offline processors so they won't
997 * try to rejoin the destroyed partition when they're turned
1000 first_cp
= cp
= CPU
;
1002 if (cp
->cpu_part
== pp
) {
1003 ASSERT(cp
->cpu_flags
& CPU_OFFLINE
);
1004 cp
->cpu_part
= newpp
;
1007 } while (cp
!= first_cp
);
1010 * Pause all CPUs while changing the partition list, to make sure
1011 * the clock thread (which traverses the list without holding
1012 * cpu_lock) isn't running.
1014 pause_cpus(NULL
, NULL
);
1015 pp
->cp_prev
->cp_next
= pp
->cp_next
;
1016 pp
->cp_next
->cp_prev
= pp
->cp_prev
;
1017 if (cp_list_head
== pp
)
1018 cp_list_head
= pp
->cp_next
;
1021 if (cp_id_next
> pp
->cp_id
)
1022 cp_id_next
= pp
->cp_id
;
1025 kstat_delete(pp
->cp_kstat
);
1029 disp_kp_free(&pp
->cp_kp_queue
);
1031 cpupart_lpl_teardown(pp
);
1033 kmem_free(pp
, sizeof (cpupart_t
));
1034 mutex_exit(&cpu_lock
);
1041 * Return the ID of the partition to which the specified processor belongs.
1044 cpupart_query_cpu(cpu_t
*cp
)
1046 ASSERT(MUTEX_HELD(&cpu_lock
));
1048 return (CPTOPS(cp
->cpu_part
->cp_id
));
1053 * Attach a processor to an existing partition.
1056 cpupart_attach_cpu(psetid_t psid
, cpu_t
*cp
, int forced
)
1061 ASSERT(pool_lock_held());
1062 ASSERT(MUTEX_HELD(&cpu_lock
));
1064 pp
= cpupart_find(psid
);
1067 if (cp
->cpu_flags
& CPU_OFFLINE
)
1070 err
= cpupart_move_cpu(cp
, pp
, forced
);
1075 * Get a list of cpus belonging to the partition. If numcpus is NULL,
1076 * this just checks for a valid partition. If numcpus is non-NULL but
1077 * cpulist is NULL, the current number of cpus is stored in *numcpus.
1078 * If both are non-NULL, the current number of cpus is stored in *numcpus,
1079 * and a list of those cpus up to the size originally in *numcpus is
1080 * stored in cpulist[]. Also, store the processor set id in *psid.
1081 * This is useful in case the processor set id passed in was PS_MYID.
1084 cpupart_get_cpus(psetid_t
*psid
, processorid_t
*cpulist
, uint_t
*numcpus
)
1091 mutex_enter(&cpu_lock
);
1092 pp
= cpupart_find(*psid
);
1094 mutex_exit(&cpu_lock
);
1097 *psid
= CPTOPS(pp
->cp_id
);
1098 ncpus
= pp
->cp_ncpus
;
1100 if (ncpus
> *numcpus
) {
1102 * Only copy as many cpus as were passed in, but
1103 * pass back the real number.
1113 for (i
= 0; i
< ncpus
; i
++) {
1115 cpulist
[i
] = c
->cpu_id
;
1116 c
= c
->cpu_next_part
;
1120 mutex_exit(&cpu_lock
);
1125 * Reallocate kpreempt queues for each CPU partition. Called from
1126 * disp_setup when a new scheduling class is loaded that increases the
1127 * number of priorities in the system.
1130 cpupart_kpqalloc(pri_t npri
)
1134 ASSERT(MUTEX_HELD(&cpu_lock
));
1137 disp_kp_alloc(&cpp
->cp_kp_queue
, npri
);
1139 } while (cpp
!= cp_list_head
);
1143 cpupart_get_loadavg(psetid_t psid
, int *buf
, int nelem
)
1149 ASSERT(nelem
<= LOADAVG_NSTATS
);
1150 ASSERT(MUTEX_HELD(&cpu_lock
));
1152 cp
= cpupart_find(psid
);
1155 for (i
= 0; i
< nelem
; i
++)
1156 buf
[i
] = cp
->cp_hp_avenrun
[i
] >> (16 - FSHIFT
);
1163 cpupart_list(psetid_t
*list
, uint_t nelem
, int flag
)
1168 ASSERT(MUTEX_HELD(&cpu_lock
));
1169 ASSERT(flag
== CP_ALL
|| flag
== CP_NONEMPTY
);
1174 if (((flag
== CP_ALL
) && (cp
!= &cp_default
)) ||
1175 ((flag
== CP_NONEMPTY
) && (cp
->cp_ncpus
!= 0))) {
1176 if (numpart
== nelem
)
1178 list
[numpart
++] = CPTOPS(cp
->cp_id
);
1181 } while (cp
!= cp_list_head
);
1184 ASSERT(numpart
< cp_numparts
);
1187 numpart
= cp_numparts
- 1; /* leave out default partition */
1188 else if (flag
== CP_NONEMPTY
)
1189 numpart
= cp_numparts_nonempty
;
1195 cpupart_setattr(psetid_t psid
, uint_t attr
)
1199 ASSERT(pool_lock_held());
1201 mutex_enter(&cpu_lock
);
1202 if ((cp
= cpupart_find(psid
)) == NULL
) {
1203 mutex_exit(&cpu_lock
);
1207 * PSET_NOESCAPE attribute for default cpu partition is always set
1209 if (cp
== &cp_default
&& !(attr
& PSET_NOESCAPE
)) {
1210 mutex_exit(&cpu_lock
);
1214 mutex_exit(&cpu_lock
);
1219 cpupart_getattr(psetid_t psid
, uint_t
*attrp
)
1223 mutex_enter(&cpu_lock
);
1224 if ((cp
= cpupart_find(psid
)) == NULL
) {
1225 mutex_exit(&cpu_lock
);
1228 *attrp
= cp
->cp_attr
;
1229 mutex_exit(&cpu_lock
);