1 /* $NetBSD: sys_pset.c,v 1.12 2009/03/03 21:55:06 rmind Exp $ */
4 * Copyright (c) 2008, Mindaugas Rasiukevicius <rmind at NetBSD org>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * Implementation of the Processor Sets.
33 * The array of the processor-set structures and its members are protected
34 * by the global cpu_lock. Note that in scheduler, the very l_psid value
35 * might be used without lock held.
38 #include <sys/cdefs.h>
39 __KERNEL_RCSID(0, "$NetBSD: sys_pset.c,v 1.12 2009/03/03 21:55:06 rmind Exp $");
41 #include <sys/param.h>
44 #include <sys/kauth.h>
47 #include <sys/mutex.h>
50 #include <sys/sched.h>
51 #include <sys/syscallargs.h>
52 #include <sys/sysctl.h>
53 #include <sys/systm.h>
54 #include <sys/types.h>
56 static pset_info_t
** psets
;
57 static u_int psets_max
;
58 static u_int psets_count
;
59 static kauth_listener_t psets_listener
;
61 static int psets_realloc(int);
62 static int psid_validate(psetid_t
, bool);
63 static int kern_pset_create(psetid_t
*);
64 static int kern_pset_destroy(psetid_t
);
67 psets_listener_cb(kauth_cred_t cred
, kauth_action_t action
, void *cookie
,
68 void *arg0
, void *arg1
, void *arg2
, void *arg3
)
71 enum kauth_system_req req
;
74 result
= KAUTH_RESULT_DEFER
;
75 req
= (enum kauth_system_req
)arg0
;
76 id
= (psetid_t
)(unsigned long)arg1
;
78 if (action
!= KAUTH_SYSTEM_PSET
)
81 if ((req
== KAUTH_REQ_SYSTEM_PSET_ASSIGN
) ||
82 (req
== KAUTH_REQ_SYSTEM_PSET_BIND
)) {
84 result
= KAUTH_RESULT_ALLOW
;
91 * Initialization of the processor-sets.
97 psets_max
= max(MAXCPUS
, 32);
98 psets
= kmem_zalloc(psets_max
* sizeof(void *), KM_SLEEP
);
101 psets_listener
= kauth_listen_scope(KAUTH_SCOPE_SYSTEM
,
102 psets_listener_cb
, NULL
);
106 * Reallocate the array of the processor-set structures.
109 psets_realloc(int new_psets_max
)
111 pset_info_t
**new_psets
, **old_psets
;
112 const u_int newsize
= new_psets_max
* sizeof(void *);
115 if (new_psets_max
< 1)
118 new_psets
= kmem_zalloc(newsize
, KM_SLEEP
);
119 mutex_enter(&cpu_lock
);
121 oldsize
= psets_max
* sizeof(void *);
123 /* Check if we can lower the size of the array */
124 if (new_psets_max
< psets_max
) {
125 for (i
= new_psets_max
; i
< psets_max
; i
++) {
126 if (psets
[i
] == NULL
)
128 mutex_exit(&cpu_lock
);
129 kmem_free(new_psets
, newsize
);
134 /* Copy all pointers to the new array */
135 memcpy(new_psets
, psets
, newsize
);
136 psets_max
= new_psets_max
;
138 mutex_exit(&cpu_lock
);
140 kmem_free(old_psets
, oldsize
);
145 * Validate processor-set ID.
148 psid_validate(psetid_t psid
, bool chkps
)
151 KASSERT(mutex_owned(&cpu_lock
));
153 if (chkps
&& (psid
== PS_NONE
|| psid
== PS_QUERY
|| psid
== PS_MYID
))
155 if (psid
<= 0 || psid
> psets_max
)
157 if (psets
[psid
- 1] == NULL
)
159 if (psets
[psid
- 1]->ps_flags
& PSET_BUSY
)
166 * Create a processor-set.
169 kern_pset_create(psetid_t
*psid
)
174 if (psets_count
== psets_max
)
177 pi
= kmem_zalloc(sizeof(pset_info_t
), KM_SLEEP
);
179 mutex_enter(&cpu_lock
);
180 if (psets_count
== psets_max
) {
181 mutex_exit(&cpu_lock
);
182 kmem_free(pi
, sizeof(pset_info_t
));
186 /* Find a free entry in the array */
187 for (i
= 0; i
< psets_max
; i
++)
188 if (psets
[i
] == NULL
)
190 KASSERT(i
!= psets_max
);
194 mutex_exit(&cpu_lock
);
201 * Destroy a processor-set.
204 kern_pset_destroy(psetid_t psid
)
209 CPU_INFO_ITERATOR cii
;
212 mutex_enter(&cpu_lock
);
213 if (psid
== PS_MYID
) {
214 /* Use caller's processor-set ID */
215 psid
= curlwp
->l_psid
;
217 error
= psid_validate(psid
, false);
219 mutex_exit(&cpu_lock
);
223 /* Release the processor-set from all CPUs */
224 for (CPU_INFO_FOREACH(cii
, ci
)) {
225 struct schedstate_percpu
*spc
;
227 spc
= &ci
->ci_schedstate
;
228 if (spc
->spc_psid
!= psid
)
230 spc
->spc_psid
= PS_NONE
;
232 /* Mark that processor-set is going to be destroyed */
233 pi
= psets
[psid
- 1];
234 pi
->ps_flags
|= PSET_BUSY
;
235 mutex_exit(&cpu_lock
);
237 /* Unmark the processor-set ID from each thread */
238 mutex_enter(proc_lock
);
239 LIST_FOREACH(l
, &alllwp
, l_list
) {
240 /* Safe to check and set without lock held */
241 if (l
->l_psid
!= psid
)
245 mutex_exit(proc_lock
);
247 /* Destroy the processor-set */
248 mutex_enter(&cpu_lock
);
249 psets
[psid
- 1] = NULL
;
251 mutex_exit(&cpu_lock
);
253 kmem_free(pi
, sizeof(pset_info_t
));
258 * General system calls for the processor-sets.
262 sys_pset_create(struct lwp
*l
, const struct sys_pset_create_args
*uap
,
266 syscallarg(psetid_t) *psid;
271 /* Available only for super-user */
272 if (kauth_authorize_system(l
->l_cred
, KAUTH_SYSTEM_PSET
,
273 KAUTH_REQ_SYSTEM_PSET_CREATE
, NULL
, NULL
, NULL
))
276 error
= kern_pset_create(&psid
);
280 error
= copyout(&psid
, SCARG(uap
, psid
), sizeof(psetid_t
));
282 (void)kern_pset_destroy(psid
);
288 sys_pset_destroy(struct lwp
*l
, const struct sys_pset_destroy_args
*uap
,
292 syscallarg(psetid_t) psid;
295 /* Available only for super-user */
296 if (kauth_authorize_system(l
->l_cred
, KAUTH_SYSTEM_PSET
,
297 KAUTH_REQ_SYSTEM_PSET_DESTROY
,
298 KAUTH_ARG(SCARG(uap
, psid
)), NULL
, NULL
))
301 return kern_pset_destroy(SCARG(uap
, psid
));
305 sys_pset_assign(struct lwp
*l
, const struct sys_pset_assign_args
*uap
,
309 syscallarg(psetid_t) psid;
310 syscallarg(cpuid_t) cpuid;
311 syscallarg(psetid_t) *opsid;
313 struct cpu_info
*ici
, *ci
= NULL
;
314 struct schedstate_percpu
*spc
= NULL
;
316 psetid_t psid
= SCARG(uap
, psid
), opsid
= 0;
317 CPU_INFO_ITERATOR cii
;
318 int error
= 0, nnone
= 0;
320 /* Available only for super-user, except the case of PS_QUERY */
321 if (kauth_authorize_system(l
->l_cred
, KAUTH_SYSTEM_PSET
,
322 KAUTH_REQ_SYSTEM_PSET_ASSIGN
, KAUTH_ARG(SCARG(uap
, psid
)), NULL
,
326 /* Find the target CPU */
327 mutex_enter(&cpu_lock
);
328 for (CPU_INFO_FOREACH(cii
, ici
)) {
329 struct schedstate_percpu
*ispc
;
330 ispc
= &ici
->ci_schedstate
;
331 if (cpu_index(ici
) == SCARG(uap
, cpuid
)) {
335 nnone
+= (ispc
->spc_psid
== PS_NONE
);
338 mutex_exit(&cpu_lock
);
341 error
= psid_validate(psid
, true);
343 mutex_exit(&cpu_lock
);
346 opsid
= spc
->spc_psid
;
351 psid
= curlwp
->l_psid
;
355 * Ensure at least one CPU stays in the default set,
356 * and that specified CPU is not offline.
358 if (psid
!= PS_NONE
&& ((spc
->spc_flags
& SPCF_OFFLINE
) ||
359 (nnone
== 1 && spc
->spc_psid
== PS_NONE
))) {
360 mutex_exit(&cpu_lock
);
363 mutex_enter(proc_lock
);
365 * Ensure that none of the threads are using affinity mask
366 * with this target CPU in it.
368 LIST_FOREACH(t
, &alllwp
, l_list
) {
369 if ((t
->l_flag
& LW_AFFINITY
) == 0)
372 if ((t
->l_flag
& LW_AFFINITY
) == 0) {
376 if (kcpuset_isset(cpu_index(ci
), t
->l_affinity
)) {
378 mutex_exit(proc_lock
);
379 mutex_exit(&cpu_lock
);
384 * Set the processor-set ID.
385 * Migrate out any threads running on this CPU.
387 spc
->spc_psid
= psid
;
389 LIST_FOREACH(t
, &alllwp
, l_list
) {
390 struct cpu_info
*tci
;
393 if (t
->l_pflag
& (LP_BOUND
| LP_INTR
))
396 tci
= sched_takecpu(t
);
400 mutex_exit(proc_lock
);
403 mutex_exit(&cpu_lock
);
405 if (SCARG(uap
, opsid
) != NULL
)
406 error
= copyout(&opsid
, SCARG(uap
, opsid
), sizeof(psetid_t
));
412 sys__pset_bind(struct lwp
*l
, const struct sys__pset_bind_args
*uap
,
416 syscallarg(idtype_t) idtype;
417 syscallarg(id_t) first_id;
418 syscallarg(id_t) second_id;
419 syscallarg(psetid_t) psid;
420 syscallarg(psetid_t) *opsid;
428 psetid_t psid
, opsid
;
431 psid
= SCARG(uap
, psid
);
433 /* Available only for super-user, except the case of PS_QUERY */
434 if (kauth_authorize_system(l
->l_cred
, KAUTH_SYSTEM_PSET
,
435 KAUTH_REQ_SYSTEM_PSET_BIND
, KAUTH_ARG(SCARG(uap
, psid
)), NULL
,
439 mutex_enter(&cpu_lock
);
440 error
= psid_validate(psid
, true);
442 mutex_exit(&cpu_lock
);
446 psid
= curlwp
->l_psid
;
447 if (psid
!= PS_QUERY
&& psid
!= PS_NONE
)
448 psets
[psid
- 1]->ps_flags
|= PSET_BUSY
;
449 mutex_exit(&cpu_lock
);
452 * Get PID and LID from the ID.
455 id1
= SCARG(uap
, first_id
);
456 id2
= SCARG(uap
, second_id
);
458 switch (SCARG(uap
, idtype
)) {
463 * Second ID - ignored;
465 pid
= (id1
== P_MYID
) ? p
->p_pid
: id1
;
480 pid
= (id2
== P_MYID
) ? p
->p_pid
: id2
;
487 /* Find the process */
488 mutex_enter(proc_lock
);
489 p
= p_find(pid
, PFIND_LOCKED
);
491 mutex_exit(proc_lock
);
495 mutex_enter(p
->p_lock
);
496 mutex_exit(proc_lock
);
498 /* Disallow modification of the system processes */
499 if (p
->p_flag
& PK_SYSTEM
) {
500 mutex_exit(p
->p_lock
);
505 /* Find the LWP(s) */
508 LIST_FOREACH(t
, &p
->p_lwps
, l_sibling
) {
509 if (lid
&& lid
!= t
->l_lid
)
512 * Bind the thread to the processor-set,
513 * take some CPU and migrate.
518 ci
= sched_takecpu(t
);
523 mutex_exit(p
->p_lock
);
528 if (SCARG(uap
, opsid
))
529 error
= copyout(&opsid
, SCARG(uap
, opsid
), sizeof(psetid_t
));
531 if (psid
!= PS_QUERY
&& psid
!= PS_NONE
) {
532 mutex_enter(&cpu_lock
);
533 psets
[psid
- 1]->ps_flags
&= ~PSET_BUSY
;
534 mutex_exit(&cpu_lock
);
540 * Sysctl nodes and initialization.
544 sysctl_psets_max(SYSCTLFN_ARGS
)
546 struct sysctlnode node
;
550 node
.sysctl_data
= &newsize
;
553 error
= sysctl_lookup(SYSCTLFN_CALL(&node
));
554 if (error
|| newp
== NULL
)
561 error
= psets_realloc(newsize
);
567 sysctl_psets_list(SYSCTLFN_ARGS
)
569 const size_t bufsz
= 1024;
575 buf
= kmem_alloc(bufsz
, KM_SLEEP
);
576 snprintf(buf
, bufsz
, "%d:1", PS_NONE
); /* XXX */
578 mutex_enter(&cpu_lock
);
579 for (i
= 0; i
< psets_max
; i
++) {
580 if (psets
[i
] == NULL
)
582 snprintf(tbuf
, sizeof(tbuf
), ",%d:2", i
+ 1); /* XXX */
583 strlcat(buf
, tbuf
, bufsz
);
585 mutex_exit(&cpu_lock
);
586 len
= strlen(buf
) + 1;
589 error
= copyout(buf
, oldp
, min(len
, *oldlenp
));
591 kmem_free(buf
, bufsz
);
596 SYSCTL_SETUP(sysctl_pset_setup
, "sysctl kern.pset subtree setup")
598 const struct sysctlnode
*node
= NULL
;
600 sysctl_createv(clog
, 0, NULL
, NULL
,
602 CTLTYPE_NODE
, "kern", NULL
,
605 sysctl_createv(clog
, 0, NULL
, &node
,
607 CTLTYPE_NODE
, "pset",
608 SYSCTL_DESCR("Processor-set options"),
610 CTL_KERN
, CTL_CREATE
, CTL_EOL
);
615 sysctl_createv(clog
, 0, &node
, NULL
,
616 CTLFLAG_PERMANENT
| CTLFLAG_READWRITE
,
617 CTLTYPE_INT
, "psets_max",
618 SYSCTL_DESCR("Maximal count of the processor-sets"),
619 sysctl_psets_max
, 0, &psets_max
, 0,
620 CTL_CREATE
, CTL_EOL
);
621 sysctl_createv(clog
, 0, &node
, NULL
,
623 CTLTYPE_STRING
, "list",
624 SYSCTL_DESCR("List of active sets"),
625 sysctl_psets_list
, 0, NULL
, 0,
626 CTL_CREATE
, CTL_EOL
);