1 /* $NetBSD: sys_sched.c,v 1.33 2009/03/03 21:55:06 rmind Exp $ */
4 * Copyright (c) 2008, Mindaugas Rasiukevicius <rmind at NetBSD org>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * System calls relating to the scheduler.
40 * - Handle pthread_setschedprio() as defined by POSIX;
41 * - Handle sched_yield() case for SCHED_FIFO as defined by POSIX;
44 #include <sys/cdefs.h>
45 __KERNEL_RCSID(0, "$NetBSD: sys_sched.c,v 1.33 2009/03/03 21:55:06 rmind Exp $");
47 #include <sys/param.h>
50 #include <sys/kauth.h>
53 #include <sys/mutex.h>
57 #include <sys/savar.h>
58 #include <sys/sched.h>
59 #include <sys/syscallargs.h>
60 #include <sys/sysctl.h>
61 #include <sys/systm.h>
62 #include <sys/types.h>
63 #include <sys/unistd.h>
67 static struct sysctllog
*sched_sysctl_log
;
68 static kauth_listener_t sched_listener
;
71 * Convert user priority or the in-kernel priority or convert the current
72 * priority to the appropriate range according to the policy change.
75 convert_pri(lwp_t
*l
, int policy
, pri_t pri
)
78 /* Convert user priority to the in-kernel */
79 if (pri
!= PRI_NONE
) {
80 /* Only for real-time threads */
81 KASSERT(pri
>= SCHED_PRI_MIN
&& pri
<= SCHED_PRI_MAX
);
82 KASSERT(policy
!= SCHED_OTHER
);
83 return PRI_USER_RT
+ pri
;
86 /* Neither policy, nor priority change */
87 if (l
->l_class
== policy
)
90 /* Time-sharing -> real-time */
91 if (l
->l_class
== SCHED_OTHER
) {
92 KASSERT(policy
== SCHED_FIFO
|| policy
== SCHED_RR
);
96 /* Real-time -> time-sharing */
97 if (policy
== SCHED_OTHER
) {
98 KASSERT(l
->l_class
== SCHED_FIFO
|| l
->l_class
== SCHED_RR
);
99 return l
->l_priority
- PRI_USER_RT
;
102 /* Real-time -> real-time */
103 return l
->l_priority
;
107 do_sched_setparam(pid_t pid
, lwpid_t lid
, int policy
,
108 const struct sched_param
*params
)
118 pri
= params
->sched_priority
;
120 /* If no parameters specified, just return (this should not happen) */
121 if (pri
== PRI_NONE
&& policy
== SCHED_NONE
)
124 /* Validate scheduling class */
125 if (policy
!= SCHED_NONE
&& (policy
< SCHED_OTHER
|| policy
> SCHED_RR
))
128 /* Validate priority */
129 if (pri
!= PRI_NONE
&& (pri
< SCHED_PRI_MIN
|| pri
> SCHED_PRI_MAX
))
133 /* Find the process */
134 mutex_enter(proc_lock
);
135 p
= p_find(pid
, PFIND_LOCKED
);
137 mutex_exit(proc_lock
);
140 mutex_enter(p
->p_lock
);
141 mutex_exit(proc_lock
);
142 /* Disallow modification of system processes */
143 if ((p
->p_flag
& PK_SYSTEM
) != 0) {
144 mutex_exit(p
->p_lock
);
148 /* Use the calling process */
150 mutex_enter(p
->p_lock
);
153 /* Find the LWP(s) */
155 LIST_FOREACH(t
, &p
->p_lwps
, l_sibling
) {
159 if (lid
&& lid
!= t
->l_lid
)
164 lpolicy
= (policy
== SCHED_NONE
) ? t
->l_class
: policy
;
166 /* Disallow setting of priority for SCHED_OTHER threads */
167 if (lpolicy
== SCHED_OTHER
&& pri
!= PRI_NONE
) {
173 /* Convert priority, if needed */
174 kpri
= convert_pri(t
, lpolicy
, pri
);
176 /* Check the permission */
177 error
= kauth_authorize_process(kauth_cred_get(),
178 KAUTH_PROCESS_SCHEDULER_SETPARAM
, p
, t
, KAUTH_ARG(lpolicy
),
185 /* Set the scheduling class, change the priority */
186 t
->l_class
= lpolicy
;
187 lwp_changepri(t
, kpri
);
190 mutex_exit(p
->p_lock
);
191 return (lcnt
== 0) ? ESRCH
: error
;
195 * Set scheduling parameters.
198 sys__sched_setparam(struct lwp
*l
, const struct sys__sched_setparam_args
*uap
,
202 syscallarg(pid_t) pid;
203 syscallarg(lwpid_t) lid;
204 syscallarg(int) policy;
205 syscallarg(const struct sched_param *) params;
207 struct sched_param params
;
210 /* Get the parameters from the user-space */
211 error
= copyin(SCARG(uap
, params
), ¶ms
, sizeof(params
));
215 error
= do_sched_setparam(SCARG(uap
, pid
), SCARG(uap
, lid
),
216 SCARG(uap
, policy
), ¶ms
);
222 do_sched_getparam(pid_t pid
, lwpid_t lid
, int *policy
,
223 struct sched_param
*params
)
225 struct sched_param lparams
;
230 t
= lwp_find2(pid
, lid
);
234 /* Check the permission */
235 error
= kauth_authorize_process(kauth_cred_get(),
236 KAUTH_PROCESS_SCHEDULER_GETPARAM
, t
->l_proc
, NULL
, NULL
, NULL
);
238 mutex_exit(t
->l_proc
->p_lock
);
243 lparams
.sched_priority
= t
->l_priority
;
244 lpolicy
= t
->l_class
;
248 lparams
.sched_priority
-= PRI_USER
;
252 lparams
.sched_priority
-= PRI_USER_RT
;
263 mutex_exit(t
->l_proc
->p_lock
);
268 * Get scheduling parameters.
271 sys__sched_getparam(struct lwp
*l
, const struct sys__sched_getparam_args
*uap
,
275 syscallarg(pid_t) pid;
276 syscallarg(lwpid_t) lid;
277 syscallarg(int *) policy;
278 syscallarg(struct sched_param *) params;
280 struct sched_param params
;
283 error
= do_sched_getparam(SCARG(uap
, pid
), SCARG(uap
, lid
), &policy
,
288 error
= copyout(¶ms
, SCARG(uap
, params
), sizeof(params
));
289 if (error
== 0 && SCARG(uap
, policy
) != NULL
)
290 error
= copyout(&policy
, SCARG(uap
, policy
), sizeof(int));
296 * Allocate the CPU set, and get it from userspace.
299 genkcpuset(kcpuset_t
**dset
, const cpuset_t
*sset
, size_t size
)
303 *dset
= kcpuset_create();
304 error
= kcpuset_copyin(sset
, *dset
, size
);
306 kcpuset_unuse(*dset
, NULL
);
314 sys__sched_setaffinity(struct lwp
*l
,
315 const struct sys__sched_setaffinity_args
*uap
, register_t
*retval
)
318 syscallarg(pid_t) pid;
319 syscallarg(lwpid_t) lid;
320 syscallarg(size_t) size;
321 syscallarg(const cpuset_t *) cpuset;
323 kcpuset_t
*cpuset
, *cpulst
= NULL
;
324 struct cpu_info
*ici
, *ci
;
327 CPU_INFO_ITERATOR cii
;
333 error
= genkcpuset(&cpuset
, SCARG(uap
, cpuset
), SCARG(uap
, size
));
338 * Traverse _each_ CPU to:
339 * - Check that CPUs in the mask have no assigned processor set.
340 * - Check that at least one CPU from the mask is online.
341 * - Find the first target CPU to migrate.
343 * To avoid the race with CPU online/offline calls and processor sets,
344 * cpu_lock will be locked for the entire operation.
348 mutex_enter(&cpu_lock
);
349 for (CPU_INFO_FOREACH(cii
, ici
)) {
350 struct schedstate_percpu
*ispc
;
352 if (kcpuset_isset(cpu_index(ici
), cpuset
) == 0)
355 ispc
= &ici
->ci_schedstate
;
356 /* Check that CPU is not in the processor-set */
357 if (ispc
->spc_psid
!= PS_NONE
) {
361 /* Skip offline CPUs */
362 if (ispc
->spc_flags
& SPCF_OFFLINE
) {
366 /* Target CPU to migrate */
373 /* All CPUs in the set are offline */
378 kcpuset_unuse(cpuset
, &cpulst
);
382 if (SCARG(uap
, pid
) != 0) {
383 /* Find the process */
384 mutex_enter(proc_lock
);
385 p
= p_find(SCARG(uap
, pid
), PFIND_LOCKED
);
387 mutex_exit(proc_lock
);
391 mutex_enter(p
->p_lock
);
392 mutex_exit(proc_lock
);
393 /* Disallow modification of system processes. */
394 if ((p
->p_flag
& PK_SYSTEM
) != 0) {
395 mutex_exit(p
->p_lock
);
400 /* Use the calling process */
402 mutex_enter(p
->p_lock
);
406 * Check the permission.
408 error
= kauth_authorize_process(l
->l_cred
,
409 KAUTH_PROCESS_SCHEDULER_SETAFFINITY
, p
, NULL
, NULL
, NULL
);
411 mutex_exit(p
->p_lock
);
416 /* Changing the affinity of a SA process is not supported */
417 if ((p
->p_sflag
& (PS_SA
| PS_WEXIT
)) != 0 || p
->p_sa
!= NULL
) {
418 mutex_exit(p
->p_lock
);
424 /* Find the LWP(s) */
426 lid
= SCARG(uap
, lid
);
427 LIST_FOREACH(t
, &p
->p_lwps
, l_sibling
) {
428 if (lid
&& lid
!= t
->l_lid
)
431 /* It is not allowed to set the affinity for zombie LWPs */
432 if (t
->l_stat
== LSZOMB
) {
437 /* Set the affinity flag and new CPU set */
438 t
->l_flag
|= LW_AFFINITY
;
440 if (t
->l_affinity
!= NULL
)
441 kcpuset_unuse(t
->l_affinity
, &cpulst
);
442 t
->l_affinity
= cpuset
;
443 /* Migrate to another CPU, unlocks LWP */
446 /* Unset the affinity flag */
447 t
->l_flag
&= ~LW_AFFINITY
;
448 if (t
->l_affinity
!= NULL
)
449 kcpuset_unuse(t
->l_affinity
, &cpulst
);
450 t
->l_affinity
= NULL
;
455 mutex_exit(p
->p_lock
);
459 mutex_exit(&cpu_lock
);
461 kcpuset_unuse(cpuset
, &cpulst
);
462 kcpuset_destroy(cpulst
);
470 sys__sched_getaffinity(struct lwp
*l
,
471 const struct sys__sched_getaffinity_args
*uap
, register_t
*retval
)
474 syscallarg(pid_t) pid;
475 syscallarg(lwpid_t) lid;
476 syscallarg(size_t) size;
477 syscallarg(cpuset_t *) cpuset;
483 error
= genkcpuset(&cpuset
, SCARG(uap
, cpuset
), SCARG(uap
, size
));
488 t
= lwp_find2(SCARG(uap
, pid
), SCARG(uap
, lid
));
493 /* Check the permission */
494 if (kauth_authorize_process(l
->l_cred
,
495 KAUTH_PROCESS_SCHEDULER_GETAFFINITY
, t
->l_proc
, NULL
, NULL
, NULL
)) {
496 mutex_exit(t
->l_proc
->p_lock
);
501 if (t
->l_flag
& LW_AFFINITY
) {
502 KASSERT(t
->l_affinity
!= NULL
);
503 kcpuset_copy(cpuset
, t
->l_affinity
);
505 kcpuset_zero(cpuset
);
507 mutex_exit(t
->l_proc
->p_lock
);
509 error
= kcpuset_copyout(cpuset
, SCARG(uap
, cpuset
), SCARG(uap
, size
));
511 kcpuset_unuse(cpuset
, NULL
);
519 sys_sched_yield(struct lwp
*l
, const void *v
, register_t
*retval
)
524 if (l
->l_flag
& LW_SA
) {
532 * Sysctl nodes and initialization.
535 sysctl_sched_setup(struct sysctllog
**clog
)
537 const struct sysctlnode
*node
= NULL
;
539 sysctl_createv(clog
, 0, NULL
, NULL
,
541 CTLTYPE_NODE
, "kern", NULL
,
544 sysctl_createv(clog
, 0, NULL
, NULL
,
545 CTLFLAG_PERMANENT
|CTLFLAG_IMMEDIATE
,
546 CTLTYPE_INT
, "posix_sched",
547 SYSCTL_DESCR("Version of IEEE Std 1003.1 and its "
548 "Process Scheduling option to which the "
549 "system attempts to conform"),
550 NULL
, _POSIX_PRIORITY_SCHEDULING
, NULL
, 0,
551 CTL_KERN
, CTL_CREATE
, CTL_EOL
);
552 sysctl_createv(clog
, 0, NULL
, &node
,
554 CTLTYPE_NODE
, "sched",
555 SYSCTL_DESCR("Scheduler options"),
557 CTL_KERN
, CTL_CREATE
, CTL_EOL
);
562 sysctl_createv(clog
, 0, &node
, NULL
,
563 CTLFLAG_PERMANENT
| CTLFLAG_IMMEDIATE
,
564 CTLTYPE_INT
, "pri_min",
565 SYSCTL_DESCR("Minimal POSIX real-time priority"),
566 NULL
, SCHED_PRI_MIN
, NULL
, 0,
567 CTL_CREATE
, CTL_EOL
);
568 sysctl_createv(clog
, 0, &node
, NULL
,
569 CTLFLAG_PERMANENT
| CTLFLAG_IMMEDIATE
,
570 CTLTYPE_INT
, "pri_max",
571 SYSCTL_DESCR("Maximal POSIX real-time priority"),
572 NULL
, SCHED_PRI_MAX
, NULL
, 0,
573 CTL_CREATE
, CTL_EOL
);
577 sched_listener_cb(kauth_cred_t cred
, kauth_action_t action
, void *cookie
,
578 void *arg0
, void *arg1
, void *arg2
, void *arg3
)
583 result
= KAUTH_RESULT_DEFER
;
587 case KAUTH_PROCESS_SCHEDULER_GETPARAM
:
588 if (kauth_cred_uidmatch(cred
, p
->p_cred
))
589 result
= KAUTH_RESULT_ALLOW
;
592 case KAUTH_PROCESS_SCHEDULER_SETPARAM
:
593 if (kauth_cred_uidmatch(cred
, p
->p_cred
)) {
599 policy
= (int)(unsigned long)arg2
;
600 priority
= (pri_t
)(unsigned long)arg3
;
602 if ((policy
== l
->l_class
||
603 (policy
!= SCHED_FIFO
&& policy
!= SCHED_RR
)) &&
604 priority
<= l
->l_priority
)
605 result
= KAUTH_RESULT_ALLOW
;
610 case KAUTH_PROCESS_SCHEDULER_GETAFFINITY
:
611 result
= KAUTH_RESULT_ALLOW
;
614 case KAUTH_PROCESS_SCHEDULER_SETAFFINITY
:
615 /* Privileged; we let the secmodel handle this. */
629 sysctl_sched_setup(&sched_sysctl_log
);
631 sched_listener
= kauth_listen_scope(KAUTH_SCOPE_PROCESS
,
632 sched_listener_cb
, NULL
);