Fix up mix of man(7)/mdoc(7).
[netbsd-mini2440.git] / sys / kern / sys_sched.c
blob6b49f7bf27cb01acf1003cbc2dae52d3828fb797
1 /* $NetBSD: sys_sched.c,v 1.33 2009/03/03 21:55:06 rmind Exp $ */
3 /*
4 * Copyright (c) 2008, Mindaugas Rasiukevicius <rmind at NetBSD org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
30 * System calls relating to the scheduler.
32 * Lock order:
34 * cpu_lock ->
35 * proc_lock ->
36 * proc_t::p_lock ->
37 * lwp_t::lwp_lock
39 * TODO:
40 * - Handle pthread_setschedprio() as defined by POSIX;
41 * - Handle sched_yield() case for SCHED_FIFO as defined by POSIX;
44 #include <sys/cdefs.h>
45 __KERNEL_RCSID(0, "$NetBSD: sys_sched.c,v 1.33 2009/03/03 21:55:06 rmind Exp $");
47 #include <sys/param.h>
49 #include <sys/cpu.h>
50 #include <sys/kauth.h>
51 #include <sys/kmem.h>
52 #include <sys/lwp.h>
53 #include <sys/mutex.h>
54 #include <sys/proc.h>
55 #include <sys/pset.h>
56 #include <sys/sa.h>
57 #include <sys/savar.h>
58 #include <sys/sched.h>
59 #include <sys/syscallargs.h>
60 #include <sys/sysctl.h>
61 #include <sys/systm.h>
62 #include <sys/types.h>
63 #include <sys/unistd.h>
65 #include "opt_sa.h"
67 static struct sysctllog *sched_sysctl_log;
68 static kauth_listener_t sched_listener;
71 * Convert user priority or the in-kernel priority or convert the current
72 * priority to the appropriate range according to the policy change.
74 static pri_t
75 convert_pri(lwp_t *l, int policy, pri_t pri)
78 /* Convert user priority to the in-kernel */
79 if (pri != PRI_NONE) {
80 /* Only for real-time threads */
81 KASSERT(pri >= SCHED_PRI_MIN && pri <= SCHED_PRI_MAX);
82 KASSERT(policy != SCHED_OTHER);
83 return PRI_USER_RT + pri;
86 /* Neither policy, nor priority change */
87 if (l->l_class == policy)
88 return l->l_priority;
90 /* Time-sharing -> real-time */
91 if (l->l_class == SCHED_OTHER) {
92 KASSERT(policy == SCHED_FIFO || policy == SCHED_RR);
93 return PRI_USER_RT;
96 /* Real-time -> time-sharing */
97 if (policy == SCHED_OTHER) {
98 KASSERT(l->l_class == SCHED_FIFO || l->l_class == SCHED_RR);
99 return l->l_priority - PRI_USER_RT;
102 /* Real-time -> real-time */
103 return l->l_priority;
107 do_sched_setparam(pid_t pid, lwpid_t lid, int policy,
108 const struct sched_param *params)
110 struct proc *p;
111 struct lwp *t;
112 pri_t pri;
113 u_int lcnt;
114 int error;
116 error = 0;
118 pri = params->sched_priority;
120 /* If no parameters specified, just return (this should not happen) */
121 if (pri == PRI_NONE && policy == SCHED_NONE)
122 return 0;
124 /* Validate scheduling class */
125 if (policy != SCHED_NONE && (policy < SCHED_OTHER || policy > SCHED_RR))
126 return EINVAL;
128 /* Validate priority */
129 if (pri != PRI_NONE && (pri < SCHED_PRI_MIN || pri > SCHED_PRI_MAX))
130 return EINVAL;
132 if (pid != 0) {
133 /* Find the process */
134 mutex_enter(proc_lock);
135 p = p_find(pid, PFIND_LOCKED);
136 if (p == NULL) {
137 mutex_exit(proc_lock);
138 return ESRCH;
140 mutex_enter(p->p_lock);
141 mutex_exit(proc_lock);
142 /* Disallow modification of system processes */
143 if ((p->p_flag & PK_SYSTEM) != 0) {
144 mutex_exit(p->p_lock);
145 return EPERM;
147 } else {
148 /* Use the calling process */
149 p = curlwp->l_proc;
150 mutex_enter(p->p_lock);
153 /* Find the LWP(s) */
154 lcnt = 0;
155 LIST_FOREACH(t, &p->p_lwps, l_sibling) {
156 pri_t kpri;
157 int lpolicy;
159 if (lid && lid != t->l_lid)
160 continue;
162 lcnt++;
163 lwp_lock(t);
164 lpolicy = (policy == SCHED_NONE) ? t->l_class : policy;
166 /* Disallow setting of priority for SCHED_OTHER threads */
167 if (lpolicy == SCHED_OTHER && pri != PRI_NONE) {
168 lwp_unlock(t);
169 error = EINVAL;
170 break;
173 /* Convert priority, if needed */
174 kpri = convert_pri(t, lpolicy, pri);
176 /* Check the permission */
177 error = kauth_authorize_process(kauth_cred_get(),
178 KAUTH_PROCESS_SCHEDULER_SETPARAM, p, t, KAUTH_ARG(lpolicy),
179 KAUTH_ARG(kpri));
180 if (error) {
181 lwp_unlock(t);
182 break;
185 /* Set the scheduling class, change the priority */
186 t->l_class = lpolicy;
187 lwp_changepri(t, kpri);
188 lwp_unlock(t);
190 mutex_exit(p->p_lock);
191 return (lcnt == 0) ? ESRCH : error;
195 * Set scheduling parameters.
198 sys__sched_setparam(struct lwp *l, const struct sys__sched_setparam_args *uap,
199 register_t *retval)
201 /* {
202 syscallarg(pid_t) pid;
203 syscallarg(lwpid_t) lid;
204 syscallarg(int) policy;
205 syscallarg(const struct sched_param *) params;
206 } */
207 struct sched_param params;
208 int error;
210 /* Get the parameters from the user-space */
211 error = copyin(SCARG(uap, params), &params, sizeof(params));
212 if (error)
213 goto out;
215 error = do_sched_setparam(SCARG(uap, pid), SCARG(uap, lid),
216 SCARG(uap, policy), &params);
217 out:
218 return error;
222 do_sched_getparam(pid_t pid, lwpid_t lid, int *policy,
223 struct sched_param *params)
225 struct sched_param lparams;
226 struct lwp *t;
227 int error, lpolicy;
229 /* Locks the LWP */
230 t = lwp_find2(pid, lid);
231 if (t == NULL)
232 return ESRCH;
234 /* Check the permission */
235 error = kauth_authorize_process(kauth_cred_get(),
236 KAUTH_PROCESS_SCHEDULER_GETPARAM, t->l_proc, NULL, NULL, NULL);
237 if (error != 0) {
238 mutex_exit(t->l_proc->p_lock);
239 return error;
242 lwp_lock(t);
243 lparams.sched_priority = t->l_priority;
244 lpolicy = t->l_class;
246 switch (lpolicy) {
247 case SCHED_OTHER:
248 lparams.sched_priority -= PRI_USER;
249 break;
250 case SCHED_RR:
251 case SCHED_FIFO:
252 lparams.sched_priority -= PRI_USER_RT;
253 break;
256 if (policy != NULL)
257 *policy = lpolicy;
259 if (params != NULL)
260 *params = lparams;
262 lwp_unlock(t);
263 mutex_exit(t->l_proc->p_lock);
264 return error;
268 * Get scheduling parameters.
271 sys__sched_getparam(struct lwp *l, const struct sys__sched_getparam_args *uap,
272 register_t *retval)
274 /* {
275 syscallarg(pid_t) pid;
276 syscallarg(lwpid_t) lid;
277 syscallarg(int *) policy;
278 syscallarg(struct sched_param *) params;
279 } */
280 struct sched_param params;
281 int error, policy;
283 error = do_sched_getparam(SCARG(uap, pid), SCARG(uap, lid), &policy,
284 &params);
285 if (error)
286 goto out;
288 error = copyout(&params, SCARG(uap, params), sizeof(params));
289 if (error == 0 && SCARG(uap, policy) != NULL)
290 error = copyout(&policy, SCARG(uap, policy), sizeof(int));
291 out:
292 return error;
296 * Allocate the CPU set, and get it from userspace.
298 static int
299 genkcpuset(kcpuset_t **dset, const cpuset_t *sset, size_t size)
301 int error;
303 *dset = kcpuset_create();
304 error = kcpuset_copyin(sset, *dset, size);
305 if (error != 0)
306 kcpuset_unuse(*dset, NULL);
307 return error;
311 * Set affinity.
314 sys__sched_setaffinity(struct lwp *l,
315 const struct sys__sched_setaffinity_args *uap, register_t *retval)
317 /* {
318 syscallarg(pid_t) pid;
319 syscallarg(lwpid_t) lid;
320 syscallarg(size_t) size;
321 syscallarg(const cpuset_t *) cpuset;
322 } */
323 kcpuset_t *cpuset, *cpulst = NULL;
324 struct cpu_info *ici, *ci;
325 struct proc *p;
326 struct lwp *t;
327 CPU_INFO_ITERATOR cii;
328 bool alloff;
329 lwpid_t lid;
330 u_int lcnt;
331 int error;
333 error = genkcpuset(&cpuset, SCARG(uap, cpuset), SCARG(uap, size));
334 if (error)
335 return error;
338 * Traverse _each_ CPU to:
339 * - Check that CPUs in the mask have no assigned processor set.
340 * - Check that at least one CPU from the mask is online.
341 * - Find the first target CPU to migrate.
343 * To avoid the race with CPU online/offline calls and processor sets,
344 * cpu_lock will be locked for the entire operation.
346 ci = NULL;
347 alloff = false;
348 mutex_enter(&cpu_lock);
349 for (CPU_INFO_FOREACH(cii, ici)) {
350 struct schedstate_percpu *ispc;
352 if (kcpuset_isset(cpu_index(ici), cpuset) == 0)
353 continue;
355 ispc = &ici->ci_schedstate;
356 /* Check that CPU is not in the processor-set */
357 if (ispc->spc_psid != PS_NONE) {
358 error = EPERM;
359 goto out;
361 /* Skip offline CPUs */
362 if (ispc->spc_flags & SPCF_OFFLINE) {
363 alloff = true;
364 continue;
366 /* Target CPU to migrate */
367 if (ci == NULL) {
368 ci = ici;
371 if (ci == NULL) {
372 if (alloff) {
373 /* All CPUs in the set are offline */
374 error = EPERM;
375 goto out;
377 /* Empty set */
378 kcpuset_unuse(cpuset, &cpulst);
379 cpuset = NULL;
382 if (SCARG(uap, pid) != 0) {
383 /* Find the process */
384 mutex_enter(proc_lock);
385 p = p_find(SCARG(uap, pid), PFIND_LOCKED);
386 if (p == NULL) {
387 mutex_exit(proc_lock);
388 error = ESRCH;
389 goto out;
391 mutex_enter(p->p_lock);
392 mutex_exit(proc_lock);
393 /* Disallow modification of system processes. */
394 if ((p->p_flag & PK_SYSTEM) != 0) {
395 mutex_exit(p->p_lock);
396 error = EPERM;
397 goto out;
399 } else {
400 /* Use the calling process */
401 p = l->l_proc;
402 mutex_enter(p->p_lock);
406 * Check the permission.
408 error = kauth_authorize_process(l->l_cred,
409 KAUTH_PROCESS_SCHEDULER_SETAFFINITY, p, NULL, NULL, NULL);
410 if (error != 0) {
411 mutex_exit(p->p_lock);
412 goto out;
415 #ifdef KERN_SA
416 /* Changing the affinity of a SA process is not supported */
417 if ((p->p_sflag & (PS_SA | PS_WEXIT)) != 0 || p->p_sa != NULL) {
418 mutex_exit(p->p_lock);
419 error = EINVAL;
420 goto out;
422 #endif
424 /* Find the LWP(s) */
425 lcnt = 0;
426 lid = SCARG(uap, lid);
427 LIST_FOREACH(t, &p->p_lwps, l_sibling) {
428 if (lid && lid != t->l_lid)
429 continue;
430 lwp_lock(t);
431 /* It is not allowed to set the affinity for zombie LWPs */
432 if (t->l_stat == LSZOMB) {
433 lwp_unlock(t);
434 continue;
436 if (cpuset) {
437 /* Set the affinity flag and new CPU set */
438 t->l_flag |= LW_AFFINITY;
439 kcpuset_use(cpuset);
440 if (t->l_affinity != NULL)
441 kcpuset_unuse(t->l_affinity, &cpulst);
442 t->l_affinity = cpuset;
443 /* Migrate to another CPU, unlocks LWP */
444 lwp_migrate(t, ci);
445 } else {
446 /* Unset the affinity flag */
447 t->l_flag &= ~LW_AFFINITY;
448 if (t->l_affinity != NULL)
449 kcpuset_unuse(t->l_affinity, &cpulst);
450 t->l_affinity = NULL;
451 lwp_unlock(t);
453 lcnt++;
455 mutex_exit(p->p_lock);
456 if (lcnt == 0)
457 error = ESRCH;
458 out:
459 mutex_exit(&cpu_lock);
460 if (cpuset != NULL)
461 kcpuset_unuse(cpuset, &cpulst);
462 kcpuset_destroy(cpulst);
463 return error;
467 * Get affinity.
470 sys__sched_getaffinity(struct lwp *l,
471 const struct sys__sched_getaffinity_args *uap, register_t *retval)
473 /* {
474 syscallarg(pid_t) pid;
475 syscallarg(lwpid_t) lid;
476 syscallarg(size_t) size;
477 syscallarg(cpuset_t *) cpuset;
478 } */
479 struct lwp *t;
480 kcpuset_t *cpuset;
481 int error;
483 error = genkcpuset(&cpuset, SCARG(uap, cpuset), SCARG(uap, size));
484 if (error)
485 return error;
487 /* Locks the LWP */
488 t = lwp_find2(SCARG(uap, pid), SCARG(uap, lid));
489 if (t == NULL) {
490 error = ESRCH;
491 goto out;
493 /* Check the permission */
494 if (kauth_authorize_process(l->l_cred,
495 KAUTH_PROCESS_SCHEDULER_GETAFFINITY, t->l_proc, NULL, NULL, NULL)) {
496 mutex_exit(t->l_proc->p_lock);
497 error = EPERM;
498 goto out;
500 lwp_lock(t);
501 if (t->l_flag & LW_AFFINITY) {
502 KASSERT(t->l_affinity != NULL);
503 kcpuset_copy(cpuset, t->l_affinity);
504 } else
505 kcpuset_zero(cpuset);
506 lwp_unlock(t);
507 mutex_exit(t->l_proc->p_lock);
509 error = kcpuset_copyout(cpuset, SCARG(uap, cpuset), SCARG(uap, size));
510 out:
511 kcpuset_unuse(cpuset, NULL);
512 return error;
516 * Yield.
519 sys_sched_yield(struct lwp *l, const void *v, register_t *retval)
522 yield();
523 #ifdef KERN_SA
524 if (l->l_flag & LW_SA) {
525 sa_preempt(l);
527 #endif
528 return 0;
532 * Sysctl nodes and initialization.
534 static void
535 sysctl_sched_setup(struct sysctllog **clog)
537 const struct sysctlnode *node = NULL;
539 sysctl_createv(clog, 0, NULL, NULL,
540 CTLFLAG_PERMANENT,
541 CTLTYPE_NODE, "kern", NULL,
542 NULL, 0, NULL, 0,
543 CTL_KERN, CTL_EOL);
544 sysctl_createv(clog, 0, NULL, NULL,
545 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
546 CTLTYPE_INT, "posix_sched",
547 SYSCTL_DESCR("Version of IEEE Std 1003.1 and its "
548 "Process Scheduling option to which the "
549 "system attempts to conform"),
550 NULL, _POSIX_PRIORITY_SCHEDULING, NULL, 0,
551 CTL_KERN, CTL_CREATE, CTL_EOL);
552 sysctl_createv(clog, 0, NULL, &node,
553 CTLFLAG_PERMANENT,
554 CTLTYPE_NODE, "sched",
555 SYSCTL_DESCR("Scheduler options"),
556 NULL, 0, NULL, 0,
557 CTL_KERN, CTL_CREATE, CTL_EOL);
559 if (node == NULL)
560 return;
562 sysctl_createv(clog, 0, &node, NULL,
563 CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
564 CTLTYPE_INT, "pri_min",
565 SYSCTL_DESCR("Minimal POSIX real-time priority"),
566 NULL, SCHED_PRI_MIN, NULL, 0,
567 CTL_CREATE, CTL_EOL);
568 sysctl_createv(clog, 0, &node, NULL,
569 CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
570 CTLTYPE_INT, "pri_max",
571 SYSCTL_DESCR("Maximal POSIX real-time priority"),
572 NULL, SCHED_PRI_MAX, NULL, 0,
573 CTL_CREATE, CTL_EOL);
576 static int
577 sched_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
578 void *arg0, void *arg1, void *arg2, void *arg3)
580 struct proc *p;
581 int result;
583 result = KAUTH_RESULT_DEFER;
584 p = arg0;
586 switch (action) {
587 case KAUTH_PROCESS_SCHEDULER_GETPARAM:
588 if (kauth_cred_uidmatch(cred, p->p_cred))
589 result = KAUTH_RESULT_ALLOW;
590 break;
592 case KAUTH_PROCESS_SCHEDULER_SETPARAM:
593 if (kauth_cred_uidmatch(cred, p->p_cred)) {
594 struct lwp *l;
595 int policy;
596 pri_t priority;
598 l = arg1;
599 policy = (int)(unsigned long)arg2;
600 priority = (pri_t)(unsigned long)arg3;
602 if ((policy == l->l_class ||
603 (policy != SCHED_FIFO && policy != SCHED_RR)) &&
604 priority <= l->l_priority)
605 result = KAUTH_RESULT_ALLOW;
608 break;
610 case KAUTH_PROCESS_SCHEDULER_GETAFFINITY:
611 result = KAUTH_RESULT_ALLOW;
612 break;
614 case KAUTH_PROCESS_SCHEDULER_SETAFFINITY:
615 /* Privileged; we let the secmodel handle this. */
616 break;
618 default:
619 break;
622 return result;
625 void
626 sched_init(void)
629 sysctl_sched_setup(&sched_sysctl_log);
631 sched_listener = kauth_listen_scope(KAUTH_SCOPE_PROCESS,
632 sched_listener_cb, NULL);