1 /* $NetBSD: linux_sched.c,v 1.60 2009/06/23 13:18:59 njoly Exp $ */
4 * Copyright (c) 1999 The NetBSD Foundation, Inc.
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center; by Matthias Scheler.
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
34 * Linux compatibility module. Try to deal with scheduler related syscalls.
37 #include <sys/cdefs.h>
38 __KERNEL_RCSID(0, "$NetBSD: linux_sched.c,v 1.60 2009/06/23 13:18:59 njoly Exp $");
40 #include <sys/param.h>
41 #include <sys/mount.h>
43 #include <sys/systm.h>
44 #include <sys/sysctl.h>
45 #include <sys/malloc.h>
46 #include <sys/syscallargs.h>
48 #include <sys/kauth.h>
49 #include <sys/ptrace.h>
53 #include <compat/linux/common/linux_types.h>
54 #include <compat/linux/common/linux_signal.h>
55 #include <compat/linux/common/linux_machdep.h> /* For LINUX_NPTL */
56 #include <compat/linux/common/linux_emuldata.h>
57 #include <compat/linux/common/linux_ipc.h>
58 #include <compat/linux/common/linux_sem.h>
59 #include <compat/linux/common/linux_exec.h>
61 #include <compat/linux/linux_syscallargs.h>
63 #include <compat/linux/common/linux_sched.h>
66 linux_sys_clone(struct lwp
*l
, const struct linux_sys_clone_args
*uap
, register_t
*retval
)
69 syscallarg(int) flags;
70 syscallarg(void *) stack;
72 syscallarg(void *) parent_tidptr;
73 syscallarg(void *) child_tidptr;
80 struct linux_emuldata
*led
;
84 * We don't support the Linux CLONE_PID or CLONE_PTRACE flags.
86 if (SCARG(uap
, flags
) & (LINUX_CLONE_PID
|LINUX_CLONE_PTRACE
))
90 * Thread group implies shared signals. Shared signals
91 * imply shared VM. This matches what Linux kernel does.
93 if (SCARG(uap
, flags
) & LINUX_CLONE_THREAD
94 && (SCARG(uap
, flags
) & LINUX_CLONE_SIGHAND
) == 0)
96 if (SCARG(uap
, flags
) & LINUX_CLONE_SIGHAND
97 && (SCARG(uap
, flags
) & LINUX_CLONE_VM
) == 0)
102 if (SCARG(uap
, flags
) & LINUX_CLONE_VM
)
103 flags
|= FORK_SHAREVM
;
104 if (SCARG(uap
, flags
) & LINUX_CLONE_FS
)
105 flags
|= FORK_SHARECWD
;
106 if (SCARG(uap
, flags
) & LINUX_CLONE_FILES
)
107 flags
|= FORK_SHAREFILES
;
108 if (SCARG(uap
, flags
) & LINUX_CLONE_SIGHAND
)
109 flags
|= FORK_SHARESIGS
;
110 if (SCARG(uap
, flags
) & LINUX_CLONE_VFORK
)
111 flags
|= FORK_PPWAIT
;
113 sig
= SCARG(uap
, flags
) & LINUX_CLONE_CSIGNAL
;
114 if (sig
< 0 || sig
>= LINUX__NSIG
)
116 sig
= linux_to_native_signo
[sig
];
119 led
= (struct linux_emuldata
*)l
->l_proc
->p_emuldata
;
121 led
->parent_tidptr
= SCARG(uap
, parent_tidptr
);
122 led
->child_tidptr
= SCARG(uap
, child_tidptr
);
123 led
->clone_flags
= SCARG(uap
, flags
);
124 #endif /* LINUX_NPTL */
127 * Note that Linux does not provide a portable way of specifying
128 * the stack area; the caller must know if the stack grows up
129 * or down. So, we pass a stack size of 0, so that the code
130 * that makes this adjustment is a noop.
132 if ((error
= fork1(l
, flags
, sig
, SCARG(uap
, stack
), 0,
133 NULL
, NULL
, retval
, &p
)) != 0)
137 if ((SCARG(uap
, flags
) & LINUX_CLONE_SETTLS
) != 0)
138 return linux_init_thread_area(l
, LIST_FIRST(&p
->p_lwps
));
139 #endif /* LINUX_NPTL */
145 * linux realtime priority
147 * - SCHED_RR and SCHED_FIFO tasks have priorities [1,99].
149 * - SCHED_OTHER tasks don't have realtime priorities.
150 * in particular, sched_param::sched_priority is always 0.
153 #define LINUX_SCHED_RTPRIO_MIN 1
154 #define LINUX_SCHED_RTPRIO_MAX 99
157 sched_linux2native(int linux_policy
, struct linux_sched_param
*linux_params
,
158 int *native_policy
, struct sched_param
*native_params
)
161 switch (linux_policy
) {
162 case LINUX_SCHED_OTHER
:
163 if (native_policy
!= NULL
) {
164 *native_policy
= SCHED_OTHER
;
168 case LINUX_SCHED_FIFO
:
169 if (native_policy
!= NULL
) {
170 *native_policy
= SCHED_FIFO
;
175 if (native_policy
!= NULL
) {
176 *native_policy
= SCHED_RR
;
184 if (linux_params
!= NULL
) {
185 int prio
= linux_params
->sched_priority
;
187 KASSERT(native_params
!= NULL
);
189 if (linux_policy
== LINUX_SCHED_OTHER
) {
193 native_params
->sched_priority
= PRI_NONE
; /* XXX */
195 if (prio
< LINUX_SCHED_RTPRIO_MIN
||
196 prio
> LINUX_SCHED_RTPRIO_MAX
) {
199 native_params
->sched_priority
=
200 (prio
- LINUX_SCHED_RTPRIO_MIN
)
201 * (SCHED_PRI_MAX
- SCHED_PRI_MIN
)
202 / (LINUX_SCHED_RTPRIO_MAX
- LINUX_SCHED_RTPRIO_MIN
)
211 sched_native2linux(int native_policy
, struct sched_param
*native_params
,
212 int *linux_policy
, struct linux_sched_param
*linux_params
)
215 switch (native_policy
) {
217 if (linux_policy
!= NULL
) {
218 *linux_policy
= LINUX_SCHED_OTHER
;
223 if (linux_policy
!= NULL
) {
224 *linux_policy
= LINUX_SCHED_FIFO
;
229 if (linux_policy
!= NULL
) {
230 *linux_policy
= LINUX_SCHED_RR
;
235 panic("%s: unknown policy %d\n", __func__
, native_policy
);
238 if (native_params
!= NULL
) {
239 int prio
= native_params
->sched_priority
;
241 KASSERT(prio
>= SCHED_PRI_MIN
);
242 KASSERT(prio
<= SCHED_PRI_MAX
);
243 KASSERT(linux_params
!= NULL
);
246 printf("native2linux: native: policy %d, priority %d\n",
247 native_policy
, prio
);
250 if (native_policy
== SCHED_OTHER
) {
251 linux_params
->sched_priority
= 0;
253 linux_params
->sched_priority
=
254 (prio
- SCHED_PRI_MIN
)
255 * (LINUX_SCHED_RTPRIO_MAX
- LINUX_SCHED_RTPRIO_MIN
)
256 / (SCHED_PRI_MAX
- SCHED_PRI_MIN
)
257 + LINUX_SCHED_RTPRIO_MIN
;
260 printf("native2linux: linux: policy %d, priority %d\n",
261 -1, linux_params
->sched_priority
);
269 linux_sys_sched_setparam(struct lwp
*l
, const struct linux_sys_sched_setparam_args
*uap
, register_t
*retval
)
272 syscallarg(linux_pid_t) pid;
273 syscallarg(const struct linux_sched_param *) sp;
276 struct linux_sched_param lp
;
277 struct sched_param sp
;
279 if (SCARG(uap
, pid
) < 0 || SCARG(uap
, sp
) == NULL
) {
284 error
= copyin(SCARG(uap
, sp
), &lp
, sizeof(lp
));
288 /* We need the current policy in Linux terms. */
289 error
= do_sched_getparam(SCARG(uap
, pid
), 0, &policy
, NULL
);
292 error
= sched_native2linux(policy
, NULL
, &policy
, NULL
);
296 error
= sched_linux2native(policy
, &lp
, &policy
, &sp
);
300 error
= do_sched_setparam(SCARG(uap
, pid
), 0, policy
, &sp
);
309 linux_sys_sched_getparam(struct lwp
*l
, const struct linux_sys_sched_getparam_args
*uap
, register_t
*retval
)
312 syscallarg(linux_pid_t) pid;
313 syscallarg(struct linux_sched_param *) sp;
315 struct linux_sched_param lp
;
316 struct sched_param sp
;
319 if (SCARG(uap
, pid
) < 0 || SCARG(uap
, sp
) == NULL
) {
324 error
= do_sched_getparam(SCARG(uap
, pid
), 0, &policy
, &sp
);
328 printf("getparam: native: policy %d, priority %d\n",
329 policy
, sp
.sched_priority
);
332 error
= sched_native2linux(policy
, &sp
, NULL
, &lp
);
336 printf("getparam: linux: policy %d, priority %d\n",
337 policy
, lp
.sched_priority
);
340 error
= copyout(&lp
, SCARG(uap
, sp
), sizeof(lp
));
349 linux_sys_sched_setscheduler(struct lwp
*l
, const struct linux_sys_sched_setscheduler_args
*uap
, register_t
*retval
)
352 syscallarg(linux_pid_t) pid;
353 syscallarg(int) policy;
354 syscallarg(cont struct linux_sched_param *) sp;
357 struct linux_sched_param lp
;
358 struct sched_param sp
;
360 if (SCARG(uap
, pid
) < 0 || SCARG(uap
, sp
) == NULL
) {
365 error
= copyin(SCARG(uap
, sp
), &lp
, sizeof(lp
));
369 printf("setscheduler: linux: policy %d, priority %d\n",
370 SCARG(uap
, policy
), lp
.sched_priority
);
373 error
= sched_linux2native(SCARG(uap
, policy
), &lp
, &policy
, &sp
);
377 printf("setscheduler: native: policy %d, priority %d\n",
378 policy
, sp
.sched_priority
);
381 error
= do_sched_setparam(SCARG(uap
, pid
), 0, policy
, &sp
);
390 linux_sys_sched_getscheduler(struct lwp
*l
, const struct linux_sys_sched_getscheduler_args
*uap
, register_t
*retval
)
393 syscallarg(linux_pid_t) pid;
399 error
= do_sched_getparam(SCARG(uap
, pid
), 0, &policy
, NULL
);
403 error
= sched_native2linux(policy
, NULL
, &policy
, NULL
);
414 linux_sys_sched_yield(struct lwp
*l
, const void *v
, register_t
*retval
)
422 linux_sys_sched_get_priority_max(struct lwp
*l
, const struct linux_sys_sched_get_priority_max_args
*uap
, register_t
*retval
)
425 syscallarg(int) policy;
428 switch (SCARG(uap
, policy
)) {
429 case LINUX_SCHED_OTHER
:
432 case LINUX_SCHED_FIFO
:
434 *retval
= LINUX_SCHED_RTPRIO_MAX
;
444 linux_sys_sched_get_priority_min(struct lwp
*l
, const struct linux_sys_sched_get_priority_min_args
*uap
, register_t
*retval
)
447 syscallarg(int) policy;
450 switch (SCARG(uap
, policy
)) {
451 case LINUX_SCHED_OTHER
:
454 case LINUX_SCHED_FIFO
:
456 *retval
= LINUX_SCHED_RTPRIO_MIN
;
466 /* Present on everything but m68k */
468 linux_sys_exit_group(struct lwp
*l
, const struct linux_sys_exit_group_args
*uap
, register_t
*retval
)
472 syscallarg(int) error_code;
474 struct proc
*p
= l
->l_proc
;
475 struct linux_emuldata
*led
= p
->p_emuldata
;
476 struct linux_emuldata
*e
;
478 if (led
->s
->flags
& LINUX_LES_USE_NPTL
) {
481 printf("%s:%d, led->s->refs = %d\n", __func__
, __LINE__
,
486 * The calling thread is supposed to kill all threads
487 * in the same thread group (i.e. all threads created
488 * via clone(2) with CLONE_THREAD flag set).
490 * If there is only one thread, things are quite simple
492 if (led
->s
->refs
== 1)
493 return sys_exit(l
, (const void *)uap
, retval
);
496 printf("%s:%d\n", __func__
, __LINE__
);
499 mutex_enter(proc_lock
);
500 led
->s
->flags
|= LINUX_LES_INEXITGROUP
;
501 led
->s
->xstat
= W_EXITCODE(SCARG(uap
, error_code
), 0);
504 * Kill all threads in the group. The emulation exit hook takes
505 * care of hiding the zombies and reporting the exit code
508 LIST_FOREACH(e
, &led
->s
->threads
, threads
) {
513 printf("%s: kill PID %d\n", __func__
, e
->proc
->p_pid
);
515 psignal(e
->proc
, SIGKILL
);
518 /* Now, kill ourselves */
520 mutex_exit(proc_lock
);
525 #endif /* LINUX_NPTL */
527 return sys_exit(l
, (const void *)uap
, retval
);
529 #endif /* !__m68k__ */
533 linux_sys_set_tid_address(struct lwp
*l
, const struct linux_sys_set_tid_address_args
*uap
, register_t
*retval
)
536 syscallarg(int *) tidptr;
538 struct linux_emuldata
*led
;
540 led
= (struct linux_emuldata
*)l
->l_proc
->p_emuldata
;
541 led
->clear_tid
= SCARG(uap
, tid
);
543 led
->s
->flags
|= LINUX_LES_USE_NPTL
;
545 *retval
= l
->l_proc
->p_pid
;
552 linux_sys_gettid(struct lwp
*l
, const void *v
, register_t
*retval
)
554 /* The Linux kernel does it exactly that way */
555 *retval
= l
->l_proc
->p_pid
;
562 linux_sys_getpid(struct lwp
*l
, const void *v
, register_t
*retval
)
564 struct linux_emuldata
*led
= l
->l_proc
->p_emuldata
;
566 if (led
->s
->flags
& LINUX_LES_USE_NPTL
) {
567 /* The Linux kernel does it exactly that way */
568 *retval
= led
->s
->group_pid
;
570 *retval
= l
->l_proc
->p_pid
;
578 linux_sys_getppid(struct lwp
*l
, const void *v
, register_t
*retval
)
580 struct proc
*p
= l
->l_proc
;
581 struct linux_emuldata
*led
= p
->p_emuldata
;
585 mutex_enter(proc_lock
);
586 if (led
->s
->flags
& LINUX_LES_USE_NPTL
) {
588 /* Find the thread group leader's parent */
589 if ((glp
= p_find(led
->s
->group_pid
, PFIND_LOCKED
)) == NULL
) {
591 printf("linux_sys_getppid: missing group leader PID"
592 " %d\n", led
->s
->group_pid
);
593 mutex_exit(proc_lock
);
598 /* If this is a Linux process too, return thread group PID */
599 if (pp
->p_emul
== p
->p_emul
) {
600 struct linux_emuldata
*pled
;
602 pled
= pp
->p_emuldata
;
603 *retval
= pled
->s
->group_pid
;
609 *retval
= p
->p_pptr
->p_pid
;
611 mutex_exit(proc_lock
);
615 #endif /* LINUX_NPTL */
618 linux_sys_sched_getaffinity(struct lwp
*l
, const struct linux_sys_sched_getaffinity_args
*uap
, register_t
*retval
)
621 syscallarg(pid_t) pid;
622 syscallarg(unsigned int) len;
623 syscallarg(unsigned long *) mask;
625 int error
, size
, nb
= ncpu
;
626 unsigned long *p
, *data
;
628 /* Unlike Linux, dynamically calculate cpu mask size */
629 size
= sizeof(long) * ((ncpu
+ LONG_BIT
- 1) / LONG_BIT
);
630 if (SCARG(uap
, len
) < size
)
633 if (pfind(SCARG(uap
, pid
)) == NULL
)
637 * return the actual number of CPU, tag all of them as available
638 * The result is a mask, the first CPU being in the least significant
641 data
= malloc(size
, M_TEMP
, M_WAITOK
|M_ZERO
);
643 while (nb
> LONG_BIT
) {
648 *p
= (1 << ncpu
) - 1;
650 error
= copyout(data
, SCARG(uap
, mask
), size
);
659 linux_sys_sched_setaffinity(struct lwp
*l
, const struct linux_sys_sched_setaffinity_args
*uap
, register_t
*retval
)
662 syscallarg(pid_t) pid;
663 syscallarg(unsigned int) len;
664 syscallarg(unsigned long *) mask;
667 if (pfind(SCARG(uap
, pid
)) == NULL
)
670 /* Let's ignore it */
672 printf("linux_sys_sched_setaffinity\n");
676 #endif /* LINUX_NPTL */