1 /* $NetBSD: kern_proc.c,v 1.158 2009/11/26 00:19:11 matt Exp $ */
4 * Copyright (c) 1999, 2006, 2007, 2008 The NetBSD Foundation, Inc.
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center, and by Andrew Doran.
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
34 * Copyright (c) 1982, 1986, 1989, 1991, 1993
35 * The Regents of the University of California. All rights reserved.
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * @(#)kern_proc.c 8.7 (Berkeley) 2/14/95
64 #include <sys/cdefs.h>
65 __KERNEL_RCSID(0, "$NetBSD: kern_proc.c,v 1.158 2009/11/26 00:19:11 matt Exp $");
67 #include "opt_kstack.h"
68 #include "opt_maxuprc.h"
70 #include <sys/param.h>
71 #include <sys/systm.h>
72 #include <sys/kernel.h>
74 #include <sys/resourcevar.h>
79 #include <ufs/ufs/quota.h>
84 #include <sys/ioctl.h>
86 #include <sys/signalvar.h>
89 #include <sys/savar.h>
90 #include <sys/filedesc.h>
91 #include "sys/syscall_stats.h"
92 #include <sys/kauth.h>
93 #include <sys/sleepq.h>
94 #include <sys/atomic.h>
98 #include <uvm/uvm_extern.h>
101 * Other process lists
104 struct proclist allproc
;
105 struct proclist zombproc
; /* resources have been freed */
110 * pid to proc lookup is done by indexing the pid_table array.
111 * Since pid numbers are only allocated when an empty slot
112 * has been found, there is no need to search any lists ever.
113 * (an orphaned pgrp will lock the slot, a session will lock
114 * the pgrp with the same number.)
115 * If the table is too small it is reallocated with twice the
116 * previous size and the entries 'unzipped' into the two halves.
117 * A linked list of free entries is passed through the pt_proc
118 * field of 'free' items - set odd to be an invalid ptr.
122 struct proc
*pt_proc
;
123 struct pgrp
*pt_pgrp
;
125 #if 1 /* strongly typed cast - should be a noop */
126 static inline uint
p2u(struct proc
*p
) { return (uint
)(uintptr_t)p
; }
128 #define p2u(p) ((uint)p)
130 #define P_VALID(p) (!(p2u(p) & 1))
131 #define P_NEXT(p) (p2u(p) >> 1)
132 #define P_FREE(pid) ((struct proc *)(uintptr_t)((pid) << 1 | 1))
134 #define INITIAL_PID_TABLE_SIZE (1 << 5)
135 static struct pid_table
*pid_table
;
136 static uint pid_tbl_mask
= INITIAL_PID_TABLE_SIZE
- 1;
137 static uint pid_alloc_lim
; /* max we allocate before growing table */
138 static uint pid_alloc_cnt
; /* number of allocated pids */
140 /* links through free slots - never empty! */
141 static uint next_free_pt
, last_free_pt
;
142 static pid_t pid_max
= PID_MAX
; /* largest value we allocate */
144 /* Components of the first process -- never freed. */
146 extern struct emul emul_netbsd
; /* defined in kern_exec.c */
148 struct session session0
= {
152 struct pgrp pgrp0
= {
153 .pg_members
= LIST_HEAD_INITIALIZER(&pgrp0
.pg_members
),
154 .pg_session
= &session0
,
156 filedesc_t filedesc0
;
157 struct cwdinfo cwdi0
= {
158 .cwdi_cmask
= CMASK
, /* see cmask below */
161 struct plimit limit0
;
162 struct pstats pstat0
;
163 struct vmspace vmspace0
;
164 struct sigacts sigacts0
;
165 struct turnstile turnstile0
;
166 struct proc proc0
= {
167 .p_lwps
= LIST_HEAD_INITIALIZER(&proc0
.p_lwps
),
168 .p_sigwaiters
= LIST_HEAD_INITIALIZER(&proc0
.p_sigwaiters
),
171 .p_nlwpid
= 1, /* must match lwp0.l_lid */
175 * Set P_NOCLDWAIT so that kernel threads are reparented to init(8)
176 * when they exit. init(8) can easily wait them out for us.
178 .p_flag
= PK_SYSTEM
| PK_NOCLDWAIT
,
181 .p_emul
= &emul_netbsd
,
185 .p_vmspace
= &vmspace0
,
187 .p_sigacts
= &sigacts0
,
189 struct lwp lwp0
__aligned(MIN_LWP_ALIGNMENT
) = {
191 .l_cpu
= LWP0_CPU_INFO
,
198 .l_syncobj
= &sched_syncobj
,
200 .l_priority
= PRI_USER
+ NPRI_USER
- 1,
201 .l_inheritedprio
= -1,
202 .l_class
= SCHED_OTHER
,
204 .l_pi_lenders
= SLIST_HEAD_INITIALIZER(&lwp0
.l_pi_lenders
),
205 .l_name
= __UNCONST("swapper"),
211 int maxuprc
= MAXUPRC
;
214 MALLOC_DEFINE(M_EMULDATA
, "emuldata", "Per-process emulation data");
215 MALLOC_DEFINE(M_SUBPROC
, "subproc", "Proc sub-structures");
218 * The process list descriptors, used during pid allocation and
219 * by sysctl. No locking on this data structure is needed since
220 * it is completely static.
222 const struct proclist_desc proclists
[] = {
228 static struct pgrp
* pg_remove(pid_t
);
229 static void pg_delete(pid_t
);
230 static void orphanpg(struct pgrp
*);
232 static specificdata_domain_t proc_specificdata_domain
;
234 static pool_cache_t proc_cache
;
236 static kauth_listener_t proc_listener
;
239 proc_listener_cb(kauth_cred_t cred
, kauth_action_t action
, void *cookie
,
240 void *arg0
, void *arg1
, void *arg2
, void *arg3
)
245 result
= KAUTH_RESULT_DEFER
;
249 case KAUTH_PROCESS_CANSEE
: {
250 enum kauth_process_req req
;
252 req
= (enum kauth_process_req
)arg1
;
255 case KAUTH_REQ_PROCESS_CANSEE_ARGS
:
256 case KAUTH_REQ_PROCESS_CANSEE_ENTRY
:
257 case KAUTH_REQ_PROCESS_CANSEE_OPENFILES
:
258 result
= KAUTH_RESULT_ALLOW
;
262 case KAUTH_REQ_PROCESS_CANSEE_ENV
:
263 if (kauth_cred_getuid(cred
) !=
264 kauth_cred_getuid(p
->p_cred
) ||
265 kauth_cred_getuid(cred
) !=
266 kauth_cred_getsvuid(p
->p_cred
))
269 result
= KAUTH_RESULT_ALLOW
;
280 case KAUTH_PROCESS_FORK
: {
281 int lnprocs
= (int)(unsigned long)arg2
;
284 * Don't allow a nonprivileged user to use the last few
285 * processes. The variable lnprocs is the current number of
286 * processes, maxproc is the limit.
288 if (__predict_false((lnprocs
>= maxproc
- 5)))
291 result
= KAUTH_RESULT_ALLOW
;
296 case KAUTH_PROCESS_CORENAME
:
297 case KAUTH_PROCESS_STOPFLAG
:
298 if (proc_uidmatch(cred
, p
->p_cred
) == 0)
299 result
= KAUTH_RESULT_ALLOW
;
311 * Initialize global process hashing structures.
316 const struct proclist_desc
*pd
;
318 #define LINK_EMPTY ((PID_MAX + INITIAL_PID_TABLE_SIZE) & ~(INITIAL_PID_TABLE_SIZE - 1))
320 for (pd
= proclists
; pd
->pd_list
!= NULL
; pd
++)
321 LIST_INIT(pd
->pd_list
);
323 proc_lock
= mutex_obj_alloc(MUTEX_DEFAULT
, IPL_NONE
);
324 pid_table
= kmem_alloc(INITIAL_PID_TABLE_SIZE
325 * sizeof(struct pid_table
), KM_SLEEP
);
327 /* Set free list running through table...
328 Preset 'use count' above PID_MAX so we allocate pid 1 next. */
329 for (i
= 0; i
<= pid_tbl_mask
; i
++) {
330 pid_table
[i
].pt_proc
= P_FREE(LINK_EMPTY
+ i
+ 1);
331 pid_table
[i
].pt_pgrp
= 0;
333 /* slot 0 is just grabbed */
335 /* Need to fix last entry. */
336 last_free_pt
= pid_tbl_mask
;
337 pid_table
[last_free_pt
].pt_proc
= P_FREE(LINK_EMPTY
);
338 /* point at which we grow table - to avoid reusing pids too often */
339 pid_alloc_lim
= pid_tbl_mask
- 1;
342 proc_specificdata_domain
= specificdata_domain_create();
343 KASSERT(proc_specificdata_domain
!= NULL
);
345 proc_cache
= pool_cache_init(sizeof(struct proc
), 0, 0, 0,
346 "procpl", NULL
, IPL_NONE
, NULL
, NULL
, NULL
);
348 proc_listener
= kauth_listen_scope(KAUTH_SCOPE_PROCESS
,
349 proc_listener_cb
, NULL
);
353 * Initialize process 0.
368 KASSERT((void *)uvm_lwp_getuarea(l
) != NULL
);
369 KASSERT(l
->l_lid
== p
->p_nlwpid
);
371 mutex_init(&p
->p_stmutex
, MUTEX_DEFAULT
, IPL_HIGH
);
372 mutex_init(&p
->p_auxlock
, MUTEX_DEFAULT
, IPL_NONE
);
373 p
->p_lock
= mutex_obj_alloc(MUTEX_DEFAULT
, IPL_NONE
);
375 rw_init(&p
->p_reflock
);
376 cv_init(&p
->p_waitcv
, "wait");
377 cv_init(&p
->p_lwpcv
, "lwpwait");
379 LIST_INSERT_HEAD(&p
->p_lwps
, l
, l_sibling
);
381 pid_table
[0].pt_proc
= p
;
382 LIST_INSERT_HEAD(&allproc
, p
, p_list
);
383 LIST_INSERT_HEAD(&alllwp
, l
, l_list
);
385 pid_table
[0].pt_pgrp
= pg
;
386 LIST_INSERT_HEAD(&pg
->pg_members
, p
, p_pglist
);
388 #ifdef __HAVE_SYSCALL_INTERN
389 (*p
->p_emul
->e_syscall_intern
)(p
);
392 callout_init(&l
->l_timeout_ch
, CALLOUT_MPSAFE
);
393 callout_setfunc(&l
->l_timeout_ch
, sleepq_timeout
, l
);
394 cv_init(&l
->l_sigcv
, "sigwait");
396 /* Create credentials. */
397 cred0
= kauth_cred_alloc();
399 kauth_cred_hold(cred0
);
402 /* Create the CWD info. */
403 rw_init(&cwdi0
.cwdi_lock
);
405 /* Create the limits structures. */
406 mutex_init(&limit0
.pl_lock
, MUTEX_DEFAULT
, IPL_NONE
);
407 for (i
= 0; i
< __arraycount(limit0
.pl_rlimit
); i
++)
408 limit0
.pl_rlimit
[i
].rlim_cur
=
409 limit0
.pl_rlimit
[i
].rlim_max
= RLIM_INFINITY
;
411 limit0
.pl_rlimit
[RLIMIT_NOFILE
].rlim_max
= maxfiles
;
412 limit0
.pl_rlimit
[RLIMIT_NOFILE
].rlim_cur
=
413 maxfiles
< nofile
? maxfiles
: nofile
;
415 limit0
.pl_rlimit
[RLIMIT_NPROC
].rlim_max
= maxproc
;
416 limit0
.pl_rlimit
[RLIMIT_NPROC
].rlim_cur
=
417 maxproc
< maxuprc
? maxproc
: maxuprc
;
419 lim
= ptoa(uvmexp
.free
);
420 limit0
.pl_rlimit
[RLIMIT_RSS
].rlim_max
= lim
;
421 limit0
.pl_rlimit
[RLIMIT_MEMLOCK
].rlim_max
= lim
;
422 limit0
.pl_rlimit
[RLIMIT_MEMLOCK
].rlim_cur
= lim
/ 3;
423 limit0
.pl_corename
= defcorename
;
424 limit0
.pl_refcnt
= 1;
425 limit0
.pl_sv_limit
= NULL
;
427 /* Configure virtual memory system, set vm rlimits. */
430 /* Initialize file descriptor table for proc0. */
434 * Initialize proc0's vmspace, which uses the kernel pmap.
435 * All kernel processes (which never have user space mappings)
436 * share proc0's vmspace, and thus, the kernel pmap.
438 uvmspace_init(&vmspace0
, pmap_kernel(), round_page(VM_MIN_ADDRESS
),
439 trunc_page(VM_MAX_ADDRESS
));
441 /* Initialize signal state for proc0. XXX IPL_SCHED */
442 mutex_init(&p
->p_sigacts
->sa_mutex
, MUTEX_DEFAULT
, IPL_SCHED
);
445 proc_initspecific(p
);
448 SYSCALL_TIME_LWP_INIT(l
);
452 * Session reference counting.
456 proc_sesshold(struct session
*ss
)
459 KASSERT(mutex_owned(proc_lock
));
464 proc_sessrele(struct session
*ss
)
467 KASSERT(mutex_owned(proc_lock
));
469 * We keep the pgrp with the same id as the session in order to
470 * stop a process being given the same pid. Since the pgrp holds
471 * a reference to the session, it must be a 'zombie' pgrp by now.
473 if (--ss
->s_count
== 0) {
476 pg
= pg_remove(ss
->s_sid
);
477 mutex_exit(proc_lock
);
479 kmem_free(pg
, sizeof(struct pgrp
));
480 kmem_free(ss
, sizeof(struct session
));
482 mutex_exit(proc_lock
);
487 * Check that the specified process group is in the session of the
489 * Treats -ve ids as process ids.
490 * Used to validate TIOCSPGRP requests.
493 pgid_in_session(struct proc
*p
, pid_t pg_id
)
496 struct session
*session
;
499 mutex_enter(proc_lock
);
501 struct proc
*p1
= p_find(-pg_id
, PFIND_LOCKED
| PFIND_UNLOCK_FAIL
);
506 pgrp
= pg_find(pg_id
, PFIND_LOCKED
| PFIND_UNLOCK_FAIL
);
510 session
= pgrp
->pg_session
;
511 if (session
!= p
->p_pgrp
->pg_session
)
515 mutex_exit(proc_lock
);
521 * p_inferior: is p an inferior of q?
524 p_inferior(struct proc
*p
, struct proc
*q
)
527 KASSERT(mutex_owned(proc_lock
));
529 for (; p
!= q
; p
= p
->p_pptr
)
536 * Locate a process by number
539 p_find(pid_t pid
, uint flags
)
544 if (!(flags
& PFIND_LOCKED
))
545 mutex_enter(proc_lock
);
547 p
= pid_table
[pid
& pid_tbl_mask
].pt_proc
;
549 /* Only allow live processes to be found by pid. */
551 if (P_VALID(p
) && p
->p_pid
== pid
&& ((stat
= p
->p_stat
) == SACTIVE
||
552 stat
== SSTOP
|| ((flags
& PFIND_ZOMBIE
) &&
553 (stat
== SZOMB
|| stat
== SDEAD
|| stat
== SDYING
)))) {
554 if (flags
& PFIND_UNLOCK_OK
)
555 mutex_exit(proc_lock
);
558 if (flags
& PFIND_UNLOCK_FAIL
)
559 mutex_exit(proc_lock
);
565 * Locate a process group by number
568 pg_find(pid_t pgid
, uint flags
)
572 if (!(flags
& PFIND_LOCKED
))
573 mutex_enter(proc_lock
);
574 pg
= pid_table
[pgid
& pid_tbl_mask
].pt_pgrp
;
576 * Can't look up a pgrp that only exists because the session
577 * hasn't died yet (traditional)
579 if (pg
== NULL
|| pg
->pg_id
!= pgid
|| LIST_EMPTY(&pg
->pg_members
)) {
580 if (flags
& PFIND_UNLOCK_FAIL
)
581 mutex_exit(proc_lock
);
585 if (flags
& PFIND_UNLOCK_OK
)
586 mutex_exit(proc_lock
);
591 expand_pid_table(void)
594 struct pid_table
*n_pt
, *new_pt
;
600 pt_size
= pid_tbl_mask
+ 1;
601 tsz
= pt_size
* 2 * sizeof(struct pid_table
);
602 new_pt
= kmem_alloc(tsz
, KM_SLEEP
);
604 mutex_enter(proc_lock
);
605 if (pt_size
!= pid_tbl_mask
+ 1) {
606 /* Another process beat us to it... */
607 mutex_exit(proc_lock
);
608 kmem_free(new_pt
, tsz
);
613 * Copy entries from old table into new one.
614 * If 'pid' is 'odd' we need to place in the upper half,
615 * even pid's to the lower half.
616 * Free items stay in the low half so we don't have to
617 * fixup the reference to them.
618 * We stuff free items on the front of the freelist
619 * because we can't write to unmodified entries.
620 * Processing the table backwards maintains a semblance
621 * of issueing pid numbers that increase with time.
625 for (; ; i
--, n_pt
--) {
626 proc
= pid_table
[i
].pt_proc
;
627 pgrp
= pid_table
[i
].pt_pgrp
;
628 if (!P_VALID(proc
)) {
629 /* Up 'use count' so that link is valid */
630 pid
= (P_NEXT(proc
) + pt_size
) & ~pt_size
;
637 /* Save entry in appropriate half of table */
638 n_pt
[pid
& pt_size
].pt_proc
= proc
;
639 n_pt
[pid
& pt_size
].pt_pgrp
= pgrp
;
641 /* Put other piece on start of free list */
642 pid
= (pid
^ pt_size
) & ~pid_tbl_mask
;
643 n_pt
[pid
& pt_size
].pt_proc
=
644 P_FREE((pid
& ~pt_size
) | next_free_pt
);
645 n_pt
[pid
& pt_size
].pt_pgrp
= 0;
646 next_free_pt
= i
| (pid
& pt_size
);
651 /* Save old table size and switch tables */
652 tsz
= pt_size
* sizeof(struct pid_table
);
655 pid_tbl_mask
= pt_size
* 2 - 1;
658 * pid_max starts as PID_MAX (= 30000), once we have 16384
659 * allocated pids we need it to be larger!
661 if (pid_tbl_mask
> PID_MAX
) {
662 pid_max
= pid_tbl_mask
* 2 + 1;
663 pid_alloc_lim
|= pid_alloc_lim
<< 1;
665 pid_alloc_lim
<<= 1; /* doubles number of free slots... */
667 mutex_exit(proc_lock
);
668 kmem_free(n_pt
, tsz
);
677 struct pid_table
*pt
;
679 p
= pool_cache_get(proc_cache
, PR_WAITOK
);
680 p
->p_stat
= SIDL
; /* protect against others */
682 proc_initspecific(p
);
683 /* allocate next free pid */
685 for (;;expand_pid_table()) {
686 if (__predict_false(pid_alloc_cnt
>= pid_alloc_lim
))
687 /* ensure pids cycle through 2000+ values */
689 mutex_enter(proc_lock
);
690 pt
= &pid_table
[next_free_pt
];
692 if (__predict_false(P_VALID(pt
->pt_proc
) || pt
->pt_pgrp
))
693 panic("proc_alloc: slot busy");
695 nxt
= P_NEXT(pt
->pt_proc
);
696 if (nxt
& pid_tbl_mask
)
698 /* Table full - expand (NB last entry not used....) */
699 mutex_exit(proc_lock
);
702 /* pid is 'saved use count' + 'size' + entry */
703 pid
= (nxt
& ~pid_tbl_mask
) + pid_tbl_mask
+ 1 + next_free_pt
;
704 if ((uint
)pid
> (uint
)pid_max
)
707 next_free_pt
= nxt
& pid_tbl_mask
;
709 /* Grab table slot */
713 mutex_exit(proc_lock
);
719 * Free a process id - called from proc_free (in kern_exit.c)
721 * Called with the proc_lock held.
724 proc_free_pid(struct proc
*p
)
726 pid_t pid
= p
->p_pid
;
727 struct pid_table
*pt
;
729 KASSERT(mutex_owned(proc_lock
));
731 pt
= &pid_table
[pid
& pid_tbl_mask
];
733 if (__predict_false(pt
->pt_proc
!= p
))
734 panic("proc_free: pid_table mismatch, pid %x, proc %p",
737 /* save pid use count in slot */
738 pt
->pt_proc
= P_FREE(pid
& ~pid_tbl_mask
);
740 if (pt
->pt_pgrp
== NULL
) {
741 /* link last freed entry onto ours */
743 pt
= &pid_table
[last_free_pt
];
744 pt
->pt_proc
= P_FREE(P_NEXT(pt
->pt_proc
) | pid
);
749 atomic_dec_uint(&nprocs
);
753 proc_free_mem(struct proc
*p
)
756 pool_cache_put(proc_cache
, p
);
760 * proc_enterpgrp: move p to a new or existing process group (and session).
762 * If we are creating a new pgrp, the pgid should equal
763 * the calling process' pid.
764 * If is only valid to enter a process group that is in the session
766 * Also mksess should only be set if we are creating a process group
768 * Only called from sys_setsid and sys_setpgid.
771 proc_enterpgrp(struct proc
*curp
, pid_t pid
, pid_t pgid
, bool mksess
)
773 struct pgrp
*new_pgrp
, *pgrp
;
774 struct session
*sess
;
777 pid_t pg_id
= NO_PGID
;
779 sess
= mksess
? kmem_alloc(sizeof(*sess
), KM_SLEEP
) : NULL
;
781 /* Allocate data areas we might need before doing any validity checks */
782 mutex_enter(proc_lock
); /* Because pid_table might change */
783 if (pid_table
[pgid
& pid_tbl_mask
].pt_pgrp
== 0) {
784 mutex_exit(proc_lock
);
785 new_pgrp
= kmem_alloc(sizeof(*new_pgrp
), KM_SLEEP
);
786 mutex_enter(proc_lock
);
789 rval
= EPERM
; /* most common error (to save typing) */
791 /* Check pgrp exists or can be created */
792 pgrp
= pid_table
[pgid
& pid_tbl_mask
].pt_pgrp
;
793 if (pgrp
!= NULL
&& pgrp
->pg_id
!= pgid
)
796 /* Can only set another process under restricted circumstances. */
797 if (pid
!= curp
->p_pid
) {
798 /* must exist and be one of our children... */
799 if ((p
= p_find(pid
, PFIND_LOCKED
)) == NULL
||
800 !p_inferior(p
, curp
)) {
804 /* ... in the same session... */
805 if (sess
!= NULL
|| p
->p_session
!= curp
->p_session
)
807 /* ... existing pgid must be in same session ... */
808 if (pgrp
!= NULL
&& pgrp
->pg_session
!= p
->p_session
)
810 /* ... and not done an exec. */
811 if (p
->p_flag
& PK_EXEC
) {
816 /* ... setsid() cannot re-enter a pgrp */
817 if (mksess
&& (curp
->p_pgid
== curp
->p_pid
||
818 pg_find(curp
->p_pid
, PFIND_LOCKED
)))
823 /* Changing the process group/session of a session
824 leader is definitely off limits. */
825 if (SESS_LEADER(p
)) {
826 if (sess
== NULL
&& p
->p_pgrp
== pgrp
)
827 /* unless it's a definite noop */
832 /* Can only create a process group with id of process */
833 if (pgrp
== NULL
&& pgid
!= pid
)
836 /* Can only create a session if creating pgrp */
837 if (sess
!= NULL
&& pgrp
!= NULL
)
840 /* Check we allocated memory for a pgrp... */
841 if (pgrp
== NULL
&& new_pgrp
== NULL
)
844 /* Don't attach to 'zombie' pgrp */
845 if (pgrp
!= NULL
&& LIST_EMPTY(&pgrp
->pg_members
))
848 /* Expect to succeed now */
851 if (pgrp
== p
->p_pgrp
)
855 /* Ok all setup, link up required structures */
861 sess
->s_sid
= p
->p_pid
;
864 sess
->s_ttyvp
= NULL
;
866 sess
->s_flags
= p
->p_session
->s_flags
& ~S_LOGIN_SET
;
867 memcpy(sess
->s_login
, p
->p_session
->s_login
,
868 sizeof(sess
->s_login
));
869 p
->p_lflag
&= ~PL_CONTROLT
;
871 sess
= p
->p_pgrp
->pg_session
;
874 pgrp
->pg_session
= sess
;
878 LIST_INIT(&pgrp
->pg_members
);
880 if (__predict_false(pid_table
[pgid
& pid_tbl_mask
].pt_pgrp
))
881 panic("enterpgrp: pgrp table slot in use");
882 if (__predict_false(mksess
&& p
!= curp
))
883 panic("enterpgrp: mksession and p != curproc");
885 pid_table
[pgid
& pid_tbl_mask
].pt_pgrp
= pgrp
;
890 * Adjust eligibility of affected pgrps to participate in job control.
891 * Increment eligibility counts before decrementing, otherwise we
892 * could reach 0 spuriously during the first call.
895 fixjobc(p
, p
->p_pgrp
, 0);
897 /* Interlock with ttread(). */
898 mutex_spin_enter(&tty_lock
);
900 /* Move process to requested group. */
901 LIST_REMOVE(p
, p_pglist
);
902 if (LIST_EMPTY(&p
->p_pgrp
->pg_members
))
903 /* defer delete until we've dumped the lock */
904 pg_id
= p
->p_pgrp
->pg_id
;
906 LIST_INSERT_HEAD(&pgrp
->pg_members
, p
, p_pglist
);
908 /* Done with the swap; we can release the tty mutex. */
909 mutex_spin_exit(&tty_lock
);
912 if (pg_id
!= NO_PGID
) {
913 /* Releases proc_lock. */
916 mutex_exit(proc_lock
);
919 kmem_free(sess
, sizeof(*sess
));
920 if (new_pgrp
!= NULL
)
921 kmem_free(new_pgrp
, sizeof(*new_pgrp
));
923 if (__predict_false(rval
))
924 printf("enterpgrp(%d,%d,%d), curproc %d, rval %d\n",
925 pid
, pgid
, mksess
, curp
->p_pid
, rval
);
931 * proc_leavepgrp: remove a process from its process group.
932 * => must be called with the proc_lock held, which will be released;
935 proc_leavepgrp(struct proc
*p
)
939 KASSERT(mutex_owned(proc_lock
));
941 /* Interlock with ttread() */
942 mutex_spin_enter(&tty_lock
);
944 LIST_REMOVE(p
, p_pglist
);
946 mutex_spin_exit(&tty_lock
);
948 if (LIST_EMPTY(&pgrp
->pg_members
)) {
949 /* Releases proc_lock. */
950 pg_delete(pgrp
->pg_id
);
952 mutex_exit(proc_lock
);
957 * pg_remove: remove a process group from the table.
958 * => must be called with the proc_lock held;
959 * => returns process group to free;
962 pg_remove(pid_t pg_id
)
965 struct pid_table
*pt
;
967 KASSERT(mutex_owned(proc_lock
));
969 pt
= &pid_table
[pg_id
& pid_tbl_mask
];
972 KASSERT(pgrp
!= NULL
);
973 KASSERT(pgrp
->pg_id
== pg_id
);
974 KASSERT(LIST_EMPTY(&pgrp
->pg_members
));
978 if (!P_VALID(pt
->pt_proc
)) {
979 /* Orphaned pgrp, put slot onto free list. */
980 KASSERT((P_NEXT(pt
->pt_proc
) & pid_tbl_mask
) == 0);
981 pg_id
&= pid_tbl_mask
;
982 pt
= &pid_table
[last_free_pt
];
983 pt
->pt_proc
= P_FREE(P_NEXT(pt
->pt_proc
) | pg_id
);
984 last_free_pt
= pg_id
;
991 * pg_delete: delete and free a process group.
992 * => must be called with the proc_lock held, which will be released.
995 pg_delete(pid_t pg_id
)
1001 KASSERT(mutex_owned(proc_lock
));
1003 pg
= pid_table
[pg_id
& pid_tbl_mask
].pt_pgrp
;
1004 if (pg
== NULL
|| pg
->pg_id
!= pg_id
|| !LIST_EMPTY(&pg
->pg_members
)) {
1005 mutex_exit(proc_lock
);
1009 ss
= pg
->pg_session
;
1011 /* Remove reference (if any) from tty to this process group */
1012 mutex_spin_enter(&tty_lock
);
1014 if (ttyp
!= NULL
&& ttyp
->t_pgrp
== pg
) {
1015 ttyp
->t_pgrp
= NULL
;
1016 KASSERT(ttyp
->t_session
== ss
);
1018 mutex_spin_exit(&tty_lock
);
1021 * The leading process group in a session is freed by proc_sessrele(),
1022 * if last reference. Note: proc_sessrele() releases proc_lock.
1024 pg
= (ss
->s_sid
!= pg
->pg_id
) ? pg_remove(pg_id
) : NULL
;
1028 /* Free it, if was not done by proc_sessrele(). */
1029 kmem_free(pg
, sizeof(struct pgrp
));
1034 * Adjust pgrp jobc counters when specified process changes process group.
1035 * We count the number of processes in each process group that "qualify"
1036 * the group for terminal job control (those with a parent in a different
1037 * process group of the same session). If that count reaches zero, the
1038 * process group becomes orphaned. Check both the specified process'
1039 * process group and that of its children.
1040 * entering == 0 => p is leaving specified group.
1041 * entering == 1 => p is entering specified group.
1043 * Call with proc_lock held.
1046 fixjobc(struct proc
*p
, struct pgrp
*pgrp
, int entering
)
1048 struct pgrp
*hispgrp
;
1049 struct session
*mysession
= pgrp
->pg_session
;
1052 KASSERT(mutex_owned(proc_lock
));
1055 * Check p's parent to see whether p qualifies its own process
1056 * group; if so, adjust count for p's process group.
1058 hispgrp
= p
->p_pptr
->p_pgrp
;
1059 if (hispgrp
!= pgrp
&& hispgrp
->pg_session
== mysession
) {
1062 p
->p_lflag
&= ~PL_ORPHANPG
;
1063 } else if (--pgrp
->pg_jobc
== 0)
1068 * Check this process' children to see whether they qualify
1069 * their process groups; if so, adjust counts for children's
1072 LIST_FOREACH(child
, &p
->p_children
, p_sibling
) {
1073 hispgrp
= child
->p_pgrp
;
1074 if (hispgrp
!= pgrp
&& hispgrp
->pg_session
== mysession
&&
1077 child
->p_lflag
&= ~PL_ORPHANPG
;
1079 } else if (--hispgrp
->pg_jobc
== 0)
1086 * A process group has become orphaned;
1087 * if there are any stopped processes in the group,
1088 * hang-up all process in that group.
1090 * Call with proc_lock held.
1093 orphanpg(struct pgrp
*pg
)
1097 KASSERT(mutex_owned(proc_lock
));
1099 LIST_FOREACH(p
, &pg
->pg_members
, p_pglist
) {
1100 if (p
->p_stat
== SSTOP
) {
1101 p
->p_lflag
|= PL_ORPHANPG
;
1103 psignal(p
, SIGCONT
);
1109 #include <ddb/db_output.h>
1110 void pidtbl_dump(void);
1114 struct pid_table
*pt
;
1119 db_printf("pid table %p size %x, next %x, last %x\n",
1120 pid_table
, pid_tbl_mask
+1,
1121 next_free_pt
, last_free_pt
);
1122 for (pt
= pid_table
, id
= 0; id
<= pid_tbl_mask
; id
++, pt
++) {
1124 if (!P_VALID(p
) && !pt
->pt_pgrp
)
1126 db_printf(" id %x: ", id
);
1128 db_printf("proc %p id %d (0x%x) %s\n",
1129 p
, p
->p_pid
, p
->p_pid
, p
->p_comm
);
1131 db_printf("next %x use %x\n",
1132 P_NEXT(p
) & pid_tbl_mask
,
1133 P_NEXT(p
) & ~pid_tbl_mask
);
1134 if ((pgrp
= pt
->pt_pgrp
)) {
1135 db_printf("\tsession %p, sid %d, count %d, login %s\n",
1136 pgrp
->pg_session
, pgrp
->pg_session
->s_sid
,
1137 pgrp
->pg_session
->s_count
,
1138 pgrp
->pg_session
->s_login
);
1139 db_printf("\tpgrp %p, pg_id %d, pg_jobc %d, members %p\n",
1140 pgrp
, pgrp
->pg_id
, pgrp
->pg_jobc
,
1141 LIST_FIRST(&pgrp
->pg_members
));
1142 LIST_FOREACH(p
, &pgrp
->pg_members
, p_pglist
) {
1143 db_printf("\t\tpid %d addr %p pgrp %p %s\n",
1144 p
->p_pid
, p
, p
->p_pgrp
, p
->p_comm
);
1151 #ifdef KSTACK_CHECK_MAGIC
1153 #define KSTACK_MAGIC 0xdeadbeaf
1155 /* XXX should be per process basis? */
1156 static int kstackleftmin
= KSTACK_SIZE
;
1157 static int kstackleftthres
= KSTACK_SIZE
/ 8;
1160 kstack_setup_magic(const struct lwp
*l
)
1163 uint32_t const *end
;
1166 KASSERT(l
!= &lwp0
);
1169 * fill all the stack with magic number
1170 * so that later modification on it can be detected.
1172 ip
= (uint32_t *)KSTACK_LOWEST_ADDR(l
);
1173 end
= (uint32_t *)((char *)KSTACK_LOWEST_ADDR(l
) + KSTACK_SIZE
);
1174 for (; ip
< end
; ip
++) {
1180 kstack_check_magic(const struct lwp
*l
)
1182 uint32_t const *ip
, *end
;
1187 /* don't check proc0 */ /*XXX*/
1191 #ifdef __MACHINE_STACK_GROWS_UP
1192 /* stack grows upwards (eg. hppa) */
1193 ip
= (uint32_t *)((void *)KSTACK_LOWEST_ADDR(l
) + KSTACK_SIZE
);
1194 end
= (uint32_t *)KSTACK_LOWEST_ADDR(l
);
1195 for (ip
--; ip
>= end
; ip
--)
1196 if (*ip
!= KSTACK_MAGIC
)
1199 stackleft
= (void *)KSTACK_LOWEST_ADDR(l
) + KSTACK_SIZE
- (void *)ip
;
1200 #else /* __MACHINE_STACK_GROWS_UP */
1201 /* stack grows downwards (eg. i386) */
1202 ip
= (uint32_t *)KSTACK_LOWEST_ADDR(l
);
1203 end
= (uint32_t *)((char *)KSTACK_LOWEST_ADDR(l
) + KSTACK_SIZE
);
1204 for (; ip
< end
; ip
++)
1205 if (*ip
!= KSTACK_MAGIC
)
1208 stackleft
= ((const char *)ip
) - (const char *)KSTACK_LOWEST_ADDR(l
);
1209 #endif /* __MACHINE_STACK_GROWS_UP */
1211 if (kstackleftmin
> stackleft
) {
1212 kstackleftmin
= stackleft
;
1213 if (stackleft
< kstackleftthres
)
1214 printf("warning: kernel stack left %d bytes"
1215 "(pid %u:lid %u)\n", stackleft
,
1216 (u_int
)l
->l_proc
->p_pid
, (u_int
)l
->l_lid
);
1219 if (stackleft
<= 0) {
1220 panic("magic on the top of kernel stack changed for "
1221 "pid %u, lid %u: maybe kernel stack overflow",
1222 (u_int
)l
->l_proc
->p_pid
, (u_int
)l
->l_lid
);
1225 #endif /* KSTACK_CHECK_MAGIC */
1228 proclist_foreach_call(struct proclist
*list
,
1229 int (*callback
)(struct proc
*, void *arg
), void *arg
)
1235 marker
.p_flag
= PK_MARKER
;
1236 mutex_enter(proc_lock
);
1237 for (p
= LIST_FIRST(list
); ret
== 0 && p
!= NULL
;) {
1238 if (p
->p_flag
& PK_MARKER
) {
1239 p
= LIST_NEXT(p
, p_list
);
1242 LIST_INSERT_AFTER(p
, &marker
, p_list
);
1243 ret
= (*callback
)(p
, arg
);
1244 KASSERT(mutex_owned(proc_lock
));
1245 p
= LIST_NEXT(&marker
, p_list
);
1246 LIST_REMOVE(&marker
, p_list
);
1248 mutex_exit(proc_lock
);
1254 proc_vmspace_getref(struct proc
*p
, struct vmspace
**vm
)
1257 /* XXXCDC: how should locking work here? */
1259 /* curproc exception is for coredump. */
1261 if ((p
!= curproc
&& (p
->p_sflag
& PS_WEXIT
) != 0) ||
1262 (p
->p_vmspace
->vm_refcnt
< 1)) { /* XXX */
1266 uvmspace_addref(p
->p_vmspace
);
1273 * Acquire a write lock on the process credential.
1276 proc_crmod_enter(void)
1278 struct lwp
*l
= curlwp
;
1279 struct proc
*p
= l
->l_proc
;
1284 /* Reset what needs to be reset in plimit. */
1285 if (p
->p_limit
->pl_corename
!= defcorename
) {
1286 lim_privatise(p
, false);
1288 mutex_enter(&lim
->pl_lock
);
1289 cn
= lim
->pl_corename
;
1290 lim
->pl_corename
= defcorename
;
1291 mutex_exit(&lim
->pl_lock
);
1292 if (cn
!= defcorename
)
1296 mutex_enter(p
->p_lock
);
1298 /* Ensure the LWP cached credentials are up to date. */
1299 if ((oc
= l
->l_cred
) != p
->p_cred
) {
1300 kauth_cred_hold(p
->p_cred
);
1301 l
->l_cred
= p
->p_cred
;
1302 kauth_cred_free(oc
);
1308 * Set in a new process credential, and drop the write lock. The credential
1309 * must have a reference already. Optionally, free a no-longer required
1310 * credential. The scheduler also needs to inspect p_cred, so we also
1311 * briefly acquire the sched state mutex.
1314 proc_crmod_leave(kauth_cred_t scred
, kauth_cred_t fcred
, bool sugid
)
1316 struct lwp
*l
= curlwp
, *l2
;
1317 struct proc
*p
= l
->l_proc
;
1320 KASSERT(mutex_owned(p
->p_lock
));
1322 /* Is there a new credential to set in? */
1323 if (scred
!= NULL
) {
1325 LIST_FOREACH(l2
, &p
->p_lwps
, l_sibling
) {
1327 l2
->l_prflag
|= LPR_CRMOD
;
1330 /* Ensure the LWP cached credentials are up to date. */
1331 if ((oc
= l
->l_cred
) != scred
) {
1332 kauth_cred_hold(scred
);
1336 oc
= NULL
; /* XXXgcc */
1340 * Mark process as having changed credentials, stops
1343 p
->p_flag
|= PK_SUGID
;
1346 mutex_exit(p
->p_lock
);
1348 /* If there is a credential to be released, free it now. */
1349 if (fcred
!= NULL
) {
1350 KASSERT(scred
!= NULL
);
1351 kauth_cred_free(fcred
);
1353 kauth_cred_free(oc
);
1358 * proc_specific_key_create --
1359 * Create a key for subsystem proc-specific data.
1362 proc_specific_key_create(specificdata_key_t
*keyp
, specificdata_dtor_t dtor
)
1365 return (specificdata_key_create(proc_specificdata_domain
, keyp
, dtor
));
1369 * proc_specific_key_delete --
1370 * Delete a key for subsystem proc-specific data.
1373 proc_specific_key_delete(specificdata_key_t key
)
1376 specificdata_key_delete(proc_specificdata_domain
, key
);
1380 * proc_initspecific --
1381 * Initialize a proc's specificdata container.
1384 proc_initspecific(struct proc
*p
)
1388 error
= specificdata_init(proc_specificdata_domain
, &p
->p_specdataref
);
1389 KASSERT(error
== 0);
1393 * proc_finispecific --
1394 * Finalize a proc's specificdata container.
1397 proc_finispecific(struct proc
*p
)
1400 specificdata_fini(proc_specificdata_domain
, &p
->p_specdataref
);
1404 * proc_getspecific --
1405 * Return proc-specific data corresponding to the specified key.
1408 proc_getspecific(struct proc
*p
, specificdata_key_t key
)
1411 return (specificdata_getspecific(proc_specificdata_domain
,
1412 &p
->p_specdataref
, key
));
1416 * proc_setspecific --
1417 * Set proc-specific data corresponding to the specified key.
1420 proc_setspecific(struct proc
*p
, specificdata_key_t key
, void *data
)
1423 specificdata_setspecific(proc_specificdata_domain
,
1424 &p
->p_specdataref
, key
, data
);
1428 proc_uidmatch(kauth_cred_t cred
, kauth_cred_t target
)
1432 if (kauth_cred_getuid(cred
) != kauth_cred_getuid(target
) ||
1433 kauth_cred_getuid(cred
) != kauth_cred_getsvuid(target
)) {
1435 * suid proc of ours or proc not ours
1438 } else if (kauth_cred_getgid(target
) != kauth_cred_getsvgid(target
)) {
1440 * sgid proc has sgid back to us temporarily
1445 * our rgid must be in target's group list (ie,
1446 * sub-processes started by a sgid process)
1450 if (kauth_cred_ismember_gid(cred
,
1451 kauth_cred_getgid(target
), &ismember
) != 0 ||