preprocessor cleanup: __sparc
[unleashed/tickless.git] / kernel / os / exit.c
blobc8b8caf0056d5a5b2e63d46a2af0aa2ca5deeb71
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2011, Joyent, Inc. All rights reserved.
27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
29 #include <sys/types.h>
30 #include <sys/param.h>
31 #include <sys/sysmacros.h>
32 #include <sys/systm.h>
33 #include <sys/cred.h>
34 #include <sys/user.h>
35 #include <sys/errno.h>
36 #include <sys/proc.h>
37 #include <sys/ucontext.h>
38 #include <sys/procfs.h>
39 #include <sys/vnode.h>
40 #include <sys/acct.h>
41 #include <sys/var.h>
42 #include <sys/cmn_err.h>
43 #include <sys/debug.h>
44 #include <sys/wait.h>
45 #include <sys/siginfo.h>
46 #include <sys/procset.h>
47 #include <sys/class.h>
48 #include <sys/file.h>
49 #include <sys/session.h>
50 #include <sys/kmem.h>
51 #include <sys/vtrace.h>
52 #include <sys/prsystm.h>
53 #include <sys/ipc.h>
54 #include <sys/sem_impl.h>
55 #include <c2/audit.h>
56 #include <sys/aio_impl.h>
57 #include <vm/as.h>
58 #include <sys/poll.h>
59 #include <sys/door.h>
60 #include <sys/lwpchan_impl.h>
61 #include <sys/utrap.h>
62 #include <sys/task.h>
63 #include <sys/exacct.h>
64 #include <sys/cyclic.h>
65 #include <sys/schedctl.h>
66 #include <sys/rctl.h>
67 #include <sys/contract_impl.h>
68 #include <sys/contract/process_impl.h>
69 #include <sys/list.h>
70 #include <sys/dtrace.h>
71 #include <sys/pool.h>
72 #include <sys/sdt.h>
73 #include <sys/corectl.h>
74 #include <sys/brand.h>
75 #include <sys/libc_kernel.h>
78 * convert code/data pair into old style wait status
80 int
81 wstat(int code, int data)
83 int stat = (data & 0377);
85 switch (code) {
86 case CLD_EXITED:
87 stat <<= 8;
88 break;
89 case CLD_DUMPED:
90 stat |= WCOREFLG;
91 break;
92 case CLD_KILLED:
93 break;
94 case CLD_TRAPPED:
95 case CLD_STOPPED:
96 stat <<= 8;
97 stat |= WSTOPFLG;
98 break;
99 case CLD_CONTINUED:
100 stat = WCONTFLG;
101 break;
102 default:
103 cmn_err(CE_PANIC, "wstat: bad code");
104 /* NOTREACHED */
106 return (stat);
109 static char *
110 exit_reason(char *buf, size_t bufsz, int what, int why)
112 switch (why) {
113 case CLD_EXITED:
114 (void) snprintf(buf, bufsz, "exited with status %d", what);
115 break;
116 case CLD_KILLED:
117 (void) snprintf(buf, bufsz, "exited on fatal signal %d", what);
118 break;
119 case CLD_DUMPED:
120 (void) snprintf(buf, bufsz, "core dumped on signal %d", what);
121 break;
122 default:
123 (void) snprintf(buf, bufsz, "encountered unknown error "
124 "(%d, %d)", why, what);
125 break;
128 return (buf);
132 * exit system call: pass back caller's arg.
134 void
135 rexit(int rval)
137 exit(CLD_EXITED, rval);
141 * Called by proc_exit() when a zone's init exits, presumably because
142 * it failed. As long as the given zone is still in the "running"
143 * state, we will re-exec() init, but first we need to reset things
144 * which are usually inherited across exec() but will break init's
145 * assumption that it is being exec()'d from a virgin process. Most
146 * importantly this includes closing all file descriptors (exec only
147 * closes those marked close-on-exec) and resetting signals (exec only
148 * resets handled signals, and we need to clear any signals which
149 * killed init). Anything else that exec(2) says would be inherited,
150 * but would affect the execution of init, needs to be reset.
152 static int
153 restart_init(int what, int why)
155 kthread_t *t = curthread;
156 klwp_t *lwp = ttolwp(t);
157 proc_t *p = ttoproc(t);
158 user_t *up = PTOU(p);
160 vnode_t *oldcd, *oldrd;
161 int i, err;
162 char reason_buf[64];
165 * Let zone admin (and global zone admin if this is for a non-global
166 * zone) know that init has failed and will be restarted.
168 zcmn_err(p->p_zone->zone_id, CE_WARN,
169 "init(1M) %s: restarting automatically",
170 exit_reason(reason_buf, sizeof (reason_buf), what, why));
172 if (!INGLOBALZONE(p)) {
173 cmn_err(CE_WARN, "init(1M) for zone %s (pid %d) %s: "
174 "restarting automatically",
175 p->p_zone->zone_name, p->p_pid, reason_buf);
179 * Remove any fpollinfo_t's for this (last) thread from our file
180 * descriptors so closeall() can ASSERT() that they're all gone.
181 * Then close all open file descriptors in the process.
183 pollcleanup();
184 closeall(P_FINFO(p));
187 * Grab p_lock and begin clearing miscellaneous global process
188 * state that needs to be reset before we exec the new init(1M).
191 mutex_enter(&p->p_lock);
192 prbarrier(p);
194 p->p_flag &= ~(SKILLED | SEXTKILLED | SEXITING | SDOCORE);
195 up->u_cmask = CMASK;
197 sigemptyset(&t->t_hold);
198 sigemptyset(&t->t_sig);
199 sigemptyset(&t->t_extsig);
201 sigemptyset(&p->p_sig);
202 sigemptyset(&p->p_extsig);
204 sigdelq(p, t, 0);
205 sigdelq(p, NULL, 0);
207 if (p->p_killsqp) {
208 siginfofree(p->p_killsqp);
209 p->p_killsqp = NULL;
213 * Reset any signals that are ignored back to the default disposition.
214 * Other u_signal members will be cleared when exec calls sigdefault().
216 for (i = 1; i < NSIG; i++) {
217 if (up->u_signal[i - 1] == SIG_IGN) {
218 up->u_signal[i - 1] = SIG_DFL;
219 sigemptyset(&up->u_sigmask[i - 1]);
224 * Clear the current signal, any signal info associated with it, and
225 * any signal information from contracts and/or contract templates.
227 lwp->lwp_cursig = 0;
228 lwp->lwp_extsig = 0;
229 if (lwp->lwp_curinfo != NULL) {
230 siginfofree(lwp->lwp_curinfo);
231 lwp->lwp_curinfo = NULL;
233 lwp_ctmpl_clear(lwp);
236 * Reset both the process root directory and the current working
237 * directory to the root of the zone just as we do during boot.
239 VN_HOLD(p->p_zone->zone_rootvp);
240 oldrd = up->u_rdir;
241 up->u_rdir = p->p_zone->zone_rootvp;
243 VN_HOLD(p->p_zone->zone_rootvp);
244 oldcd = up->u_cdir;
245 up->u_cdir = p->p_zone->zone_rootvp;
247 if (up->u_cwd != NULL) {
248 refstr_rele(up->u_cwd);
249 up->u_cwd = NULL;
252 mutex_exit(&p->p_lock);
254 if (oldrd != NULL)
255 VN_RELE(oldrd);
256 if (oldcd != NULL)
257 VN_RELE(oldcd);
259 /* Free the controlling tty. (freectty() always assumes curproc.) */
260 ASSERT(p == curproc);
261 (void) freectty(B_TRUE);
264 * Now exec() the new init(1M) on top of the current process. If we
265 * succeed, the caller will treat this like a successful system call.
266 * If we fail, we issue messages and the caller will proceed with exit.
268 err = exec_init(p->p_zone->zone_initname, NULL);
270 if (err == 0)
271 return (0);
273 zcmn_err(p->p_zone->zone_id, CE_WARN,
274 "failed to restart init(1M) (err=%d): system reboot required", err);
276 if (!INGLOBALZONE(p)) {
277 cmn_err(CE_WARN, "failed to restart init(1M) for zone %s "
278 "(pid %d, err=%d): zoneadm(1M) boot required",
279 p->p_zone->zone_name, p->p_pid, err);
282 return (-1);
286 * Release resources.
287 * Enter zombie state.
288 * Wake up parent and init processes,
289 * and dispose of children.
291 void
292 exit(int why, int what)
295 * If proc_exit() fails, then some other lwp in the process
296 * got there first. We just have to call lwp_exit() to allow
297 * the other lwp to finish exiting the process. Otherwise we're
298 * restarting init, and should return.
300 if (proc_exit(why, what) != 0) {
301 mutex_enter(&curproc->p_lock);
302 ASSERT(curproc->p_flag & SEXITLWPS);
303 lwp_exit();
304 /* NOTREACHED */
309 * Set the SEXITING flag on the process, after making sure /proc does
310 * not have it locked. This is done in more places than proc_exit(),
311 * so it is a separate function.
313 void
314 proc_is_exiting(proc_t *p)
316 mutex_enter(&p->p_lock);
317 prbarrier(p);
318 p->p_flag |= SEXITING;
319 mutex_exit(&p->p_lock);
323 * Return value:
324 * 1 - exitlwps() failed, call (or continue) lwp_exit()
325 * 0 - restarting init. Return through system call path
328 proc_exit(int why, int what)
330 kthread_t *t = curthread;
331 klwp_t *lwp = ttolwp(t);
332 proc_t *p = ttoproc(t);
333 zone_t *z = p->p_zone;
334 timeout_id_t tmp_id;
335 int rv;
336 proc_t *q;
337 task_t *tk;
338 vnode_t *exec_vp, *execdir_vp, *cdir, *rdir;
339 sigqueue_t *sqp;
340 lwpdir_t *lwpdir;
341 uint_t lwpdir_sz;
342 tidhash_t *tidhash;
343 uint_t tidhash_sz;
344 ret_tidhash_t *ret_tidhash;
345 refstr_t *cwd;
346 hrtime_t hrutime, hrstime;
347 int evaporate;
350 * Stop and discard the process's lwps except for the current one,
351 * unless some other lwp beat us to it. If exitlwps() fails then
352 * return and the calling lwp will call (or continue in) lwp_exit().
354 proc_is_exiting(p);
355 if (exitlwps(0) != 0)
356 return (1);
358 mutex_enter(&p->p_lock);
359 if (p->p_ttime > 0) {
361 * Account any remaining ticks charged to this process
362 * on its way out.
364 (void) task_cpu_time_incr(p->p_task, p->p_ttime);
365 p->p_ttime = 0;
367 mutex_exit(&p->p_lock);
369 DTRACE_PROC(lwp__exit);
370 DTRACE_PROC1(exit, int, why);
373 * Will perform any brand specific proc exit processing, since this
374 * is always the last lwp, will also perform lwp_exit and free brand
375 * data
377 if (PROC_IS_BRANDED(p)) {
378 lwp_detach_brand_hdlrs(lwp);
379 brand_clearbrand(p, B_FALSE);
383 * Don't let init exit unless zone_start_init() failed its exec, or
384 * we are shutting down the zone or the machine.
386 * Since we are single threaded, we don't need to lock the
387 * following accesses to zone_proc_initpid.
389 if (p->p_pid == z->zone_proc_initpid) {
390 if (z->zone_boot_err == 0 &&
391 zone_status_get(z) < ZONE_IS_SHUTTING_DOWN &&
392 zone_status_get(global_zone) < ZONE_IS_SHUTTING_DOWN) {
393 if (z->zone_restart_init == B_TRUE) {
394 if (restart_init(what, why) == 0)
395 return (0);
396 } else {
397 (void) zone_kadmin(A_SHUTDOWN, AD_HALT, NULL,
398 CRED());
403 * Since we didn't or couldn't restart init, we clear
404 * the zone's init state and proceed with exit
405 * processing.
407 z->zone_proc_initpid = -1;
410 lwp_pcb_exit();
413 * Allocate a sigqueue now, before we grab locks.
414 * It will be given to sigcld(), below.
415 * Special case: If we will be making the process disappear
416 * without a trace because it is either:
417 * * an exiting SSYS process, or
418 * * a posix_spawn() vfork child who requests it,
419 * we don't bother to allocate a useless sigqueue.
421 evaporate = (p->p_flag & SSYS) || ((p->p_flag & SVFORK) &&
422 why == CLD_EXITED && what == _EVAPORATE);
423 if (!evaporate)
424 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
427 * revoke any doors created by the process.
429 if (p->p_door_list)
430 door_exit();
433 * Release schedctl data structures.
435 if (p->p_pagep)
436 schedctl_proc_cleanup();
439 * make sure all pending kaio has completed.
441 if (p->p_aio)
442 aio_cleanup_exit();
445 * discard the lwpchan cache.
447 if (p->p_lcp != NULL)
448 lwpchan_destroy_cache(0);
451 * Clean up any DTrace helper actions or probes for the process.
453 if (p->p_dtrace_helpers != NULL) {
454 ASSERT(dtrace_helpers_cleanup != NULL);
455 (*dtrace_helpers_cleanup)(p);
459 * Clean up any signalfd state for the process.
461 if (p->p_sigfd != NULL) {
462 VERIFY(sigfd_exit_helper != NULL);
463 (*sigfd_exit_helper)();
466 /* untimeout the realtime timers */
467 if (p->p_itimer != NULL)
468 timer_exit();
470 if ((tmp_id = p->p_alarmid) != 0) {
471 p->p_alarmid = 0;
472 (void) untimeout(tmp_id);
476 * Remove any fpollinfo_t's for this (last) thread from our file
477 * descriptors so closeall() can ASSERT() that they're all gone.
479 pollcleanup();
481 if (p->p_rprof_cyclic != CYCLIC_NONE) {
482 mutex_enter(&cpu_lock);
483 cyclic_remove(p->p_rprof_cyclic);
484 mutex_exit(&cpu_lock);
487 mutex_enter(&p->p_lock);
490 * Clean up any DTrace probes associated with this process.
492 if (p->p_dtrace_probes) {
493 ASSERT(dtrace_fasttrap_exit_ptr != NULL);
494 dtrace_fasttrap_exit_ptr(p);
497 while ((tmp_id = p->p_itimerid) != 0) {
498 p->p_itimerid = 0;
499 mutex_exit(&p->p_lock);
500 (void) untimeout(tmp_id);
501 mutex_enter(&p->p_lock);
504 lwp_cleanup();
507 * We are about to exit; prevent our resource associations from
508 * being changed.
510 pool_barrier_enter();
513 * Block the process against /proc now that we have really
514 * acquired p->p_lock (to manipulate p_tlist at least).
516 prbarrier(p);
518 sigfillset(&p->p_ignore);
519 sigemptyset(&p->p_siginfo);
520 sigemptyset(&p->p_sig);
521 sigemptyset(&p->p_extsig);
522 sigemptyset(&t->t_sig);
523 sigemptyset(&t->t_extsig);
524 sigemptyset(&p->p_sigmask);
525 sigdelq(p, t, 0);
526 lwp->lwp_cursig = 0;
527 lwp->lwp_extsig = 0;
528 p->p_flag &= ~(SKILLED | SEXTKILLED);
529 if (lwp->lwp_curinfo) {
530 siginfofree(lwp->lwp_curinfo);
531 lwp->lwp_curinfo = NULL;
534 t->t_proc_flag |= TP_LWPEXIT;
535 ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0);
536 prlwpexit(t); /* notify /proc */
537 lwp_hash_out(p, t->t_tid);
538 prexit(p);
540 p->p_lwpcnt = 0;
541 p->p_tlist = NULL;
542 sigqfree(p);
543 term_mstate(t);
544 p->p_mterm = gethrtime();
546 exec_vp = p->p_exec;
547 execdir_vp = p->p_execdir;
548 p->p_exec = NULLVP;
549 p->p_execdir = NULLVP;
550 mutex_exit(&p->p_lock);
552 pr_free_watched_pages(p);
554 closeall(P_FINFO(p));
556 /* Free the controlling tty. (freectty() always assumes curproc.) */
557 ASSERT(p == curproc);
558 (void) freectty(B_TRUE);
560 if (p->p_semacct) /* IPC semaphore exit */
561 semexit(p);
562 rv = wstat(why, what);
564 acct(rv & 0xff);
565 exacct_commit_proc(p, rv);
568 * Release any resources associated with C2 auditing
570 if (AU_AUDITING()) {
572 * audit exit system call
574 audit_exit(why, what);
578 * Free address space.
580 relvm();
582 if (exec_vp) {
584 * Close this executable which has been opened when the process
585 * was created by getproc().
587 (void) fop_close(exec_vp, FREAD, 1, (offset_t)0, CRED(), NULL);
588 VN_RELE(exec_vp);
590 if (execdir_vp)
591 VN_RELE(execdir_vp);
594 * Release held contracts.
596 contract_exit(p);
599 * Depart our encapsulating process contract.
601 if ((p->p_flag & SSYS) == 0) {
602 ASSERT(p->p_ct_process);
603 contract_process_exit(p->p_ct_process, p, rv);
607 * Remove pool association, and block if requested by pool_do_bind.
609 mutex_enter(&p->p_lock);
610 ASSERT(p->p_pool->pool_ref > 0);
611 atomic_dec_32(&p->p_pool->pool_ref);
612 p->p_pool = pool_default;
614 * Now that our address space has been freed and all other threads
615 * in this process have exited, set the PEXITED pool flag. This
616 * tells the pools subsystems to ignore this process if it was
617 * requested to rebind this process to a new pool.
619 p->p_poolflag |= PEXITED;
620 pool_barrier_exit();
621 mutex_exit(&p->p_lock);
623 mutex_enter(&pidlock);
626 * Delete this process from the newstate list of its parent. We
627 * will put it in the right place in the sigcld in the end.
629 delete_ns(p->p_parent, p);
632 * Reassign the orphans to the next of kin.
633 * Don't rearrange init's orphanage.
635 if ((q = p->p_orphan) != NULL && p != proc_init) {
637 proc_t *nokp = p->p_nextofkin;
639 for (;;) {
640 q->p_nextofkin = nokp;
641 if (q->p_nextorph == NULL)
642 break;
643 q = q->p_nextorph;
645 q->p_nextorph = nokp->p_orphan;
646 nokp->p_orphan = p->p_orphan;
647 p->p_orphan = NULL;
651 * Reassign the children to init.
652 * Don't try to assign init's children to init.
654 if ((q = p->p_child) != NULL && p != proc_init) {
655 struct proc *np;
656 struct proc *initp = proc_init;
657 boolean_t setzonetop = B_FALSE;
659 if (!INGLOBALZONE(curproc))
660 setzonetop = B_TRUE;
662 pgdetach(p);
664 do {
665 np = q->p_sibling;
667 * Delete it from its current parent new state
668 * list and add it to init new state list
670 delete_ns(q->p_parent, q);
672 q->p_ppid = 1;
673 q->p_pidflag &= ~(CLDNOSIGCHLD | CLDWAITPID);
674 if (setzonetop) {
675 mutex_enter(&q->p_lock);
676 q->p_flag |= SZONETOP;
677 mutex_exit(&q->p_lock);
679 q->p_parent = initp;
682 * Since q will be the first child,
683 * it will not have a previous sibling.
685 q->p_psibling = NULL;
686 if (initp->p_child) {
687 initp->p_child->p_psibling = q;
689 q->p_sibling = initp->p_child;
690 initp->p_child = q;
691 if (q->p_proc_flag & P_PR_PTRACE) {
692 mutex_enter(&q->p_lock);
693 sigtoproc(q, NULL, SIGKILL);
694 mutex_exit(&q->p_lock);
697 * sigcld() will add the child to parents
698 * newstate list.
700 if (q->p_stat == SZOMB)
701 sigcld(q, NULL);
702 } while ((q = np) != NULL);
704 p->p_child = NULL;
705 ASSERT(p->p_child_ns == NULL);
708 TRACE_1(TR_FAC_PROC, TR_PROC_EXIT, "proc_exit: %p", p);
710 mutex_enter(&p->p_lock);
711 CL_EXIT(curthread); /* tell the scheduler that curthread is exiting */
714 * Have our task accummulate our resource usage data before they
715 * become contaminated by p_cacct etc., and before we renounce
716 * membership of the task.
718 * We do this regardless of whether or not task accounting is active.
719 * This is to avoid having nonsense data reported for this task if
720 * task accounting is subsequently enabled. The overhead is minimal;
721 * by this point, this process has accounted for the usage of all its
722 * LWPs. We nonetheless do the work here, and under the protection of
723 * pidlock, so that the movement of the process's usage to the task
724 * happens at the same time as the removal of the process from the
725 * task, from the point of view of exacct_snapshot_task_usage().
727 exacct_update_task_mstate(p);
729 hrutime = mstate_aggr_state(p, LMS_USER);
730 hrstime = mstate_aggr_state(p, LMS_SYSTEM);
731 p->p_utime = (clock_t)NSEC_TO_TICK(hrutime) + p->p_cutime;
732 p->p_stime = (clock_t)NSEC_TO_TICK(hrstime) + p->p_cstime;
734 p->p_acct[LMS_USER] += p->p_cacct[LMS_USER];
735 p->p_acct[LMS_SYSTEM] += p->p_cacct[LMS_SYSTEM];
736 p->p_acct[LMS_TRAP] += p->p_cacct[LMS_TRAP];
737 p->p_acct[LMS_TFAULT] += p->p_cacct[LMS_TFAULT];
738 p->p_acct[LMS_DFAULT] += p->p_cacct[LMS_DFAULT];
739 p->p_acct[LMS_KFAULT] += p->p_cacct[LMS_KFAULT];
740 p->p_acct[LMS_USER_LOCK] += p->p_cacct[LMS_USER_LOCK];
741 p->p_acct[LMS_SLEEP] += p->p_cacct[LMS_SLEEP];
742 p->p_acct[LMS_WAIT_CPU] += p->p_cacct[LMS_WAIT_CPU];
743 p->p_acct[LMS_STOPPED] += p->p_cacct[LMS_STOPPED];
745 p->p_ru.minflt += p->p_cru.minflt;
746 p->p_ru.majflt += p->p_cru.majflt;
747 p->p_ru.nswap += p->p_cru.nswap;
748 p->p_ru.inblock += p->p_cru.inblock;
749 p->p_ru.oublock += p->p_cru.oublock;
750 p->p_ru.msgsnd += p->p_cru.msgsnd;
751 p->p_ru.msgrcv += p->p_cru.msgrcv;
752 p->p_ru.nsignals += p->p_cru.nsignals;
753 p->p_ru.nvcsw += p->p_cru.nvcsw;
754 p->p_ru.nivcsw += p->p_cru.nivcsw;
755 p->p_ru.sysc += p->p_cru.sysc;
756 p->p_ru.ioch += p->p_cru.ioch;
758 p->p_stat = SZOMB;
759 p->p_proc_flag &= ~P_PR_PTRACE;
760 p->p_wdata = what;
761 p->p_wcode = (char)why;
763 cdir = PTOU(p)->u_cdir;
764 rdir = PTOU(p)->u_rdir;
765 cwd = PTOU(p)->u_cwd;
767 ASSERT(cdir != NULL || p->p_parent == &p0);
770 * Release resource controls, as they are no longer enforceable.
772 rctl_set_free(p->p_rctls);
775 * Decrement tk_nlwps counter for our task.max-lwps resource control.
776 * An extended accounting record, if that facility is active, is
777 * scheduled to be written. We cannot give up task and project
778 * membership at this point because that would allow zombies to escape
779 * from the max-processes resource controls. Zombies stay in their
780 * current task and project until the process table slot is released
781 * in freeproc().
783 tk = p->p_task;
785 mutex_enter(&p->p_zone->zone_nlwps_lock);
786 tk->tk_nlwps--;
787 tk->tk_proj->kpj_nlwps--;
788 p->p_zone->zone_nlwps--;
789 mutex_exit(&p->p_zone->zone_nlwps_lock);
792 * Clear the lwp directory and the lwpid hash table
793 * now that /proc can't bother us any more.
794 * We free the memory below, after dropping p->p_lock.
796 lwpdir = p->p_lwpdir;
797 lwpdir_sz = p->p_lwpdir_sz;
798 tidhash = p->p_tidhash;
799 tidhash_sz = p->p_tidhash_sz;
800 ret_tidhash = p->p_ret_tidhash;
801 p->p_lwpdir = NULL;
802 p->p_lwpfree = NULL;
803 p->p_lwpdir_sz = 0;
804 p->p_tidhash = NULL;
805 p->p_tidhash_sz = 0;
806 p->p_ret_tidhash = NULL;
809 * If the process has context ops installed, call the exit routine
810 * on behalf of this last remaining thread. Normally exitpctx() is
811 * called during thread_exit() or lwp_exit(), but because this is the
812 * last thread in the process, we must call it here. By the time
813 * thread_exit() is called (below), the association with the relevant
814 * process has been lost.
816 * We also free the context here.
818 if (p->p_pctx) {
819 kpreempt_disable();
820 exitpctx(p);
821 kpreempt_enable();
823 freepctx(p, 0);
827 * curthread's proc pointer is changed to point to the 'sched'
828 * process for the corresponding zone, except in the case when
829 * the exiting process is in fact a zsched instance, in which
830 * case the proc pointer is set to p0. We do so, so that the
831 * process still points at the right zone when we call the VN_RELE()
832 * below.
834 * This is because curthread's original proc pointer can be freed as
835 * soon as the child sends a SIGCLD to its parent. We use zsched so
836 * that for user processes, even in the final moments of death, the
837 * process is still associated with its zone.
839 if (p != t->t_procp->p_zone->zone_zsched)
840 t->t_procp = t->t_procp->p_zone->zone_zsched;
841 else
842 t->t_procp = &p0;
844 mutex_exit(&p->p_lock);
845 if (!evaporate) {
846 p->p_pidflag &= ~CLDPEND;
847 sigcld(p, sqp);
848 } else {
850 * Do what sigcld() would do if the disposition
851 * of the SIGCHLD signal were set to be ignored.
853 cv_broadcast(&p->p_srwchan_cv);
854 freeproc(p);
856 mutex_exit(&pidlock);
859 * We don't release u_cdir and u_rdir until SZOMB is set.
860 * This protects us against dofusers().
862 if (cdir)
863 VN_RELE(cdir);
864 if (rdir)
865 VN_RELE(rdir);
866 if (cwd)
867 refstr_rele(cwd);
870 * task_rele() may ultimately cause the zone to go away (or
871 * may cause the last user process in a zone to go away, which
872 * signals zsched to go away). So prior to this call, we must
873 * no longer point at zsched.
875 t->t_procp = &p0;
877 kmem_free(lwpdir, lwpdir_sz * sizeof (lwpdir_t));
878 kmem_free(tidhash, tidhash_sz * sizeof (tidhash_t));
879 while (ret_tidhash != NULL) {
880 ret_tidhash_t *next = ret_tidhash->rth_next;
881 kmem_free(ret_tidhash->rth_tidhash,
882 ret_tidhash->rth_tidhash_sz * sizeof (tidhash_t));
883 kmem_free(ret_tidhash, sizeof (*ret_tidhash));
884 ret_tidhash = next;
887 thread_exit();
888 /* NOTREACHED */
892 * Format siginfo structure for wait system calls.
894 void
895 winfo(proc_t *pp, k_siginfo_t *ip, int waitflag)
897 ASSERT(MUTEX_HELD(&pidlock));
899 bzero(ip, sizeof (k_siginfo_t));
900 ip->si_signo = SIGCLD;
901 ip->si_code = pp->p_wcode;
902 ip->si_pid = pp->p_pid;
903 ip->si_ctid = PRCTID(pp);
904 ip->si_zoneid = pp->p_zone->zone_id;
905 ip->si_status = pp->p_wdata;
906 ip->si_stime = pp->p_stime;
907 ip->si_utime = pp->p_utime;
909 if (waitflag) {
910 pp->p_wcode = 0;
911 pp->p_wdata = 0;
912 pp->p_pidflag &= ~CLDPEND;
917 * Wait system call.
918 * Search for a terminated (zombie) child,
919 * finally lay it to rest, and collect its status.
920 * Look also for stopped children,
921 * and pass back status from them.
924 waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options)
926 int found;
927 proc_t *cp, *pp;
928 int proc_gone;
929 int waitflag = !(options & WNOWAIT);
932 * Obsolete flag, defined here only for binary compatibility
933 * with old statically linked executables. Delete this when
934 * we no longer care about these old and broken applications.
936 #define _WNOCHLD 0400
937 options &= ~_WNOCHLD;
939 if (options == 0 || (options & ~WOPTMASK))
940 return (EINVAL);
942 switch (idtype) {
943 case P_PID:
944 case P_PGID:
945 if (id < 0 || id >= maxpid)
946 return (EINVAL);
947 /* FALLTHROUGH */
948 case P_ALL:
949 break;
950 default:
951 return (EINVAL);
954 pp = ttoproc(curthread);
957 * lock parent mutex so that sibling chain can be searched.
959 mutex_enter(&pidlock);
962 * if we are only looking for exited processes and child_ns list
963 * is empty no reason to look at all children.
965 if (idtype == P_ALL &&
966 (options & ~WNOWAIT) == (WNOHANG | WEXITED) &&
967 pp->p_child_ns == NULL) {
968 if (pp->p_child) {
969 mutex_exit(&pidlock);
970 bzero(ip, sizeof (k_siginfo_t));
971 return (0);
973 mutex_exit(&pidlock);
974 return (ECHILD);
977 while (pp->p_child != NULL) {
979 proc_gone = 0;
981 for (cp = pp->p_child_ns; cp != NULL; cp = cp->p_sibling_ns) {
982 if (idtype != P_PID && (cp->p_pidflag & CLDWAITPID))
983 continue;
984 if (idtype == P_PID && id != cp->p_pid)
985 continue;
986 if (idtype == P_PGID && id != cp->p_pgrp)
987 continue;
989 switch (cp->p_wcode) {
991 case CLD_TRAPPED:
992 case CLD_STOPPED:
993 case CLD_CONTINUED:
994 cmn_err(CE_PANIC,
995 "waitid: wrong state %d on the p_newstate"
996 " list", cp->p_wcode);
997 break;
999 case CLD_EXITED:
1000 case CLD_DUMPED:
1001 case CLD_KILLED:
1002 if (!(options & WEXITED)) {
1004 * Count how many are already gone
1005 * for good.
1007 proc_gone++;
1008 break;
1010 if (!waitflag) {
1011 winfo(cp, ip, 0);
1012 } else {
1013 winfo(cp, ip, 1);
1014 freeproc(cp);
1016 mutex_exit(&pidlock);
1017 if (waitflag) { /* accept SIGCLD */
1018 sigcld_delete(ip);
1019 sigcld_repost();
1021 return (0);
1024 if (idtype == P_PID)
1025 break;
1029 * Wow! None of the threads on the p_sibling_ns list were
1030 * interesting threads. Check all the kids!
1032 found = 0;
1033 for (cp = pp->p_child; cp != NULL; cp = cp->p_sibling) {
1034 if (idtype == P_PID && id != cp->p_pid)
1035 continue;
1036 if (idtype == P_PGID && id != cp->p_pgrp)
1037 continue;
1039 switch (cp->p_wcode) {
1040 case CLD_TRAPPED:
1041 if (!(options & WTRAPPED))
1042 break;
1043 winfo(cp, ip, waitflag);
1044 mutex_exit(&pidlock);
1045 if (waitflag) { /* accept SIGCLD */
1046 sigcld_delete(ip);
1047 sigcld_repost();
1049 return (0);
1051 case CLD_STOPPED:
1052 if (!(options & WSTOPPED))
1053 break;
1054 /* Is it still stopped? */
1055 mutex_enter(&cp->p_lock);
1056 if (!jobstopped(cp)) {
1057 mutex_exit(&cp->p_lock);
1058 break;
1060 mutex_exit(&cp->p_lock);
1061 winfo(cp, ip, waitflag);
1062 mutex_exit(&pidlock);
1063 if (waitflag) { /* accept SIGCLD */
1064 sigcld_delete(ip);
1065 sigcld_repost();
1067 return (0);
1069 case CLD_CONTINUED:
1070 if (!(options & WCONTINUED))
1071 break;
1072 winfo(cp, ip, waitflag);
1073 mutex_exit(&pidlock);
1074 if (waitflag) { /* accept SIGCLD */
1075 sigcld_delete(ip);
1076 sigcld_repost();
1078 return (0);
1080 case CLD_EXITED:
1081 case CLD_DUMPED:
1082 case CLD_KILLED:
1083 if (idtype != P_PID &&
1084 (cp->p_pidflag & CLDWAITPID))
1085 continue;
1087 * Don't complain if a process was found in
1088 * the first loop but we broke out of the loop
1089 * because of the arguments passed to us.
1091 if (proc_gone == 0) {
1092 cmn_err(CE_PANIC,
1093 "waitid: wrong state on the"
1094 " p_child list");
1095 } else {
1096 break;
1100 found++;
1102 if (idtype == P_PID)
1103 break;
1107 * If we found no interesting processes at all,
1108 * break out and return ECHILD.
1110 if (found + proc_gone == 0)
1111 break;
1113 if (options & WNOHANG) {
1114 mutex_exit(&pidlock);
1115 bzero(ip, sizeof (k_siginfo_t));
1116 /* XXX: should set ip->si_signo = SIGCLD? */
1117 return (0);
1121 * If we found no processes of interest that could
1122 * change state while we wait, we don't wait at all.
1123 * Get out with ECHILD according to SVID.
1125 if (found == proc_gone)
1126 break;
1128 if (!cv_wait_sig_swap(&pp->p_cv, &pidlock)) {
1129 mutex_exit(&pidlock);
1130 return (EINTR);
1133 mutex_exit(&pidlock);
1134 return (ECHILD);
1138 waitsys(idtype_t idtype, id_t id, siginfo_t *infop, int options)
1140 int error;
1141 k_siginfo_t info;
1143 if (error = waitid(idtype, id, &info, options))
1144 return (set_errno(error));
1145 if (copyout(&info, infop, sizeof (k_siginfo_t)))
1146 return (set_errno(EFAULT));
1147 return (0);
1150 #ifdef _SYSCALL32_IMPL
1153 waitsys32(idtype_t idtype, id_t id, siginfo_t *infop, int options)
1155 int error;
1156 k_siginfo_t info;
1157 siginfo32_t info32;
1159 if (error = waitid(idtype, id, &info, options))
1160 return (set_errno(error));
1161 siginfo_kto32(&info, &info32);
1162 if (copyout(&info32, infop, sizeof (info32)))
1163 return (set_errno(EFAULT));
1164 return (0);
1167 #endif /* _SYSCALL32_IMPL */
1169 void
1170 proc_detach(proc_t *p)
1172 proc_t *q;
1174 ASSERT(MUTEX_HELD(&pidlock));
1176 q = p->p_parent;
1177 ASSERT(q != NULL);
1180 * Take it off the newstate list of its parent
1182 delete_ns(q, p);
1184 if (q->p_child == p) {
1185 q->p_child = p->p_sibling;
1187 * If the parent has no children, it better not
1188 * have any with new states either!
1190 ASSERT(q->p_child ? 1 : q->p_child_ns == NULL);
1193 if (p->p_sibling) {
1194 p->p_sibling->p_psibling = p->p_psibling;
1197 if (p->p_psibling) {
1198 p->p_psibling->p_sibling = p->p_sibling;
1203 * Remove zombie children from the process table.
1205 void
1206 freeproc(proc_t *p)
1208 proc_t *q;
1209 task_t *tk;
1211 ASSERT(p->p_stat == SZOMB);
1212 ASSERT(p->p_tlist == NULL);
1213 ASSERT(MUTEX_HELD(&pidlock));
1215 sigdelq(p, NULL, 0);
1216 if (p->p_killsqp) {
1217 siginfofree(p->p_killsqp);
1218 p->p_killsqp = NULL;
1221 prfree(p); /* inform /proc */
1224 * Don't free the init processes.
1225 * Other dying processes will access it.
1227 if (p == proc_init)
1228 return;
1232 * We wait until now to free the cred structure because a
1233 * zombie process's credentials may be examined by /proc.
1234 * No cred locking needed because there are no threads at this point.
1236 upcount_dec(crgetruid(p->p_cred), crgetzoneid(p->p_cred));
1237 crfree(p->p_cred);
1238 if (p->p_corefile != NULL) {
1239 corectl_path_rele(p->p_corefile);
1240 p->p_corefile = NULL;
1242 if (p->p_content != NULL) {
1243 corectl_content_rele(p->p_content);
1244 p->p_content = NULL;
1247 if (p->p_nextofkin && !((p->p_nextofkin->p_flag & SNOWAIT) ||
1248 (PTOU(p->p_nextofkin)->u_signal[SIGCLD - 1] == SIG_IGN))) {
1250 * This should still do the right thing since p_utime/stime
1251 * get set to the correct value on process exit, so it
1252 * should get properly updated
1254 p->p_nextofkin->p_cutime += p->p_utime;
1255 p->p_nextofkin->p_cstime += p->p_stime;
1257 p->p_nextofkin->p_cacct[LMS_USER] += p->p_acct[LMS_USER];
1258 p->p_nextofkin->p_cacct[LMS_SYSTEM] += p->p_acct[LMS_SYSTEM];
1259 p->p_nextofkin->p_cacct[LMS_TRAP] += p->p_acct[LMS_TRAP];
1260 p->p_nextofkin->p_cacct[LMS_TFAULT] += p->p_acct[LMS_TFAULT];
1261 p->p_nextofkin->p_cacct[LMS_DFAULT] += p->p_acct[LMS_DFAULT];
1262 p->p_nextofkin->p_cacct[LMS_KFAULT] += p->p_acct[LMS_KFAULT];
1263 p->p_nextofkin->p_cacct[LMS_USER_LOCK]
1264 += p->p_acct[LMS_USER_LOCK];
1265 p->p_nextofkin->p_cacct[LMS_SLEEP] += p->p_acct[LMS_SLEEP];
1266 p->p_nextofkin->p_cacct[LMS_WAIT_CPU]
1267 += p->p_acct[LMS_WAIT_CPU];
1268 p->p_nextofkin->p_cacct[LMS_STOPPED] += p->p_acct[LMS_STOPPED];
1270 p->p_nextofkin->p_cru.minflt += p->p_ru.minflt;
1271 p->p_nextofkin->p_cru.majflt += p->p_ru.majflt;
1272 p->p_nextofkin->p_cru.nswap += p->p_ru.nswap;
1273 p->p_nextofkin->p_cru.inblock += p->p_ru.inblock;
1274 p->p_nextofkin->p_cru.oublock += p->p_ru.oublock;
1275 p->p_nextofkin->p_cru.msgsnd += p->p_ru.msgsnd;
1276 p->p_nextofkin->p_cru.msgrcv += p->p_ru.msgrcv;
1277 p->p_nextofkin->p_cru.nsignals += p->p_ru.nsignals;
1278 p->p_nextofkin->p_cru.nvcsw += p->p_ru.nvcsw;
1279 p->p_nextofkin->p_cru.nivcsw += p->p_ru.nivcsw;
1280 p->p_nextofkin->p_cru.sysc += p->p_ru.sysc;
1281 p->p_nextofkin->p_cru.ioch += p->p_ru.ioch;
1285 q = p->p_nextofkin;
1286 if (q && q->p_orphan == p)
1287 q->p_orphan = p->p_nextorph;
1288 else if (q) {
1289 for (q = q->p_orphan; q; q = q->p_nextorph)
1290 if (q->p_nextorph == p)
1291 break;
1292 ASSERT(q && q->p_nextorph == p);
1293 q->p_nextorph = p->p_nextorph;
1297 * The process table slot is being freed, so it is now safe to give up
1298 * task and project membership.
1300 mutex_enter(&p->p_lock);
1301 tk = p->p_task;
1302 task_detach(p);
1303 mutex_exit(&p->p_lock);
1305 proc_detach(p);
1306 pid_exit(p, tk); /* frees pid and proc structure */
1308 task_rele(tk);
1312 * Delete process "child" from the newstate list of process "parent"
1314 void
1315 delete_ns(proc_t *parent, proc_t *child)
1317 proc_t **ns;
1319 ASSERT(MUTEX_HELD(&pidlock));
1320 ASSERT(child->p_parent == parent);
1321 for (ns = &parent->p_child_ns; *ns != NULL; ns = &(*ns)->p_sibling_ns) {
1322 if (*ns == child) {
1324 ASSERT((*ns)->p_parent == parent);
1326 *ns = child->p_sibling_ns;
1327 child->p_sibling_ns = NULL;
1328 return;
1334 * Add process "child" to the new state list of process "parent"
1336 void
1337 add_ns(proc_t *parent, proc_t *child)
1339 ASSERT(child->p_sibling_ns == NULL);
1340 child->p_sibling_ns = parent->p_child_ns;
1341 parent->p_child_ns = child;