8322 nl: misleading-indentation
[unleashed/tickless.git] / usr / src / uts / common / os / pid.c
blobb555bb82b71c2f67fe639dd298ba3185fd947a24
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
29 #include <sys/types.h>
30 #include <sys/param.h>
31 #include <sys/sysmacros.h>
32 #include <sys/proc.h>
33 #include <sys/kmem.h>
34 #include <sys/tuneable.h>
35 #include <sys/var.h>
36 #include <sys/cred.h>
37 #include <sys/systm.h>
38 #include <sys/prsystm.h>
39 #include <sys/vnode.h>
40 #include <sys/session.h>
41 #include <sys/cpuvar.h>
42 #include <sys/cmn_err.h>
43 #include <sys/bitmap.h>
44 #include <sys/debug.h>
45 #include <c2/audit.h>
46 #include <sys/project.h>
47 #include <sys/task.h>
48 #include <sys/zone.h>
50 /* directory entries for /proc */
51 union procent {
52 proc_t *pe_proc;
53 union procent *pe_next;
56 struct pid pid0 = {
57 0, /* pid_prinactive */
58 1, /* pid_pgorphaned */
59 0, /* pid_padding */
60 0, /* pid_prslot */
61 0, /* pid_id */
62 NULL, /* pid_pglink */
63 NULL, /* pid_pgtail */
64 NULL, /* pid_link */
65 3 /* pid_ref */
68 static int pid_hashlen = 4; /* desired average hash chain length */
69 static int pid_hashsz; /* number of buckets in the hash table */
71 #define HASHPID(pid) (pidhash[((pid)&(pid_hashsz-1))])
73 extern uint_t nproc;
74 extern struct kmem_cache *process_cache;
75 static void upcount_init(void);
77 kmutex_t pidlock; /* global process lock */
78 kmutex_t pr_pidlock; /* /proc global process lock */
79 kcondvar_t *pr_pid_cv; /* for /proc, one per process slot */
80 struct plock *proc_lock; /* persistent array of p_lock's */
83 * See the comment above pid_getlockslot() for a detailed explanation of this
84 * constant. Note that a PLOCK_SHIFT of 3 implies 64-byte coherence
85 * granularity; if the coherence granularity is ever changed, this constant
86 * should be modified to reflect the change to minimize proc_lock false
87 * sharing (correctness, however, is guaranteed regardless of the coherence
88 * granularity).
90 #define PLOCK_SHIFT 3
92 static kmutex_t pidlinklock;
93 static struct pid **pidhash;
94 static pid_t minpid;
95 static pid_t mpid = FAMOUS_PIDS; /* one more than the last famous pid */
96 static union procent *procdir;
97 static union procent *procentfree;
99 static struct pid *
100 pid_lookup(pid_t pid)
102 struct pid *pidp;
104 ASSERT(MUTEX_HELD(&pidlinklock));
106 for (pidp = HASHPID(pid); pidp; pidp = pidp->pid_link) {
107 if (pidp->pid_id == pid) {
108 ASSERT(pidp->pid_ref > 0);
109 break;
112 return (pidp);
115 void
116 pid_setmin(void)
118 if (jump_pid && jump_pid > mpid)
119 minpid = mpid = jump_pid;
120 else
121 minpid = mpid;
125 * When prslots are simply used as an index to determine a process' p_lock,
126 * adjacent prslots share adjacent p_locks. On machines where the size
127 * of a mutex is smaller than that of a cache line (which, as of this writing,
128 * is true for all machines on which Solaris runs), this can potentially
129 * induce false sharing. The standard solution for false sharing is to pad
130 * out one's data structures (in this case, struct plock). However,
131 * given the size and (generally) sparse use of the proc_lock array, this
132 * is suboptimal. We therefore stride through the proc_lock array with
133 * a stride of PLOCK_SHIFT. PLOCK_SHIFT should be defined as:
135 * log_2 (coherence_granularity / sizeof (kmutex_t))
137 * Under this scheme, false sharing is still possible -- but only when
138 * the number of active processes is very large. Note that the one-to-one
139 * mapping between prslots and lockslots is maintained.
141 static int
142 pid_getlockslot(int prslot)
144 int even = (v.v_proc >> PLOCK_SHIFT) << PLOCK_SHIFT;
145 int perlap = even >> PLOCK_SHIFT;
147 if (prslot >= even)
148 return (prslot);
150 return (((prslot % perlap) << PLOCK_SHIFT) + (prslot / perlap));
154 * This function allocates a pid structure, a free pid, and optionally a
155 * slot in the proc table for it.
157 * pid_allocate() returns the new pid on success, -1 on failure.
159 pid_t
160 pid_allocate(proc_t *prp, pid_t pid, int flags)
162 struct pid *pidp;
163 union procent *pep;
164 pid_t newpid, startpid;
166 pidp = kmem_zalloc(sizeof (struct pid), KM_SLEEP);
168 mutex_enter(&pidlinklock);
169 if ((flags & PID_ALLOC_PROC) && (pep = procentfree) == NULL) {
171 * ran out of /proc directory entries
173 goto failed;
176 if (pid != 0) {
177 VERIFY(minpid == 0);
178 VERIFY3P(pid, <, mpid);
179 VERIFY3P(pid_lookup(pid), ==, NULL);
180 newpid = pid;
181 } else {
183 * Allocate a pid
185 ASSERT(minpid <= mpid && mpid < maxpid);
187 startpid = mpid;
188 for (;;) {
189 newpid = mpid;
190 if (++mpid == maxpid)
191 mpid = minpid;
193 if (pid_lookup(newpid) == NULL)
194 break;
196 if (mpid == startpid)
197 goto failed;
202 * Put pid into the pid hash table.
204 pidp->pid_link = HASHPID(newpid);
205 HASHPID(newpid) = pidp;
206 pidp->pid_ref = 1;
207 pidp->pid_id = newpid;
209 if (flags & PID_ALLOC_PROC) {
210 procentfree = pep->pe_next;
211 pidp->pid_prslot = pep - procdir;
212 pep->pe_proc = prp;
213 prp->p_pidp = pidp;
214 prp->p_lockp = &proc_lock[pid_getlockslot(pidp->pid_prslot)];
215 } else {
216 pidp->pid_prslot = 0;
219 mutex_exit(&pidlinklock);
221 return (newpid);
223 failed:
224 mutex_exit(&pidlinklock);
225 kmem_free(pidp, sizeof (struct pid));
226 return (-1);
230 * decrement the reference count for pid
233 pid_rele(struct pid *pidp)
235 struct pid **pidpp;
237 mutex_enter(&pidlinklock);
238 ASSERT(pidp != &pid0);
240 pidpp = &HASHPID(pidp->pid_id);
241 for (;;) {
242 ASSERT(*pidpp != NULL);
243 if (*pidpp == pidp)
244 break;
245 pidpp = &(*pidpp)->pid_link;
248 *pidpp = pidp->pid_link;
249 mutex_exit(&pidlinklock);
251 kmem_free(pidp, sizeof (*pidp));
252 return (0);
255 void
256 proc_entry_free(struct pid *pidp)
258 mutex_enter(&pidlinklock);
259 pidp->pid_prinactive = 1;
260 procdir[pidp->pid_prslot].pe_next = procentfree;
261 procentfree = &procdir[pidp->pid_prslot];
262 mutex_exit(&pidlinklock);
266 * The original task needs to be passed in since the process has already been
267 * detached from the task at this point in time.
269 void
270 pid_exit(proc_t *prp, struct task *tk)
272 struct pid *pidp;
273 zone_t *zone = prp->p_zone;
275 ASSERT(MUTEX_HELD(&pidlock));
278 * Exit process group. If it is NULL, it's because fork failed
279 * before calling pgjoin().
281 ASSERT(prp->p_pgidp != NULL || prp->p_stat == SIDL);
282 if (prp->p_pgidp != NULL)
283 pgexit(prp);
285 sess_rele(prp->p_sessp, B_TRUE);
287 pidp = prp->p_pidp;
289 proc_entry_free(pidp);
291 if (audit_active)
292 audit_pfree(prp);
294 if (practive == prp) {
295 practive = prp->p_next;
298 if (prp->p_next) {
299 prp->p_next->p_prev = prp->p_prev;
301 if (prp->p_prev) {
302 prp->p_prev->p_next = prp->p_next;
305 PID_RELE(pidp);
307 mutex_destroy(&prp->p_crlock);
308 kmem_cache_free(process_cache, prp);
309 nproc--;
312 * Decrement the process counts of the original task, project and zone.
314 mutex_enter(&zone->zone_nlwps_lock);
315 tk->tk_nprocs--;
316 tk->tk_proj->kpj_nprocs--;
317 zone->zone_nprocs--;
318 mutex_exit(&zone->zone_nlwps_lock);
322 * Find a process visible from the specified zone given its process ID.
324 proc_t *
325 prfind_zone(pid_t pid, zoneid_t zoneid)
327 struct pid *pidp;
328 proc_t *p;
330 ASSERT(MUTEX_HELD(&pidlock));
332 mutex_enter(&pidlinklock);
333 pidp = pid_lookup(pid);
334 mutex_exit(&pidlinklock);
335 if (pidp != NULL && pidp->pid_prinactive == 0) {
336 p = procdir[pidp->pid_prslot].pe_proc;
337 if (zoneid == ALL_ZONES || p->p_zone->zone_id == zoneid)
338 return (p);
340 return (NULL);
344 * Find a process given its process ID. This obeys zone restrictions,
345 * so if the caller is in a non-global zone it won't find processes
346 * associated with other zones. Use prfind_zone(pid, ALL_ZONES) to
347 * bypass this restriction.
349 proc_t *
350 prfind(pid_t pid)
352 zoneid_t zoneid;
354 if (INGLOBALZONE(curproc))
355 zoneid = ALL_ZONES;
356 else
357 zoneid = getzoneid();
358 return (prfind_zone(pid, zoneid));
361 proc_t *
362 pgfind_zone(pid_t pgid, zoneid_t zoneid)
364 struct pid *pidp;
366 ASSERT(MUTEX_HELD(&pidlock));
368 mutex_enter(&pidlinklock);
369 pidp = pid_lookup(pgid);
370 mutex_exit(&pidlinklock);
371 if (pidp != NULL) {
372 proc_t *p = pidp->pid_pglink;
374 if (zoneid == ALL_ZONES || pgid == 0 || p == NULL ||
375 p->p_zone->zone_id == zoneid)
376 return (p);
378 return (NULL);
382 * return the head of the list of processes whose process group ID is 'pgid',
383 * or NULL, if no such process group
385 proc_t *
386 pgfind(pid_t pgid)
388 zoneid_t zoneid;
390 if (INGLOBALZONE(curproc))
391 zoneid = ALL_ZONES;
392 else
393 zoneid = getzoneid();
394 return (pgfind_zone(pgid, zoneid));
398 * Sets P_PR_LOCK on a non-system process. Process must be fully created
399 * and not exiting to succeed.
401 * Returns 0 on success.
402 * Returns 1 if P_PR_LOCK is set.
403 * Returns -1 if proc is in invalid state.
406 sprtrylock_proc(proc_t *p)
408 ASSERT(MUTEX_HELD(&p->p_lock));
410 /* skip system and incomplete processes */
411 if (p->p_stat == SIDL || p->p_stat == SZOMB ||
412 (p->p_flag & (SSYS | SEXITING | SEXITLWPS))) {
413 return (-1);
416 if (p->p_proc_flag & P_PR_LOCK)
417 return (1);
419 p->p_proc_flag |= P_PR_LOCK;
420 THREAD_KPRI_REQUEST();
422 return (0);
426 * Wait for P_PR_LOCK to become clear. Returns with p_lock dropped,
427 * and the proc pointer no longer valid, as the proc may have exited.
429 void
430 sprwaitlock_proc(proc_t *p)
432 kmutex_t *mp;
434 ASSERT(MUTEX_HELD(&p->p_lock));
435 ASSERT(p->p_proc_flag & P_PR_LOCK);
438 * p_lock is persistent, but p itself is not -- it could
439 * vanish during cv_wait(). Load p->p_lock now so we can
440 * drop it after cv_wait() without referencing p.
442 mp = &p->p_lock;
443 cv_wait(&pr_pid_cv[p->p_slot], mp);
444 mutex_exit(mp);
448 * If pid exists, find its proc, acquire its p_lock and mark it P_PR_LOCK.
449 * Returns the proc pointer on success, NULL on failure. sprlock() is
450 * really just a stripped-down version of pr_p_lock() to allow practive
451 * walkers like dofusers() and dumpsys() to synchronize with /proc.
453 proc_t *
454 sprlock_zone(pid_t pid, zoneid_t zoneid)
456 proc_t *p;
457 int ret;
459 for (;;) {
460 mutex_enter(&pidlock);
461 if ((p = prfind_zone(pid, zoneid)) == NULL) {
462 mutex_exit(&pidlock);
463 return (NULL);
465 mutex_enter(&p->p_lock);
466 mutex_exit(&pidlock);
468 if (panicstr)
469 return (p);
471 ret = sprtrylock_proc(p);
472 if (ret == -1) {
473 mutex_exit(&p->p_lock);
474 return (NULL);
475 } else if (ret == 0) {
476 break;
478 sprwaitlock_proc(p);
480 return (p);
483 proc_t *
484 sprlock(pid_t pid)
486 zoneid_t zoneid;
488 if (INGLOBALZONE(curproc))
489 zoneid = ALL_ZONES;
490 else
491 zoneid = getzoneid();
492 return (sprlock_zone(pid, zoneid));
495 void
496 sprlock_proc(proc_t *p)
498 ASSERT(MUTEX_HELD(&p->p_lock));
500 while (p->p_proc_flag & P_PR_LOCK) {
501 cv_wait(&pr_pid_cv[p->p_slot], &p->p_lock);
504 p->p_proc_flag |= P_PR_LOCK;
505 THREAD_KPRI_REQUEST();
508 void
509 sprunlock(proc_t *p)
511 if (panicstr) {
512 mutex_exit(&p->p_lock);
513 return;
516 ASSERT(p->p_proc_flag & P_PR_LOCK);
517 ASSERT(MUTEX_HELD(&p->p_lock));
519 cv_signal(&pr_pid_cv[p->p_slot]);
520 p->p_proc_flag &= ~P_PR_LOCK;
521 mutex_exit(&p->p_lock);
522 THREAD_KPRI_RELEASE();
525 void
526 pid_init(void)
528 int i;
530 pid_hashsz = 1 << highbit(v.v_proc / pid_hashlen);
532 pidhash = kmem_zalloc(sizeof (struct pid *) * pid_hashsz, KM_SLEEP);
533 procdir = kmem_alloc(sizeof (union procent) * v.v_proc, KM_SLEEP);
534 pr_pid_cv = kmem_zalloc(sizeof (kcondvar_t) * v.v_proc, KM_SLEEP);
535 proc_lock = kmem_zalloc(sizeof (struct plock) * v.v_proc, KM_SLEEP);
537 nproc = 1;
538 practive = proc_sched;
539 proc_sched->p_next = NULL;
540 procdir[0].pe_proc = proc_sched;
542 procentfree = &procdir[1];
543 for (i = 1; i < v.v_proc - 1; i++)
544 procdir[i].pe_next = &procdir[i+1];
545 procdir[i].pe_next = NULL;
547 HASHPID(0) = &pid0;
549 upcount_init();
552 proc_t *
553 pid_entry(int slot)
555 union procent *pep;
556 proc_t *prp;
558 ASSERT(MUTEX_HELD(&pidlock));
559 ASSERT(slot >= 0 && slot < v.v_proc);
561 pep = procdir[slot].pe_next;
562 if (pep >= procdir && pep < &procdir[v.v_proc])
563 return (NULL);
564 prp = procdir[slot].pe_proc;
565 if (prp != 0 && prp->p_stat == SIDL)
566 return (NULL);
567 return (prp);
571 * Send the specified signal to all processes whose process group ID is
572 * equal to 'pgid'
575 void
576 signal(pid_t pgid, int sig)
578 struct pid *pidp;
579 proc_t *prp;
581 mutex_enter(&pidlock);
582 mutex_enter(&pidlinklock);
583 if (pgid == 0 || (pidp = pid_lookup(pgid)) == NULL) {
584 mutex_exit(&pidlinklock);
585 mutex_exit(&pidlock);
586 return;
588 mutex_exit(&pidlinklock);
589 for (prp = pidp->pid_pglink; prp; prp = prp->p_pglink) {
590 mutex_enter(&prp->p_lock);
591 sigtoproc(prp, NULL, sig);
592 mutex_exit(&prp->p_lock);
594 mutex_exit(&pidlock);
598 * Send the specified signal to the specified process
601 void
602 prsignal(struct pid *pidp, int sig)
604 if (!(pidp->pid_prinactive))
605 psignal(procdir[pidp->pid_prslot].pe_proc, sig);
608 #include <sys/sunddi.h>
611 * DDI/DKI interfaces for drivers to send signals to processes
615 * obtain an opaque reference to a process for signaling
617 void *
618 proc_ref(void)
620 struct pid *pidp;
622 mutex_enter(&pidlock);
623 pidp = curproc->p_pidp;
624 PID_HOLD(pidp);
625 mutex_exit(&pidlock);
627 return (pidp);
631 * release a reference to a process
632 * - a process can exit even if a driver has a reference to it
633 * - one proc_unref for every proc_ref
635 void
636 proc_unref(void *pref)
638 mutex_enter(&pidlock);
639 PID_RELE((struct pid *)pref);
640 mutex_exit(&pidlock);
644 * send a signal to a process
646 * - send the process the signal
647 * - if the process went away, return a -1
648 * - if the process is still there return 0
651 proc_signal(void *pref, int sig)
653 struct pid *pidp = pref;
655 prsignal(pidp, sig);
656 return (pidp->pid_prinactive ? -1 : 0);
660 static struct upcount **upc_hash; /* a boot time allocated array */
661 static ulong_t upc_hashmask;
662 #define UPC_HASH(x, y) ((ulong_t)(x ^ y) & upc_hashmask)
665 * Get us off the ground. Called once at boot.
667 void
668 upcount_init(void)
670 ulong_t upc_hashsize;
673 * An entry per MB of memory is our current guess
676 * 2^20 is a meg, so shifting right by 20 - PAGESHIFT
677 * converts pages to megs (without overflowing a u_int
678 * if you have more than 4G of memory, like ptob(physmem)/1M
679 * would).
681 upc_hashsize = (1 << highbit(physmem >> (20 - PAGESHIFT)));
682 upc_hashmask = upc_hashsize - 1;
683 upc_hash = kmem_zalloc(upc_hashsize * sizeof (struct upcount *),
684 KM_SLEEP);
688 * Increment the number of processes associated with a given uid and zoneid.
690 void
691 upcount_inc(uid_t uid, zoneid_t zoneid)
693 struct upcount **upc, **hupc;
694 struct upcount *new;
696 ASSERT(MUTEX_HELD(&pidlock));
697 new = NULL;
698 hupc = &upc_hash[UPC_HASH(uid, zoneid)];
699 top:
700 upc = hupc;
701 while ((*upc) != NULL) {
702 if ((*upc)->up_uid == uid && (*upc)->up_zoneid == zoneid) {
703 (*upc)->up_count++;
704 if (new) {
706 * did not need `new' afterall.
708 kmem_free(new, sizeof (*new));
710 return;
712 upc = &(*upc)->up_next;
716 * There is no entry for this <uid,zoneid> pair.
717 * Allocate one. If we have to drop pidlock, check
718 * again.
720 if (new == NULL) {
721 new = (struct upcount *)kmem_alloc(sizeof (*new), KM_NOSLEEP);
722 if (new == NULL) {
723 mutex_exit(&pidlock);
724 new = (struct upcount *)kmem_alloc(sizeof (*new),
725 KM_SLEEP);
726 mutex_enter(&pidlock);
727 goto top;
733 * On the assumption that a new user is going to do some
734 * more forks, put the new upcount structure on the front.
736 upc = hupc;
738 new->up_uid = uid;
739 new->up_zoneid = zoneid;
740 new->up_count = 1;
741 new->up_next = *upc;
743 *upc = new;
747 * Decrement the number of processes a given uid and zoneid has.
749 void
750 upcount_dec(uid_t uid, zoneid_t zoneid)
752 struct upcount **upc;
753 struct upcount *done;
755 ASSERT(MUTEX_HELD(&pidlock));
757 upc = &upc_hash[UPC_HASH(uid, zoneid)];
758 while ((*upc) != NULL) {
759 if ((*upc)->up_uid == uid && (*upc)->up_zoneid == zoneid) {
760 (*upc)->up_count--;
761 if ((*upc)->up_count == 0) {
762 done = *upc;
763 *upc = (*upc)->up_next;
764 kmem_free(done, sizeof (*done));
766 return;
768 upc = &(*upc)->up_next;
770 cmn_err(CE_PANIC, "decr_upcount-off the end");
774 * Returns the number of processes a uid has.
775 * Non-existent uid's are assumed to have no processes.
778 upcount_get(uid_t uid, zoneid_t zoneid)
780 struct upcount *upc;
782 ASSERT(MUTEX_HELD(&pidlock));
784 upc = upc_hash[UPC_HASH(uid, zoneid)];
785 while (upc != NULL) {
786 if (upc->up_uid == uid && upc->up_zoneid == zoneid) {
787 return (upc->up_count);
789 upc = upc->up_next;
791 return (0);