4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
29 #include <sys/types.h>
30 #include <sys/param.h>
31 #include <sys/sysmacros.h>
34 #include <sys/tuneable.h>
37 #include <sys/systm.h>
38 #include <sys/prsystm.h>
39 #include <sys/vnode.h>
40 #include <sys/session.h>
41 #include <sys/cpuvar.h>
42 #include <sys/cmn_err.h>
43 #include <sys/bitmap.h>
44 #include <sys/debug.h>
46 #include <sys/project.h>
50 /* directory entries for /proc */
53 union procent
*pe_next
;
57 0, /* pid_prinactive */
58 1, /* pid_pgorphaned */
62 NULL
, /* pid_pglink */
63 NULL
, /* pid_pgtail */
68 static int pid_hashlen
= 4; /* desired average hash chain length */
69 static int pid_hashsz
; /* number of buckets in the hash table */
71 #define HASHPID(pid) (pidhash[((pid)&(pid_hashsz-1))])
74 extern struct kmem_cache
*process_cache
;
75 static void upcount_init(void);
77 kmutex_t pidlock
; /* global process lock */
78 kmutex_t pr_pidlock
; /* /proc global process lock */
79 kcondvar_t
*pr_pid_cv
; /* for /proc, one per process slot */
80 struct plock
*proc_lock
; /* persistent array of p_lock's */
83 * See the comment above pid_getlockslot() for a detailed explanation of this
84 * constant. Note that a PLOCK_SHIFT of 3 implies 64-byte coherence
85 * granularity; if the coherence granularity is ever changed, this constant
86 * should be modified to reflect the change to minimize proc_lock false
87 * sharing (correctness, however, is guaranteed regardless of the coherence
92 static kmutex_t pidlinklock
;
93 static struct pid
**pidhash
;
95 static pid_t mpid
= FAMOUS_PIDS
; /* one more than the last famous pid */
96 static union procent
*procdir
;
97 static union procent
*procentfree
;
100 pid_lookup(pid_t pid
)
104 ASSERT(MUTEX_HELD(&pidlinklock
));
106 for (pidp
= HASHPID(pid
); pidp
; pidp
= pidp
->pid_link
) {
107 if (pidp
->pid_id
== pid
) {
108 ASSERT(pidp
->pid_ref
> 0);
118 if (jump_pid
&& jump_pid
> mpid
)
119 minpid
= mpid
= jump_pid
;
125 * When prslots are simply used as an index to determine a process' p_lock,
126 * adjacent prslots share adjacent p_locks. On machines where the size
127 * of a mutex is smaller than that of a cache line (which, as of this writing,
128 * is true for all machines on which Solaris runs), this can potentially
129 * induce false sharing. The standard solution for false sharing is to pad
130 * out one's data structures (in this case, struct plock). However,
131 * given the size and (generally) sparse use of the proc_lock array, this
132 * is suboptimal. We therefore stride through the proc_lock array with
133 * a stride of PLOCK_SHIFT. PLOCK_SHIFT should be defined as:
135 * log_2 (coherence_granularity / sizeof (kmutex_t))
137 * Under this scheme, false sharing is still possible -- but only when
138 * the number of active processes is very large. Note that the one-to-one
139 * mapping between prslots and lockslots is maintained.
142 pid_getlockslot(int prslot
)
144 int even
= (v
.v_proc
>> PLOCK_SHIFT
) << PLOCK_SHIFT
;
145 int perlap
= even
>> PLOCK_SHIFT
;
150 return (((prslot
% perlap
) << PLOCK_SHIFT
) + (prslot
/ perlap
));
154 * This function allocates a pid structure, a free pid, and optionally a
155 * slot in the proc table for it.
157 * pid_allocate() returns the new pid on success, -1 on failure.
160 pid_allocate(proc_t
*prp
, pid_t pid
, int flags
)
164 pid_t newpid
, startpid
;
166 pidp
= kmem_zalloc(sizeof (struct pid
), KM_SLEEP
);
168 mutex_enter(&pidlinklock
);
169 if ((flags
& PID_ALLOC_PROC
) && (pep
= procentfree
) == NULL
) {
171 * ran out of /proc directory entries
178 VERIFY3P(pid
, <, mpid
);
179 VERIFY3P(pid_lookup(pid
), ==, NULL
);
185 ASSERT(minpid
<= mpid
&& mpid
< maxpid
);
190 if (++mpid
== maxpid
)
193 if (pid_lookup(newpid
) == NULL
)
196 if (mpid
== startpid
)
202 * Put pid into the pid hash table.
204 pidp
->pid_link
= HASHPID(newpid
);
205 HASHPID(newpid
) = pidp
;
207 pidp
->pid_id
= newpid
;
209 if (flags
& PID_ALLOC_PROC
) {
210 procentfree
= pep
->pe_next
;
211 pidp
->pid_prslot
= pep
- procdir
;
214 prp
->p_lockp
= &proc_lock
[pid_getlockslot(pidp
->pid_prslot
)];
216 pidp
->pid_prslot
= 0;
219 mutex_exit(&pidlinklock
);
224 mutex_exit(&pidlinklock
);
225 kmem_free(pidp
, sizeof (struct pid
));
230 * decrement the reference count for pid
233 pid_rele(struct pid
*pidp
)
237 mutex_enter(&pidlinklock
);
238 ASSERT(pidp
!= &pid0
);
240 pidpp
= &HASHPID(pidp
->pid_id
);
242 ASSERT(*pidpp
!= NULL
);
245 pidpp
= &(*pidpp
)->pid_link
;
248 *pidpp
= pidp
->pid_link
;
249 mutex_exit(&pidlinklock
);
251 kmem_free(pidp
, sizeof (*pidp
));
256 proc_entry_free(struct pid
*pidp
)
258 mutex_enter(&pidlinklock
);
259 pidp
->pid_prinactive
= 1;
260 procdir
[pidp
->pid_prslot
].pe_next
= procentfree
;
261 procentfree
= &procdir
[pidp
->pid_prslot
];
262 mutex_exit(&pidlinklock
);
266 * The original task needs to be passed in since the process has already been
267 * detached from the task at this point in time.
270 pid_exit(proc_t
*prp
, struct task
*tk
)
273 zone_t
*zone
= prp
->p_zone
;
275 ASSERT(MUTEX_HELD(&pidlock
));
278 * Exit process group. If it is NULL, it's because fork failed
279 * before calling pgjoin().
281 ASSERT(prp
->p_pgidp
!= NULL
|| prp
->p_stat
== SIDL
);
282 if (prp
->p_pgidp
!= NULL
)
285 sess_rele(prp
->p_sessp
, B_TRUE
);
289 proc_entry_free(pidp
);
294 if (practive
== prp
) {
295 practive
= prp
->p_next
;
299 prp
->p_next
->p_prev
= prp
->p_prev
;
302 prp
->p_prev
->p_next
= prp
->p_next
;
307 mutex_destroy(&prp
->p_crlock
);
308 kmem_cache_free(process_cache
, prp
);
312 * Decrement the process counts of the original task, project and zone.
314 mutex_enter(&zone
->zone_nlwps_lock
);
316 tk
->tk_proj
->kpj_nprocs
--;
318 mutex_exit(&zone
->zone_nlwps_lock
);
322 * Find a process visible from the specified zone given its process ID.
325 prfind_zone(pid_t pid
, zoneid_t zoneid
)
330 ASSERT(MUTEX_HELD(&pidlock
));
332 mutex_enter(&pidlinklock
);
333 pidp
= pid_lookup(pid
);
334 mutex_exit(&pidlinklock
);
335 if (pidp
!= NULL
&& pidp
->pid_prinactive
== 0) {
336 p
= procdir
[pidp
->pid_prslot
].pe_proc
;
337 if (zoneid
== ALL_ZONES
|| p
->p_zone
->zone_id
== zoneid
)
344 * Find a process given its process ID. This obeys zone restrictions,
345 * so if the caller is in a non-global zone it won't find processes
346 * associated with other zones. Use prfind_zone(pid, ALL_ZONES) to
347 * bypass this restriction.
354 if (INGLOBALZONE(curproc
))
357 zoneid
= getzoneid();
358 return (prfind_zone(pid
, zoneid
));
362 pgfind_zone(pid_t pgid
, zoneid_t zoneid
)
366 ASSERT(MUTEX_HELD(&pidlock
));
368 mutex_enter(&pidlinklock
);
369 pidp
= pid_lookup(pgid
);
370 mutex_exit(&pidlinklock
);
372 proc_t
*p
= pidp
->pid_pglink
;
374 if (zoneid
== ALL_ZONES
|| pgid
== 0 || p
== NULL
||
375 p
->p_zone
->zone_id
== zoneid
)
382 * return the head of the list of processes whose process group ID is 'pgid',
383 * or NULL, if no such process group
390 if (INGLOBALZONE(curproc
))
393 zoneid
= getzoneid();
394 return (pgfind_zone(pgid
, zoneid
));
398 * Sets P_PR_LOCK on a non-system process. Process must be fully created
399 * and not exiting to succeed.
401 * Returns 0 on success.
402 * Returns 1 if P_PR_LOCK is set.
403 * Returns -1 if proc is in invalid state.
406 sprtrylock_proc(proc_t
*p
)
408 ASSERT(MUTEX_HELD(&p
->p_lock
));
410 /* skip system and incomplete processes */
411 if (p
->p_stat
== SIDL
|| p
->p_stat
== SZOMB
||
412 (p
->p_flag
& (SSYS
| SEXITING
| SEXITLWPS
))) {
416 if (p
->p_proc_flag
& P_PR_LOCK
)
419 p
->p_proc_flag
|= P_PR_LOCK
;
420 THREAD_KPRI_REQUEST();
426 * Wait for P_PR_LOCK to become clear. Returns with p_lock dropped,
427 * and the proc pointer no longer valid, as the proc may have exited.
430 sprwaitlock_proc(proc_t
*p
)
434 ASSERT(MUTEX_HELD(&p
->p_lock
));
435 ASSERT(p
->p_proc_flag
& P_PR_LOCK
);
438 * p_lock is persistent, but p itself is not -- it could
439 * vanish during cv_wait(). Load p->p_lock now so we can
440 * drop it after cv_wait() without referencing p.
443 cv_wait(&pr_pid_cv
[p
->p_slot
], mp
);
448 * If pid exists, find its proc, acquire its p_lock and mark it P_PR_LOCK.
449 * Returns the proc pointer on success, NULL on failure. sprlock() is
450 * really just a stripped-down version of pr_p_lock() to allow practive
451 * walkers like dofusers() and dumpsys() to synchronize with /proc.
454 sprlock_zone(pid_t pid
, zoneid_t zoneid
)
460 mutex_enter(&pidlock
);
461 if ((p
= prfind_zone(pid
, zoneid
)) == NULL
) {
462 mutex_exit(&pidlock
);
465 mutex_enter(&p
->p_lock
);
466 mutex_exit(&pidlock
);
471 ret
= sprtrylock_proc(p
);
473 mutex_exit(&p
->p_lock
);
475 } else if (ret
== 0) {
488 if (INGLOBALZONE(curproc
))
491 zoneid
= getzoneid();
492 return (sprlock_zone(pid
, zoneid
));
496 sprlock_proc(proc_t
*p
)
498 ASSERT(MUTEX_HELD(&p
->p_lock
));
500 while (p
->p_proc_flag
& P_PR_LOCK
) {
501 cv_wait(&pr_pid_cv
[p
->p_slot
], &p
->p_lock
);
504 p
->p_proc_flag
|= P_PR_LOCK
;
505 THREAD_KPRI_REQUEST();
512 mutex_exit(&p
->p_lock
);
516 ASSERT(p
->p_proc_flag
& P_PR_LOCK
);
517 ASSERT(MUTEX_HELD(&p
->p_lock
));
519 cv_signal(&pr_pid_cv
[p
->p_slot
]);
520 p
->p_proc_flag
&= ~P_PR_LOCK
;
521 mutex_exit(&p
->p_lock
);
522 THREAD_KPRI_RELEASE();
530 pid_hashsz
= 1 << highbit(v
.v_proc
/ pid_hashlen
);
532 pidhash
= kmem_zalloc(sizeof (struct pid
*) * pid_hashsz
, KM_SLEEP
);
533 procdir
= kmem_alloc(sizeof (union procent
) * v
.v_proc
, KM_SLEEP
);
534 pr_pid_cv
= kmem_zalloc(sizeof (kcondvar_t
) * v
.v_proc
, KM_SLEEP
);
535 proc_lock
= kmem_zalloc(sizeof (struct plock
) * v
.v_proc
, KM_SLEEP
);
538 practive
= proc_sched
;
539 proc_sched
->p_next
= NULL
;
540 procdir
[0].pe_proc
= proc_sched
;
542 procentfree
= &procdir
[1];
543 for (i
= 1; i
< v
.v_proc
- 1; i
++)
544 procdir
[i
].pe_next
= &procdir
[i
+1];
545 procdir
[i
].pe_next
= NULL
;
558 ASSERT(MUTEX_HELD(&pidlock
));
559 ASSERT(slot
>= 0 && slot
< v
.v_proc
);
561 pep
= procdir
[slot
].pe_next
;
562 if (pep
>= procdir
&& pep
< &procdir
[v
.v_proc
])
564 prp
= procdir
[slot
].pe_proc
;
565 if (prp
!= 0 && prp
->p_stat
== SIDL
)
571 * Send the specified signal to all processes whose process group ID is
576 signal(pid_t pgid
, int sig
)
581 mutex_enter(&pidlock
);
582 mutex_enter(&pidlinklock
);
583 if (pgid
== 0 || (pidp
= pid_lookup(pgid
)) == NULL
) {
584 mutex_exit(&pidlinklock
);
585 mutex_exit(&pidlock
);
588 mutex_exit(&pidlinklock
);
589 for (prp
= pidp
->pid_pglink
; prp
; prp
= prp
->p_pglink
) {
590 mutex_enter(&prp
->p_lock
);
591 sigtoproc(prp
, NULL
, sig
);
592 mutex_exit(&prp
->p_lock
);
594 mutex_exit(&pidlock
);
598 * Send the specified signal to the specified process
602 prsignal(struct pid
*pidp
, int sig
)
604 if (!(pidp
->pid_prinactive
))
605 psignal(procdir
[pidp
->pid_prslot
].pe_proc
, sig
);
608 #include <sys/sunddi.h>
611 * DDI/DKI interfaces for drivers to send signals to processes
615 * obtain an opaque reference to a process for signaling
622 mutex_enter(&pidlock
);
623 pidp
= curproc
->p_pidp
;
625 mutex_exit(&pidlock
);
631 * release a reference to a process
632 * - a process can exit even if a driver has a reference to it
633 * - one proc_unref for every proc_ref
636 proc_unref(void *pref
)
638 mutex_enter(&pidlock
);
639 PID_RELE((struct pid
*)pref
);
640 mutex_exit(&pidlock
);
644 * send a signal to a process
646 * - send the process the signal
647 * - if the process went away, return a -1
648 * - if the process is still there return 0
651 proc_signal(void *pref
, int sig
)
653 struct pid
*pidp
= pref
;
656 return (pidp
->pid_prinactive
? -1 : 0);
660 static struct upcount
**upc_hash
; /* a boot time allocated array */
661 static ulong_t upc_hashmask
;
662 #define UPC_HASH(x, y) ((ulong_t)(x ^ y) & upc_hashmask)
665 * Get us off the ground. Called once at boot.
670 ulong_t upc_hashsize
;
673 * An entry per MB of memory is our current guess
676 * 2^20 is a meg, so shifting right by 20 - PAGESHIFT
677 * converts pages to megs (without overflowing a u_int
678 * if you have more than 4G of memory, like ptob(physmem)/1M
681 upc_hashsize
= (1 << highbit(physmem
>> (20 - PAGESHIFT
)));
682 upc_hashmask
= upc_hashsize
- 1;
683 upc_hash
= kmem_zalloc(upc_hashsize
* sizeof (struct upcount
*),
688 * Increment the number of processes associated with a given uid and zoneid.
691 upcount_inc(uid_t uid
, zoneid_t zoneid
)
693 struct upcount
**upc
, **hupc
;
696 ASSERT(MUTEX_HELD(&pidlock
));
698 hupc
= &upc_hash
[UPC_HASH(uid
, zoneid
)];
701 while ((*upc
) != NULL
) {
702 if ((*upc
)->up_uid
== uid
&& (*upc
)->up_zoneid
== zoneid
) {
706 * did not need `new' afterall.
708 kmem_free(new, sizeof (*new));
712 upc
= &(*upc
)->up_next
;
716 * There is no entry for this <uid,zoneid> pair.
717 * Allocate one. If we have to drop pidlock, check
721 new = (struct upcount
*)kmem_alloc(sizeof (*new), KM_NOSLEEP
);
723 mutex_exit(&pidlock
);
724 new = (struct upcount
*)kmem_alloc(sizeof (*new),
726 mutex_enter(&pidlock
);
733 * On the assumption that a new user is going to do some
734 * more forks, put the new upcount structure on the front.
739 new->up_zoneid
= zoneid
;
747 * Decrement the number of processes a given uid and zoneid has.
750 upcount_dec(uid_t uid
, zoneid_t zoneid
)
752 struct upcount
**upc
;
753 struct upcount
*done
;
755 ASSERT(MUTEX_HELD(&pidlock
));
757 upc
= &upc_hash
[UPC_HASH(uid
, zoneid
)];
758 while ((*upc
) != NULL
) {
759 if ((*upc
)->up_uid
== uid
&& (*upc
)->up_zoneid
== zoneid
) {
761 if ((*upc
)->up_count
== 0) {
763 *upc
= (*upc
)->up_next
;
764 kmem_free(done
, sizeof (*done
));
768 upc
= &(*upc
)->up_next
;
770 cmn_err(CE_PANIC
, "decr_upcount-off the end");
774 * Returns the number of processes a uid has.
775 * Non-existent uid's are assumed to have no processes.
778 upcount_get(uid_t uid
, zoneid_t zoneid
)
782 ASSERT(MUTEX_HELD(&pidlock
));
784 upc
= upc_hash
[UPC_HASH(uid
, zoneid
)];
785 while (upc
!= NULL
) {
786 if (upc
->up_uid
== uid
&& upc
->up_zoneid
== zoneid
) {
787 return (upc
->up_count
);