sys/kern/kern_proc.c

   1 /*      $NetBSD: kern_proc.c,v 1.158 2009/11/26 00:19:11 matt Exp $     */
   2
   3 /*-
   4  * Copyright (c) 1999, 2006, 2007, 2008 The NetBSD Foundation, Inc.
   5  * All rights reserved.
   6  *
   7  * This code is derived from software contributed to The NetBSD Foundation
   8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
   9  * NASA Ames Research Center, and by Andrew Doran.
  10  *
  11  * Redistribution and use in source and binary forms, with or without
  12  * modification, are permitted provided that the following conditions
  13  * are met:
  14  * 1. Redistributions of source code must retain the above copyright
  15  *    notice, this list of conditions and the following disclaimer.
  16  * 2. Redistributions in binary form must reproduce the above copyright
  17  *    notice, this list of conditions and the following disclaimer in the
  18  *    documentation and/or other materials provided with the distribution.
  19  *
  20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  30  * POSSIBILITY OF SUCH DAMAGE.
  31  */
  32
  33 /*
  34  * Copyright (c) 1982, 1986, 1989, 1991, 1993
  35  *      The Regents of the University of California.  All rights reserved.
  36  *
  37  * Redistribution and use in source and binary forms, with or without
  38  * modification, are permitted provided that the following conditions
  39  * are met:
  40  * 1. Redistributions of source code must retain the above copyright
  41  *    notice, this list of conditions and the following disclaimer.
  42  * 2. Redistributions in binary form must reproduce the above copyright
  43  *    notice, this list of conditions and the following disclaimer in the
  44  *    documentation and/or other materials provided with the distribution.
  45  * 3. Neither the name of the University nor the names of its contributors
  46  *    may be used to endorse or promote products derived from this software
  47  *    without specific prior written permission.
  48  *
  49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  59  * SUCH DAMAGE.
  60  *
  61  *      @(#)kern_proc.c 8.7 (Berkeley) 2/14/95
  62  */
  63
  64 #include <sys/cdefs.h>
  65 __KERNEL_RCSID(0, "$NetBSD: kern_proc.c,v 1.158 2009/11/26 00:19:11 matt Exp $");
  66
  67 #include "opt_kstack.h"
  68 #include "opt_maxuprc.h"
  69
  70 #include <sys/param.h>
  71 #include <sys/systm.h>
  72 #include <sys/kernel.h>
  73 #include <sys/proc.h>
  74 #include <sys/resourcevar.h>
  75 #include <sys/buf.h>
  76 #include <sys/acct.h>
  77 #include <sys/wait.h>
  78 #include <sys/file.h>
  79 #include <ufs/ufs/quota.h>
  80 #include <sys/uio.h>
  81 #include <sys/pool.h>
  82 #include <sys/pset.h>
  83 #include <sys/mbuf.h>
  84 #include <sys/ioctl.h>
  85 #include <sys/tty.h>
  86 #include <sys/signalvar.h>
  87 #include <sys/ras.h>
  88 #include <sys/sa.h>
  89 #include <sys/savar.h>
  90 #include <sys/filedesc.h>
  91 #include "sys/syscall_stats.h"
  92 #include <sys/kauth.h>
  93 #include <sys/sleepq.h>
  94 #include <sys/atomic.h>
  95 #include <sys/kmem.h>
  96
  97 #include <uvm/uvm.h>
  98 #include <uvm/uvm_extern.h>
  99
 100 /*
 101  * Other process lists
 102  */
 103
 104 struct proclist allproc;
 105 struct proclist zombproc;       /* resources have been freed */
 106
 107 kmutex_t        *proc_lock;
 108
 109 /*
 110  * pid to proc lookup is done by indexing the pid_table array.
 111  * Since pid numbers are only allocated when an empty slot
 112  * has been found, there is no need to search any lists ever.
 113  * (an orphaned pgrp will lock the slot, a session will lock
 114  * the pgrp with the same number.)
 115  * If the table is too small it is reallocated with twice the
 116  * previous size and the entries 'unzipped' into the two halves.
 117  * A linked list of free entries is passed through the pt_proc
 118  * field of 'free' items - set odd to be an invalid ptr.
 119  */
 120
 121 struct pid_table {
 122         struct proc     *pt_proc;
 123         struct pgrp     *pt_pgrp;
 124 };
 125 #if 1   /* strongly typed cast - should be a noop */
 126 static inline uint p2u(struct proc *p) { return (uint)(uintptr_t)p; }
 127 #else
 128 #define p2u(p) ((uint)p)
 129 #endif
 130 #define P_VALID(p) (!(p2u(p) & 1))
 131 #define P_NEXT(p) (p2u(p) >> 1)
 132 #define P_FREE(pid) ((struct proc *)(uintptr_t)((pid) << 1 | 1))
 133
 134 #define INITIAL_PID_TABLE_SIZE  (1 << 5)
 135 static struct pid_table *pid_table;
 136 static uint pid_tbl_mask = INITIAL_PID_TABLE_SIZE - 1;
 137 static uint pid_alloc_lim;      /* max we allocate before growing table */
 138 static uint pid_alloc_cnt;      /* number of allocated pids */
 139
 140 /* links through free slots - never empty! */
 141 static uint next_free_pt, last_free_pt;
 142 static pid_t pid_max = PID_MAX;         /* largest value we allocate */
 143
 144 /* Components of the first process -- never freed. */
 145
 146 extern struct emul emul_netbsd; /* defined in kern_exec.c */
 147
 148 struct session session0 = {
 149         .s_count = 1,
 150         .s_sid = 0,
 151 };
 152 struct pgrp pgrp0 = {
 153         .pg_members = LIST_HEAD_INITIALIZER(&pgrp0.pg_members),
 154         .pg_session = &session0,
 155 };
 156 filedesc_t filedesc0;
 157 struct cwdinfo cwdi0 = {
 158         .cwdi_cmask = CMASK,            /* see cmask below */
 159         .cwdi_refcnt = 1,
 160 };
 161 struct plimit limit0;
 162 struct pstats pstat0;
 163 struct vmspace vmspace0;
 164 struct sigacts sigacts0;
 165 struct turnstile turnstile0;
 166 struct proc proc0 = {
 167         .p_lwps = LIST_HEAD_INITIALIZER(&proc0.p_lwps),
 168         .p_sigwaiters = LIST_HEAD_INITIALIZER(&proc0.p_sigwaiters),
 169         .p_nlwps = 1,
 170         .p_nrlwps = 1,
 171         .p_nlwpid = 1,          /* must match lwp0.l_lid */
 172         .p_pgrp = &pgrp0,
 173         .p_comm = "system",
 174         /*
 175          * Set P_NOCLDWAIT so that kernel threads are reparented to init(8)
 176          * when they exit.  init(8) can easily wait them out for us.
 177          */
 178         .p_flag = PK_SYSTEM | PK_NOCLDWAIT,
 179         .p_stat = SACTIVE,
 180         .p_nice = NZERO,
 181         .p_emul = &emul_netbsd,
 182         .p_cwdi = &cwdi0,
 183         .p_limit = &limit0,
 184         .p_fd = &filedesc0,
 185         .p_vmspace = &vmspace0,
 186         .p_stats = &pstat0,
 187         .p_sigacts = &sigacts0,
 188 };
 189 struct lwp lwp0 __aligned(MIN_LWP_ALIGNMENT) = {
 190 #ifdef LWP0_CPU_INFO
 191         .l_cpu = LWP0_CPU_INFO,
 192 #endif
 193         .l_proc = &proc0,
 194         .l_lid = 1,
 195         .l_flag = LW_SYSTEM,
 196         .l_stat = LSONPROC,
 197         .l_ts = &turnstile0,
 198         .l_syncobj = &sched_syncobj,
 199         .l_refcnt = 1,
 200         .l_priority = PRI_USER + NPRI_USER - 1,
 201         .l_inheritedprio = -1,
 202         .l_class = SCHED_OTHER,
 203         .l_psid = PS_NONE,
 204         .l_pi_lenders = SLIST_HEAD_INITIALIZER(&lwp0.l_pi_lenders),
 205         .l_name = __UNCONST("swapper"),
 206         .l_fd = &filedesc0,
 207 };
 208 kauth_cred_t cred0;
 209
 210 int nofile = NOFILE;
 211 int maxuprc = MAXUPRC;
 212 int cmask = CMASK;
 213
 214 MALLOC_DEFINE(M_EMULDATA, "emuldata", "Per-process emulation data");
 215 MALLOC_DEFINE(M_SUBPROC, "subproc", "Proc sub-structures");
 216
 217 /*
 218  * The process list descriptors, used during pid allocation and
 219  * by sysctl.  No locking on this data structure is needed since
 220  * it is completely static.
 221  */
 222 const struct proclist_desc proclists[] = {
 223         { &allproc      },
 224         { &zombproc     },
 225         { NULL          },
 226 };
 227
 228 static struct pgrp *    pg_remove(pid_t);
 229 static void             pg_delete(pid_t);
 230 static void             orphanpg(struct pgrp *);
 231
 232 static specificdata_domain_t proc_specificdata_domain;
 233
 234 static pool_cache_t proc_cache;
 235
 236 static kauth_listener_t proc_listener;
 237
 238 static int
 239 proc_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
 240     void *arg0, void *arg1, void *arg2, void *arg3)
 241 {
 242         struct proc *p;
 243         int result;
 244
 245         result = KAUTH_RESULT_DEFER;
 246         p = arg0;
 247
 248         switch (action) {
 249         case KAUTH_PROCESS_CANSEE: {
 250                 enum kauth_process_req req;
 251
 252                 req = (enum kauth_process_req)arg1;
 253
 254                 switch (req) {
 255                 case KAUTH_REQ_PROCESS_CANSEE_ARGS:
 256                 case KAUTH_REQ_PROCESS_CANSEE_ENTRY:
 257                 case KAUTH_REQ_PROCESS_CANSEE_OPENFILES:
 258                         result = KAUTH_RESULT_ALLOW;
 259
 260                         break;
 261
 262                 case KAUTH_REQ_PROCESS_CANSEE_ENV:
 263                         if (kauth_cred_getuid(cred) !=
 264                             kauth_cred_getuid(p->p_cred) ||
 265                             kauth_cred_getuid(cred) !=
 266                             kauth_cred_getsvuid(p->p_cred))
 267                                 break;
 268
 269                         result = KAUTH_RESULT_ALLOW;
 270
 271                         break;
 272
 273                 default:
 274                         break;
 275                 }
 276
 277                 break;
 278                 }
 279
 280         case KAUTH_PROCESS_FORK: {
 281                 int lnprocs = (int)(unsigned long)arg2;
 282
 283                 /*
 284                  * Don't allow a nonprivileged user to use the last few
 285                  * processes. The variable lnprocs is the current number of
 286                  * processes, maxproc is the limit.
 287                  */
 288                 if (__predict_false((lnprocs >= maxproc - 5)))
 289                         break;
 290
 291                 result = KAUTH_RESULT_ALLOW;
 292
 293                 break;
 294                 }
 295
 296         case KAUTH_PROCESS_CORENAME:
 297         case KAUTH_PROCESS_STOPFLAG:
 298                 if (proc_uidmatch(cred, p->p_cred) == 0)
 299                         result = KAUTH_RESULT_ALLOW;
 300
 301                 break;
 302
 303         default:
 304                 break;
 305         }
 306
 307         return result;
 308 }
 309
 310 /*
 311  * Initialize global process hashing structures.
 312  */
 313 void
 314 procinit(void)
 315 {
 316         const struct proclist_desc *pd;
 317         u_int i;
 318 #define LINK_EMPTY ((PID_MAX + INITIAL_PID_TABLE_SIZE) & ~(INITIAL_PID_TABLE_SIZE - 1))
 319
 320         for (pd = proclists; pd->pd_list != NULL; pd++)
 321                 LIST_INIT(pd->pd_list);
 322
 323         proc_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
 324         pid_table = kmem_alloc(INITIAL_PID_TABLE_SIZE
 325             * sizeof(struct pid_table), KM_SLEEP);
 326
 327         /* Set free list running through table...
 328            Preset 'use count' above PID_MAX so we allocate pid 1 next. */
 329         for (i = 0; i <= pid_tbl_mask; i++) {
 330                 pid_table[i].pt_proc = P_FREE(LINK_EMPTY + i + 1);
 331                 pid_table[i].pt_pgrp = 0;
 332         }
 333         /* slot 0 is just grabbed */
 334         next_free_pt = 1;
 335         /* Need to fix last entry. */
 336         last_free_pt = pid_tbl_mask;
 337         pid_table[last_free_pt].pt_proc = P_FREE(LINK_EMPTY);
 338         /* point at which we grow table - to avoid reusing pids too often */
 339         pid_alloc_lim = pid_tbl_mask - 1;
 340 #undef LINK_EMPTY
 341
 342         proc_specificdata_domain = specificdata_domain_create();
 343         KASSERT(proc_specificdata_domain != NULL);
 344
 345         proc_cache = pool_cache_init(sizeof(struct proc), 0, 0, 0,
 346             "procpl", NULL, IPL_NONE, NULL, NULL, NULL);
 347
 348         proc_listener = kauth_listen_scope(KAUTH_SCOPE_PROCESS,
 349             proc_listener_cb, NULL);
 350 }
 351
 352 /*
 353  * Initialize process 0.
 354  */
 355 void
 356 proc0_init(void)
 357 {
 358         struct proc *p;
 359         struct pgrp *pg;
 360         struct lwp *l;
 361         rlim_t lim;
 362         int i;
 363
 364         p = &proc0;
 365         pg = &pgrp0;
 366         l = &lwp0;
 367
 368         KASSERT((void *)uvm_lwp_getuarea(l) != NULL);
 369         KASSERT(l->l_lid == p->p_nlwpid);
 370
 371         mutex_init(&p->p_stmutex, MUTEX_DEFAULT, IPL_HIGH);
 372         mutex_init(&p->p_auxlock, MUTEX_DEFAULT, IPL_NONE);
 373         p->p_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
 374
 375         rw_init(&p->p_reflock);
 376         cv_init(&p->p_waitcv, "wait");
 377         cv_init(&p->p_lwpcv, "lwpwait");
 378
 379         LIST_INSERT_HEAD(&p->p_lwps, l, l_sibling);
 380
 381         pid_table[0].pt_proc = p;
 382         LIST_INSERT_HEAD(&allproc, p, p_list);
 383         LIST_INSERT_HEAD(&alllwp, l, l_list);
 384
 385         pid_table[0].pt_pgrp = pg;
 386         LIST_INSERT_HEAD(&pg->pg_members, p, p_pglist);
 387
 388 #ifdef __HAVE_SYSCALL_INTERN
 389         (*p->p_emul->e_syscall_intern)(p);
 390 #endif
 391
 392         callout_init(&l->l_timeout_ch, CALLOUT_MPSAFE);
 393         callout_setfunc(&l->l_timeout_ch, sleepq_timeout, l);
 394         cv_init(&l->l_sigcv, "sigwait");
 395
 396         /* Create credentials. */
 397         cred0 = kauth_cred_alloc();
 398         p->p_cred = cred0;
 399         kauth_cred_hold(cred0);
 400         l->l_cred = cred0;
 401
 402         /* Create the CWD info. */
 403         rw_init(&cwdi0.cwdi_lock);
 404
 405         /* Create the limits structures. */
 406         mutex_init(&limit0.pl_lock, MUTEX_DEFAULT, IPL_NONE);
 407         for (i = 0; i < __arraycount(limit0.pl_rlimit); i++)
 408                 limit0.pl_rlimit[i].rlim_cur =
 409                     limit0.pl_rlimit[i].rlim_max = RLIM_INFINITY;
 410
 411         limit0.pl_rlimit[RLIMIT_NOFILE].rlim_max = maxfiles;
 412         limit0.pl_rlimit[RLIMIT_NOFILE].rlim_cur =
 413             maxfiles < nofile ? maxfiles : nofile;
 414
 415         limit0.pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc;
 416         limit0.pl_rlimit[RLIMIT_NPROC].rlim_cur =
 417             maxproc < maxuprc ? maxproc : maxuprc;
 418
 419         lim = ptoa(uvmexp.free);
 420         limit0.pl_rlimit[RLIMIT_RSS].rlim_max = lim;
 421         limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_max = lim;
 422         limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = lim / 3;
 423         limit0.pl_corename = defcorename;
 424         limit0.pl_refcnt = 1;
 425         limit0.pl_sv_limit = NULL;
 426
 427         /* Configure virtual memory system, set vm rlimits. */
 428         uvm_init_limits(p);
 429
 430         /* Initialize file descriptor table for proc0. */
 431         fd_init(&filedesc0);
 432
 433         /*
 434          * Initialize proc0's vmspace, which uses the kernel pmap.
 435          * All kernel processes (which never have user space mappings)
 436          * share proc0's vmspace, and thus, the kernel pmap.
 437          */
 438         uvmspace_init(&vmspace0, pmap_kernel(), round_page(VM_MIN_ADDRESS),
 439             trunc_page(VM_MAX_ADDRESS));
 440
 441         /* Initialize signal state for proc0. XXX IPL_SCHED */
 442         mutex_init(&p->p_sigacts->sa_mutex, MUTEX_DEFAULT, IPL_SCHED);
 443         siginit(p);
 444
 445         proc_initspecific(p);
 446         lwp_initspecific(l);
 447
 448         SYSCALL_TIME_LWP_INIT(l);
 449 }
 450
 451 /*
 452  * Session reference counting.
 453  */
 454
 455 void
 456 proc_sesshold(struct session *ss)
 457 {
 458
 459         KASSERT(mutex_owned(proc_lock));
 460         ss->s_count++;
 461 }
 462
 463 void
 464 proc_sessrele(struct session *ss)
 465 {
 466
 467         KASSERT(mutex_owned(proc_lock));
 468         /*
 469          * We keep the pgrp with the same id as the session in order to
 470          * stop a process being given the same pid.  Since the pgrp holds
 471          * a reference to the session, it must be a 'zombie' pgrp by now.
 472          */
 473         if (--ss->s_count == 0) {
 474                 struct pgrp *pg;
 475
 476                 pg = pg_remove(ss->s_sid);
 477                 mutex_exit(proc_lock);
 478
 479                 kmem_free(pg, sizeof(struct pgrp));
 480                 kmem_free(ss, sizeof(struct session));
 481         } else {
 482                 mutex_exit(proc_lock);
 483         }
 484 }
 485
 486 /*
 487  * Check that the specified process group is in the session of the
 488  * specified process.
 489  * Treats -ve ids as process ids.
 490  * Used to validate TIOCSPGRP requests.
 491  */
 492 int
 493 pgid_in_session(struct proc *p, pid_t pg_id)
 494 {
 495         struct pgrp *pgrp;
 496         struct session *session;
 497         int error;
 498
 499         mutex_enter(proc_lock);
 500         if (pg_id < 0) {
 501                 struct proc *p1 = p_find(-pg_id, PFIND_LOCKED | PFIND_UNLOCK_FAIL);
 502                 if (p1 == NULL)
 503                         return EINVAL;
 504                 pgrp = p1->p_pgrp;
 505         } else {
 506                 pgrp = pg_find(pg_id, PFIND_LOCKED | PFIND_UNLOCK_FAIL);
 507                 if (pgrp == NULL)
 508                         return EINVAL;
 509         }
 510         session = pgrp->pg_session;
 511         if (session != p->p_pgrp->pg_session)
 512                 error = EPERM;
 513         else
 514                 error = 0;
 515         mutex_exit(proc_lock);
 516
 517         return error;
 518 }
 519
 520 /*
 521  * p_inferior: is p an inferior of q?
 522  */
 523 static inline bool
 524 p_inferior(struct proc *p, struct proc *q)
 525 {
 526
 527         KASSERT(mutex_owned(proc_lock));
 528
 529         for (; p != q; p = p->p_pptr)
 530                 if (p->p_pid == 0)
 531                         return false;
 532         return true;
 533 }
 534
 535 /*
 536  * Locate a process by number
 537  */
 538 struct proc *
 539 p_find(pid_t pid, uint flags)
 540 {
 541         struct proc *p;
 542         char stat;
 543
 544         if (!(flags & PFIND_LOCKED))
 545                 mutex_enter(proc_lock);
 546
 547         p = pid_table[pid & pid_tbl_mask].pt_proc;
 548
 549         /* Only allow live processes to be found by pid. */
 550         /* XXXSMP p_stat */
 551         if (P_VALID(p) && p->p_pid == pid && ((stat = p->p_stat) == SACTIVE ||
 552             stat == SSTOP || ((flags & PFIND_ZOMBIE) &&
 553             (stat == SZOMB || stat == SDEAD || stat == SDYING)))) {
 554                 if (flags & PFIND_UNLOCK_OK)
 555                          mutex_exit(proc_lock);
 556                 return p;
 557         }
 558         if (flags & PFIND_UNLOCK_FAIL)
 559                 mutex_exit(proc_lock);
 560         return NULL;
 561 }
 562
 563
 564 /*
 565  * Locate a process group by number
 566  */
 567 struct pgrp *
 568 pg_find(pid_t pgid, uint flags)
 569 {
 570         struct pgrp *pg;
 571
 572         if (!(flags & PFIND_LOCKED))
 573                 mutex_enter(proc_lock);
 574         pg = pid_table[pgid & pid_tbl_mask].pt_pgrp;
 575         /*
 576          * Can't look up a pgrp that only exists because the session
 577          * hasn't died yet (traditional)
 578          */
 579         if (pg == NULL || pg->pg_id != pgid || LIST_EMPTY(&pg->pg_members)) {
 580                 if (flags & PFIND_UNLOCK_FAIL)
 581                          mutex_exit(proc_lock);
 582                 return NULL;
 583         }
 584
 585         if (flags & PFIND_UNLOCK_OK)
 586                 mutex_exit(proc_lock);
 587         return pg;
 588 }
 589
 590 static void
 591 expand_pid_table(void)
 592 {
 593         size_t pt_size, tsz;
 594         struct pid_table *n_pt, *new_pt;
 595         struct proc *proc;
 596         struct pgrp *pgrp;
 597         pid_t pid;
 598         u_int i;
 599
 600         pt_size = pid_tbl_mask + 1;
 601         tsz = pt_size * 2 * sizeof(struct pid_table);
 602         new_pt = kmem_alloc(tsz, KM_SLEEP);
 603
 604         mutex_enter(proc_lock);
 605         if (pt_size != pid_tbl_mask + 1) {
 606                 /* Another process beat us to it... */
 607                 mutex_exit(proc_lock);
 608                 kmem_free(new_pt, tsz);
 609                 return;
 610         }
 611
 612         /*
 613          * Copy entries from old table into new one.
 614          * If 'pid' is 'odd' we need to place in the upper half,
 615          * even pid's to the lower half.
 616          * Free items stay in the low half so we don't have to
 617          * fixup the reference to them.
 618          * We stuff free items on the front of the freelist
 619          * because we can't write to unmodified entries.
 620          * Processing the table backwards maintains a semblance
 621          * of issueing pid numbers that increase with time.
 622          */
 623         i = pt_size - 1;
 624         n_pt = new_pt + i;
 625         for (; ; i--, n_pt--) {
 626                 proc = pid_table[i].pt_proc;
 627                 pgrp = pid_table[i].pt_pgrp;
 628                 if (!P_VALID(proc)) {
 629                         /* Up 'use count' so that link is valid */
 630                         pid = (P_NEXT(proc) + pt_size) & ~pt_size;
 631                         proc = P_FREE(pid);
 632                         if (pgrp)
 633                                 pid = pgrp->pg_id;
 634                 } else
 635                         pid = proc->p_pid;
 636
 637                 /* Save entry in appropriate half of table */
 638                 n_pt[pid & pt_size].pt_proc = proc;
 639                 n_pt[pid & pt_size].pt_pgrp = pgrp;
 640
 641                 /* Put other piece on start of free list */
 642                 pid = (pid ^ pt_size) & ~pid_tbl_mask;
 643                 n_pt[pid & pt_size].pt_proc =
 644                                     P_FREE((pid & ~pt_size) | next_free_pt);
 645                 n_pt[pid & pt_size].pt_pgrp = 0;
 646                 next_free_pt = i | (pid & pt_size);
 647                 if (i == 0)
 648                         break;
 649         }
 650
 651         /* Save old table size and switch tables */
 652         tsz = pt_size * sizeof(struct pid_table);
 653         n_pt = pid_table;
 654         pid_table = new_pt;
 655         pid_tbl_mask = pt_size * 2 - 1;
 656
 657         /*
 658          * pid_max starts as PID_MAX (= 30000), once we have 16384
 659          * allocated pids we need it to be larger!
 660          */
 661         if (pid_tbl_mask > PID_MAX) {
 662                 pid_max = pid_tbl_mask * 2 + 1;
 663                 pid_alloc_lim |= pid_alloc_lim << 1;
 664         } else
 665                 pid_alloc_lim <<= 1;    /* doubles number of free slots... */
 666
 667         mutex_exit(proc_lock);
 668         kmem_free(n_pt, tsz);
 669 }
 670
 671 struct proc *
 672 proc_alloc(void)
 673 {
 674         struct proc *p;
 675         int nxt;
 676         pid_t pid;
 677         struct pid_table *pt;
 678
 679         p = pool_cache_get(proc_cache, PR_WAITOK);
 680         p->p_stat = SIDL;                       /* protect against others */
 681
 682         proc_initspecific(p);
 683         /* allocate next free pid */
 684
 685         for (;;expand_pid_table()) {
 686                 if (__predict_false(pid_alloc_cnt >= pid_alloc_lim))
 687                         /* ensure pids cycle through 2000+ values */
 688                         continue;
 689                 mutex_enter(proc_lock);
 690                 pt = &pid_table[next_free_pt];
 691 #ifdef DIAGNOSTIC
 692                 if (__predict_false(P_VALID(pt->pt_proc) || pt->pt_pgrp))
 693                         panic("proc_alloc: slot busy");
 694 #endif
 695                 nxt = P_NEXT(pt->pt_proc);
 696                 if (nxt & pid_tbl_mask)
 697                         break;
 698                 /* Table full - expand (NB last entry not used....) */
 699                 mutex_exit(proc_lock);
 700         }
 701
 702         /* pid is 'saved use count' + 'size' + entry */
 703         pid = (nxt & ~pid_tbl_mask) + pid_tbl_mask + 1 + next_free_pt;
 704         if ((uint)pid > (uint)pid_max)
 705                 pid &= pid_tbl_mask;
 706         p->p_pid = pid;
 707         next_free_pt = nxt & pid_tbl_mask;
 708
 709         /* Grab table slot */
 710         pt->pt_proc = p;
 711         pid_alloc_cnt++;
 712
 713         mutex_exit(proc_lock);
 714
 715         return p;
 716 }
 717
 718 /*
 719  * Free a process id - called from proc_free (in kern_exit.c)
 720  *
 721  * Called with the proc_lock held.
 722  */
 723 void
 724 proc_free_pid(struct proc *p)
 725 {
 726         pid_t pid = p->p_pid;
 727         struct pid_table *pt;
 728
 729         KASSERT(mutex_owned(proc_lock));
 730
 731         pt = &pid_table[pid & pid_tbl_mask];
 732 #ifdef DIAGNOSTIC
 733         if (__predict_false(pt->pt_proc != p))
 734                 panic("proc_free: pid_table mismatch, pid %x, proc %p",
 735                         pid, p);
 736 #endif
 737         /* save pid use count in slot */
 738         pt->pt_proc = P_FREE(pid & ~pid_tbl_mask);
 739
 740         if (pt->pt_pgrp == NULL) {
 741                 /* link last freed entry onto ours */
 742                 pid &= pid_tbl_mask;
 743                 pt = &pid_table[last_free_pt];
 744                 pt->pt_proc = P_FREE(P_NEXT(pt->pt_proc) | pid);
 745                 last_free_pt = pid;
 746                 pid_alloc_cnt--;
 747         }
 748
 749         atomic_dec_uint(&nprocs);
 750 }
 751
 752 void
 753 proc_free_mem(struct proc *p)
 754 {
 755
 756         pool_cache_put(proc_cache, p);
 757 }
 758
 759 /*
 760  * proc_enterpgrp: move p to a new or existing process group (and session).
 761  *
 762  * If we are creating a new pgrp, the pgid should equal
 763  * the calling process' pid.
 764  * If is only valid to enter a process group that is in the session
 765  * of the process.
 766  * Also mksess should only be set if we are creating a process group
 767  *
 768  * Only called from sys_setsid and sys_setpgid.
 769  */
 770 int
 771 proc_enterpgrp(struct proc *curp, pid_t pid, pid_t pgid, bool mksess)
 772 {
 773         struct pgrp *new_pgrp, *pgrp;
 774         struct session *sess;
 775         struct proc *p;
 776         int rval;
 777         pid_t pg_id = NO_PGID;
 778
 779         sess = mksess ? kmem_alloc(sizeof(*sess), KM_SLEEP) : NULL;
 780
 781         /* Allocate data areas we might need before doing any validity checks */
 782         mutex_enter(proc_lock);         /* Because pid_table might change */
 783         if (pid_table[pgid & pid_tbl_mask].pt_pgrp == 0) {
 784                 mutex_exit(proc_lock);
 785                 new_pgrp = kmem_alloc(sizeof(*new_pgrp), KM_SLEEP);
 786                 mutex_enter(proc_lock);
 787         } else
 788                 new_pgrp = NULL;
 789         rval = EPERM;   /* most common error (to save typing) */
 790
 791         /* Check pgrp exists or can be created */
 792         pgrp = pid_table[pgid & pid_tbl_mask].pt_pgrp;
 793         if (pgrp != NULL && pgrp->pg_id != pgid)
 794                 goto done;
 795
 796         /* Can only set another process under restricted circumstances. */
 797         if (pid != curp->p_pid) {
 798                 /* must exist and be one of our children... */
 799                 if ((p = p_find(pid, PFIND_LOCKED)) == NULL ||
 800                     !p_inferior(p, curp)) {
 801                         rval = ESRCH;
 802                         goto done;
 803                 }
 804                 /* ... in the same session... */
 805                 if (sess != NULL || p->p_session != curp->p_session)
 806                         goto done;
 807                 /* ... existing pgid must be in same session ... */
 808                 if (pgrp != NULL && pgrp->pg_session != p->p_session)
 809                         goto done;
 810                 /* ... and not done an exec. */
 811                 if (p->p_flag & PK_EXEC) {
 812                         rval = EACCES;
 813                         goto done;
 814                 }
 815         } else {
 816                 /* ... setsid() cannot re-enter a pgrp */
 817                 if (mksess && (curp->p_pgid == curp->p_pid ||
 818                     pg_find(curp->p_pid, PFIND_LOCKED)))
 819                         goto done;
 820                 p = curp;
 821         }
 822
 823         /* Changing the process group/session of a session
 824            leader is definitely off limits. */
 825         if (SESS_LEADER(p)) {
 826                 if (sess == NULL && p->p_pgrp == pgrp)
 827                         /* unless it's a definite noop */
 828                         rval = 0;
 829                 goto done;
 830         }
 831
 832         /* Can only create a process group with id of process */
 833         if (pgrp == NULL && pgid != pid)
 834                 goto done;
 835
 836         /* Can only create a session if creating pgrp */
 837         if (sess != NULL && pgrp != NULL)
 838                 goto done;
 839
 840         /* Check we allocated memory for a pgrp... */
 841         if (pgrp == NULL && new_pgrp == NULL)
 842                 goto done;
 843
 844         /* Don't attach to 'zombie' pgrp */
 845         if (pgrp != NULL && LIST_EMPTY(&pgrp->pg_members))
 846                 goto done;
 847
 848         /* Expect to succeed now */
 849         rval = 0;
 850
 851         if (pgrp == p->p_pgrp)
 852                 /* nothing to do */
 853                 goto done;
 854
 855         /* Ok all setup, link up required structures */
 856
 857         if (pgrp == NULL) {
 858                 pgrp = new_pgrp;
 859                 new_pgrp = NULL;
 860                 if (sess != NULL) {
 861                         sess->s_sid = p->p_pid;
 862                         sess->s_leader = p;
 863                         sess->s_count = 1;
 864                         sess->s_ttyvp = NULL;
 865                         sess->s_ttyp = NULL;
 866                         sess->s_flags = p->p_session->s_flags & ~S_LOGIN_SET;
 867                         memcpy(sess->s_login, p->p_session->s_login,
 868                             sizeof(sess->s_login));
 869                         p->p_lflag &= ~PL_CONTROLT;
 870                 } else {
 871                         sess = p->p_pgrp->pg_session;
 872                         proc_sesshold(sess);
 873                 }
 874                 pgrp->pg_session = sess;
 875                 sess = NULL;
 876
 877                 pgrp->pg_id = pgid;
 878                 LIST_INIT(&pgrp->pg_members);
 879 #ifdef DIAGNOSTIC
 880                 if (__predict_false(pid_table[pgid & pid_tbl_mask].pt_pgrp))
 881                         panic("enterpgrp: pgrp table slot in use");
 882                 if (__predict_false(mksess && p != curp))
 883                         panic("enterpgrp: mksession and p != curproc");
 884 #endif
 885                 pid_table[pgid & pid_tbl_mask].pt_pgrp = pgrp;
 886                 pgrp->pg_jobc = 0;
 887         }
 888
 889         /*
 890          * Adjust eligibility of affected pgrps to participate in job control.
 891          * Increment eligibility counts before decrementing, otherwise we
 892          * could reach 0 spuriously during the first call.
 893          */
 894         fixjobc(p, pgrp, 1);
 895         fixjobc(p, p->p_pgrp, 0);
 896
 897         /* Interlock with ttread(). */
 898         mutex_spin_enter(&tty_lock);
 899
 900         /* Move process to requested group. */
 901         LIST_REMOVE(p, p_pglist);
 902         if (LIST_EMPTY(&p->p_pgrp->pg_members))
 903                 /* defer delete until we've dumped the lock */
 904                 pg_id = p->p_pgrp->pg_id;
 905         p->p_pgrp = pgrp;
 906         LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist);
 907
 908         /* Done with the swap; we can release the tty mutex. */
 909         mutex_spin_exit(&tty_lock);
 910
 911     done:
 912         if (pg_id != NO_PGID) {
 913                 /* Releases proc_lock. */
 914                 pg_delete(pg_id);
 915         } else {
 916                 mutex_exit(proc_lock);
 917         }
 918         if (sess != NULL)
 919                 kmem_free(sess, sizeof(*sess));
 920         if (new_pgrp != NULL)
 921                 kmem_free(new_pgrp, sizeof(*new_pgrp));
 922 #ifdef DEBUG_PGRP
 923         if (__predict_false(rval))
 924                 printf("enterpgrp(%d,%d,%d), curproc %d, rval %d\n",
 925                         pid, pgid, mksess, curp->p_pid, rval);
 926 #endif
 927         return rval;
 928 }
 929
 930 /*
 931  * proc_leavepgrp: remove a process from its process group.
 932  *  => must be called with the proc_lock held, which will be released;
 933  */
 934 void
 935 proc_leavepgrp(struct proc *p)
 936 {
 937         struct pgrp *pgrp;
 938
 939         KASSERT(mutex_owned(proc_lock));
 940
 941         /* Interlock with ttread() */
 942         mutex_spin_enter(&tty_lock);
 943         pgrp = p->p_pgrp;
 944         LIST_REMOVE(p, p_pglist);
 945         p->p_pgrp = NULL;
 946         mutex_spin_exit(&tty_lock);
 947
 948         if (LIST_EMPTY(&pgrp->pg_members)) {
 949                 /* Releases proc_lock. */
 950                 pg_delete(pgrp->pg_id);
 951         } else {
 952                 mutex_exit(proc_lock);
 953         }
 954 }
 955
 956 /*
 957  * pg_remove: remove a process group from the table.
 958  *  => must be called with the proc_lock held;
 959  *  => returns process group to free;
 960  */
 961 static struct pgrp *
 962 pg_remove(pid_t pg_id)
 963 {
 964         struct pgrp *pgrp;
 965         struct pid_table *pt;
 966
 967         KASSERT(mutex_owned(proc_lock));
 968
 969         pt = &pid_table[pg_id & pid_tbl_mask];
 970         pgrp = pt->pt_pgrp;
 971
 972         KASSERT(pgrp != NULL);
 973         KASSERT(pgrp->pg_id == pg_id);
 974         KASSERT(LIST_EMPTY(&pgrp->pg_members));
 975
 976         pt->pt_pgrp = NULL;
 977
 978         if (!P_VALID(pt->pt_proc)) {
 979                 /* Orphaned pgrp, put slot onto free list. */
 980                 KASSERT((P_NEXT(pt->pt_proc) & pid_tbl_mask) == 0);
 981                 pg_id &= pid_tbl_mask;
 982                 pt = &pid_table[last_free_pt];
 983                 pt->pt_proc = P_FREE(P_NEXT(pt->pt_proc) | pg_id);
 984                 last_free_pt = pg_id;
 985                 pid_alloc_cnt--;
 986         }
 987         return pgrp;
 988 }
 989
 990 /*
 991  * pg_delete: delete and free a process group.
 992  *  => must be called with the proc_lock held, which will be released.
 993  */
 994 static void
 995 pg_delete(pid_t pg_id)
 996 {
 997         struct pgrp *pg;
 998         struct tty *ttyp;
 999         struct session *ss;
1000
1001         KASSERT(mutex_owned(proc_lock));
1002
1003         pg = pid_table[pg_id & pid_tbl_mask].pt_pgrp;
1004         if (pg == NULL || pg->pg_id != pg_id || !LIST_EMPTY(&pg->pg_members)) {
1005                 mutex_exit(proc_lock);
1006                 return;
1007         }
1008
1009         ss = pg->pg_session;
1010
1011         /* Remove reference (if any) from tty to this process group */
1012         mutex_spin_enter(&tty_lock);
1013         ttyp = ss->s_ttyp;
1014         if (ttyp != NULL && ttyp->t_pgrp == pg) {
1015                 ttyp->t_pgrp = NULL;
1016                 KASSERT(ttyp->t_session == ss);
1017         }
1018         mutex_spin_exit(&tty_lock);
1019
1020         /*
1021          * The leading process group in a session is freed by proc_sessrele(),
1022          * if last reference.  Note: proc_sessrele() releases proc_lock.
1023          */
1024         pg = (ss->s_sid != pg->pg_id) ? pg_remove(pg_id) : NULL;
1025         proc_sessrele(ss);
1026
1027         if (pg != NULL) {
1028                 /* Free it, if was not done by proc_sessrele(). */
1029                 kmem_free(pg, sizeof(struct pgrp));
1030         }
1031 }
1032
1033 /*
1034  * Adjust pgrp jobc counters when specified process changes process group.
1035  * We count the number of processes in each process group that "qualify"
1036  * the group for terminal job control (those with a parent in a different
1037  * process group of the same session).  If that count reaches zero, the
1038  * process group becomes orphaned.  Check both the specified process'
1039  * process group and that of its children.
1040  * entering == 0 => p is leaving specified group.
1041  * entering == 1 => p is entering specified group.
1042  *
1043  * Call with proc_lock held.
1044  */
1045 void
1046 fixjobc(struct proc *p, struct pgrp *pgrp, int entering)
1047 {
1048         struct pgrp *hispgrp;
1049         struct session *mysession = pgrp->pg_session;
1050         struct proc *child;
1051
1052         KASSERT(mutex_owned(proc_lock));
1053
1054         /*
1055          * Check p's parent to see whether p qualifies its own process
1056          * group; if so, adjust count for p's process group.
1057          */
1058         hispgrp = p->p_pptr->p_pgrp;
1059         if (hispgrp != pgrp && hispgrp->pg_session == mysession) {
1060                 if (entering) {
1061                         pgrp->pg_jobc++;
1062                         p->p_lflag &= ~PL_ORPHANPG;
1063                 } else if (--pgrp->pg_jobc == 0)
1064                         orphanpg(pgrp);
1065         }
1066
1067         /*
1068          * Check this process' children to see whether they qualify
1069          * their process groups; if so, adjust counts for children's
1070          * process groups.
1071          */
1072         LIST_FOREACH(child, &p->p_children, p_sibling) {
1073                 hispgrp = child->p_pgrp;
1074                 if (hispgrp != pgrp && hispgrp->pg_session == mysession &&
1075                     !P_ZOMBIE(child)) {
1076                         if (entering) {
1077                                 child->p_lflag &= ~PL_ORPHANPG;
1078                                 hispgrp->pg_jobc++;
1079                         } else if (--hispgrp->pg_jobc == 0)
1080                                 orphanpg(hispgrp);
1081                 }
1082         }
1083 }
1084
1085 /*
1086  * A process group has become orphaned;
1087  * if there are any stopped processes in the group,
1088  * hang-up all process in that group.
1089  *
1090  * Call with proc_lock held.
1091  */
1092 static void
1093 orphanpg(struct pgrp *pg)
1094 {
1095         struct proc *p;
1096
1097         KASSERT(mutex_owned(proc_lock));
1098
1099         LIST_FOREACH(p, &pg->pg_members, p_pglist) {
1100                 if (p->p_stat == SSTOP) {
1101                         p->p_lflag |= PL_ORPHANPG;
1102                         psignal(p, SIGHUP);
1103                         psignal(p, SIGCONT);
1104                 }
1105         }
1106 }
1107
1108 #ifdef DDB
1109 #include <ddb/db_output.h>
1110 void pidtbl_dump(void);
1111 void
1112 pidtbl_dump(void)
1113 {
1114         struct pid_table *pt;
1115         struct proc *p;
1116         struct pgrp *pgrp;
1117         int id;
1118
1119         db_printf("pid table %p size %x, next %x, last %x\n",
1120                 pid_table, pid_tbl_mask+1,
1121                 next_free_pt, last_free_pt);
1122         for (pt = pid_table, id = 0; id <= pid_tbl_mask; id++, pt++) {
1123                 p = pt->pt_proc;
1124                 if (!P_VALID(p) && !pt->pt_pgrp)
1125                         continue;
1126                 db_printf("  id %x: ", id);
1127                 if (P_VALID(p))
1128                         db_printf("proc %p id %d (0x%x) %s\n",
1129                                 p, p->p_pid, p->p_pid, p->p_comm);
1130                 else
1131                         db_printf("next %x use %x\n",
1132                                 P_NEXT(p) & pid_tbl_mask,
1133                                 P_NEXT(p) & ~pid_tbl_mask);
1134                 if ((pgrp = pt->pt_pgrp)) {
1135                         db_printf("\tsession %p, sid %d, count %d, login %s\n",
1136                             pgrp->pg_session, pgrp->pg_session->s_sid,
1137                             pgrp->pg_session->s_count,
1138                             pgrp->pg_session->s_login);
1139                         db_printf("\tpgrp %p, pg_id %d, pg_jobc %d, members %p\n",
1140                             pgrp, pgrp->pg_id, pgrp->pg_jobc,
1141                             LIST_FIRST(&pgrp->pg_members));
1142                         LIST_FOREACH(p, &pgrp->pg_members, p_pglist) {
1143                                 db_printf("\t\tpid %d addr %p pgrp %p %s\n",
1144                                     p->p_pid, p, p->p_pgrp, p->p_comm);
1145                         }
1146                 }
1147         }
1148 }
1149 #endif /* DDB */
1150
1151 #ifdef KSTACK_CHECK_MAGIC
1152
1153 #define KSTACK_MAGIC    0xdeadbeaf
1154
1155 /* XXX should be per process basis? */
1156 static int      kstackleftmin = KSTACK_SIZE;
1157 static int      kstackleftthres = KSTACK_SIZE / 8;
1158
1159 void
1160 kstack_setup_magic(const struct lwp *l)
1161 {
1162         uint32_t *ip;
1163         uint32_t const *end;
1164
1165         KASSERT(l != NULL);
1166         KASSERT(l != &lwp0);
1167
1168         /*
1169          * fill all the stack with magic number
1170          * so that later modification on it can be detected.
1171          */
1172         ip = (uint32_t *)KSTACK_LOWEST_ADDR(l);
1173         end = (uint32_t *)((char *)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE);
1174         for (; ip < end; ip++) {
1175                 *ip = KSTACK_MAGIC;
1176         }
1177 }
1178
1179 void
1180 kstack_check_magic(const struct lwp *l)
1181 {
1182         uint32_t const *ip, *end;
1183         int stackleft;
1184
1185         KASSERT(l != NULL);
1186
1187         /* don't check proc0 */ /*XXX*/
1188         if (l == &lwp0)
1189                 return;
1190
1191 #ifdef __MACHINE_STACK_GROWS_UP
1192         /* stack grows upwards (eg. hppa) */
1193         ip = (uint32_t *)((void *)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE);
1194         end = (uint32_t *)KSTACK_LOWEST_ADDR(l);
1195         for (ip--; ip >= end; ip--)
1196                 if (*ip != KSTACK_MAGIC)
1197                         break;
1198
1199         stackleft = (void *)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE - (void *)ip;
1200 #else /* __MACHINE_STACK_GROWS_UP */
1201         /* stack grows downwards (eg. i386) */
1202         ip = (uint32_t *)KSTACK_LOWEST_ADDR(l);
1203         end = (uint32_t *)((char *)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE);
1204         for (; ip < end; ip++)
1205                 if (*ip != KSTACK_MAGIC)
1206                         break;
1207
1208         stackleft = ((const char *)ip) - (const char *)KSTACK_LOWEST_ADDR(l);
1209 #endif /* __MACHINE_STACK_GROWS_UP */
1210
1211         if (kstackleftmin > stackleft) {
1212                 kstackleftmin = stackleft;
1213                 if (stackleft < kstackleftthres)
1214                         printf("warning: kernel stack left %d bytes"
1215                             "(pid %u:lid %u)\n", stackleft,
1216                             (u_int)l->l_proc->p_pid, (u_int)l->l_lid);
1217         }
1218
1219         if (stackleft <= 0) {
1220                 panic("magic on the top of kernel stack changed for "
1221                     "pid %u, lid %u: maybe kernel stack overflow",
1222                     (u_int)l->l_proc->p_pid, (u_int)l->l_lid);
1223         }
1224 }
1225 #endif /* KSTACK_CHECK_MAGIC */
1226
1227 int
1228 proclist_foreach_call(struct proclist *list,
1229     int (*callback)(struct proc *, void *arg), void *arg)
1230 {
1231         struct proc marker;
1232         struct proc *p;
1233         int ret = 0;
1234
1235         marker.p_flag = PK_MARKER;
1236         mutex_enter(proc_lock);
1237         for (p = LIST_FIRST(list); ret == 0 && p != NULL;) {
1238                 if (p->p_flag & PK_MARKER) {
1239                         p = LIST_NEXT(p, p_list);
1240                         continue;
1241                 }
1242                 LIST_INSERT_AFTER(p, &marker, p_list);
1243                 ret = (*callback)(p, arg);
1244                 KASSERT(mutex_owned(proc_lock));
1245                 p = LIST_NEXT(&marker, p_list);
1246                 LIST_REMOVE(&marker, p_list);
1247         }
1248         mutex_exit(proc_lock);
1249
1250         return ret;
1251 }
1252
1253 int
1254 proc_vmspace_getref(struct proc *p, struct vmspace **vm)
1255 {
1256
1257         /* XXXCDC: how should locking work here? */
1258
1259         /* curproc exception is for coredump. */
1260
1261         if ((p != curproc && (p->p_sflag & PS_WEXIT) != 0) ||
1262             (p->p_vmspace->vm_refcnt < 1)) { /* XXX */
1263                 return EFAULT;
1264         }
1265
1266         uvmspace_addref(p->p_vmspace);
1267         *vm = p->p_vmspace;
1268
1269         return 0;
1270 }
1271
1272 /*
1273  * Acquire a write lock on the process credential.
1274  */
1275 void
1276 proc_crmod_enter(void)
1277 {
1278         struct lwp *l = curlwp;
1279         struct proc *p = l->l_proc;
1280         struct plimit *lim;
1281         kauth_cred_t oc;
1282         char *cn;
1283
1284         /* Reset what needs to be reset in plimit. */
1285         if (p->p_limit->pl_corename != defcorename) {
1286                 lim_privatise(p, false);
1287                 lim = p->p_limit;
1288                 mutex_enter(&lim->pl_lock);
1289                 cn = lim->pl_corename;
1290                 lim->pl_corename = defcorename;
1291                 mutex_exit(&lim->pl_lock);
1292                 if (cn != defcorename)
1293                         free(cn, M_TEMP);
1294         }
1295
1296         mutex_enter(p->p_lock);
1297
1298         /* Ensure the LWP cached credentials are up to date. */
1299         if ((oc = l->l_cred) != p->p_cred) {
1300                 kauth_cred_hold(p->p_cred);
1301                 l->l_cred = p->p_cred;
1302                 kauth_cred_free(oc);
1303         }
1304
1305 }
1306
1307 /*
1308  * Set in a new process credential, and drop the write lock.  The credential
1309  * must have a reference already.  Optionally, free a no-longer required
1310  * credential.  The scheduler also needs to inspect p_cred, so we also
1311  * briefly acquire the sched state mutex.
1312  */
1313 void
1314 proc_crmod_leave(kauth_cred_t scred, kauth_cred_t fcred, bool sugid)
1315 {
1316         struct lwp *l = curlwp, *l2;
1317         struct proc *p = l->l_proc;
1318         kauth_cred_t oc;
1319
1320         KASSERT(mutex_owned(p->p_lock));
1321
1322         /* Is there a new credential to set in? */
1323         if (scred != NULL) {
1324                 p->p_cred = scred;
1325                 LIST_FOREACH(l2, &p->p_lwps, l_sibling) {
1326                         if (l2 != l)
1327                                 l2->l_prflag |= LPR_CRMOD;
1328                 }
1329
1330                 /* Ensure the LWP cached credentials are up to date. */
1331                 if ((oc = l->l_cred) != scred) {
1332                         kauth_cred_hold(scred);
1333                         l->l_cred = scred;
1334                 }
1335         } else
1336                 oc = NULL;      /* XXXgcc */
1337
1338         if (sugid) {
1339                 /*
1340                  * Mark process as having changed credentials, stops
1341                  * tracing etc.
1342                  */
1343                 p->p_flag |= PK_SUGID;
1344         }
1345
1346         mutex_exit(p->p_lock);
1347
1348         /* If there is a credential to be released, free it now. */
1349         if (fcred != NULL) {
1350                 KASSERT(scred != NULL);
1351                 kauth_cred_free(fcred);
1352                 if (oc != scred)
1353                         kauth_cred_free(oc);
1354         }
1355 }
1356
1357 /*
1358  * proc_specific_key_create --
1359  *      Create a key for subsystem proc-specific data.
1360  */
1361 int
1362 proc_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor)
1363 {
1364
1365         return (specificdata_key_create(proc_specificdata_domain, keyp, dtor));
1366 }
1367
1368 /*
1369  * proc_specific_key_delete --
1370  *      Delete a key for subsystem proc-specific data.
1371  */
1372 void
1373 proc_specific_key_delete(specificdata_key_t key)
1374 {
1375
1376         specificdata_key_delete(proc_specificdata_domain, key);
1377 }
1378
1379 /*
1380  * proc_initspecific --
1381  *      Initialize a proc's specificdata container.
1382  */
1383 void
1384 proc_initspecific(struct proc *p)
1385 {
1386         int error;
1387
1388         error = specificdata_init(proc_specificdata_domain, &p->p_specdataref);
1389         KASSERT(error == 0);
1390 }
1391
1392 /*
1393  * proc_finispecific --
1394  *      Finalize a proc's specificdata container.
1395  */
1396 void
1397 proc_finispecific(struct proc *p)
1398 {
1399
1400         specificdata_fini(proc_specificdata_domain, &p->p_specdataref);
1401 }
1402
1403 /*
1404  * proc_getspecific --
1405  *      Return proc-specific data corresponding to the specified key.
1406  */
1407 void *
1408 proc_getspecific(struct proc *p, specificdata_key_t key)
1409 {
1410
1411         return (specificdata_getspecific(proc_specificdata_domain,
1412                                          &p->p_specdataref, key));
1413 }
1414
1415 /*
1416  * proc_setspecific --
1417  *      Set proc-specific data corresponding to the specified key.
1418  */
1419 void
1420 proc_setspecific(struct proc *p, specificdata_key_t key, void *data)
1421 {
1422
1423         specificdata_setspecific(proc_specificdata_domain,
1424                                  &p->p_specdataref, key, data);
1425 }
1426
1427 int
1428 proc_uidmatch(kauth_cred_t cred, kauth_cred_t target)
1429 {
1430         int r = 0;
1431
1432         if (kauth_cred_getuid(cred) != kauth_cred_getuid(target) ||
1433             kauth_cred_getuid(cred) != kauth_cred_getsvuid(target)) {
1434                 /*
1435                  * suid proc of ours or proc not ours
1436                  */
1437                 r = EPERM;
1438         } else if (kauth_cred_getgid(target) != kauth_cred_getsvgid(target)) {
1439                 /*
1440                  * sgid proc has sgid back to us temporarily
1441                  */
1442                 r = EPERM;
1443         } else {
1444                 /*
1445                  * our rgid must be in target's group list (ie,
1446                  * sub-processes started by a sgid process)
1447                  */
1448                 int ismember = 0;
1449
1450                 if (kauth_cred_ismember_gid(cred,
1451                     kauth_cred_getgid(target), &ismember) != 0 ||
1452                     !ismember)
1453                         r = EPERM;
1454         }
1455
1456         return (r);
1457 }
1458