kernel/exit.c

   1 /*
   2  *  linux/kernel/exit.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  */
   6
   7 #include <linux/mm.h>
   8 #include <linux/slab.h>
   9 #include <linux/interrupt.h>
  10 #include <linux/module.h>
  11 #include <linux/capability.h>
  12 #include <linux/completion.h>
  13 #include <linux/personality.h>
  14 #include <linux/tty.h>
  15 #include <linux/mnt_namespace.h>
  16 #include <linux/key.h>
  17 #include <linux/security.h>
  18 #include <linux/cpu.h>
  19 #include <linux/acct.h>
  20 #include <linux/tsacct_kern.h>
  21 #include <linux/file.h>
  22 #include <linux/binfmts.h>
  23 #include <linux/nsproxy.h>
  24 #include <linux/pid_namespace.h>
  25 #include <linux/ptrace.h>
  26 #include <linux/profile.h>
  27 #include <linux/mount.h>
  28 #include <linux/proc_fs.h>
  29 #include <linux/kthread.h>
  30 #include <linux/mempolicy.h>
  31 #include <linux/taskstats_kern.h>
  32 #include <linux/delayacct.h>
  33 #include <linux/freezer.h>
  34 #include <linux/cgroup.h>
  35 #include <linux/syscalls.h>
  36 #include <linux/signal.h>
  37 #include <linux/posix-timers.h>
  38 #include <linux/cn_proc.h>
  39 #include <linux/mutex.h>
  40 #include <linux/futex.h>
  41 #include <linux/compat.h>
  42 #include <linux/pipe_fs_i.h>
  43 #include <linux/audit.h> /* for audit_free() */
  44 #include <linux/resource.h>
  45 #include <linux/blkdev.h>
  46 #include <linux/task_io_accounting_ops.h>
  47
  48 #include <asm/uaccess.h>
  49 #include <asm/unistd.h>
  50 #include <asm/pgtable.h>
  51 #include <asm/mmu_context.h>
  52
  53 static void exit_mm(struct task_struct * tsk);
  54
  55 static void __unhash_process(struct task_struct *p)
  56 {
  57         nr_threads--;
  58         detach_pid(p, PIDTYPE_PID);
  59         if (thread_group_leader(p)) {
  60                 detach_pid(p, PIDTYPE_PGID);
  61                 detach_pid(p, PIDTYPE_SID);
  62
  63                 list_del_rcu(&p->tasks);
  64                 __get_cpu_var(process_counts)--;
  65         }
  66         list_del_rcu(&p->thread_group);
  67         remove_parent(p);
  68 }
  69
  70 /*
  71  * This function expects the tasklist_lock write-locked.
  72  */
  73 static void __exit_signal(struct task_struct *tsk)
  74 {
  75         struct signal_struct *sig = tsk->signal;
  76         struct sighand_struct *sighand;
  77
  78         BUG_ON(!sig);
  79         BUG_ON(!atomic_read(&sig->count));
  80
  81         rcu_read_lock();
  82         sighand = rcu_dereference(tsk->sighand);
  83         spin_lock(&sighand->siglock);
  84
  85         posix_cpu_timers_exit(tsk);
  86         if (atomic_dec_and_test(&sig->count))
  87                 posix_cpu_timers_exit_group(tsk);
  88         else {
  89                 /*
  90                  * If there is any task waiting for the group exit
  91                  * then notify it:
  92                  */
  93                 if (sig->group_exit_task && atomic_read(&sig->count) == sig->notify_count)
  94                         wake_up_process(sig->group_exit_task);
  95
  96                 if (tsk == sig->curr_target)
  97                         sig->curr_target = next_thread(tsk);
  98                 /*
  99                  * Accumulate here the counters for all threads but the
 100                  * group leader as they die, so they can be added into
 101                  * the process-wide totals when those are taken.
 102                  * The group leader stays around as a zombie as long
 103                  * as there are other threads.  When it gets reaped,
 104                  * the exit.c code will add its counts into these totals.
 105                  * We won't ever get here for the group leader, since it
 106                  * will have been the last reference on the signal_struct.
 107                  */
 108                 sig->utime = cputime_add(sig->utime, tsk->utime);
 109                 sig->stime = cputime_add(sig->stime, tsk->stime);
 110                 sig->gtime = cputime_add(sig->gtime, tsk->gtime);
 111                 sig->min_flt += tsk->min_flt;
 112                 sig->maj_flt += tsk->maj_flt;
 113                 sig->nvcsw += tsk->nvcsw;
 114                 sig->nivcsw += tsk->nivcsw;
 115                 sig->inblock += task_io_get_inblock(tsk);
 116                 sig->oublock += task_io_get_oublock(tsk);
 117                 sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
 118                 sig = NULL; /* Marker for below. */
 119         }
 120
 121         __unhash_process(tsk);
 122
 123         tsk->signal = NULL;
 124         tsk->sighand = NULL;
 125         spin_unlock(&sighand->siglock);
 126         rcu_read_unlock();
 127
 128         __cleanup_sighand(sighand);
 129         clear_tsk_thread_flag(tsk,TIF_SIGPENDING);
 130         flush_sigqueue(&tsk->pending);
 131         if (sig) {
 132                 flush_sigqueue(&sig->shared_pending);
 133                 taskstats_tgid_free(sig);
 134                 __cleanup_signal(sig);
 135         }
 136 }
 137
 138 static void delayed_put_task_struct(struct rcu_head *rhp)
 139 {
 140         put_task_struct(container_of(rhp, struct task_struct, rcu));
 141 }
 142
 143 void release_task(struct task_struct * p)
 144 {
 145         struct task_struct *leader;
 146         int zap_leader;
 147 repeat:
 148         atomic_dec(&p->user->processes);
 149         proc_flush_task(p);
 150         write_lock_irq(&tasklist_lock);
 151         ptrace_unlink(p);
 152         BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children));
 153         __exit_signal(p);
 154
 155         /*
 156          * If we are the last non-leader member of the thread
 157          * group, and the leader is zombie, then notify the
 158          * group leader's parent process. (if it wants notification.)
 159          */
 160         zap_leader = 0;
 161         leader = p->group_leader;
 162         if (leader != p && thread_group_empty(leader) && leader->exit_state == EXIT_ZOMBIE) {
 163                 BUG_ON(leader->exit_signal == -1);
 164                 do_notify_parent(leader, leader->exit_signal);
 165                 /*
 166                  * If we were the last child thread and the leader has
 167                  * exited already, and the leader's parent ignores SIGCHLD,
 168                  * then we are the one who should release the leader.
 169                  *
 170                  * do_notify_parent() will have marked it self-reaping in
 171                  * that case.
 172                  */
 173                 zap_leader = (leader->exit_signal == -1);
 174         }
 175
 176         write_unlock_irq(&tasklist_lock);
 177         release_thread(p);
 178         call_rcu(&p->rcu, delayed_put_task_struct);
 179
 180         p = leader;
 181         if (unlikely(zap_leader))
 182                 goto repeat;
 183 }
 184
 185 /*
 186  * This checks not only the pgrp, but falls back on the pid if no
 187  * satisfactory pgrp is found. I dunno - gdb doesn't work correctly
 188  * without this...
 189  *
 190  * The caller must hold rcu lock or the tasklist lock.
 191  */
 192 struct pid *session_of_pgrp(struct pid *pgrp)
 193 {
 194         struct task_struct *p;
 195         struct pid *sid = NULL;
 196
 197         p = pid_task(pgrp, PIDTYPE_PGID);
 198         if (p == NULL)
 199                 p = pid_task(pgrp, PIDTYPE_PID);
 200         if (p != NULL)
 201                 sid = task_session(p);
 202
 203         return sid;
 204 }
 205
 206 /*
 207  * Determine if a process group is "orphaned", according to the POSIX
 208  * definition in 2.2.2.52.  Orphaned process groups are not to be affected
 209  * by terminal-generated stop signals.  Newly orphaned process groups are
 210  * to receive a SIGHUP and a SIGCONT.
 211  *
 212  * "I ask you, have you ever known what it is to be an orphan?"
 213  */
 214 static int will_become_orphaned_pgrp(struct pid *pgrp, struct task_struct *ignored_task)
 215 {
 216         struct task_struct *p;
 217 <<<<<<< HEAD:kernel/exit.c
 218         int ret = 1;
 219 =======
 220 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/exit.c
 221
 222         do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
 223 <<<<<<< HEAD:kernel/exit.c
 224                 if (p == ignored_task
 225                                 || p->exit_state
 226                                 || is_global_init(p->real_parent))
 227 =======
 228                 if ((p == ignored_task) ||
 229                     (p->exit_state && thread_group_empty(p)) ||
 230                     is_global_init(p->real_parent))
 231 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/exit.c
 232                         continue;
 233 <<<<<<< HEAD:kernel/exit.c
 234 =======
 235
 236 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/exit.c
 237                 if (task_pgrp(p->real_parent) != pgrp &&
 238 <<<<<<< HEAD:kernel/exit.c
 239                     task_session(p->real_parent) == task_session(p)) {
 240                         ret = 0;
 241                         break;
 242                 }
 243 =======
 244                     task_session(p->real_parent) == task_session(p))
 245                         return 0;
 246 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/exit.c
 247         } while_each_pid_task(pgrp, PIDTYPE_PGID, p);
 248 <<<<<<< HEAD:kernel/exit.c
 249         return ret;     /* (sighing) "Often!" */
 250 =======
 251
 252         return 1;
 253 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/exit.c
 254 }
 255
 256 int is_current_pgrp_orphaned(void)
 257 {
 258         int retval;
 259
 260         read_lock(&tasklist_lock);
 261         retval = will_become_orphaned_pgrp(task_pgrp(current), NULL);
 262         read_unlock(&tasklist_lock);
 263
 264         return retval;
 265 }
 266
 267 static int has_stopped_jobs(struct pid *pgrp)
 268 {
 269         int retval = 0;
 270         struct task_struct *p;
 271
 272         do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
 273                 if (!task_is_stopped(p))
 274                         continue;
 275                 retval = 1;
 276                 break;
 277         } while_each_pid_task(pgrp, PIDTYPE_PGID, p);
 278         return retval;
 279 }
 280
 281 <<<<<<< HEAD:kernel/exit.c
 282 =======
 283 /*
 284  * Check to see if any process groups have become orphaned as
 285  * a result of our exiting, and if they have any stopped jobs,
 286  * send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2)
 287  */
 288 static void
 289 kill_orphaned_pgrp(struct task_struct *tsk, struct task_struct *parent)
 290 {
 291         struct pid *pgrp = task_pgrp(tsk);
 292         struct task_struct *ignored_task = tsk;
 293
 294         if (!parent)
 295                  /* exit: our father is in a different pgrp than
 296                   * we are and we were the only connection outside.
 297                   */
 298                 parent = tsk->real_parent;
 299         else
 300                 /* reparent: our child is in a different pgrp than
 301                  * we are, and it was the only connection outside.
 302                  */
 303                 ignored_task = NULL;
 304
 305         if (task_pgrp(parent) != pgrp &&
 306             task_session(parent) == task_session(tsk) &&
 307             will_become_orphaned_pgrp(pgrp, ignored_task) &&
 308             has_stopped_jobs(pgrp)) {
 309                 __kill_pgrp_info(SIGHUP, SEND_SIG_PRIV, pgrp);
 310                 __kill_pgrp_info(SIGCONT, SEND_SIG_PRIV, pgrp);
 311         }
 312 }
 313
 314 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/exit.c
 315 /**
 316  * reparent_to_kthreadd - Reparent the calling kernel thread to kthreadd
 317  *
 318  * If a kernel thread is launched as a result of a system call, or if
 319  * it ever exits, it should generally reparent itself to kthreadd so it
 320  * isn't in the way of other processes and is correctly cleaned up on exit.
 321  *
 322  * The various task state such as scheduling policy and priority may have
 323  * been inherited from a user process, so we reset them to sane values here.
 324  *
 325  * NOTE that reparent_to_kthreadd() gives the caller full capabilities.
 326  */
 327 static void reparent_to_kthreadd(void)
 328 {
 329         write_lock_irq(&tasklist_lock);
 330
 331         ptrace_unlink(current);
 332         /* Reparent to init */
 333         remove_parent(current);
 334         current->real_parent = current->parent = kthreadd_task;
 335         add_parent(current);
 336
 337         /* Set the exit signal to SIGCHLD so we signal init on exit */
 338         current->exit_signal = SIGCHLD;
 339
 340         if (task_nice(current) < 0)
 341                 set_user_nice(current, 0);
 342         /* cpus_allowed? */
 343         /* rt_priority? */
 344         /* signals? */
 345         security_task_reparent_to_init(current);
 346         memcpy(current->signal->rlim, init_task.signal->rlim,
 347                sizeof(current->signal->rlim));
 348         atomic_inc(&(INIT_USER->__count));
 349         write_unlock_irq(&tasklist_lock);
 350         switch_uid(INIT_USER);
 351 }
 352
 353 void __set_special_pids(struct pid *pid)
 354 {
 355         struct task_struct *curr = current->group_leader;
 356         pid_t nr = pid_nr(pid);
 357
 358         if (task_session(curr) != pid) {
 359                 detach_pid(curr, PIDTYPE_SID);
 360                 attach_pid(curr, PIDTYPE_SID, pid);
 361                 set_task_session(curr, nr);
 362         }
 363         if (task_pgrp(curr) != pid) {
 364                 detach_pid(curr, PIDTYPE_PGID);
 365                 attach_pid(curr, PIDTYPE_PGID, pid);
 366                 set_task_pgrp(curr, nr);
 367         }
 368 }
 369
 370 static void set_special_pids(struct pid *pid)
 371 {
 372         write_lock_irq(&tasklist_lock);
 373         __set_special_pids(pid);
 374         write_unlock_irq(&tasklist_lock);
 375 }
 376
 377 /*
 378  * Let kernel threads use this to say that they
 379  * allow a certain signal (since daemonize() will
 380  * have disabled all of them by default).
 381  */
 382 int allow_signal(int sig)
 383 {
 384         if (!valid_signal(sig) || sig < 1)
 385                 return -EINVAL;
 386
 387         spin_lock_irq(&current->sighand->siglock);
 388         sigdelset(&current->blocked, sig);
 389         if (!current->mm) {
 390                 /* Kernel threads handle their own signals.
 391                    Let the signal code know it'll be handled, so
 392                    that they don't get converted to SIGKILL or
 393                    just silently dropped */
 394                 current->sighand->action[(sig)-1].sa.sa_handler = (void __user *)2;
 395         }
 396         recalc_sigpending();
 397         spin_unlock_irq(&current->sighand->siglock);
 398         return 0;
 399 }
 400
 401 EXPORT_SYMBOL(allow_signal);
 402
 403 int disallow_signal(int sig)
 404 {
 405         if (!valid_signal(sig) || sig < 1)
 406                 return -EINVAL;
 407
 408         spin_lock_irq(&current->sighand->siglock);
 409         current->sighand->action[(sig)-1].sa.sa_handler = SIG_IGN;
 410         recalc_sigpending();
 411         spin_unlock_irq(&current->sighand->siglock);
 412         return 0;
 413 }
 414
 415 EXPORT_SYMBOL(disallow_signal);
 416
 417 /*
 418  *      Put all the gunge required to become a kernel thread without
 419  *      attached user resources in one place where it belongs.
 420  */
 421
 422 void daemonize(const char *name, ...)
 423 {
 424         va_list args;
 425         struct fs_struct *fs;
 426         sigset_t blocked;
 427
 428         va_start(args, name);
 429         vsnprintf(current->comm, sizeof(current->comm), name, args);
 430         va_end(args);
 431
 432         /*
 433          * If we were started as result of loading a module, close all of the
 434          * user space pages.  We don't need them, and if we didn't close them
 435          * they would be locked into memory.
 436          */
 437         exit_mm(current);
 438         /*
 439          * We don't want to have TIF_FREEZE set if the system-wide hibernation
 440          * or suspend transition begins right now.
 441          */
 442         current->flags |= PF_NOFREEZE;
 443
 444         if (current->nsproxy != &init_nsproxy) {
 445                 get_nsproxy(&init_nsproxy);
 446                 switch_task_namespaces(current, &init_nsproxy);
 447         }
 448         set_special_pids(&init_struct_pid);
 449         proc_clear_tty(current);
 450
 451         /* Block and flush all signals */
 452         sigfillset(&blocked);
 453         sigprocmask(SIG_BLOCK, &blocked, NULL);
 454         flush_signals(current);
 455
 456         /* Become as one with the init task */
 457
 458         exit_fs(current);       /* current->fs->count--; */
 459         fs = init_task.fs;
 460         current->fs = fs;
 461         atomic_inc(&fs->count);
 462
 463         exit_files(current);
 464         current->files = init_task.files;
 465         atomic_inc(&current->files->count);
 466
 467         reparent_to_kthreadd();
 468 }
 469
 470 EXPORT_SYMBOL(daemonize);
 471
 472 static void close_files(struct files_struct * files)
 473 {
 474         int i, j;
 475         struct fdtable *fdt;
 476
 477         j = 0;
 478
 479         /*
 480          * It is safe to dereference the fd table without RCU or
 481          * ->file_lock because this is the last reference to the
 482          * files structure.
 483          */
 484         fdt = files_fdtable(files);
 485         for (;;) {
 486                 unsigned long set;
 487                 i = j * __NFDBITS;
 488                 if (i >= fdt->max_fds)
 489                         break;
 490                 set = fdt->open_fds->fds_bits[j++];
 491                 while (set) {
 492                         if (set & 1) {
 493                                 struct file * file = xchg(&fdt->fd[i], NULL);
 494                                 if (file) {
 495                                         filp_close(file, files);
 496                                         cond_resched();
 497                                 }
 498                         }
 499                         i++;
 500                         set >>= 1;
 501                 }
 502         }
 503 }
 504
 505 struct files_struct *get_files_struct(struct task_struct *task)
 506 {
 507         struct files_struct *files;
 508
 509         task_lock(task);
 510         files = task->files;
 511         if (files)
 512                 atomic_inc(&files->count);
 513         task_unlock(task);
 514
 515         return files;
 516 }
 517
 518 void put_files_struct(struct files_struct *files)
 519 {
 520         struct fdtable *fdt;
 521
 522         if (atomic_dec_and_test(&files->count)) {
 523                 close_files(files);
 524                 /*
 525                  * Free the fd and fdset arrays if we expanded them.
 526                  * If the fdtable was embedded, pass files for freeing
 527                  * at the end of the RCU grace period. Otherwise,
 528                  * you can free files immediately.
 529                  */
 530                 fdt = files_fdtable(files);
 531                 if (fdt != &files->fdtab)
 532                         kmem_cache_free(files_cachep, files);
 533                 free_fdtable(fdt);
 534         }
 535 }
 536
 537 EXPORT_SYMBOL(put_files_struct);
 538
 539 void reset_files_struct(struct task_struct *tsk, struct files_struct *files)
 540 {
 541         struct files_struct *old;
 542
 543         old = tsk->files;
 544         task_lock(tsk);
 545         tsk->files = files;
 546         task_unlock(tsk);
 547         put_files_struct(old);
 548 }
 549 EXPORT_SYMBOL(reset_files_struct);
 550
 551 static void __exit_files(struct task_struct *tsk)
 552 {
 553         struct files_struct * files = tsk->files;
 554
 555         if (files) {
 556                 task_lock(tsk);
 557                 tsk->files = NULL;
 558                 task_unlock(tsk);
 559                 put_files_struct(files);
 560         }
 561 }
 562
 563 void exit_files(struct task_struct *tsk)
 564 {
 565         __exit_files(tsk);
 566 }
 567
 568 static void __put_fs_struct(struct fs_struct *fs)
 569 {
 570         /* No need to hold fs->lock if we are killing it */
 571         if (atomic_dec_and_test(&fs->count)) {
 572                 path_put(&fs->root);
 573                 path_put(&fs->pwd);
 574                 if (fs->altroot.dentry)
 575                         path_put(&fs->altroot);
 576                 kmem_cache_free(fs_cachep, fs);
 577         }
 578 }
 579
 580 void put_fs_struct(struct fs_struct *fs)
 581 {
 582         __put_fs_struct(fs);
 583 }
 584
 585 static void __exit_fs(struct task_struct *tsk)
 586 {
 587         struct fs_struct * fs = tsk->fs;
 588
 589         if (fs) {
 590                 task_lock(tsk);
 591                 tsk->fs = NULL;
 592                 task_unlock(tsk);
 593                 __put_fs_struct(fs);
 594         }
 595 }
 596
 597 void exit_fs(struct task_struct *tsk)
 598 {
 599         __exit_fs(tsk);
 600 }
 601
 602 EXPORT_SYMBOL_GPL(exit_fs);
 603
 604 /*
 605  * Turn us into a lazy TLB process if we
 606  * aren't already..
 607  */
 608 static void exit_mm(struct task_struct * tsk)
 609 {
 610         struct mm_struct *mm = tsk->mm;
 611
 612         mm_release(tsk, mm);
 613         if (!mm)
 614                 return;
 615         /*
 616          * Serialize with any possible pending coredump.
 617          * We must hold mmap_sem around checking core_waiters
 618          * and clearing tsk->mm.  The core-inducing thread
 619          * will increment core_waiters for each thread in the
 620          * group with ->mm != NULL.
 621          */
 622         down_read(&mm->mmap_sem);
 623         if (mm->core_waiters) {
 624                 up_read(&mm->mmap_sem);
 625                 down_write(&mm->mmap_sem);
 626                 if (!--mm->core_waiters)
 627                         complete(mm->core_startup_done);
 628                 up_write(&mm->mmap_sem);
 629
 630                 wait_for_completion(&mm->core_done);
 631                 down_read(&mm->mmap_sem);
 632         }
 633         atomic_inc(&mm->mm_count);
 634         BUG_ON(mm != tsk->active_mm);
 635         /* more a memory barrier than a real lock */
 636         task_lock(tsk);
 637         tsk->mm = NULL;
 638         up_read(&mm->mmap_sem);
 639         enter_lazy_tlb(mm, current);
 640         /* We don't want this task to be frozen prematurely */
 641         clear_freeze_flag(tsk);
 642         task_unlock(tsk);
 643         mmput(mm);
 644 }
 645
 646 static void
 647 reparent_thread(struct task_struct *p, struct task_struct *father, int traced)
 648 {
 649         if (p->pdeath_signal)
 650                 /* We already hold the tasklist_lock here.  */
 651                 group_send_sig_info(p->pdeath_signal, SEND_SIG_NOINFO, p);
 652
 653         /* Move the child from its dying parent to the new one.  */
 654         if (unlikely(traced)) {
 655                 /* Preserve ptrace links if someone else is tracing this child.  */
 656                 list_del_init(&p->ptrace_list);
 657                 if (p->parent != p->real_parent)
 658                         list_add(&p->ptrace_list, &p->real_parent->ptrace_children);
 659         } else {
 660                 /* If this child is being traced, then we're the one tracing it
 661                  * anyway, so let go of it.
 662                  */
 663                 p->ptrace = 0;
 664                 remove_parent(p);
 665                 p->parent = p->real_parent;
 666                 add_parent(p);
 667
 668                 if (task_is_traced(p)) {
 669                         /*
 670                          * If it was at a trace stop, turn it into
 671                          * a normal stop since it's no longer being
 672                          * traced.
 673                          */
 674                         ptrace_untrace(p);
 675                 }
 676         }
 677
 678         /* If this is a threaded reparent there is no need to
 679          * notify anyone anything has happened.
 680          */
 681         if (p->real_parent->group_leader == father->group_leader)
 682                 return;
 683
 684         /* We don't want people slaying init.  */
 685         if (p->exit_signal != -1)
 686                 p->exit_signal = SIGCHLD;
 687
 688         /* If we'd notified the old parent about this child's death,
 689          * also notify the new parent.
 690          */
 691         if (!traced && p->exit_state == EXIT_ZOMBIE &&
 692             p->exit_signal != -1 && thread_group_empty(p))
 693                 do_notify_parent(p, p->exit_signal);
 694
 695 <<<<<<< HEAD:kernel/exit.c
 696         /*
 697          * process group orphan check
 698          * Case ii: Our child is in a different pgrp
 699          * than we are, and it was the only connection
 700          * outside, so the child pgrp is now orphaned.
 701          */
 702         if ((task_pgrp(p) != task_pgrp(father)) &&
 703             (task_session(p) == task_session(father))) {
 704                 struct pid *pgrp = task_pgrp(p);
 705
 706                 if (will_become_orphaned_pgrp(pgrp, NULL) &&
 707                     has_stopped_jobs(pgrp)) {
 708                         __kill_pgrp_info(SIGHUP, SEND_SIG_PRIV, pgrp);
 709                         __kill_pgrp_info(SIGCONT, SEND_SIG_PRIV, pgrp);
 710                 }
 711         }
 712 =======
 713         kill_orphaned_pgrp(p, father);
 714 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/exit.c
 715 }
 716
 717 /*
 718  * When we die, we re-parent all our children.
 719  * Try to give them to another thread in our thread
 720  * group, and if no such member exists, give it to
 721  * the child reaper process (ie "init") in our pid
 722  * space.
 723  */
 724 static void forget_original_parent(struct task_struct *father)
 725 {
 726         struct task_struct *p, *n, *reaper = father;
 727         struct list_head ptrace_dead;
 728
 729         INIT_LIST_HEAD(&ptrace_dead);
 730
 731         write_lock_irq(&tasklist_lock);
 732
 733         do {
 734                 reaper = next_thread(reaper);
 735                 if (reaper == father) {
 736                         reaper = task_child_reaper(father);
 737                         break;
 738                 }
 739         } while (reaper->flags & PF_EXITING);
 740
 741         /*
 742          * There are only two places where our children can be:
 743          *
 744          * - in our child list
 745          * - in our ptraced child list
 746          *
 747          * Search them and reparent children.
 748          */
 749         list_for_each_entry_safe(p, n, &father->children, sibling) {
 750                 int ptrace;
 751
 752                 ptrace = p->ptrace;
 753
 754                 /* if father isn't the real parent, then ptrace must be enabled */
 755                 BUG_ON(father != p->real_parent && !ptrace);
 756
 757                 if (father == p->real_parent) {
 758                         /* reparent with a reaper, real father it's us */
 759                         p->real_parent = reaper;
 760                         reparent_thread(p, father, 0);
 761                 } else {
 762                         /* reparent ptraced task to its real parent */
 763                         __ptrace_unlink (p);
 764                         if (p->exit_state == EXIT_ZOMBIE && p->exit_signal != -1 &&
 765                             thread_group_empty(p))
 766                                 do_notify_parent(p, p->exit_signal);
 767                 }
 768
 769                 /*
 770                  * if the ptraced child is a zombie with exit_signal == -1
 771                  * we must collect it before we exit, or it will remain
 772                  * zombie forever since we prevented it from self-reap itself
 773                  * while it was being traced by us, to be able to see it in wait4.
 774                  */
 775                 if (unlikely(ptrace && p->exit_state == EXIT_ZOMBIE && p->exit_signal == -1))
 776                         list_add(&p->ptrace_list, &ptrace_dead);
 777         }
 778
 779         list_for_each_entry_safe(p, n, &father->ptrace_children, ptrace_list) {
 780                 p->real_parent = reaper;
 781                 reparent_thread(p, father, 1);
 782         }
 783
 784         write_unlock_irq(&tasklist_lock);
 785         BUG_ON(!list_empty(&father->children));
 786         BUG_ON(!list_empty(&father->ptrace_children));
 787
 788         list_for_each_entry_safe(p, n, &ptrace_dead, ptrace_list) {
 789                 list_del_init(&p->ptrace_list);
 790                 release_task(p);
 791         }
 792
 793 }
 794
 795 /*
 796  * Send signals to all our closest relatives so that they know
 797  * to properly mourn us..
 798  */
 799 <<<<<<< HEAD:kernel/exit.c
 800 static void exit_notify(struct task_struct *tsk)
 801 =======
 802 static void exit_notify(struct task_struct *tsk, int group_dead)
 803 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/exit.c
 804 {
 805         int state;
 806 <<<<<<< HEAD:kernel/exit.c
 807         struct task_struct *t;
 808         struct pid *pgrp;
 809 =======
 810 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/exit.c
 811
 812         /*
 813          * This does two things:
 814          *
 815          * A.  Make init inherit all the child processes
 816          * B.  Check to see if any process groups have become orphaned
 817          *      as a result of our exiting, and if they have any stopped
 818          *      jobs, send them a SIGHUP and then a SIGCONT.  (POSIX 3.2.2.2)
 819          */
 820         forget_original_parent(tsk);
 821         exit_task_namespaces(tsk);
 822
 823         write_lock_irq(&tasklist_lock);
 824 <<<<<<< HEAD:kernel/exit.c
 825         /*
 826          * Check to see if any process groups have become orphaned
 827          * as a result of our exiting, and if they have any stopped
 828          * jobs, send them a SIGHUP and then a SIGCONT.  (POSIX 3.2.2.2)
 829          *
 830          * Case i: Our father is in a different pgrp than we are
 831          * and we were the only connection outside, so our pgrp
 832          * is about to become orphaned.
 833          */
 834         t = tsk->real_parent;
 835
 836         pgrp = task_pgrp(tsk);
 837         if ((task_pgrp(t) != pgrp) &&
 838             (task_session(t) == task_session(tsk)) &&
 839             will_become_orphaned_pgrp(pgrp, tsk) &&
 840             has_stopped_jobs(pgrp)) {
 841                 __kill_pgrp_info(SIGHUP, SEND_SIG_PRIV, pgrp);
 842                 __kill_pgrp_info(SIGCONT, SEND_SIG_PRIV, pgrp);
 843         }
 844 =======
 845         if (group_dead)
 846                 kill_orphaned_pgrp(tsk->group_leader, NULL);
 847 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/exit.c
 848
 849         /* Let father know we died
 850          *
 851          * Thread signals are configurable, but you aren't going to use
 852          * that to send signals to arbitary processes.
 853          * That stops right now.
 854          *
 855          * If the parent exec id doesn't match the exec id we saved
 856          * when we started then we know the parent has changed security
 857          * domain.
 858          *
 859          * If our self_exec id doesn't match our parent_exec_id then
 860          * we have changed execution domain as these two values started
 861          * the same after a fork.
 862          */
 863         if (tsk->exit_signal != SIGCHLD && tsk->exit_signal != -1 &&
 864 <<<<<<< HEAD:kernel/exit.c
 865             ( tsk->parent_exec_id != t->self_exec_id  ||
 866               tsk->self_exec_id != tsk->parent_exec_id)
 867 =======
 868             (tsk->parent_exec_id != tsk->real_parent->self_exec_id ||
 869              tsk->self_exec_id != tsk->parent_exec_id)
 870 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/exit.c
 871             && !capable(CAP_KILL))
 872                 tsk->exit_signal = SIGCHLD;
 873
 874
 875         /* If something other than our normal parent is ptracing us, then
 876          * send it a SIGCHLD instead of honoring exit_signal.  exit_signal
 877          * only has special meaning to our real parent.
 878          */
 879         if (tsk->exit_signal != -1 && thread_group_empty(tsk)) {
 880                 int signal = tsk->parent == tsk->real_parent ? tsk->exit_signal : SIGCHLD;
 881                 do_notify_parent(tsk, signal);
 882         } else if (tsk->ptrace) {
 883                 do_notify_parent(tsk, SIGCHLD);
 884         }
 885
 886         state = EXIT_ZOMBIE;
 887         if (tsk->exit_signal == -1 && likely(!tsk->ptrace))
 888                 state = EXIT_DEAD;
 889         tsk->exit_state = state;
 890
 891         if (thread_group_leader(tsk) &&
 892             tsk->signal->notify_count < 0 &&
 893             tsk->signal->group_exit_task)
 894                 wake_up_process(tsk->signal->group_exit_task);
 895
 896         write_unlock_irq(&tasklist_lock);
 897
 898         /* If the process is dead, release it - nobody will wait for it */
 899         if (state == EXIT_DEAD)
 900                 release_task(tsk);
 901 }
 902
 903 #ifdef CONFIG_DEBUG_STACK_USAGE
 904 static void check_stack_usage(void)
 905 {
 906         static DEFINE_SPINLOCK(low_water_lock);
 907         static int lowest_to_date = THREAD_SIZE;
 908         unsigned long *n = end_of_stack(current);
 909         unsigned long free;
 910
 911         while (*n == 0)
 912                 n++;
 913         free = (unsigned long)n - (unsigned long)end_of_stack(current);
 914
 915         if (free >= lowest_to_date)
 916                 return;
 917
 918         spin_lock(&low_water_lock);
 919         if (free < lowest_to_date) {
 920                 printk(KERN_WARNING "%s used greatest stack depth: %lu bytes "
 921                                 "left\n",
 922                                 current->comm, free);
 923                 lowest_to_date = free;
 924         }
 925         spin_unlock(&low_water_lock);
 926 }
 927 #else
 928 static inline void check_stack_usage(void) {}
 929 #endif
 930
 931 static inline void exit_child_reaper(struct task_struct *tsk)
 932 {
 933         if (likely(tsk->group_leader != task_child_reaper(tsk)))
 934                 return;
 935
 936         if (tsk->nsproxy->pid_ns == &init_pid_ns)
 937                 panic("Attempted to kill init!");
 938
 939         /*
 940          * @tsk is the last thread in the 'cgroup-init' and is exiting.
 941          * Terminate all remaining processes in the namespace and reap them
 942          * before exiting @tsk.
 943          *
 944          * Note that @tsk (last thread of cgroup-init) may not necessarily
 945          * be the child-reaper (i.e main thread of cgroup-init) of the
 946          * namespace i.e the child_reaper may have already exited.
 947          *
 948          * Even after a child_reaper exits, we let it inherit orphaned children,
 949          * because, pid_ns->child_reaper remains valid as long as there is
 950          * at least one living sub-thread in the cgroup init.
 951
 952          * This living sub-thread of the cgroup-init will be notified when
 953          * a child inherited by the 'child-reaper' exits (do_notify_parent()
 954          * uses __group_send_sig_info()). Further, when reaping child processes,
 955          * do_wait() iterates over children of all living sub threads.
 956
 957          * i.e even though 'child_reaper' thread is listed as the parent of the
 958          * orphaned children, any living sub-thread in the cgroup-init can
 959          * perform the role of the child_reaper.
 960          */
 961         zap_pid_ns_processes(tsk->nsproxy->pid_ns);
 962 }
 963
 964 NORET_TYPE void do_exit(long code)
 965 {
 966         struct task_struct *tsk = current;
 967         int group_dead;
 968
 969         profile_task_exit(tsk);
 970
 971         WARN_ON(atomic_read(&tsk->fs_excl));
 972
 973         if (unlikely(in_interrupt()))
 974                 panic("Aiee, killing interrupt handler!");
 975         if (unlikely(!tsk->pid))
 976                 panic("Attempted to kill the idle task!");
 977
 978         if (unlikely(current->ptrace & PT_TRACE_EXIT)) {
 979                 current->ptrace_message = code;
 980                 ptrace_notify((PTRACE_EVENT_EXIT << 8) | SIGTRAP);
 981         }
 982
 983         /*
 984          * We're taking recursive faults here in do_exit. Safest is to just
 985          * leave this task alone and wait for reboot.
 986          */
 987         if (unlikely(tsk->flags & PF_EXITING)) {
 988                 printk(KERN_ALERT
 989                         "Fixing recursive fault but reboot is needed!\n");
 990                 /*
 991                  * We can do this unlocked here. The futex code uses
 992                  * this flag just to verify whether the pi state
 993                  * cleanup has been done or not. In the worst case it
 994                  * loops once more. We pretend that the cleanup was
 995                  * done as there is no way to return. Either the
 996                  * OWNER_DIED bit is set by now or we push the blocked
 997                  * task into the wait for ever nirwana as well.
 998                  */
 999                 tsk->flags |= PF_EXITPIDONE;
1000                 if (tsk->io_context)
1001                         exit_io_context();
1002                 set_current_state(TASK_UNINTERRUPTIBLE);
1003                 schedule();
1004         }
1005
1006         exit_signals(tsk);  /* sets PF_EXITING */
1007         /*
1008          * tsk->flags are checked in the futex code to protect against
1009          * an exiting task cleaning up the robust pi futexes.
1010          */
1011         smp_mb();
1012         spin_unlock_wait(&tsk->pi_lock);
1013
1014         if (unlikely(in_atomic()))
1015                 printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n",
1016                                 current->comm, task_pid_nr(current),
1017                                 preempt_count());
1018
1019         acct_update_integrals(tsk);
1020         if (tsk->mm) {
1021                 update_hiwater_rss(tsk->mm);
1022                 update_hiwater_vm(tsk->mm);
1023         }
1024         group_dead = atomic_dec_and_test(&tsk->signal->live);
1025         if (group_dead) {
1026                 exit_child_reaper(tsk);
1027                 hrtimer_cancel(&tsk->signal->real_timer);
1028                 exit_itimers(tsk->signal);
1029         }
1030         acct_collect(code, group_dead);
1031 #ifdef CONFIG_FUTEX
1032         if (unlikely(tsk->robust_list))
1033                 exit_robust_list(tsk);
1034 #ifdef CONFIG_COMPAT
1035         if (unlikely(tsk->compat_robust_list))
1036                 compat_exit_robust_list(tsk);
1037 #endif
1038 #endif
1039         if (group_dead)
1040                 tty_audit_exit();
1041         if (unlikely(tsk->audit_context))
1042                 audit_free(tsk);
1043
1044         tsk->exit_code = code;
1045         taskstats_exit(tsk, group_dead);
1046
1047         exit_mm(tsk);
1048
1049         if (group_dead)
1050                 acct_process();
1051         exit_sem(tsk);
1052         __exit_files(tsk);
1053         __exit_fs(tsk);
1054         check_stack_usage();
1055         exit_thread();
1056         cgroup_exit(tsk, 1);
1057         exit_keys(tsk);
1058
1059         if (group_dead && tsk->signal->leader)
1060                 disassociate_ctty(1);
1061
1062         module_put(task_thread_info(tsk)->exec_domain->module);
1063         if (tsk->binfmt)
1064                 module_put(tsk->binfmt->module);
1065
1066         proc_exit_connector(tsk);
1067 <<<<<<< HEAD:kernel/exit.c
1068         exit_notify(tsk);
1069 =======
1070         exit_notify(tsk, group_dead);
1071 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/exit.c
1072 #ifdef CONFIG_NUMA
1073         mpol_free(tsk->mempolicy);
1074         tsk->mempolicy = NULL;
1075 #endif
1076 #ifdef CONFIG_FUTEX
1077         /*
1078          * This must happen late, after the PID is not
1079          * hashed anymore:
1080          */
1081         if (unlikely(!list_empty(&tsk->pi_state_list)))
1082                 exit_pi_state_list(tsk);
1083         if (unlikely(current->pi_state_cache))
1084                 kfree(current->pi_state_cache);
1085 #endif
1086         /*
1087          * Make sure we are holding no locks:
1088          */
1089         debug_check_no_locks_held(tsk);
1090         /*
1091          * We can do this unlocked here. The futex code uses this flag
1092          * just to verify whether the pi state cleanup has been done
1093          * or not. In the worst case it loops once more.
1094          */
1095         tsk->flags |= PF_EXITPIDONE;
1096
1097         if (tsk->io_context)
1098                 exit_io_context();
1099
1100         if (tsk->splice_pipe)
1101                 __free_pipe_info(tsk->splice_pipe);
1102
1103         preempt_disable();
1104         /* causes final put_task_struct in finish_task_switch(). */
1105         tsk->state = TASK_DEAD;
1106
1107         schedule();
1108         BUG();
1109         /* Avoid "noreturn function does return".  */
1110         for (;;)
1111                 cpu_relax();    /* For when BUG is null */
1112 }
1113
1114 EXPORT_SYMBOL_GPL(do_exit);
1115
1116 NORET_TYPE void complete_and_exit(struct completion *comp, long code)
1117 {
1118         if (comp)
1119                 complete(comp);
1120
1121         do_exit(code);
1122 }
1123
1124 EXPORT_SYMBOL(complete_and_exit);
1125
1126 asmlinkage long sys_exit(int error_code)
1127 {
1128         do_exit((error_code&0xff)<<8);
1129 }
1130
1131 /*
1132  * Take down every thread in the group.  This is called by fatal signals
1133  * as well as by sys_exit_group (below).
1134  */
1135 NORET_TYPE void
1136 do_group_exit(int exit_code)
1137 {
1138         BUG_ON(exit_code & 0x80); /* core dumps don't get here */
1139
1140         if (current->signal->flags & SIGNAL_GROUP_EXIT)
1141                 exit_code = current->signal->group_exit_code;
1142         else if (!thread_group_empty(current)) {
1143                 struct signal_struct *const sig = current->signal;
1144                 struct sighand_struct *const sighand = current->sighand;
1145                 spin_lock_irq(&sighand->siglock);
1146                 if (signal_group_exit(sig))
1147                         /* Another thread got here before we took the lock.  */
1148                         exit_code = sig->group_exit_code;
1149                 else {
1150                         sig->group_exit_code = exit_code;
1151                         sig->flags = SIGNAL_GROUP_EXIT;
1152                         zap_other_threads(current);
1153                 }
1154                 spin_unlock_irq(&sighand->siglock);
1155         }
1156
1157         do_exit(exit_code);
1158         /* NOTREACHED */
1159 }
1160
1161 /*
1162  * this kills every thread in the thread group. Note that any externally
1163  * wait4()-ing process will get the correct exit code - even if this
1164  * thread is not the thread group leader.
1165  */
1166 asmlinkage void sys_exit_group(int error_code)
1167 {
1168         do_group_exit((error_code & 0xff) << 8);
1169 }
1170
1171 static struct pid *task_pid_type(struct task_struct *task, enum pid_type type)
1172 {
1173         struct pid *pid = NULL;
1174         if (type == PIDTYPE_PID)
1175                 pid = task->pids[type].pid;
1176         else if (type < PIDTYPE_MAX)
1177                 pid = task->group_leader->pids[type].pid;
1178         return pid;
1179 }
1180
1181 static int eligible_child(enum pid_type type, struct pid *pid, int options,
1182                           struct task_struct *p)
1183 {
1184         int err;
1185
1186         if (type < PIDTYPE_MAX) {
1187                 if (task_pid_type(p, type) != pid)
1188                         return 0;
1189         }
1190
1191         /*
1192          * Do not consider detached threads that are
1193          * not ptraced:
1194          */
1195         if (p->exit_signal == -1 && !p->ptrace)
1196                 return 0;
1197
1198         /* Wait for all children (clone and not) if __WALL is set;
1199          * otherwise, wait for clone children *only* if __WCLONE is
1200          * set; otherwise, wait for non-clone children *only*.  (Note:
1201          * A "clone" child here is one that reports to its parent
1202          * using a signal other than SIGCHLD.) */
1203         if (((p->exit_signal != SIGCHLD) ^ ((options & __WCLONE) != 0))
1204             && !(options & __WALL))
1205                 return 0;
1206
1207         err = security_task_wait(p);
1208         if (likely(!err))
1209                 return 1;
1210
1211         if (type != PIDTYPE_PID)
1212                 return 0;
1213         /* This child was explicitly requested, abort */
1214         read_unlock(&tasklist_lock);
1215         return err;
1216 }
1217
1218 static int wait_noreap_copyout(struct task_struct *p, pid_t pid, uid_t uid,
1219                                int why, int status,
1220                                struct siginfo __user *infop,
1221                                struct rusage __user *rusagep)
1222 {
1223         int retval = rusagep ? getrusage(p, RUSAGE_BOTH, rusagep) : 0;
1224
1225         put_task_struct(p);
1226         if (!retval)
1227                 retval = put_user(SIGCHLD, &infop->si_signo);
1228         if (!retval)
1229                 retval = put_user(0, &infop->si_errno);
1230         if (!retval)
1231                 retval = put_user((short)why, &infop->si_code);
1232         if (!retval)
1233                 retval = put_user(pid, &infop->si_pid);
1234         if (!retval)
1235                 retval = put_user(uid, &infop->si_uid);
1236         if (!retval)
1237                 retval = put_user(status, &infop->si_status);
1238         if (!retval)
1239                 retval = pid;
1240         return retval;
1241 }
1242
1243 /*
1244  * Handle sys_wait4 work for one task in state EXIT_ZOMBIE.  We hold
1245  * read_lock(&tasklist_lock) on entry.  If we return zero, we still hold
1246  * the lock and this task is uninteresting.  If we return nonzero, we have
1247  * released the lock and the system call should return.
1248  */
1249 static int wait_task_zombie(struct task_struct *p, int noreap,
1250                             struct siginfo __user *infop,
1251                             int __user *stat_addr, struct rusage __user *ru)
1252 {
1253         unsigned long state;
1254         int retval, status, traced;
1255         pid_t pid = task_pid_vnr(p);
1256
1257         if (unlikely(noreap)) {
1258                 uid_t uid = p->uid;
1259                 int exit_code = p->exit_code;
1260                 int why, status;
1261
1262                 get_task_struct(p);
1263                 read_unlock(&tasklist_lock);
1264                 if ((exit_code & 0x7f) == 0) {
1265                         why = CLD_EXITED;
1266                         status = exit_code >> 8;
1267                 } else {
1268                         why = (exit_code & 0x80) ? CLD_DUMPED : CLD_KILLED;
1269                         status = exit_code & 0x7f;
1270                 }
1271                 return wait_noreap_copyout(p, pid, uid, why,
1272                                            status, infop, ru);
1273         }
1274
1275         /*
1276          * Try to move the task's state to DEAD
1277          * only one thread is allowed to do this:
1278          */
1279         state = xchg(&p->exit_state, EXIT_DEAD);
1280         if (state != EXIT_ZOMBIE) {
1281                 BUG_ON(state != EXIT_DEAD);
1282                 return 0;
1283         }
1284
1285         /* traced means p->ptrace, but not vice versa */
1286         traced = (p->real_parent != p->parent);
1287
1288         if (likely(!traced)) {
1289                 struct signal_struct *psig;
1290                 struct signal_struct *sig;
1291
1292                 /*
1293                  * The resource counters for the group leader are in its
1294                  * own task_struct.  Those for dead threads in the group
1295                  * are in its signal_struct, as are those for the child
1296                  * processes it has previously reaped.  All these
1297                  * accumulate in the parent's signal_struct c* fields.
1298                  *
1299                  * We don't bother to take a lock here to protect these
1300                  * p->signal fields, because they are only touched by
1301                  * __exit_signal, which runs with tasklist_lock
1302                  * write-locked anyway, and so is excluded here.  We do
1303                  * need to protect the access to p->parent->signal fields,
1304                  * as other threads in the parent group can be right
1305                  * here reaping other children at the same time.
1306                  */
1307                 spin_lock_irq(&p->parent->sighand->siglock);
1308                 psig = p->parent->signal;
1309                 sig = p->signal;
1310                 psig->cutime =
1311                         cputime_add(psig->cutime,
1312                         cputime_add(p->utime,
1313                         cputime_add(sig->utime,
1314                                     sig->cutime)));
1315                 psig->cstime =
1316                         cputime_add(psig->cstime,
1317                         cputime_add(p->stime,
1318                         cputime_add(sig->stime,
1319                                     sig->cstime)));
1320                 psig->cgtime =
1321                         cputime_add(psig->cgtime,
1322                         cputime_add(p->gtime,
1323                         cputime_add(sig->gtime,
1324                                     sig->cgtime)));
1325                 psig->cmin_flt +=
1326                         p->min_flt + sig->min_flt + sig->cmin_flt;
1327                 psig->cmaj_flt +=
1328                         p->maj_flt + sig->maj_flt + sig->cmaj_flt;
1329                 psig->cnvcsw +=
1330                         p->nvcsw + sig->nvcsw + sig->cnvcsw;
1331                 psig->cnivcsw +=
1332                         p->nivcsw + sig->nivcsw + sig->cnivcsw;
1333                 psig->cinblock +=
1334                         task_io_get_inblock(p) +
1335                         sig->inblock + sig->cinblock;
1336                 psig->coublock +=
1337                         task_io_get_oublock(p) +
1338                         sig->oublock + sig->coublock;
1339                 spin_unlock_irq(&p->parent->sighand->siglock);
1340         }
1341
1342         /*
1343          * Now we are sure this task is interesting, and no other
1344          * thread can reap it because we set its state to EXIT_DEAD.
1345          */
1346         read_unlock(&tasklist_lock);
1347
1348         retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
1349         status = (p->signal->flags & SIGNAL_GROUP_EXIT)
1350                 ? p->signal->group_exit_code : p->exit_code;
1351         if (!retval && stat_addr)
1352                 retval = put_user(status, stat_addr);
1353         if (!retval && infop)
1354                 retval = put_user(SIGCHLD, &infop->si_signo);
1355         if (!retval && infop)
1356                 retval = put_user(0, &infop->si_errno);
1357         if (!retval && infop) {
1358                 int why;
1359
1360                 if ((status & 0x7f) == 0) {
1361                         why = CLD_EXITED;
1362                         status >>= 8;
1363                 } else {
1364                         why = (status & 0x80) ? CLD_DUMPED : CLD_KILLED;
1365                         status &= 0x7f;
1366                 }
1367                 retval = put_user((short)why, &infop->si_code);
1368                 if (!retval)
1369                         retval = put_user(status, &infop->si_status);
1370         }
1371         if (!retval && infop)
1372                 retval = put_user(pid, &infop->si_pid);
1373         if (!retval && infop)
1374                 retval = put_user(p->uid, &infop->si_uid);
1375         if (!retval)
1376                 retval = pid;
1377
1378         if (traced) {
1379                 write_lock_irq(&tasklist_lock);
1380                 /* We dropped tasklist, ptracer could die and untrace */
1381                 ptrace_unlink(p);
1382                 /*
1383                  * If this is not a detached task, notify the parent.
1384                  * If it's still not detached after that, don't release
1385                  * it now.
1386                  */
1387                 if (p->exit_signal != -1) {
1388                         do_notify_parent(p, p->exit_signal);
1389                         if (p->exit_signal != -1) {
1390                                 p->exit_state = EXIT_ZOMBIE;
1391                                 p = NULL;
1392                         }
1393                 }
1394                 write_unlock_irq(&tasklist_lock);
1395         }
1396         if (p != NULL)
1397                 release_task(p);
1398
1399         return retval;
1400 }
1401
1402 /*
1403  * Handle sys_wait4 work for one task in state TASK_STOPPED.  We hold
1404  * read_lock(&tasklist_lock) on entry.  If we return zero, we still hold
1405  * the lock and this task is uninteresting.  If we return nonzero, we have
1406  * released the lock and the system call should return.
1407  */
1408 static int wait_task_stopped(struct task_struct *p,
1409                              int noreap, struct siginfo __user *infop,
1410                              int __user *stat_addr, struct rusage __user *ru)
1411 {
1412         int retval, exit_code, why;
1413         uid_t uid = 0; /* unneeded, required by compiler */
1414         pid_t pid;
1415
1416         exit_code = 0;
1417         spin_lock_irq(&p->sighand->siglock);
1418
1419         if (unlikely(!task_is_stopped_or_traced(p)))
1420                 goto unlock_sig;
1421
1422         if (!(p->ptrace & PT_PTRACED) && p->signal->group_stop_count > 0)
1423                 /*
1424                  * A group stop is in progress and this is the group leader.
1425                  * We won't report until all threads have stopped.
1426                  */
1427                 goto unlock_sig;
1428
1429         exit_code = p->exit_code;
1430         if (!exit_code)
1431                 goto unlock_sig;
1432
1433         if (!noreap)
1434                 p->exit_code = 0;
1435
1436         uid = p->uid;
1437 unlock_sig:
1438         spin_unlock_irq(&p->sighand->siglock);
1439         if (!exit_code)
1440                 return 0;
1441
1442         /*
1443          * Now we are pretty sure this task is interesting.
1444          * Make sure it doesn't get reaped out from under us while we
1445          * give up the lock and then examine it below.  We don't want to
1446          * keep holding onto the tasklist_lock while we call getrusage and
1447          * possibly take page faults for user memory.
1448          */
1449         get_task_struct(p);
1450         pid = task_pid_vnr(p);
1451         why = (p->ptrace & PT_PTRACED) ? CLD_TRAPPED : CLD_STOPPED;
1452         read_unlock(&tasklist_lock);
1453
1454         if (unlikely(noreap))
1455                 return wait_noreap_copyout(p, pid, uid,
1456                                            why, exit_code,
1457                                            infop, ru);
1458
1459         retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
1460         if (!retval && stat_addr)
1461                 retval = put_user((exit_code << 8) | 0x7f, stat_addr);
1462         if (!retval && infop)
1463                 retval = put_user(SIGCHLD, &infop->si_signo);
1464         if (!retval && infop)
1465                 retval = put_user(0, &infop->si_errno);
1466         if (!retval && infop)
1467 <<<<<<< HEAD:kernel/exit.c
1468                 retval = put_user(why, &infop->si_code);
1469 =======
1470                 retval = put_user((short)why, &infop->si_code);
1471 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:kernel/exit.c
1472         if (!retval && infop)
1473                 retval = put_user(exit_code, &infop->si_status);
1474         if (!retval && infop)
1475                 retval = put_user(pid, &infop->si_pid);
1476         if (!retval && infop)
1477                 retval = put_user(uid, &infop->si_uid);
1478         if (!retval)
1479                 retval = pid;
1480         put_task_struct(p);
1481
1482         BUG_ON(!retval);
1483         return retval;
1484 }
1485
1486 /*
1487  * Handle do_wait work for one task in a live, non-stopped state.
1488  * read_lock(&tasklist_lock) on entry.  If we return zero, we still hold
1489  * the lock and this task is uninteresting.  If we return nonzero, we have
1490  * released the lock and the system call should return.
1491  */
1492 static int wait_task_continued(struct task_struct *p, int noreap,
1493                                struct siginfo __user *infop,
1494                                int __user *stat_addr, struct rusage __user *ru)
1495 {
1496         int retval;
1497         pid_t pid;
1498         uid_t uid;
1499
1500         if (!(p->signal->flags & SIGNAL_STOP_CONTINUED))
1501                 return 0;
1502
1503         spin_lock_irq(&p->sighand->siglock);
1504         /* Re-check with the lock held.  */
1505         if (!(p->signal->flags & SIGNAL_STOP_CONTINUED)) {
1506                 spin_unlock_irq(&p->sighand->siglock);
1507                 return 0;
1508         }
1509         if (!noreap)
1510                 p->signal->flags &= ~SIGNAL_STOP_CONTINUED;
1511         spin_unlock_irq(&p->sighand->siglock);
1512
1513         pid = task_pid_vnr(p);
1514         uid = p->uid;
1515         get_task_struct(p);
1516         read_unlock(&tasklist_lock);
1517
1518         if (!infop) {
1519                 retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
1520                 put_task_struct(p);
1521                 if (!retval && stat_addr)
1522                         retval = put_user(0xffff, stat_addr);
1523                 if (!retval)
1524                         retval = pid;
1525         } else {
1526                 retval = wait_noreap_copyout(p, pid, uid,
1527                                              CLD_CONTINUED, SIGCONT,
1528                                              infop, ru);
1529                 BUG_ON(retval == 0);
1530         }
1531
1532         return retval;
1533 }
1534
1535 static long do_wait(enum pid_type type, struct pid *pid, int options,
1536                     struct siginfo __user *infop, int __user *stat_addr,
1537                     struct rusage __user *ru)
1538 {
1539         DECLARE_WAITQUEUE(wait, current);
1540         struct task_struct *tsk;
1541         int flag, retval;
1542
1543         add_wait_queue(&current->signal->wait_chldexit,&wait);
1544 repeat:
1545         /* If there is nothing that can match our critier just get out */
1546         retval = -ECHILD;
1547         if ((type < PIDTYPE_MAX) && (!pid || hlist_empty(&pid->tasks[type])))
1548                 goto end;
1549
1550         /*
1551          * We will set this flag if we see any child that might later
1552          * match our criteria, even if we are not able to reap it yet.
1553          */
1554         flag = retval = 0;
1555         current->state = TASK_INTERRUPTIBLE;
1556         read_lock(&tasklist_lock);
1557         tsk = current;
1558         do {
1559                 struct task_struct *p;
1560
1561                 list_for_each_entry(p, &tsk->children, sibling) {
1562                         int ret = eligible_child(type, pid, options, p);
1563                         if (!ret)
1564                                 continue;
1565
1566                         if (unlikely(ret < 0)) {
1567                                 retval = ret;
1568                         } else if (task_is_stopped_or_traced(p)) {
1569                                 /*
1570                                  * It's stopped now, so it might later
1571                                  * continue, exit, or stop again.
1572                                  */
1573                                 flag = 1;
1574                                 if (!(p->ptrace & PT_PTRACED) &&
1575                                     !(options & WUNTRACED))
1576                                         continue;
1577
1578                                 retval = wait_task_stopped(p,
1579                                                 (options & WNOWAIT), infop,
1580                                                 stat_addr, ru);
1581                         } else if (p->exit_state == EXIT_ZOMBIE &&
1582                                         !delay_group_leader(p)) {
1583                                 /*
1584                                  * We don't reap group leaders with subthreads.
1585                                  */
1586                                 if (!likely(options & WEXITED))
1587                                         continue;
1588                                 retval = wait_task_zombie(p,
1589                                                 (options & WNOWAIT), infop,
1590                                                 stat_addr, ru);
1591                         } else if (p->exit_state != EXIT_DEAD) {
1592                                 /*
1593                                  * It's running now, so it might later
1594                                  * exit, stop, or stop and then continue.
1595                                  */
1596                                 flag = 1;
1597                                 if (!unlikely(options & WCONTINUED))
1598                                         continue;
1599                                 retval = wait_task_continued(p,
1600                                                 (options & WNOWAIT), infop,
1601                                                 stat_addr, ru);
1602                         }
1603                         if (retval != 0) /* tasklist_lock released */
1604                                 goto end;
1605                 }
1606                 if (!flag) {
1607                         list_for_each_entry(p, &tsk->ptrace_children,
1608                                                                 ptrace_list) {
1609                                 flag = eligible_child(type, pid, options, p);
1610                                 if (!flag)
1611                                         continue;
1612                                 if (likely(flag > 0))
1613                                         break;
1614                                 retval = flag;
1615                                 goto end;
1616                         }
1617                 }
1618                 if (options & __WNOTHREAD)
1619                         break;
1620                 tsk = next_thread(tsk);
1621                 BUG_ON(tsk->signal != current->signal);
1622         } while (tsk != current);
1623         read_unlock(&tasklist_lock);
1624
1625         if (flag) {
1626                 if (options & WNOHANG)
1627                         goto end;
1628                 retval = -ERESTARTSYS;
1629                 if (signal_pending(current))
1630                         goto end;
1631                 schedule();
1632                 goto repeat;
1633         }
1634         retval = -ECHILD;
1635 end:
1636         current->state = TASK_RUNNING;
1637         remove_wait_queue(&current->signal->wait_chldexit,&wait);
1638         if (infop) {
1639                 if (retval > 0)
1640                         retval = 0;
1641                 else {
1642                         /*
1643                          * For a WNOHANG return, clear out all the fields
1644                          * we would set so the user can easily tell the
1645                          * difference.
1646                          */
1647                         if (!retval)
1648                                 retval = put_user(0, &infop->si_signo);
1649                         if (!retval)
1650                                 retval = put_user(0, &infop->si_errno);
1651                         if (!retval)
1652                                 retval = put_user(0, &infop->si_code);
1653                         if (!retval)
1654                                 retval = put_user(0, &infop->si_pid);
1655                         if (!retval)
1656                                 retval = put_user(0, &infop->si_uid);
1657                         if (!retval)
1658                                 retval = put_user(0, &infop->si_status);
1659                 }
1660         }
1661         return retval;
1662 }
1663
1664 asmlinkage long sys_waitid(int which, pid_t upid,
1665                            struct siginfo __user *infop, int options,
1666                            struct rusage __user *ru)
1667 {
1668         struct pid *pid = NULL;
1669         enum pid_type type;
1670         long ret;
1671
1672         if (options & ~(WNOHANG|WNOWAIT|WEXITED|WSTOPPED|WCONTINUED))
1673                 return -EINVAL;
1674         if (!(options & (WEXITED|WSTOPPED|WCONTINUED)))
1675                 return -EINVAL;
1676
1677         switch (which) {
1678         case P_ALL:
1679                 type = PIDTYPE_MAX;
1680                 break;
1681         case P_PID:
1682                 type = PIDTYPE_PID;
1683                 if (upid <= 0)
1684                         return -EINVAL;
1685                 break;
1686         case P_PGID:
1687                 type = PIDTYPE_PGID;
1688                 if (upid <= 0)
1689                         return -EINVAL;
1690                 break;
1691         default:
1692                 return -EINVAL;
1693         }
1694
1695         if (type < PIDTYPE_MAX)
1696                 pid = find_get_pid(upid);
1697         ret = do_wait(type, pid, options, infop, NULL, ru);
1698         put_pid(pid);
1699
1700         /* avoid REGPARM breakage on x86: */
1701         prevent_tail_call(ret);
1702         return ret;
1703 }
1704
1705 asmlinkage long sys_wait4(pid_t upid, int __user *stat_addr,
1706                           int options, struct rusage __user *ru)
1707 {
1708         struct pid *pid = NULL;
1709         enum pid_type type;
1710         long ret;
1711
1712         if (options & ~(WNOHANG|WUNTRACED|WCONTINUED|
1713                         __WNOTHREAD|__WCLONE|__WALL))
1714                 return -EINVAL;
1715
1716         if (upid == -1)
1717                 type = PIDTYPE_MAX;
1718         else if (upid < 0) {
1719                 type = PIDTYPE_PGID;
1720                 pid = find_get_pid(-upid);
1721         } else if (upid == 0) {
1722                 type = PIDTYPE_PGID;
1723                 pid = get_pid(task_pgrp(current));
1724         } else /* upid > 0 */ {
1725                 type = PIDTYPE_PID;
1726                 pid = find_get_pid(upid);
1727         }
1728
1729         ret = do_wait(type, pid, options | WEXITED, NULL, stat_addr, ru);
1730         put_pid(pid);
1731
1732         /* avoid REGPARM breakage on x86: */
1733         prevent_tail_call(ret);
1734         return ret;
1735 }
1736
1737 #ifdef __ARCH_WANT_SYS_WAITPID
1738
1739 /*
1740  * sys_waitpid() remains for compatibility. waitpid() should be
1741  * implemented by calling sys_wait4() from libc.a.
1742  */
1743 asmlinkage long sys_waitpid(pid_t pid, int __user *stat_addr, int options)
1744 {
1745         return sys_wait4(pid, stat_addr, options, NULL);
1746 }
1747
1748 #endif