kernel/utrace.c

   1 /*
   2  * utrace infrastructure interface for debugging user processes
   3  *
   4  * Copyright (C) 2006-2009 Red Hat, Inc.  All rights reserved.
   5  *
   6  * This copyrighted material is made available to anyone wishing to use,
   7  * modify, copy, or redistribute it subject to the terms and conditions
   8  * of the GNU General Public License v.2.
   9  *
  10  * Red Hat Author: Roland McGrath.
  11  */
  12
  13 #include <linux/utrace.h>
  14 #include <linux/tracehook.h>
  15 #include <linux/regset.h>
  16 #include <asm/syscall.h>
  17 #include <linux/ptrace.h>
  18 #include <linux/err.h>
  19 #include <linux/sched.h>
  20 #include <linux/freezer.h>
  21 #include <linux/module.h>
  22 #include <linux/init.h>
  23 #include <linux/slab.h>
  24 #include <linux/seq_file.h>
  25
  26
  27 /*
  28  * Rules for 'struct utrace', defined in <linux/utrace_struct.h>
  29  * but used entirely privately in this file.
  30  *
  31  * The common event reporting loops are done by the task making the
  32  * report without ever taking any locks.  To facilitate this, the two
  33  * lists @attached and @attaching work together for smooth asynchronous
  34  * attaching with low overhead.  Modifying either list requires @lock.
  35  * The @attaching list can be modified any time while holding @lock.
  36  * New engines being attached always go on this list.
  37  *
  38  * The @attached list is what the task itself uses for its reporting
  39  * loops.  When the task itself is not quiescent, it can use the
  40  * @attached list without taking any lock.  Nobody may modify the list
  41  * when the task is not quiescent.  When it is quiescent, that means
  42  * that it won't run again without taking @lock itself before using
  43  * the list.
  44  *
  45  * At each place where we know the task is quiescent (or it's current),
  46  * while holding @lock, we call splice_attaching(), below.  This moves
  47  * the @attaching list members on to the end of the @attached list.
  48  * Since this happens at the start of any reporting pass, any new
  49  * engines attached asynchronously go on the stable @attached list
  50  * in time to have their callbacks seen.
  51  */
  52
  53 static struct kmem_cache *utrace_engine_cachep;
  54 static const struct utrace_engine_ops utrace_detached_ops; /* forward decl */
  55
  56 static int __init utrace_init(void)
  57 {
  58         utrace_engine_cachep = KMEM_CACHE(utrace_engine, SLAB_PANIC);
  59         return 0;
  60 }
  61 module_init(utrace_init);
  62
  63 /*
  64  * This is called with @utrace->lock held when the task is safely
  65  * quiescent, i.e. it won't consult utrace->attached without the lock.
  66  * Move any engines attached asynchronously from @utrace->attaching
  67  * onto the @utrace->attached list.
  68  */
  69 static void splice_attaching(struct utrace *utrace)
  70 {
  71         list_splice_tail_init(&utrace->attaching, &utrace->attached);
  72 }
  73
  74 /*
  75  * This is the exported function used by the utrace_engine_put() inline.
  76  */
  77 void __utrace_engine_release(struct kref *kref)
  78 {
  79         struct utrace_engine *engine = container_of(kref, struct utrace_engine,
  80                                                     kref);
  81         BUG_ON(!list_empty(&engine->entry));
  82         kmem_cache_free(utrace_engine_cachep, engine);
  83 }
  84 EXPORT_SYMBOL_GPL(__utrace_engine_release);
  85
  86 static bool engine_matches(struct utrace_engine *engine, int flags,
  87                            const struct utrace_engine_ops *ops, void *data)
  88 {
  89         if ((flags & UTRACE_ATTACH_MATCH_OPS) && engine->ops != ops)
  90                 return false;
  91         if ((flags & UTRACE_ATTACH_MATCH_DATA) && engine->data != data)
  92                 return false;
  93         return engine->ops && engine->ops != &utrace_detached_ops;
  94 }
  95
  96 static struct utrace_engine *matching_engine(
  97         struct utrace *utrace, int flags,
  98         const struct utrace_engine_ops *ops, void *data)
  99 {
 100         struct utrace_engine *engine;
 101         list_for_each_entry(engine, &utrace->attached, entry)
 102                 if (engine_matches(engine, flags, ops, data))
 103                         return engine;
 104         list_for_each_entry(engine, &utrace->attaching, entry)
 105                 if (engine_matches(engine, flags, ops, data))
 106                         return engine;
 107         return NULL;
 108 }
 109
 110 /*
 111  * Called without locks, when we might be the first utrace engine to attach.
 112  * If this is a newborn thread and we are not the creator, we have to wait
 113  * for it.  The creator gets the first chance to attach.  The PF_STARTING
 114  * flag is cleared after its report_clone hook has had a chance to run.
 115  */
 116 static inline int utrace_attach_delay(struct task_struct *target)
 117 {
 118         if ((target->flags & PF_STARTING) &&
 119             current->utrace.cloning != target)
 120                 do {
 121                         schedule_timeout_interruptible(1);
 122                         if (signal_pending(current))
 123                                 return -ERESTARTNOINTR;
 124                 } while (target->flags & PF_STARTING);
 125
 126         return 0;
 127 }
 128
 129 /*
 130  * Enqueue @engine, or maybe don't if UTRACE_ATTACH_EXCLUSIVE.
 131  */
 132 static int utrace_add_engine(struct task_struct *target,
 133                              struct utrace *utrace,
 134                              struct utrace_engine *engine,
 135                              int flags,
 136                              const struct utrace_engine_ops *ops,
 137                              void *data)
 138 {
 139         int ret;
 140
 141         spin_lock(&utrace->lock);
 142
 143         if (utrace->reap) {
 144                 /*
 145                  * Already entered utrace_release_task(), cannot attach now.
 146                  */
 147                 ret = -ESRCH;
 148         } else if ((flags & UTRACE_ATTACH_EXCLUSIVE) &&
 149             unlikely(matching_engine(utrace, flags, ops, data))) {
 150                 ret = -EEXIST;
 151         } else {
 152                 /*
 153                  * Put the new engine on the pending ->attaching list.
 154                  * Make sure it gets onto the ->attached list by the next
 155                  * time it's examined.
 156                  *
 157                  * When target == current, it would be safe just to call
 158                  * splice_attaching() right here.  But if we're inside a
 159                  * callback, that would mean the new engine also gets
 160                  * notified about the event that precipitated its own
 161                  * creation.  This is not what the user wants.
 162                  *
 163                  * Setting ->report ensures that start_report() takes the
 164                  * lock and does it next time.  Whenever setting ->report,
 165                  * we must maintain the invariant that TIF_NOTIFY_RESUME is
 166                  * also set.  Otherwise utrace_control() or utrace_do_stop()
 167                  * might skip setting TIF_NOTIFY_RESUME upon seeing ->report
 168                  * already set, and we'd miss a necessary callback.
 169                  *
 170                  * In case we had no engines before, make sure that
 171                  * utrace_flags is not zero when tracehook_notify_resume()
 172                  * checks.  That would bypass utrace reporting clearing
 173                  * TIF_NOTIFY_RESUME, and thus violate the same invariant.
 174                  */
 175                 target->utrace_flags |= UTRACE_EVENT(REAP);
 176                 list_add_tail(&engine->entry, &utrace->attaching);
 177                 utrace->report = 1;
 178                 set_notify_resume(target);
 179
 180                 ret = 0;
 181         }
 182
 183         spin_unlock(&utrace->lock);
 184
 185         return ret;
 186 }
 187
 188 /**
 189  * utrace_attach_task - attach new engine, or look up an attached engine
 190  * @target:     thread to attach to
 191  * @flags:      flag bits combined with OR, see below
 192  * @ops:        callback table for new engine
 193  * @data:       engine private data pointer
 194  *
 195  * The caller must ensure that the @target thread does not get freed,
 196  * i.e. hold a ref or be its parent.  It is always safe to call this
 197  * on @current, or on the @child pointer in a @report_clone callback.
 198  * For most other cases, it's easier to use utrace_attach_pid() instead.
 199  *
 200  * UTRACE_ATTACH_CREATE:
 201  * Create a new engine.  If %UTRACE_ATTACH_CREATE is not specified, you
 202  * only look up an existing engine already attached to the thread.
 203  *
 204  * UTRACE_ATTACH_EXCLUSIVE:
 205  * Attempting to attach a second (matching) engine fails with -%EEXIST.
 206  *
 207  * UTRACE_ATTACH_MATCH_OPS: Only consider engines matching @ops.
 208  * UTRACE_ATTACH_MATCH_DATA: Only consider engines matching @data.
 209  */
 210 struct utrace_engine *utrace_attach_task(
 211         struct task_struct *target, int flags,
 212         const struct utrace_engine_ops *ops, void *data)
 213 {
 214         struct utrace *utrace;
 215         struct utrace_engine *engine;
 216         int ret;
 217
 218         utrace = &target->utrace;
 219
 220         if (unlikely(target->exit_state == EXIT_DEAD)) {
 221                 /*
 222                  * The target has already been reaped.
 223                  * Check this early, though it's not synchronized.
 224                  * utrace_add_engine() will do the final check.
 225                  */
 226                 if (!(flags & UTRACE_ATTACH_CREATE))
 227                         return ERR_PTR(-ENOENT);
 228                 return ERR_PTR(-ESRCH);
 229         }
 230
 231         if (!(flags & UTRACE_ATTACH_CREATE)) {
 232                 spin_lock(&utrace->lock);
 233                 engine = matching_engine(utrace, flags, ops, data);
 234                 if (engine)
 235                         utrace_engine_get(engine);
 236                 spin_unlock(&utrace->lock);
 237                 return engine ?: ERR_PTR(-ENOENT);
 238         }
 239
 240         if (unlikely(!ops) || unlikely(ops == &utrace_detached_ops))
 241                 return ERR_PTR(-EINVAL);
 242
 243         if (unlikely(target->flags & PF_KTHREAD))
 244                 /*
 245                  * Silly kernel, utrace is for users!
 246                  */
 247                 return ERR_PTR(-EPERM);
 248
 249         engine = kmem_cache_alloc(utrace_engine_cachep, GFP_KERNEL);
 250         if (unlikely(!engine))
 251                 return ERR_PTR(-ENOMEM);
 252
 253         /*
 254          * Initialize the new engine structure.  It starts out with two
 255          * refs: one ref to return, and one ref for being attached.
 256          */
 257         kref_set(&engine->kref, 2);
 258         engine->flags = 0;
 259         engine->ops = ops;
 260         engine->data = data;
 261
 262         ret = utrace_attach_delay(target);
 263         if (likely(!ret))
 264                 ret = utrace_add_engine(target, utrace, engine,
 265                                         flags, ops, data);
 266
 267         if (unlikely(ret)) {
 268                 kmem_cache_free(utrace_engine_cachep, engine);
 269                 engine = ERR_PTR(ret);
 270         }
 271
 272         return engine;
 273 }
 274 EXPORT_SYMBOL_GPL(utrace_attach_task);
 275
 276 /**
 277  * utrace_attach_pid - attach new engine, or look up an attached engine
 278  * @pid:        &struct pid pointer representing thread to attach to
 279  * @flags:      flag bits combined with OR, see utrace_attach_task()
 280  * @ops:        callback table for new engine
 281  * @data:       engine private data pointer
 282  *
 283  * This is the same as utrace_attach_task(), but takes a &struct pid
 284  * pointer rather than a &struct task_struct pointer.  The caller must
 285  * hold a ref on @pid, but does not need to worry about the task
 286  * staying valid.  If it's been reaped so that @pid points nowhere,
 287  * then this call returns -%ESRCH.
 288  */
 289 struct utrace_engine *utrace_attach_pid(
 290         struct pid *pid, int flags,
 291         const struct utrace_engine_ops *ops, void *data)
 292 {
 293         struct utrace_engine *engine = ERR_PTR(-ESRCH);
 294         struct task_struct *task = get_pid_task(pid, PIDTYPE_PID);
 295         if (task) {
 296                 engine = utrace_attach_task(task, flags, ops, data);
 297                 put_task_struct(task);
 298         }
 299         return engine;
 300 }
 301 EXPORT_SYMBOL_GPL(utrace_attach_pid);
 302
 303 /*
 304  * When an engine is detached, the target thread may still see it and
 305  * make callbacks until it quiesces.  We install a special ops vector
 306  * with these two callbacks.  When the target thread quiesces, it can
 307  * safely free the engine itself.  For any event we will always get
 308  * the report_quiesce() callback first, so we only need this one
 309  * pointer to be set.  The only exception is report_reap(), so we
 310  * supply that callback too.
 311  */
 312 static u32 utrace_detached_quiesce(enum utrace_resume_action action,
 313                                    struct utrace_engine *engine,
 314                                    struct task_struct *task,
 315                                    unsigned long event)
 316 {
 317         return UTRACE_DETACH;
 318 }
 319
 320 static void utrace_detached_reap(struct utrace_engine *engine,
 321                                  struct task_struct *task)
 322 {
 323 }
 324
 325 static const struct utrace_engine_ops utrace_detached_ops = {
 326         .report_quiesce = &utrace_detached_quiesce,
 327         .report_reap = &utrace_detached_reap
 328 };
 329
 330 /*
 331  * After waking up from TASK_TRACED, clear bookkeeping in @utrace.
 332  * Returns true if we were woken up prematurely by SIGKILL.
 333  */
 334 static inline bool finish_utrace_stop(struct task_struct *task,
 335                                       struct utrace *utrace)
 336 {
 337         bool killed = false;
 338
 339         /*
 340          * utrace_wakeup() clears @utrace->stopped before waking us up.
 341          * We're officially awake if it's clear.
 342          */
 343         spin_lock(&utrace->lock);
 344         if (unlikely(utrace->stopped)) {
 345                 /*
 346                  * If we're here with it still set, it must have been
 347                  * signal_wake_up() instead, waking us up for a SIGKILL.
 348                  */
 349                 spin_lock_irq(&task->sighand->siglock);
 350                 WARN_ON(!sigismember(&task->pending.signal, SIGKILL));
 351                 spin_unlock_irq(&task->sighand->siglock);
 352                 utrace->stopped = 0;
 353                 killed = true;
 354         }
 355         spin_unlock(&utrace->lock);
 356
 357         return killed;
 358 }
 359
 360 /*
 361  * Perform %UTRACE_STOP, i.e. block in TASK_TRACED until woken up.
 362  * @task == current, @utrace == current->utrace, which is not locked.
 363  * Return true if we were woken up by SIGKILL even though some utrace
 364  * engine may still want us to stay stopped.
 365  */
 366 static bool utrace_stop(struct task_struct *task, struct utrace *utrace,
 367                         bool report)
 368 {
 369         bool killed;
 370
 371         /*
 372          * @utrace->stopped is the flag that says we are safely
 373          * inside this function.  It should never be set on entry.
 374          */
 375         BUG_ON(utrace->stopped);
 376
 377         /*
 378          * The siglock protects us against signals.  As well as SIGKILL
 379          * waking us up, we must synchronize with the signal bookkeeping
 380          * for stop signals and SIGCONT.
 381          */
 382         spin_lock(&utrace->lock);
 383         spin_lock_irq(&task->sighand->siglock);
 384
 385         if (unlikely(sigismember(&task->pending.signal, SIGKILL))) {
 386                 spin_unlock_irq(&task->sighand->siglock);
 387                 spin_unlock(&utrace->lock);
 388                 return true;
 389         }
 390
 391         if (report) {
 392                 /*
 393                  * Ensure a reporting pass when we're resumed.
 394                  */
 395                 utrace->report = 1;
 396                 set_thread_flag(TIF_NOTIFY_RESUME);
 397         }
 398
 399         utrace->stopped = 1;
 400         __set_current_state(TASK_TRACED);
 401
 402         /*
 403          * If there is a group stop in progress,
 404          * we must participate in the bookkeeping.
 405          */
 406         if (task->signal->group_stop_count > 0)
 407                 --task->signal->group_stop_count;
 408
 409         spin_unlock_irq(&task->sighand->siglock);
 410         spin_unlock(&utrace->lock);
 411
 412         schedule();
 413
 414         /*
 415          * While in TASK_TRACED, we were considered "frozen enough".
 416          * Now that we woke up, it's crucial if we're supposed to be
 417          * frozen that we freeze now before running anything substantial.
 418          */
 419         try_to_freeze();
 420
 421         killed = finish_utrace_stop(task, utrace);
 422
 423         /*
 424          * While we were in TASK_TRACED, complete_signal() considered
 425          * us "uninterested" in signal wakeups.  Now make sure our
 426          * TIF_SIGPENDING state is correct for normal running.
 427          */
 428         spin_lock_irq(&task->sighand->siglock);
 429         recalc_sigpending();
 430         spin_unlock_irq(&task->sighand->siglock);
 431
 432         return killed;
 433 }
 434
 435 /*
 436  * The caller has to hold a ref on the engine.  If the attached flag is
 437  * true (all but utrace_barrier() calls), the engine is supposed to be
 438  * attached.  If the attached flag is false (utrace_barrier() only),
 439  * then return -ERESTARTSYS for an engine marked for detach but not yet
 440  * fully detached.  The task pointer can be invalid if the engine is
 441  * detached.
 442  *
 443  * Get the utrace lock for the target task.
 444  * Returns the struct if locked, or ERR_PTR(-errno).
 445  *
 446  * This has to be robust against races with:
 447  *      utrace_control(target, UTRACE_DETACH) calls
 448  *      UTRACE_DETACH after reports
 449  *      utrace_report_death
 450  *      utrace_release_task
 451  */
 452 static struct utrace *get_utrace_lock(struct task_struct *target,
 453                                       struct utrace_engine *engine,
 454                                       bool attached)
 455         __acquires(utrace->lock)
 456 {
 457         struct utrace *utrace;
 458
 459         rcu_read_lock();
 460
 461         /*
 462          * If this engine was already detached, bail out before we look at
 463          * the task_struct pointer at all.  If it's detached after this
 464          * check, then RCU is still keeping this task_struct pointer valid.
 465          *
 466          * The ops pointer is NULL when the engine is fully detached.
 467          * It's &utrace_detached_ops when it's marked detached but still
 468          * on the list.  In the latter case, utrace_barrier() still works,
 469          * since the target might be in the middle of an old callback.
 470          */
 471         if (unlikely(!engine->ops)) {
 472                 rcu_read_unlock();
 473                 return ERR_PTR(-ESRCH);
 474         }
 475
 476         if (unlikely(engine->ops == &utrace_detached_ops)) {
 477                 rcu_read_unlock();
 478                 return attached ? ERR_PTR(-ESRCH) : ERR_PTR(-ERESTARTSYS);
 479         }
 480
 481         utrace = &target->utrace;
 482         if (unlikely(target->exit_state == EXIT_DEAD)) {
 483                 /*
 484                  * If all engines detached already, utrace is clear.
 485                  * Otherwise, we're called after utrace_release_task might
 486                  * have started.  A call to this engine's report_reap
 487                  * callback might already be in progress.
 488                  */
 489                 utrace = ERR_PTR(-ESRCH);
 490         } else {
 491                 spin_lock(&utrace->lock);
 492                 if (unlikely(!engine->ops) ||
 493                     unlikely(engine->ops == &utrace_detached_ops)) {
 494                         /*
 495                          * By the time we got the utrace lock,
 496                          * it had been reaped or detached already.
 497                          */
 498                         spin_unlock(&utrace->lock);
 499                         utrace = ERR_PTR(-ESRCH);
 500                         if (!attached && engine->ops == &utrace_detached_ops)
 501                                 utrace = ERR_PTR(-ERESTARTSYS);
 502                 }
 503         }
 504         rcu_read_unlock();
 505
 506         return utrace;
 507 }
 508
 509 /*
 510  * Now that we don't hold any locks, run through any
 511  * detached engines and free their references.  Each
 512  * engine had one implicit ref while it was attached.
 513  */
 514 static void put_detached_list(struct list_head *list)
 515 {
 516         struct utrace_engine *engine, *next;
 517         list_for_each_entry_safe(engine, next, list, entry) {
 518                 list_del_init(&engine->entry);
 519                 utrace_engine_put(engine);
 520         }
 521 }
 522
 523 /*
 524  * Called with utrace->lock held.
 525  * Notify and clean up all engines, then free utrace.
 526  */
 527 static void utrace_reap(struct task_struct *target, struct utrace *utrace)
 528         __releases(utrace->lock)
 529 {
 530         struct utrace_engine *engine, *next;
 531         const struct utrace_engine_ops *ops;
 532         LIST_HEAD(detached);
 533
 534 restart:
 535         splice_attaching(utrace);
 536         list_for_each_entry_safe(engine, next, &utrace->attached, entry) {
 537                 ops = engine->ops;
 538                 engine->ops = NULL;
 539                 list_move(&engine->entry, &detached);
 540
 541                 /*
 542                  * If it didn't need a callback, we don't need to drop
 543                  * the lock.  Now nothing else refers to this engine.
 544                  */
 545                 if (!(engine->flags & UTRACE_EVENT(REAP)))
 546                         continue;
 547
 548                 /*
 549                  * This synchronizes with utrace_barrier().  Since we
 550                  * need the utrace->lock here anyway (unlike the other
 551                  * reporting loops), we don't need any memory barrier
 552                  * as utrace_barrier() holds the lock.
 553                  */
 554                 utrace->reporting = engine;
 555                 spin_unlock(&utrace->lock);
 556
 557                 (*ops->report_reap)(engine, target);
 558
 559                 utrace->reporting = NULL;
 560
 561                 put_detached_list(&detached);
 562
 563                 spin_lock(&utrace->lock);
 564                 goto restart;
 565         }
 566
 567         spin_unlock(&utrace->lock);
 568
 569         put_detached_list(&detached);
 570 }
 571
 572 /*
 573  * Called by release_task.  After this, target->utrace must be cleared.
 574  */
 575 void utrace_release_task(struct task_struct *target)
 576 {
 577         struct utrace *utrace;
 578
 579         utrace = &target->utrace;
 580
 581         spin_lock(&utrace->lock);
 582
 583         utrace->reap = 1;
 584
 585         if (!(target->utrace_flags & _UTRACE_DEATH_EVENTS)) {
 586                 utrace_reap(target, utrace); /* Unlocks and frees.  */
 587                 return;
 588         }
 589
 590         /*
 591          * The target will do some final callbacks but hasn't
 592          * finished them yet.  We know because it clears these
 593          * event bits after it's done.  Instead of cleaning up here
 594          * and requiring utrace_report_death to cope with it, we
 595          * delay the REAP report and the teardown until after the
 596          * target finishes its death reports.
 597          */
 598
 599         spin_unlock(&utrace->lock);
 600 }
 601
 602 /*
 603  * We use an extra bit in utrace_engine.flags past the event bits,
 604  * to record whether the engine is keeping the target thread stopped.
 605  */
 606 #define ENGINE_STOP             (1UL << _UTRACE_NEVENTS)
 607
 608 static void mark_engine_wants_stop(struct utrace_engine *engine)
 609 {
 610         engine->flags |= ENGINE_STOP;
 611 }
 612
 613 static void clear_engine_wants_stop(struct utrace_engine *engine)
 614 {
 615         engine->flags &= ~ENGINE_STOP;
 616 }
 617
 618 static bool engine_wants_stop(struct utrace_engine *engine)
 619 {
 620         return (engine->flags & ENGINE_STOP) != 0;
 621 }
 622
 623 /**
 624  * utrace_set_events - choose which event reports a tracing engine gets
 625  * @target:             thread to affect
 626  * @engine:             attached engine to affect
 627  * @events:             new event mask
 628  *
 629  * This changes the set of events for which @engine wants callbacks made.
 630  *
 631  * This fails with -%EALREADY and does nothing if you try to clear
 632  * %UTRACE_EVENT(%DEATH) when the @report_death callback may already have
 633  * begun, if you try to clear %UTRACE_EVENT(%REAP) when the @report_reap
 634  * callback may already have begun, or if you try to newly set
 635  * %UTRACE_EVENT(%DEATH) or %UTRACE_EVENT(%QUIESCE) when @target is
 636  * already dead or dying.
 637  *
 638  * This can fail with -%ESRCH when @target has already been detached,
 639  * including forcible detach on reaping.
 640  *
 641  * If @target was stopped before the call, then after a successful call,
 642  * no event callbacks not requested in @events will be made; if
 643  * %UTRACE_EVENT(%QUIESCE) is included in @events, then a @report_quiesce
 644  * callback will be made when @target resumes.  If @target was not stopped,
 645  * and was about to make a callback to @engine, this returns -%EINPROGRESS.
 646  * In this case, the callback in progress might be one excluded from the
 647  * new @events setting.  When this returns zero, you can be sure that no
 648  * event callbacks you've disabled in @events can be made.
 649  *
 650  * To synchronize after an -%EINPROGRESS return, see utrace_barrier().
 651  *
 652  * When @target is @current, -%EINPROGRESS is not returned.  But
 653  * note that a newly-created engine will not receive any callbacks
 654  * related to an event notification already in progress.  This call
 655  * enables @events callbacks to be made as soon as @engine becomes
 656  * eligible for any callbacks, see utrace_attach_task().
 657  *
 658  * These rules provide for coherent synchronization based on %UTRACE_STOP,
 659  * even when %SIGKILL is breaking its normal simple rules.
 660  */
 661 int utrace_set_events(struct task_struct *target,
 662                       struct utrace_engine *engine,
 663                       unsigned long events)
 664 {
 665         struct utrace *utrace;
 666         unsigned long old_flags, old_utrace_flags, set_utrace_flags;
 667         int ret;
 668
 669         utrace = get_utrace_lock(target, engine, true);
 670         if (unlikely(IS_ERR(utrace)))
 671                 return PTR_ERR(utrace);
 672
 673         old_utrace_flags = target->utrace_flags;
 674         set_utrace_flags = events;
 675         old_flags = engine->flags;
 676
 677         if (target->exit_state &&
 678             (((events & ~old_flags) & _UTRACE_DEATH_EVENTS) ||
 679              (utrace->death &&
 680               ((old_flags & ~events) & _UTRACE_DEATH_EVENTS)) ||
 681              (utrace->reap && ((old_flags & ~events) & UTRACE_EVENT(REAP))))) {
 682                 spin_unlock(&utrace->lock);
 683                 return -EALREADY;
 684         }
 685
 686         /*
 687          * When setting these flags, it's essential that we really
 688          * synchronize with exit_notify().  They cannot be set after
 689          * exit_notify() takes the tasklist_lock.  By holding the read
 690          * lock here while setting the flags, we ensure that the calls
 691          * to tracehook_notify_death() and tracehook_report_death() will
 692          * see the new flags.  This ensures that utrace_release_task()
 693          * knows positively that utrace_report_death() will be called or
 694          * that it won't.
 695          */
 696         if ((set_utrace_flags & ~old_utrace_flags) & _UTRACE_DEATH_EVENTS) {
 697                 read_lock(&tasklist_lock);
 698                 if (unlikely(target->exit_state)) {
 699                         read_unlock(&tasklist_lock);
 700                         spin_unlock(&utrace->lock);
 701                         return -EALREADY;
 702                 }
 703                 target->utrace_flags |= set_utrace_flags;
 704                 read_unlock(&tasklist_lock);
 705         }
 706
 707         engine->flags = events | (engine->flags & ENGINE_STOP);
 708         target->utrace_flags |= set_utrace_flags;
 709
 710         if ((set_utrace_flags & UTRACE_EVENT_SYSCALL) &&
 711             !(old_utrace_flags & UTRACE_EVENT_SYSCALL))
 712                 set_tsk_thread_flag(target, TIF_SYSCALL_TRACE);
 713
 714         ret = 0;
 715         if (!utrace->stopped && target != current) {
 716                 /*
 717                  * This barrier ensures that our engine->flags changes
 718                  * have hit before we examine utrace->reporting,
 719                  * pairing with the barrier in start_callback().  If
 720                  * @target has not yet hit finish_callback() to clear
 721                  * utrace->reporting, we might be in the middle of a
 722                  * callback to @engine.
 723                  */
 724                 smp_mb();
 725                 if (utrace->reporting == engine)
 726                         ret = -EINPROGRESS;
 727         }
 728
 729         spin_unlock(&utrace->lock);
 730
 731         return ret;
 732 }
 733 EXPORT_SYMBOL_GPL(utrace_set_events);
 734
 735 /*
 736  * Asynchronously mark an engine as being detached.
 737  *
 738  * This must work while the target thread races with us doing
 739  * start_callback(), defined below.  It uses smp_rmb() between checking
 740  * @engine->flags and using @engine->ops.  Here we change @engine->ops
 741  * first, then use smp_wmb() before changing @engine->flags.  This ensures
 742  * it can check the old flags before using the old ops, or check the old
 743  * flags before using the new ops, or check the new flags before using the
 744  * new ops, but can never check the new flags before using the old ops.
 745  * Hence, utrace_detached_ops might be used with any old flags in place.
 746  * It has report_quiesce() and report_reap() callbacks to handle all cases.
 747  */
 748 static void mark_engine_detached(struct utrace_engine *engine)
 749 {
 750         engine->ops = &utrace_detached_ops;
 751         smp_wmb();
 752         engine->flags = UTRACE_EVENT(QUIESCE);
 753 }
 754
 755 /*
 756  * Get @target to stop and return true if it is already stopped now.
 757  * If we return false, it will make some event callback soonish.
 758  * Called with @utrace locked.
 759  */
 760 static bool utrace_do_stop(struct task_struct *target, struct utrace *utrace)
 761 {
 762         bool stopped = false;
 763
 764         spin_lock_irq(&target->sighand->siglock);
 765         if (unlikely(target->exit_state)) {
 766                 /*
 767                  * On the exit path, it's only truly quiescent
 768                  * if it has already been through
 769                  * utrace_report_death(), or never will.
 770                  */
 771                 if (!(target->utrace_flags & _UTRACE_DEATH_EVENTS))
 772                         utrace->stopped = stopped = true;
 773         } else if (task_is_stopped(target)) {
 774                 /*
 775                  * Stopped is considered quiescent; when it wakes up, it will
 776                  * go through utrace_get_signal() before doing anything else.
 777                  */
 778                 utrace->stopped = stopped = true;
 779         } else if (!utrace->report && !utrace->interrupt) {
 780                 utrace->report = 1;
 781                 set_notify_resume(target);
 782         }
 783         spin_unlock_irq(&target->sighand->siglock);
 784
 785         return stopped;
 786 }
 787
 788 /*
 789  * If the target is not dead it should not be in tracing
 790  * stop any more.  Wake it unless it's in job control stop.
 791  *
 792  * Called with @utrace->lock held and @utrace->stopped set.
 793  */
 794 static void utrace_wakeup(struct task_struct *target, struct utrace *utrace)
 795 {
 796         struct sighand_struct *sighand;
 797         unsigned long irqflags;
 798
 799         utrace->stopped = 0;
 800
 801         sighand = lock_task_sighand(target, &irqflags);
 802         if (unlikely(!sighand))
 803                 return;
 804
 805         if (likely(task_is_stopped_or_traced(target))) {
 806                 if (target->signal->flags & SIGNAL_STOP_STOPPED)
 807                         target->state = TASK_STOPPED;
 808                 else
 809                         wake_up_state(target, __TASK_STOPPED | __TASK_TRACED);
 810         }
 811
 812         unlock_task_sighand(target, &irqflags);
 813 }
 814
 815 /*
 816  * This is called when there might be some detached engines on the list or
 817  * some stale bits in @task->utrace_flags.  Clean them up and recompute the
 818  * flags.
 819  *
 820  * @action is NULL when @task is stopped and @utrace->stopped is set; wake
 821  * it up if it should not be.  @action is set when @task is current; if
 822  * we're fully detached, reset *@action to UTRACE_RESUME.
 823  *
 824  * Called with @utrace->lock held, returns with it released.
 825  * After this returns, @utrace might be freed if everything detached.
 826  */
 827 static void utrace_reset(struct task_struct *task, struct utrace *utrace,
 828                          enum utrace_resume_action *action)
 829         __releases(utrace->lock)
 830 {
 831         struct utrace_engine *engine, *next;
 832         unsigned long flags = 0;
 833         LIST_HEAD(detached);
 834         bool wake = !action;
 835         BUG_ON(wake != (task != current));
 836
 837         splice_attaching(utrace);
 838
 839         /*
 840          * Update the set of events of interest from the union
 841          * of the interests of the remaining tracing engines.
 842          * For any engine marked detached, remove it from the list.
 843          * We'll collect them on the detached list.
 844          */
 845         list_for_each_entry_safe(engine, next, &utrace->attached, entry) {
 846                 if (engine->ops == &utrace_detached_ops) {
 847                         engine->ops = NULL;
 848                         list_move(&engine->entry, &detached);
 849                 } else {
 850                         flags |= engine->flags | UTRACE_EVENT(REAP);
 851                         wake = wake && !engine_wants_stop(engine);
 852                 }
 853         }
 854
 855         if (task->exit_state) {
 856                 /*
 857                  * Once it's already dead, we never install any flags
 858                  * except REAP.  When ->exit_state is set and events
 859                  * like DEATH are not set, then they never can be set.
 860                  * This ensures that utrace_release_task() knows
 861                  * positively that utrace_report_death() can never run.
 862                  */
 863                 BUG_ON(utrace->death);
 864                 flags &= UTRACE_EVENT(REAP);
 865                 wake = false;
 866         } else if (!(flags & UTRACE_EVENT_SYSCALL) &&
 867                    test_tsk_thread_flag(task, TIF_SYSCALL_TRACE)) {
 868                 clear_tsk_thread_flag(task, TIF_SYSCALL_TRACE);
 869         }
 870
 871         task->utrace_flags = flags;
 872
 873         if (wake)
 874                 utrace_wakeup(task, utrace);
 875
 876         /*
 877          * If any engines are left, we're done.
 878          */
 879         spin_unlock(&utrace->lock);
 880         if (!flags) {
 881                 /*
 882                  * No more engines, cleared out the utrace.
 883                  */
 884
 885                 if (action)
 886                         *action = UTRACE_RESUME;
 887         }
 888
 889         put_detached_list(&detached);
 890 }
 891
 892 /*
 893  * You can't do anything to a dead task but detach it.
 894  * If release_task() has been called, you can't do that.
 895  *
 896  * On the exit path, DEATH and QUIESCE event bits are set only
 897  * before utrace_report_death() has taken the lock.  At that point,
 898  * the death report will come soon, so disallow detach until it's
 899  * done.  This prevents us from racing with it detaching itself.
 900  *
 901  * Called with utrace->lock held, when @target->exit_state is nonzero.
 902  */
 903 static inline int utrace_control_dead(struct task_struct *target,
 904                                       struct utrace *utrace,
 905                                       enum utrace_resume_action action)
 906 {
 907         if (action != UTRACE_DETACH || unlikely(utrace->reap))
 908                 return -ESRCH;
 909
 910         if (unlikely(utrace->death))
 911                 /*
 912                  * We have already started the death report.  We can't
 913                  * prevent the report_death and report_reap callbacks,
 914                  * so tell the caller they will happen.
 915                  */
 916                 return -EALREADY;
 917
 918         return 0;
 919 }
 920
 921 /**
 922  * utrace_control - control a thread being traced by a tracing engine
 923  * @target:             thread to affect
 924  * @engine:             attached engine to affect
 925  * @action:             &enum utrace_resume_action for thread to do
 926  *
 927  * This is how a tracing engine asks a traced thread to do something.
 928  * This call is controlled by the @action argument, which has the
 929  * same meaning as the &enum utrace_resume_action value returned by
 930  * event reporting callbacks.
 931  *
 932  * If @target is already dead (@target->exit_state nonzero),
 933  * all actions except %UTRACE_DETACH fail with -%ESRCH.
 934  *
 935  * The following sections describe each option for the @action argument.
 936  *
 937  * UTRACE_DETACH:
 938  *
 939  * After this, the @engine data structure is no longer accessible,
 940  * and the thread might be reaped.  The thread will start running
 941  * again if it was stopped and no longer has any attached engines
 942  * that want it stopped.
 943  *
 944  * If the @report_reap callback may already have begun, this fails
 945  * with -%ESRCH.  If the @report_death callback may already have
 946  * begun, this fails with -%EALREADY.
 947  *
 948  * If @target is not already stopped, then a callback to this engine
 949  * might be in progress or about to start on another CPU.  If so,
 950  * then this returns -%EINPROGRESS; the detach happens as soon as
 951  * the pending callback is finished.  To synchronize after an
 952  * -%EINPROGRESS return, see utrace_barrier().
 953  *
 954  * If @target is properly stopped before utrace_control() is called,
 955  * then after successful return it's guaranteed that no more callbacks
 956  * to the @engine->ops vector will be made.
 957  *
 958  * The only exception is %SIGKILL (and exec or group-exit by another
 959  * thread in the group), which can cause asynchronous @report_death
 960  * and/or @report_reap callbacks even when %UTRACE_STOP was used.
 961  * (In that event, this fails with -%ESRCH or -%EALREADY, see above.)
 962  *
 963  * UTRACE_STOP:
 964  * This asks that @target stop running.  This returns 0 only if
 965  * @target is already stopped, either for tracing or for job
 966  * control.  Then @target will remain stopped until another
 967  * utrace_control() call is made on @engine; @target can be woken
 968  * only by %SIGKILL (or equivalent, such as exec or termination by
 969  * another thread in the same thread group).
 970  *
 971  * This returns -%EINPROGRESS if @target is not already stopped.
 972  * Then the effect is like %UTRACE_REPORT.  A @report_quiesce or
 973  * @report_signal callback will be made soon.  Your callback can
 974  * then return %UTRACE_STOP to keep @target stopped.
 975  *
 976  * This does not interrupt system calls in progress, including ones
 977  * that sleep for a long time.  For that, use %UTRACE_INTERRUPT.
 978  * To interrupt system calls and then keep @target stopped, your
 979  * @report_signal callback can return %UTRACE_STOP.
 980  *
 981  * UTRACE_RESUME:
 982  *
 983  * Just let @target continue running normally, reversing the effect
 984  * of a previous %UTRACE_STOP.  If another engine is keeping @target
 985  * stopped, then it remains stopped until all engines let it resume.
 986  * If @target was not stopped, this has no effect.
 987  *
 988  * UTRACE_REPORT:
 989  *
 990  * This is like %UTRACE_RESUME, but also ensures that there will be
 991  * a @report_quiesce or @report_signal callback made soon.  If
 992  * @target had been stopped, then there will be a callback before it
 993  * resumes running normally.  If another engine is keeping @target
 994  * stopped, then there might be no callbacks until all engines let
 995  * it resume.
 996  *
 997  * UTRACE_INTERRUPT:
 998  *
 999  * This is like %UTRACE_REPORT, but ensures that @target will make a
1000  * @report_signal callback before it resumes or delivers signals.
1001  * If @target was in a system call or about to enter one, work in
1002  * progress will be interrupted as if by %SIGSTOP.  If another
1003  * engine is keeping @target stopped, then there might be no
1004  * callbacks until all engines let it resume.
1005  *
1006  * This gives @engine an opportunity to introduce a forced signal
1007  * disposition via its @report_signal callback.
1008  *
1009  * UTRACE_SINGLESTEP:
1010  *
1011  * It's invalid to use this unless arch_has_single_step() returned true.
1012  * This is like %UTRACE_RESUME, but resumes for one user instruction
1013  * only.  It's invalid to use this in utrace_control() unless @target
1014  * had been stopped by @engine previously.
1015  *
1016  * Note that passing %UTRACE_SINGLESTEP or %UTRACE_BLOCKSTEP to
1017  * utrace_control() or returning it from an event callback alone does
1018  * not necessarily ensure that stepping will be enabled.  If there are
1019  * more callbacks made to any engine before returning to user mode,
1020  * then the resume action is chosen only by the last set of callbacks.
1021  * To be sure, enable %UTRACE_EVENT(%QUIESCE) and look for the
1022  * @report_quiesce callback with a zero event mask, or the
1023  * @report_signal callback with %UTRACE_SIGNAL_REPORT.
1024  *
1025  * UTRACE_BLOCKSTEP:
1026  *
1027  * It's invalid to use this unless arch_has_block_step() returned true.
1028  * This is like %UTRACE_SINGLESTEP, but resumes for one whole basic
1029  * block of user instructions.
1030  *
1031  * %UTRACE_BLOCKSTEP devolves to %UTRACE_SINGLESTEP when another
1032  * tracing engine is using %UTRACE_SINGLESTEP at the same time.
1033  */
1034 int utrace_control(struct task_struct *target,
1035                    struct utrace_engine *engine,
1036                    enum utrace_resume_action action)
1037 {
1038         struct utrace *utrace;
1039         bool resume;
1040         int ret;
1041
1042         if (unlikely(action > UTRACE_DETACH))
1043                 return -EINVAL;
1044
1045         utrace = get_utrace_lock(target, engine, true);
1046         if (unlikely(IS_ERR(utrace)))
1047                 return PTR_ERR(utrace);
1048
1049         if (target->exit_state) {
1050                 ret = utrace_control_dead(target, utrace, action);
1051                 if (ret) {
1052                         spin_unlock(&utrace->lock);
1053                         return ret;
1054                 }
1055         }
1056
1057         resume = utrace->stopped;
1058         ret = 0;
1059
1060         clear_engine_wants_stop(engine);
1061         switch (action) {
1062         case UTRACE_STOP:
1063                 mark_engine_wants_stop(engine);
1064                 if (!resume && !utrace_do_stop(target, utrace))
1065                         ret = -EINPROGRESS;
1066                 resume = false;
1067                 break;
1068
1069         case UTRACE_DETACH:
1070                 mark_engine_detached(engine);
1071                 resume = resume || utrace_do_stop(target, utrace);
1072                 if (!resume) {
1073                         /*
1074                          * As in utrace_set_events(), this barrier ensures
1075                          * that our engine->flags changes have hit before we
1076                          * examine utrace->reporting, pairing with the barrier
1077                          * in start_callback().  If @target has not yet hit
1078                          * finish_callback() to clear utrace->reporting, we
1079                          * might be in the middle of a callback to @engine.
1080                          */
1081                         smp_mb();
1082                         if (utrace->reporting == engine)
1083                                 ret = -EINPROGRESS;
1084                         break;
1085                 }
1086                 /* Fall through.  */
1087
1088         case UTRACE_RESUME:
1089                 /*
1090                  * This and all other cases imply resuming if stopped.
1091                  * There might not be another report before it just
1092                  * resumes, so make sure single-step is not left set.
1093                  */
1094                 if (likely(resume))
1095                         user_disable_single_step(target);
1096                 break;
1097
1098         case UTRACE_REPORT:
1099                 /*
1100                  * Make the thread call tracehook_notify_resume() soon.
1101                  * But don't bother if it's already been interrupted.
1102                  * In that case, utrace_get_signal() will be reporting soon.
1103                  */
1104                 if (!utrace->report && !utrace->interrupt) {
1105                         utrace->report = 1;
1106                         set_notify_resume(target);
1107                 }
1108                 break;
1109
1110         case UTRACE_INTERRUPT:
1111                 /*
1112                  * Make the thread call tracehook_get_signal() soon.
1113                  */
1114                 if (utrace->interrupt)
1115                         break;
1116                 utrace->interrupt = 1;
1117
1118                 /*
1119                  * If it's not already stopped, interrupt it now.
1120                  * We need the siglock here in case it calls
1121                  * recalc_sigpending() and clears its own
1122                  * TIF_SIGPENDING.  By taking the lock, we've
1123                  * serialized any later recalc_sigpending() after
1124                  * our setting of utrace->interrupt to force it on.
1125                  */
1126                 if (resume) {
1127                         /*
1128                          * This is really just to keep the invariant
1129                          * that TIF_SIGPENDING is set with utrace->interrupt.
1130                          * When it's stopped, we know it's always going
1131                          * through utrace_get_signal and will recalculate.
1132                          */
1133                         set_tsk_thread_flag(target, TIF_SIGPENDING);
1134                 } else {
1135                         struct sighand_struct *sighand;
1136                         unsigned long irqflags;
1137                         sighand = lock_task_sighand(target, &irqflags);
1138                         if (likely(sighand)) {
1139                                 signal_wake_up(target, 0);
1140                                 unlock_task_sighand(target, &irqflags);
1141                         }
1142                 }
1143                 break;
1144
1145         case UTRACE_BLOCKSTEP:
1146                 /*
1147                  * Resume from stopped, step one block.
1148                  */
1149                 if (unlikely(!arch_has_block_step())) {
1150                         WARN_ON(1);
1151                         /* Fall through to treat it as SINGLESTEP.  */
1152                 } else if (likely(resume)) {
1153                         user_enable_block_step(target);
1154                         break;
1155                 }
1156
1157         case UTRACE_SINGLESTEP:
1158                 /*
1159                  * Resume from stopped, step one instruction.
1160                  */
1161                 if (unlikely(!arch_has_single_step())) {
1162                         WARN_ON(1);
1163                         resume = false;
1164                         ret = -EOPNOTSUPP;
1165                         break;
1166                 }
1167
1168                 if (likely(resume))
1169                         user_enable_single_step(target);
1170                 else
1171                         /*
1172                          * You were supposed to stop it before asking
1173                          * it to step.
1174                          */
1175                         ret = -EAGAIN;
1176                 break;
1177         }
1178
1179         /*
1180          * Let the thread resume running.  If it's not stopped now,
1181          * there is nothing more we need to do.
1182          */
1183         if (resume)
1184                 utrace_reset(target, utrace, NULL);
1185         else
1186                 spin_unlock(&utrace->lock);
1187
1188         return ret;
1189 }
1190 EXPORT_SYMBOL_GPL(utrace_control);
1191
1192 /**
1193  * utrace_barrier - synchronize with simultaneous tracing callbacks
1194  * @target:             thread to affect
1195  * @engine:             engine to affect (can be detached)
1196  *
1197  * This blocks while @target might be in the midst of making a callback to
1198  * @engine.  It can be interrupted by signals and will return -%ERESTARTSYS.
1199  * A return value of zero means no callback from @target to @engine was
1200  * in progress.  Any effect of its return value (such as %UTRACE_STOP) has
1201  * already been applied to @engine.
1202  *
1203  * It's not necessary to keep the @target pointer alive for this call.
1204  * It's only necessary to hold a ref on @engine.  This will return
1205  * safely even if @target has been reaped and has no task refs.
1206  *
1207  * A successful return from utrace_barrier() guarantees its ordering
1208  * with respect to utrace_set_events() and utrace_control() calls.  If
1209  * @target was not properly stopped, event callbacks just disabled might
1210  * still be in progress; utrace_barrier() waits until there is no chance
1211  * an unwanted callback can be in progress.
1212  */
1213 int utrace_barrier(struct task_struct *target, struct utrace_engine *engine)
1214 {
1215         struct utrace *utrace;
1216         int ret = -ERESTARTSYS;
1217
1218         if (unlikely(target == current))
1219                 return 0;
1220
1221         do {
1222                 utrace = get_utrace_lock(target, engine, false);
1223                 if (unlikely(IS_ERR(utrace))) {
1224                         ret = PTR_ERR(utrace);
1225                         if (ret != -ERESTARTSYS)
1226                                 break;
1227                 } else {
1228                         /*
1229                          * All engine state changes are done while
1230                          * holding the lock, i.e. before we get here.
1231                          * Since we have the lock, we only need to
1232                          * worry about @target making a callback.
1233                          * When it has entered start_callback() but
1234                          * not yet gotten to finish_callback(), we
1235                          * will see utrace->reporting == @engine.
1236                          * When @target doesn't take the lock, it uses
1237                          * barriers to order setting utrace->reporting
1238                          * before it examines the engine state.
1239                          */
1240                         if (utrace->reporting != engine)
1241                                 ret = 0;
1242                         spin_unlock(&utrace->lock);
1243                         if (!ret)
1244                                 break;
1245                 }
1246                 schedule_timeout_interruptible(1);
1247         } while (!signal_pending(current));
1248
1249         return ret;
1250 }
1251 EXPORT_SYMBOL_GPL(utrace_barrier);
1252
1253 /*
1254  * This is local state used for reporting loops, perhaps optimized away.
1255  */
1256 struct utrace_report {
1257         enum utrace_resume_action action;
1258         u32 result;
1259         bool detaches;
1260         bool reports;
1261         bool takers;
1262         bool killed;
1263 };
1264
1265 #define INIT_REPORT(var) \
1266         struct utrace_report var = { UTRACE_RESUME, 0, \
1267                                      false, false, false, false }
1268
1269 /*
1270  * We are now making the report, so clear the flag saying we need one.
1271  */
1272 static void start_report(struct utrace *utrace)
1273 {
1274         BUG_ON(utrace->stopped);
1275         if (utrace->report) {
1276                 spin_lock(&utrace->lock);
1277                 utrace->report = 0;
1278                 splice_attaching(utrace);
1279                 spin_unlock(&utrace->lock);
1280         }
1281 }
1282
1283 /*
1284  * Complete a normal reporting pass, pairing with a start_report() call.
1285  * This handles any UTRACE_DETACH or UTRACE_REPORT or UTRACE_INTERRUPT
1286  * returns from engine callbacks.  If any engine's last callback used
1287  * UTRACE_STOP, we do UTRACE_REPORT here to ensure we stop before user
1288  * mode.  If there were no callbacks made, it will recompute
1289  * @task->utrace_flags to avoid another false-positive.
1290  */
1291 static void finish_report(struct utrace_report *report,
1292                           struct task_struct *task, struct utrace *utrace)
1293 {
1294         bool clean = (report->takers && !report->detaches);
1295
1296         if (report->action <= UTRACE_REPORT && !utrace->report) {
1297                 spin_lock(&utrace->lock);
1298                 utrace->report = 1;
1299                 set_tsk_thread_flag(task, TIF_NOTIFY_RESUME);
1300         } else if (report->action == UTRACE_INTERRUPT && !utrace->interrupt) {
1301                 spin_lock(&utrace->lock);
1302                 utrace->interrupt = 1;
1303                 set_tsk_thread_flag(task, TIF_SIGPENDING);
1304         } else if (clean) {
1305                 return;
1306         } else {
1307                 spin_lock(&utrace->lock);
1308         }
1309
1310         if (clean)
1311                 spin_unlock(&utrace->lock);
1312         else
1313                 utrace_reset(task, utrace, &report->action);
1314 }
1315
1316 /*
1317  * Apply the return value of one engine callback to @report.
1318  * Returns true if @engine detached and should not get any more callbacks.
1319  */
1320 static bool finish_callback(struct utrace *utrace,
1321                             struct utrace_report *report,
1322                             struct utrace_engine *engine,
1323                             u32 ret)
1324 {
1325         enum utrace_resume_action action = utrace_resume_action(ret);
1326
1327         report->result = ret & ~UTRACE_RESUME_MASK;
1328
1329         /*
1330          * If utrace_control() was used, treat that like UTRACE_DETACH here.
1331          */
1332         if (action == UTRACE_DETACH || engine->ops == &utrace_detached_ops) {
1333                 engine->ops = &utrace_detached_ops;
1334                 report->detaches = true;
1335         } else {
1336                 if (action < report->action)
1337                         report->action = action;
1338
1339                 if (action == UTRACE_STOP) {
1340                         if (!engine_wants_stop(engine)) {
1341                                 spin_lock(&utrace->lock);
1342                                 mark_engine_wants_stop(engine);
1343                                 spin_unlock(&utrace->lock);
1344                         }
1345                 } else {
1346                         if (action == UTRACE_REPORT)
1347                                 report->reports = true;
1348
1349                         if (engine_wants_stop(engine)) {
1350                                 spin_lock(&utrace->lock);
1351                                 clear_engine_wants_stop(engine);
1352                                 spin_unlock(&utrace->lock);
1353                         }
1354                 }
1355         }
1356
1357         /*
1358          * Now that we have applied the effect of the return value,
1359          * clear this so that utrace_barrier() can stop waiting.
1360          * A subsequent utrace_control() can stop or resume @engine
1361          * and know this was ordered after its callback's action.
1362          *
1363          * We don't need any barriers here because utrace_barrier()
1364          * takes utrace->lock.  If we touched engine->flags above,
1365          * the lock guaranteed this change was before utrace_barrier()
1366          * examined utrace->reporting.
1367          */
1368         utrace->reporting = NULL;
1369
1370         /*
1371          * This is a good place to make sure tracing engines don't
1372          * introduce too much latency under voluntary preemption.
1373          */
1374         if (need_resched())
1375                 cond_resched();
1376
1377         return engine->ops == &utrace_detached_ops;
1378 }
1379
1380 /*
1381  * Start the callbacks for @engine to consider @event (a bit mask).
1382  * This makes the report_quiesce() callback first.  If @engine wants
1383  * a specific callback for @event, we return the ops vector to use.
1384  * If not, we return NULL.  The return value from the ops->callback
1385  * function called should be passed to finish_callback().
1386  */
1387 static const struct utrace_engine_ops *start_callback(
1388         struct utrace *utrace, struct utrace_report *report,
1389         struct utrace_engine *engine, struct task_struct *task,
1390         unsigned long event)
1391 {
1392         const struct utrace_engine_ops *ops;
1393         unsigned long want;
1394
1395         /*
1396          * This barrier ensures that we've set utrace->reporting before
1397          * we examine engine->flags or engine->ops.  utrace_barrier()
1398          * relies on this ordering to indicate that the effect of any
1399          * utrace_control() and utrace_set_events() calls is in place
1400          * by the time utrace->reporting can be seen to be NULL.
1401          */
1402         utrace->reporting = engine;
1403         smp_mb();
1404
1405         /*
1406          * This pairs with the barrier in mark_engine_detached().
1407          * It makes sure that we never see the old ops vector with
1408          * the new flags, in case the original vector had no report_quiesce.
1409          */
1410         want = engine->flags;
1411         smp_rmb();
1412         ops = engine->ops;
1413
1414         if (want & UTRACE_EVENT(QUIESCE)) {
1415                 if (finish_callback(utrace, report, engine,
1416                                     (*ops->report_quiesce)(report->action,
1417                                                            engine, task,
1418                                                            event)))
1419                         return NULL;
1420
1421                 /*
1422                  * finish_callback() reset utrace->reporting after the
1423                  * quiesce callback.  Now we set it again (as above)
1424                  * before re-examining engine->flags, which could have
1425                  * been changed synchronously by ->report_quiesce or
1426                  * asynchronously by utrace_control() or utrace_set_events().
1427                  */
1428                 utrace->reporting = engine;
1429                 smp_mb();
1430                 want = engine->flags;
1431         }
1432
1433         if (want & ENGINE_STOP)
1434                 report->action = UTRACE_STOP;
1435
1436         if (want & event) {
1437                 report->takers = true;
1438                 return ops;
1439         }
1440
1441         return NULL;
1442 }
1443
1444 /*
1445  * Do a normal reporting pass for engines interested in @event.
1446  * @callback is the name of the member in the ops vector, and remaining
1447  * args are the extras it takes after the standard three args.
1448  */
1449 #define REPORT(task, utrace, report, event, callback, ...)                    \
1450         do {                                                                  \
1451                 start_report(utrace);                                         \
1452                 REPORT_CALLBACKS(task, utrace, report, event, callback,       \
1453                                  (report)->action, engine, current,           \
1454                                  ## __VA_ARGS__);                             \
1455                 finish_report(report, task, utrace);                          \
1456         } while (0)
1457 #define REPORT_CALLBACKS(task, utrace, report, event, callback, ...)          \
1458         do {                                                                  \
1459                 struct utrace_engine *engine;                                 \
1460                 const struct utrace_engine_ops *ops;                          \
1461                 list_for_each_entry(engine, &utrace->attached, entry) {       \
1462                         ops = start_callback(utrace, report, engine, task,    \
1463                                              event);                          \
1464                         if (!ops)                                             \
1465                                 continue;                                     \
1466                         finish_callback(utrace, report, engine,               \
1467                                         (*ops->callback)(__VA_ARGS__));       \
1468                 }                                                             \
1469         } while (0)
1470
1471 /*
1472  * Called iff UTRACE_EVENT(EXEC) flag is set.
1473  */
1474 void utrace_report_exec(struct linux_binfmt *fmt, struct linux_binprm *bprm,
1475                         struct pt_regs *regs)
1476 {
1477         struct task_struct *task = current;
1478         struct utrace *utrace = task_utrace_struct(task);
1479         INIT_REPORT(report);
1480
1481         REPORT(task, utrace, &report, UTRACE_EVENT(EXEC),
1482                report_exec, fmt, bprm, regs);
1483 }
1484
1485 /*
1486  * Called iff UTRACE_EVENT(SYSCALL_ENTRY) flag is set.
1487  * Return true to prevent the system call.
1488  */
1489 bool utrace_report_syscall_entry(struct pt_regs *regs)
1490 {
1491         struct task_struct *task = current;
1492         struct utrace *utrace = task_utrace_struct(task);
1493         INIT_REPORT(report);
1494
1495         start_report(utrace);
1496         REPORT_CALLBACKS(task, utrace, &report, UTRACE_EVENT(SYSCALL_ENTRY),
1497                          report_syscall_entry, report.result | report.action,
1498                          engine, current, regs);
1499         finish_report(&report, task, utrace);
1500
1501         if (report.action == UTRACE_STOP &&
1502             unlikely(utrace_stop(task, utrace, false)))
1503                 /*
1504                  * We are continuing despite UTRACE_STOP because of a
1505                  * SIGKILL.  Don't let the system call actually proceed.
1506                  */
1507                 return true;
1508
1509         if (unlikely(report.result == UTRACE_SYSCALL_ABORT))
1510                 return true;
1511
1512         if (signal_pending(task)) {
1513                 /*
1514                  * Clear TIF_SIGPENDING if it no longer needs to be set.
1515                  * It may have been set as part of quiescence, and won't
1516                  * ever have been cleared by another thread.  For other
1517                  * reports, we can just leave it set and will go through
1518                  * utrace_get_signal() to reset things.  But here we are
1519                  * about to enter a syscall, which might bail out with an
1520                  * -ERESTART* error if it's set now.
1521                  */
1522                 spin_lock_irq(&task->sighand->siglock);
1523                 recalc_sigpending();
1524                 spin_unlock_irq(&task->sighand->siglock);
1525         }
1526
1527         return false;
1528 }
1529
1530 /*
1531  * Called iff UTRACE_EVENT(SYSCALL_EXIT) flag is set.
1532  */
1533 void utrace_report_syscall_exit(struct pt_regs *regs)
1534 {
1535         struct task_struct *task = current;
1536         struct utrace *utrace = task_utrace_struct(task);
1537         INIT_REPORT(report);
1538
1539         REPORT(task, utrace, &report, UTRACE_EVENT(SYSCALL_EXIT),
1540                report_syscall_exit, regs);
1541 }
1542
1543 /*
1544  * Called iff UTRACE_EVENT(CLONE) flag is set.
1545  * This notification call blocks the wake_up_new_task call on the child.
1546  * So we must not quiesce here.  tracehook_report_clone_complete will do
1547  * a quiescence check momentarily.
1548  */
1549 void utrace_report_clone(unsigned long clone_flags, struct task_struct *child)
1550 {
1551         struct task_struct *task = current;
1552         struct utrace *utrace = task_utrace_struct(task);
1553         INIT_REPORT(report);
1554
1555         /*
1556          * We don't use the REPORT() macro here, because we need
1557          * to clear utrace->cloning before finish_report().
1558          * After finish_report(), utrace can be a stale pointer
1559          * in cases when report.action is still UTRACE_RESUME.
1560          */
1561         start_report(utrace);
1562         utrace->cloning = child;
1563
1564         REPORT_CALLBACKS(task, utrace, &report,
1565                          UTRACE_EVENT(CLONE), report_clone,
1566                          report.action, engine, task, clone_flags, child);
1567
1568         utrace->cloning = NULL;
1569         finish_report(&report, task, utrace);
1570
1571         /*
1572          * For a vfork, we will go into an uninterruptible block waiting
1573          * for the child.  We need UTRACE_STOP to happen before this, not
1574          * after.  For CLONE_VFORK, utrace_finish_vfork() will be called.
1575          */
1576         if (report.action == UTRACE_STOP && (clone_flags & CLONE_VFORK)) {
1577                 spin_lock(&utrace->lock);
1578                 utrace->vfork_stop = 1;
1579                 spin_unlock(&utrace->lock);
1580         }
1581 }
1582
1583 /*
1584  * We're called after utrace_report_clone() for a CLONE_VFORK.
1585  * If UTRACE_STOP was left from the clone report, we stop here.
1586  * After this, we'll enter the uninterruptible wait_for_completion()
1587  * waiting for the child.
1588  */
1589 void utrace_finish_vfork(struct task_struct *task)
1590 {
1591         struct utrace *utrace = task_utrace_struct(task);
1592
1593         spin_lock(&utrace->lock);
1594         if (!utrace->vfork_stop)
1595                 spin_unlock(&utrace->lock);
1596         else {
1597                 utrace->vfork_stop = 0;
1598                 spin_unlock(&utrace->lock);
1599                 utrace_stop(task, utrace, false);
1600         }
1601 }
1602
1603 /*
1604  * Called iff UTRACE_EVENT(JCTL) flag is set.
1605  *
1606  * Called with siglock held.
1607  */
1608 void utrace_report_jctl(int notify, int what)
1609 {
1610         struct task_struct *task = current;
1611         struct utrace *utrace = task_utrace_struct(task);
1612         INIT_REPORT(report);
1613         bool stop = task_is_stopped(task);
1614
1615         /*
1616          * We have to come out of TASK_STOPPED in case the event report
1617          * hooks might block.  Since we held the siglock throughout, it's
1618          * as if we were never in TASK_STOPPED yet at all.
1619          */
1620         if (stop) {
1621                 __set_current_state(TASK_RUNNING);
1622                 task->signal->flags &= ~SIGNAL_STOP_STOPPED;
1623                 ++task->signal->group_stop_count;
1624         }
1625         spin_unlock_irq(&task->sighand->siglock);
1626
1627         /*
1628          * We get here with CLD_STOPPED when we've just entered
1629          * TASK_STOPPED, or with CLD_CONTINUED when we've just come
1630          * out but not yet been through utrace_get_signal() again.
1631          *
1632          * While in TASK_STOPPED, we can be considered safely
1633          * stopped by utrace_do_stop() and detached asynchronously.
1634          * If we woke up and checked task->utrace_flags before that
1635          * was finished, we might be here with utrace already
1636          * removed or in the middle of being removed.
1637          *
1638          * If we are indeed attached, then make sure we are no
1639          * longer considered stopped while we run callbacks.
1640          */
1641         spin_lock(&utrace->lock);
1642         utrace->stopped = 0;
1643         /*
1644          * Do start_report()'s work too since we already have the lock anyway.
1645          */
1646         utrace->report = 0;
1647         splice_attaching(utrace);
1648         spin_unlock(&utrace->lock);
1649
1650         REPORT(task, utrace, &report, UTRACE_EVENT(JCTL),
1651                report_jctl, what, notify);
1652
1653         /*
1654          * Retake the lock, and go back into TASK_STOPPED
1655          * unless the stop was just cleared.
1656          */
1657         spin_lock_irq(&task->sighand->siglock);
1658         if (stop && task->signal->group_stop_count > 0) {
1659                 __set_current_state(TASK_STOPPED);
1660                 if (--task->signal->group_stop_count == 0)
1661                         task->signal->flags |= SIGNAL_STOP_STOPPED;
1662         }
1663 }
1664
1665 /*
1666  * Called iff UTRACE_EVENT(EXIT) flag is set.
1667  */
1668 void utrace_report_exit(long *exit_code)
1669 {
1670         struct task_struct *task = current;
1671         struct utrace *utrace = task_utrace_struct(task);
1672         INIT_REPORT(report);
1673         long orig_code = *exit_code;
1674
1675         REPORT(task, utrace, &report, UTRACE_EVENT(EXIT),
1676                report_exit, orig_code, exit_code);
1677
1678         if (report.action == UTRACE_STOP)
1679                 utrace_stop(task, utrace, false);
1680 }
1681
1682 /*
1683  * Called iff UTRACE_EVENT(DEATH) or UTRACE_EVENT(QUIESCE) flag is set.
1684  *
1685  * It is always possible that we are racing with utrace_release_task here.
1686  * For this reason, utrace_release_task checks for the event bits that get
1687  * us here, and delays its cleanup for us to do.
1688  */
1689 void utrace_report_death(struct task_struct *task, struct utrace *utrace,
1690                          bool group_dead, int signal)
1691 {
1692         INIT_REPORT(report);
1693
1694         BUG_ON(!task->exit_state);
1695
1696         /*
1697          * We are presently considered "quiescent"--which is accurate
1698          * inasmuch as we won't run any more user instructions ever again.
1699          * But for utrace_control and utrace_set_events to be robust, they
1700          * must be sure whether or not we will run any more callbacks.  If
1701          * a call comes in before we do, taking the lock here synchronizes
1702          * us so we don't run any callbacks just disabled.  Calls that come
1703          * in while we're running the callbacks will see the exit.death
1704          * flag and know that we are not yet fully quiescent for purposes
1705          * of detach bookkeeping.
1706          */
1707         spin_lock(&utrace->lock);
1708         BUG_ON(utrace->death);
1709         utrace->death = 1;
1710         utrace->report = 0;
1711         utrace->interrupt = 0;
1712         spin_unlock(&utrace->lock);
1713
1714         REPORT_CALLBACKS(task, utrace, &report, UTRACE_EVENT(DEATH),
1715                          report_death, engine, task, group_dead, signal);
1716
1717         spin_lock(&utrace->lock);
1718
1719         /*
1720          * After we unlock (possibly inside utrace_reap for callbacks) with
1721          * this flag clear, competing utrace_control/utrace_set_events calls
1722          * know that we've finished our callbacks and any detach bookkeeping.
1723          */
1724         utrace->death = 0;
1725
1726         if (utrace->reap)
1727                 /*
1728                  * utrace_release_task() was already called in parallel.
1729                  * We must complete its work now.
1730                  */
1731                 utrace_reap(task, utrace);
1732         else
1733                 utrace_reset(task, utrace, &report.action);
1734 }
1735
1736 /*
1737  * Finish the last reporting pass before returning to user mode.
1738  */
1739 static void finish_resume_report(struct utrace_report *report,
1740                                  struct task_struct *task,
1741                                  struct utrace *utrace)
1742 {
1743         if (report->detaches || !report->takers) {
1744                 spin_lock(&utrace->lock);
1745                 utrace_reset(task, utrace, &report->action);
1746         }
1747
1748         switch (report->action) {
1749         case UTRACE_STOP:
1750                 report->killed = utrace_stop(task, utrace, report->reports);
1751                 break;
1752
1753         case UTRACE_INTERRUPT:
1754                 if (!signal_pending(task))
1755                         set_tsk_thread_flag(task, TIF_SIGPENDING);
1756                 break;
1757
1758         case UTRACE_SINGLESTEP:
1759                 user_enable_single_step(task);
1760                 break;
1761
1762         case UTRACE_BLOCKSTEP:
1763                 user_enable_block_step(task);
1764                 break;
1765
1766         case UTRACE_REPORT:
1767         case UTRACE_RESUME:
1768         default:
1769                 user_disable_single_step(task);
1770                 break;
1771         }
1772 }
1773
1774 /*
1775  * This is called when TIF_NOTIFY_RESUME had been set (and is now clear).
1776  * We are close to user mode, and this is the place to report or stop.
1777  * When we return, we're going to user mode or into the signals code.
1778  */
1779 void utrace_resume(struct task_struct *task, struct pt_regs *regs)
1780 {
1781         struct utrace *utrace = task_utrace_struct(task);
1782         INIT_REPORT(report);
1783         struct utrace_engine *engine;
1784
1785         /*
1786          * Some machines get here with interrupts disabled.  The same arch
1787          * code path leads to calling into get_signal_to_deliver(), which
1788          * implicitly reenables them by virtue of spin_unlock_irq.
1789          */
1790         local_irq_enable();
1791
1792         /*
1793          * If this flag is still set it's because there was a signal
1794          * handler setup done but no report_signal following it.  Clear
1795          * the flag before we get to user so it doesn't confuse us later.
1796          */
1797         if (unlikely(utrace->signal_handler)) {
1798                 int skip;
1799                 spin_lock(&utrace->lock);
1800                 utrace->signal_handler = 0;
1801                 skip = !utrace->report;
1802                 spin_unlock(&utrace->lock);
1803                 if (skip)
1804                         return;
1805         }
1806
1807         /*
1808          * If UTRACE_INTERRUPT was just used, we don't bother with a
1809          * report here.  We will report and stop in utrace_get_signal().
1810          */
1811         if (unlikely(utrace->interrupt))
1812                 return;
1813
1814         /*
1815          * Do a simple reporting pass, with no callback after report_quiesce.
1816          */
1817         start_report(utrace);
1818
1819         list_for_each_entry(engine, &utrace->attached, entry)
1820                 start_callback(utrace, &report, engine, task, 0);
1821
1822         /*
1823          * Finish the report and either stop or get ready to resume.
1824          */
1825         finish_resume_report(&report, task, utrace);
1826 }
1827
1828 /*
1829  * Return true if current has forced signal_pending().
1830  *
1831  * This is called only when current->utrace_flags is nonzero, so we know
1832  * that current->utrace must be set.  It's not inlined in tracehook.h
1833  * just so that struct utrace can stay opaque outside this file.
1834  */
1835 bool utrace_interrupt_pending(void)
1836 {
1837         return task_utrace_struct(current)->interrupt;
1838 }
1839
1840 /*
1841  * Take the siglock and push @info back on our queue.
1842  * Returns with @task->sighand->siglock held.
1843  */
1844 static void push_back_signal(struct task_struct *task, siginfo_t *info)
1845         __acquires(task->sighand->siglock)
1846 {
1847         struct sigqueue *q;
1848
1849         if (unlikely(!info->si_signo)) { /* Oh, a wise guy! */
1850                 spin_lock_irq(&task->sighand->siglock);
1851                 return;
1852         }
1853
1854         q = sigqueue_alloc();
1855         if (likely(q)) {
1856                 q->flags = 0;
1857                 copy_siginfo(&q->info, info);
1858         }
1859
1860         spin_lock_irq(&task->sighand->siglock);
1861
1862         sigaddset(&task->pending.signal, info->si_signo);
1863         if (likely(q))
1864                 list_add(&q->list, &task->pending.list);
1865
1866         set_tsk_thread_flag(task, TIF_SIGPENDING);
1867 }
1868
1869 /*
1870  * This is the hook from the signals code, called with the siglock held.
1871  * Here is the ideal place to stop.  We also dequeue and intercept signals.
1872  */
1873 int utrace_get_signal(struct task_struct *task, struct pt_regs *regs,
1874                       siginfo_t *info, struct k_sigaction *return_ka)
1875         __releases(task->sighand->siglock)
1876         __acquires(task->sighand->siglock)
1877 {
1878         struct utrace *utrace;
1879         struct k_sigaction *ka;
1880         INIT_REPORT(report);
1881         struct utrace_engine *engine;
1882         const struct utrace_engine_ops *ops;
1883         unsigned long event, want;
1884         u32 ret;
1885         int signr;
1886
1887         utrace = &task->utrace;
1888         if (utrace->interrupt || utrace->report || utrace->signal_handler) {
1889                 /*
1890                  * We've been asked for an explicit report before we
1891                  * even check for pending signals.
1892                  */
1893
1894                 spin_unlock_irq(&task->sighand->siglock);
1895
1896                 spin_lock(&utrace->lock);
1897
1898                 splice_attaching(utrace);
1899
1900                 if (unlikely(!utrace->interrupt) && unlikely(!utrace->report))
1901                         report.result = UTRACE_SIGNAL_IGN;
1902                 else if (utrace->signal_handler)
1903                         report.result = UTRACE_SIGNAL_HANDLER;
1904                 else
1905                         report.result = UTRACE_SIGNAL_REPORT;
1906
1907                 /*
1908                  * We are now making the report and it's on the
1909                  * interrupt path, so clear the flags asking for those.
1910                  */
1911                 utrace->interrupt = utrace->report = utrace->signal_handler = 0;
1912                 utrace->stopped = 0;
1913
1914                 /*
1915                  * Make sure signal_pending() only returns true
1916                  * if there are real signals pending.
1917                  */
1918                 if (signal_pending(task)) {
1919                         spin_lock_irq(&task->sighand->siglock);
1920                         recalc_sigpending();
1921                         spin_unlock_irq(&task->sighand->siglock);
1922                 }
1923
1924                 spin_unlock(&utrace->lock);
1925
1926                 if (unlikely(report.result == UTRACE_SIGNAL_IGN))
1927                         /*
1928                          * We only got here to clear utrace->signal_handler.
1929                          */
1930                         return -1;
1931
1932                 /*
1933                  * Do a reporting pass for no signal, just for EVENT(QUIESCE).
1934                  * The engine callbacks can fill in *info and *return_ka.
1935                  * We'll pass NULL for the @orig_ka argument to indicate
1936                  * that there was no original signal.
1937                  */
1938                 event = 0;
1939                 ka = NULL;
1940                 memset(return_ka, 0, sizeof *return_ka);
1941         } else if ((task->utrace_flags & UTRACE_EVENT_SIGNAL_ALL) == 0 &&
1942                    !utrace->stopped) {
1943                 /*
1944                  * If no engine is interested in intercepting signals,
1945                  * let the caller just dequeue them normally.
1946                  */
1947                 return 0;
1948         } else {
1949                 if (unlikely(utrace->stopped)) {
1950                         spin_unlock_irq(&task->sighand->siglock);
1951                         spin_lock(&utrace->lock);
1952                         utrace->stopped = 0;
1953                         spin_unlock(&utrace->lock);
1954                         spin_lock_irq(&task->sighand->siglock);
1955                 }
1956
1957                 /*
1958                  * Steal the next signal so we can let tracing engines
1959                  * examine it.  From the signal number and sigaction,
1960                  * determine what normal delivery would do.  If no
1961                  * engine perturbs it, we'll do that by returning the
1962                  * signal number after setting *return_ka.
1963                  */
1964                 signr = dequeue_signal(task, &task->blocked, info);
1965                 if (signr == 0)
1966                         return signr;
1967                 BUG_ON(signr != info->si_signo);
1968
1969                 ka = &task->sighand->action[signr - 1];
1970                 *return_ka = *ka;
1971
1972                 /*
1973                  * We are never allowed to interfere with SIGKILL.
1974                  * Just punt after filling in *return_ka for our caller.
1975                  */
1976                 if (signr == SIGKILL)
1977                         return signr;
1978
1979                 if (ka->sa.sa_handler == SIG_IGN) {
1980                         event = UTRACE_EVENT(SIGNAL_IGN);
1981                         report.result = UTRACE_SIGNAL_IGN;
1982                 } else if (ka->sa.sa_handler != SIG_DFL) {
1983                         event = UTRACE_EVENT(SIGNAL);
1984                         report.result = UTRACE_SIGNAL_DELIVER;
1985                 } else if (sig_kernel_coredump(signr)) {
1986                         event = UTRACE_EVENT(SIGNAL_CORE);
1987                         report.result = UTRACE_SIGNAL_CORE;
1988                 } else if (sig_kernel_ignore(signr)) {
1989                         event = UTRACE_EVENT(SIGNAL_IGN);
1990                         report.result = UTRACE_SIGNAL_IGN;
1991                 } else if (signr == SIGSTOP) {
1992                         event = UTRACE_EVENT(SIGNAL_STOP);
1993                         report.result = UTRACE_SIGNAL_STOP;
1994                 } else if (sig_kernel_stop(signr)) {
1995                         event = UTRACE_EVENT(SIGNAL_STOP);
1996                         report.result = UTRACE_SIGNAL_TSTP;
1997                 } else {
1998                         event = UTRACE_EVENT(SIGNAL_TERM);
1999                         report.result = UTRACE_SIGNAL_TERM;
2000                 }
2001
2002                 /*
2003                  * Now that we know what event type this signal is, we
2004                  * can short-circuit if no engines care about those.
2005                  */
2006                 if ((task->utrace_flags & (event | UTRACE_EVENT(QUIESCE))) == 0)
2007                         return signr;
2008
2009                 /*
2010                  * We have some interested engines, so tell them about
2011                  * the signal and let them change its disposition.
2012                  */
2013                 spin_unlock_irq(&task->sighand->siglock);
2014         }
2015
2016         /*
2017          * This reporting pass chooses what signal disposition we'll act on.
2018          */
2019         list_for_each_entry(engine, &utrace->attached, entry) {
2020                 /*
2021                  * See start_callback() comment about this barrier.
2022                  */
2023                 utrace->reporting = engine;
2024                 smp_mb();
2025
2026                 /*
2027                  * This pairs with the barrier in mark_engine_detached(),
2028                  * see start_callback() comments.
2029                  */
2030                 want = engine->flags;
2031                 smp_rmb();
2032                 ops = engine->ops;
2033
2034                 if ((want & (event | UTRACE_EVENT(QUIESCE))) == 0) {
2035                         utrace->reporting = NULL;
2036                         continue;
2037                 }
2038
2039                 if (ops->report_signal)
2040                         ret = (*ops->report_signal)(
2041                                 report.result | report.action, engine, task,
2042                                 regs, info, ka, return_ka);
2043                 else
2044                         ret = (report.result | (*ops->report_quiesce)(
2045                                        report.action, engine, task, event));
2046
2047                 /*
2048                  * Avoid a tight loop reporting again and again if some
2049                  * engine is too stupid.
2050                  */
2051                 switch (utrace_resume_action(ret)) {
2052                 default:
2053                         break;
2054                 case UTRACE_INTERRUPT:
2055                 case UTRACE_REPORT:
2056                         ret = (ret & ~UTRACE_RESUME_MASK) | UTRACE_RESUME;
2057                         break;
2058                 }
2059
2060                 finish_callback(utrace, &report, engine, ret);
2061         }
2062
2063         /*
2064          * We express the chosen action to the signals code in terms
2065          * of a representative signal whose default action does it.
2066          * Our caller uses our return value (signr) to decide what to
2067          * do, but uses info->si_signo as the signal number to report.
2068          */
2069         switch (utrace_signal_action(report.result)) {
2070         case UTRACE_SIGNAL_TERM:
2071                 signr = SIGTERM;
2072                 break;
2073
2074         case UTRACE_SIGNAL_CORE:
2075                 signr = SIGQUIT;
2076                 break;
2077
2078         case UTRACE_SIGNAL_STOP:
2079                 signr = SIGSTOP;
2080                 break;
2081
2082         case UTRACE_SIGNAL_TSTP:
2083                 signr = SIGTSTP;
2084                 break;
2085
2086         case UTRACE_SIGNAL_DELIVER:
2087                 signr = info->si_signo;
2088
2089                 if (return_ka->sa.sa_handler == SIG_DFL) {
2090                         /*
2091                          * We'll do signr's normal default action.
2092                          * For ignore, we'll fall through below.
2093                          * For stop/death, break locks and returns it.
2094                          */
2095                         if (likely(signr) && !sig_kernel_ignore(signr))
2096                                 break;
2097                 } else if (return_ka->sa.sa_handler != SIG_IGN &&
2098                            likely(signr)) {
2099                         /*
2100                          * Complete the bookkeeping after the report.
2101                          * The handler will run.  If an engine wanted to
2102                          * stop or step, then make sure we do another
2103                          * report after signal handler setup.
2104                          */
2105                         if (report.action != UTRACE_RESUME)
2106                                 report.action = UTRACE_INTERRUPT;
2107                         finish_report(&report, task, utrace);
2108
2109                         if (unlikely(report.result & UTRACE_SIGNAL_HOLD))
2110                                 push_back_signal(task, info);
2111                         else
2112                                 spin_lock_irq(&task->sighand->siglock);
2113
2114                         /*
2115                          * We do the SA_ONESHOT work here since the
2116                          * normal path will only touch *return_ka now.
2117                          */
2118                         if (unlikely(return_ka->sa.sa_flags & SA_ONESHOT)) {
2119                                 return_ka->sa.sa_flags &= ~SA_ONESHOT;
2120                                 if (likely(valid_signal(signr))) {
2121                                         ka = &task->sighand->action[signr - 1];
2122                                         ka->sa.sa_handler = SIG_DFL;
2123                                 }
2124                         }
2125
2126                         return signr;
2127                 }
2128
2129                 /* Fall through for an ignored signal.  */
2130
2131         case UTRACE_SIGNAL_IGN:
2132         case UTRACE_SIGNAL_REPORT:
2133         default:
2134                 /*
2135                  * If the signal is being ignored, then we are on the way
2136                  * directly back to user mode.  We can stop here, or step,
2137                  * as in utrace_resume(), above.  After we've dealt with that,
2138                  * our caller will relock and come back through here.
2139                  */
2140                 finish_resume_report(&report, task, utrace);
2141
2142                 if (unlikely(report.killed)) {
2143                         /*
2144                          * The only reason we woke up now was because of a
2145                          * SIGKILL.  Don't do normal dequeuing in case it
2146                          * might get a signal other than SIGKILL.  That would
2147                          * perturb the death state so it might differ from
2148                          * what the debugger would have allowed to happen.
2149                          * Instead, pluck out just the SIGKILL to be sure
2150                          * we'll die immediately with nothing else different
2151                          * from the quiescent state the debugger wanted us in.
2152                          */
2153                         sigset_t sigkill_only;
2154                         siginitsetinv(&sigkill_only, sigmask(SIGKILL));
2155                         spin_lock_irq(&task->sighand->siglock);
2156                         signr = dequeue_signal(task, &sigkill_only, info);
2157                         BUG_ON(signr != SIGKILL);
2158                         *return_ka = task->sighand->action[SIGKILL - 1];
2159                         return signr;
2160                 }
2161
2162                 if (unlikely(report.result & UTRACE_SIGNAL_HOLD)) {
2163                         push_back_signal(task, info);
2164                         spin_unlock_irq(&task->sighand->siglock);
2165                 }
2166
2167                 return -1;
2168         }
2169
2170         /*
2171          * Complete the bookkeeping after the report.
2172          * This sets utrace->report if UTRACE_STOP was used.
2173          */
2174         finish_report(&report, task, utrace);
2175
2176         return_ka->sa.sa_handler = SIG_DFL;
2177
2178         if (unlikely(report.result & UTRACE_SIGNAL_HOLD))
2179                 push_back_signal(task, info);
2180         else
2181                 spin_lock_irq(&task->sighand->siglock);
2182
2183         if (sig_kernel_stop(signr))
2184                 task->signal->flags |= SIGNAL_STOP_DEQUEUED;
2185
2186         return signr;
2187 }
2188
2189 /*
2190  * This gets called after a signal handler has been set up.
2191  * We set a flag so the next report knows it happened.
2192  * If we're already stepping, make sure we do a report_signal.
2193  * If not, make sure we get into utrace_resume() where we can
2194  * clear the signal_handler flag before resuming.
2195  */
2196 void utrace_signal_handler(struct task_struct *task, int stepping)
2197 {
2198         struct utrace *utrace = task_utrace_struct(task);
2199
2200         spin_lock(&utrace->lock);
2201
2202         utrace->signal_handler = 1;
2203         if (stepping) {
2204                 utrace->interrupt = 1;
2205                 set_tsk_thread_flag(task, TIF_SIGPENDING);
2206         } else {
2207                 set_tsk_thread_flag(task, TIF_NOTIFY_RESUME);
2208         }
2209
2210         spin_unlock(&utrace->lock);
2211 }
2212
2213 /**
2214  * utrace_prepare_examine - prepare to examine thread state
2215  * @target:             thread of interest, a &struct task_struct pointer
2216  * @engine:             engine pointer returned by utrace_attach_task()
2217  * @exam:               temporary state, a &struct utrace_examiner pointer
2218  *
2219  * This call prepares to safely examine the thread @target using
2220  * &struct user_regset calls, or direct access to thread-synchronous fields.
2221  *
2222  * When @target is current, this call is superfluous.  When @target is
2223  * another thread, it must held stopped via %UTRACE_STOP by @engine.
2224  *
2225  * This call may block the caller until @target stays stopped, so it must
2226  * be called only after the caller is sure @target is about to unschedule.
2227  * This means a zero return from a utrace_control() call on @engine giving
2228  * %UTRACE_STOP, or a report_quiesce() or report_signal() callback to
2229  * @engine that used %UTRACE_STOP in its return value.
2230  *
2231  * Returns -%ESRCH if @target is dead or -%EINVAL if %UTRACE_STOP was
2232  * not used.  If @target has started running again despite %UTRACE_STOP
2233  * (for %SIGKILL or a spurious wakeup), this call returns -%EAGAIN.
2234  *
2235  * When this call returns zero, it's safe to use &struct user_regset
2236  * calls and task_user_regset_view() on @target and to examine some of
2237  * its fields directly.  When the examination is complete, a
2238  * utrace_finish_examine() call must follow to check whether it was
2239  * completed safely.
2240  */
2241 int utrace_prepare_examine(struct task_struct *target,
2242                            struct utrace_engine *engine,
2243                            struct utrace_examiner *exam)
2244 {
2245         int ret = 0;
2246
2247         if (unlikely(target == current))
2248                 return 0;
2249
2250         rcu_read_lock();
2251         if (unlikely(!engine_wants_stop(engine)))
2252                 ret = -EINVAL;
2253         else if (unlikely(target->exit_state))
2254                 ret = -ESRCH;
2255         else {
2256                 exam->state = target->state;
2257                 if (unlikely(exam->state == TASK_RUNNING))
2258                         ret = -EAGAIN;
2259                 else
2260                         get_task_struct(target);
2261         }
2262         rcu_read_unlock();
2263
2264         if (likely(!ret)) {
2265                 exam->ncsw = wait_task_inactive(target, exam->state);
2266                 put_task_struct(target);
2267                 if (unlikely(!exam->ncsw))
2268                         ret = -EAGAIN;
2269         }
2270
2271         return ret;
2272 }
2273 EXPORT_SYMBOL_GPL(utrace_prepare_examine);
2274
2275 /**
2276  * utrace_finish_examine - complete an examination of thread state
2277  * @target:             thread of interest, a &struct task_struct pointer
2278  * @engine:             engine pointer returned by utrace_attach_task()
2279  * @exam:               pointer passed to utrace_prepare_examine() call
2280  *
2281  * This call completes an examination on the thread @target begun by a
2282  * paired utrace_prepare_examine() call with the same arguments that
2283  * returned success (zero).
2284  *
2285  * When @target is current, this call is superfluous.  When @target is
2286  * another thread, this returns zero if @target has remained unscheduled
2287  * since the paired utrace_prepare_examine() call returned zero.
2288  *
2289  * When this returns an error, any examination done since the paired
2290  * utrace_prepare_examine() call is unreliable and the data extracted
2291  * should be discarded.  The error is -%EINVAL if @engine is not
2292  * keeping @target stopped, or -%EAGAIN if @target woke up unexpectedly.
2293  */
2294 int utrace_finish_examine(struct task_struct *target,
2295                           struct utrace_engine *engine,
2296                           struct utrace_examiner *exam)
2297 {
2298         int ret = 0;
2299
2300         if (unlikely(target == current))
2301                 return 0;
2302
2303         rcu_read_lock();
2304         if (unlikely(!engine_wants_stop(engine)))
2305                 ret = -EINVAL;
2306         else if (unlikely(target->state != exam->state))
2307                 ret = -EAGAIN;
2308         else
2309                 get_task_struct(target);
2310         rcu_read_unlock();
2311
2312         if (likely(!ret)) {
2313                 unsigned long ncsw = wait_task_inactive(target, exam->state);
2314                 if (unlikely(ncsw != exam->ncsw))
2315                         ret = -EAGAIN;
2316                 put_task_struct(target);
2317         }
2318
2319         return ret;
2320 }
2321 EXPORT_SYMBOL_GPL(utrace_finish_examine);
2322
2323 /*
2324  * This is declared in linux/regset.h and defined in machine-dependent
2325  * code.  We put the export here to ensure no machine forgets it.
2326  */
2327 EXPORT_SYMBOL_GPL(task_user_regset_view);
2328
2329 /*
2330  * Called with rcu_read_lock() held.
2331  */
2332 void task_utrace_proc_status(struct seq_file *m, struct task_struct *p)
2333 {
2334         struct utrace *utrace = &p->utrace;
2335         seq_printf(m, "Utrace: %lx%s%s%s\n",
2336                    p->utrace_flags,
2337                    utrace->stopped ? " (stopped)" : "",
2338                    utrace->report ? " (report)" : "",
2339                    utrace->interrupt ? " (interrupt)" : "");
2340 }