kernel26-debug-utrace/big-utrace2.patch

   1 diff -urN linux-2.6.39.1/Documentation/DocBook/Makefile linux-2.6.39.1b/Documentation/DocBook/Makefile
   2 --- linux-2.6.39.1/Documentation/DocBook/Makefile       2011-06-02 17:34:20.000000000 -0700
   3 +++ linux-2.6.39.1b/Documentation/DocBook/Makefile      2011-06-30 10:42:11.416429346 -0700
   4 @@ -14,7 +14,7 @@
   5             genericirq.xml s390-drivers.xml uio-howto.xml scsi.xml \
   6             80211.xml debugobjects.xml sh.xml regulator.xml \
   7             alsa-driver-api.xml writing-an-alsa-driver.xml \
   8 -           tracepoint.xml media.xml drm.xml
   9 +           tracepoint.xml utrace.xml media.xml drm.xml
  10
  11  ###
  12  # The build process is as follows (targets):
  13 diff -urN linux-2.6.39.1/Documentation/DocBook/utrace.tmpl linux-2.6.39.1b/Documentation/DocBook/utrace.tmpl
  14 --- linux-2.6.39.1/Documentation/DocBook/utrace.tmpl    1969-12-31 17:00:00.000000000 -0700
  15 +++ linux-2.6.39.1b/Documentation/DocBook/utrace.tmpl   2011-06-30 10:42:11.416429346 -0700
  16 @@ -0,0 +1,589 @@
  17 +<?xml version="1.0" encoding="UTF-8"?>
  18 +<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
  19 +"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
  20 +
  21 +<book id="utrace">
  22 +  <bookinfo>
  23 +    <title>The utrace User Debugging Infrastructure</title>
  24 +  </bookinfo>
  25 +
  26 +  <toc></toc>
  27 +
  28 +  <chapter id="concepts"><title>utrace concepts</title>
  29 +
  30 +  <sect1 id="intro"><title>Introduction</title>
  31 +
  32 +  <para>
  33 +    <application>utrace</application> is infrastructure code for tracing
  34 +    and controlling user threads.  This is the foundation for writing
  35 +    tracing engines, which can be loadable kernel modules.
  36 +  </para>
  37 +
  38 +  <para>
  39 +    The basic actors in <application>utrace</application> are the thread
  40 +    and the tracing engine.  A tracing engine is some body of code that
  41 +    calls into the <filename>&lt;linux/utrace.h&gt;</filename>
  42 +    interfaces, represented by a <structname>struct
  43 +    utrace_engine_ops</structname>.  (Usually it's a kernel module,
  44 +    though the legacy <function>ptrace</function> support is a tracing
  45 +    engine that is not in a kernel module.)  The interface operates on
  46 +    individual threads (<structname>struct task_struct</structname>).
  47 +    If an engine wants to treat several threads as a group, that is up
  48 +    to its higher-level code.
  49 +  </para>
  50 +
  51 +  <para>
  52 +    Tracing begins by attaching an engine to a thread, using
  53 +    <function>utrace_attach_task</function> or
  54 +    <function>utrace_attach_pid</function>.  If successful, it returns a
  55 +    pointer that is the handle used in all other calls.
  56 +  </para>
  57 +
  58 +  </sect1>
  59 +
  60 +  <sect1 id="callbacks"><title>Events and Callbacks</title>
  61 +
  62 +  <para>
  63 +    An attached engine does nothing by default.  An engine makes something
  64 +    happen by requesting callbacks via <function>utrace_set_events</function>
  65 +    and poking the thread with <function>utrace_control</function>.
  66 +    The synchronization issues related to these two calls
  67 +    are discussed further below in <xref linkend="teardown"/>.
  68 +  </para>
  69 +
  70 +  <para>
  71 +    Events are specified using the macro
  72 +    <constant>UTRACE_EVENT(<replaceable>type</replaceable>)</constant>.
  73 +    Each event type is associated with a callback in <structname>struct
  74 +    utrace_engine_ops</structname>.  A tracing engine can leave unused
  75 +    callbacks <constant>NULL</constant>.  The only callbacks required
  76 +    are those used by the event flags it sets.
  77 +  </para>
  78 +
  79 +  <para>
  80 +    Many engines can be attached to each thread.  When a thread has an
  81 +    event, each engine gets a callback if it has set the event flag for
  82 +    that event type.  For most events, engines are called in the order they
  83 +    attached.  Engines that attach after the event has occurred do not get
  84 +    callbacks for that event.  This includes any new engines just attached
  85 +    by an existing engine's callback function.  Once the sequence of
  86 +    callbacks for that one event has completed, such new engines are then
  87 +    eligible in the next sequence that starts when there is another event.
  88 +  </para>
  89 +
  90 +  <para>
  91 +    Event reporting callbacks have details particular to the event type,
  92 +    but are all called in similar environments and have the same
  93 +    constraints.  Callbacks are made from safe points, where no locks
  94 +    are held, no special resources are pinned (usually), and the
  95 +    user-mode state of the thread is accessible.  So, callback code has
  96 +    a pretty free hand.  But to be a good citizen, callback code should
  97 +    never block for long periods.  It is fine to block in
  98 +    <function>kmalloc</function> and the like, but never wait for i/o or
  99 +    for user mode to do something.  If you need the thread to wait, use
 100 +    <constant>UTRACE_STOP</constant> and return from the callback
 101 +    quickly.  When your i/o finishes or whatever, you can use
 102 +    <function>utrace_control</function> to resume the thread.
 103 +  </para>
 104 +
 105 +  <para>
 106 +    The <constant>UTRACE_EVENT(SYSCALL_ENTRY)</constant> event is a special
 107 +    case.  While other events happen in the kernel when it will return to
 108 +    user mode soon, this event happens when entering the kernel before it
 109 +    will proceed with the work requested from user mode.  Because of this
 110 +    difference, the <function>report_syscall_entry</function> callback is
 111 +    special in two ways.  For this event, engines are called in reverse of
 112 +    the normal order (this includes the <function>report_quiesce</function>
 113 +    call that precedes a <function>report_syscall_entry</function> call).
 114 +    This preserves the semantics that the last engine to attach is called
 115 +    "closest to user mode"--the engine that is first to see a thread's user
 116 +    state when it enters the kernel is also the last to see that state when
 117 +    the thread returns to user mode.  For the same reason, if these
 118 +    callbacks use <constant>UTRACE_STOP</constant> (see the next section),
 119 +    the thread stops immediately after callbacks rather than only when it's
 120 +    ready to return to user mode; when allowed to resume, it will actually
 121 +    attempt the system call indicated by the register values at that time.
 122 +  </para>
 123 +
 124 +  </sect1>
 125 +
 126 +  <sect1 id="safely"><title>Stopping Safely</title>
 127 +
 128 +  <sect2 id="well-behaved"><title>Writing well-behaved callbacks</title>
 129 +
 130 +  <para>
 131 +    Well-behaved callbacks are important to maintain two essential
 132 +    properties of the interface.  The first of these is that unrelated
 133 +    tracing engines should not interfere with each other.  If your engine's
 134 +    event callback does not return quickly, then another engine won't get
 135 +    the event notification in a timely manner.  The second important
 136 +    property is that tracing should be as noninvasive as possible to the
 137 +    normal operation of the system overall and of the traced thread in
 138 +    particular.  That is, attached tracing engines should not perturb a
 139 +    thread's behavior, except to the extent that changing its user-visible
 140 +    state is explicitly what you want to do.  (Obviously some perturbation
 141 +    is unavoidable, primarily timing changes, ranging from small delays due
 142 +    to the overhead of tracing, to arbitrary pauses in user code execution
 143 +    when a user stops a thread with a debugger for examination.)  Even when
 144 +    you explicitly want the perturbation of making the traced thread block,
 145 +    just blocking directly in your callback has more unwanted effects.  For
 146 +    example, the <constant>CLONE</constant> event callbacks are called when
 147 +    the new child thread has been created but not yet started running; the
 148 +    child can never be scheduled until the <constant>CLONE</constant>
 149 +    tracing callbacks return.  (This allows engines tracing the parent to
 150 +    attach to the child.)  If a <constant>CLONE</constant> event callback
 151 +    blocks the parent thread, it also prevents the child thread from
 152 +    running (even to process a <constant>SIGKILL</constant>).  If what you
 153 +    want is to make both the parent and child block, then use
 154 +    <function>utrace_attach_task</function> on the child and then use
 155 +    <constant>UTRACE_STOP</constant> on both threads.  A more crucial
 156 +    problem with blocking in callbacks is that it can prevent
 157 +    <constant>SIGKILL</constant> from working.  A thread that is blocking
 158 +    due to <constant>UTRACE_STOP</constant> will still wake up and die
 159 +    immediately when sent a <constant>SIGKILL</constant>, as all threads
 160 +    should.  Relying on the <application>utrace</application>
 161 +    infrastructure rather than on private synchronization calls in event
 162 +    callbacks is an important way to help keep tracing robustly
 163 +    noninvasive.
 164 +  </para>
 165 +
 166 +  </sect2>
 167 +
 168 +  <sect2 id="UTRACE_STOP"><title>Using <constant>UTRACE_STOP</constant></title>
 169 +
 170 +  <para>
 171 +    To control another thread and access its state, it must be stopped
 172 +    with <constant>UTRACE_STOP</constant>.  This means that it is
 173 +    stopped and won't start running again while we access it.  When a
 174 +    thread is not already stopped, <function>utrace_control</function>
 175 +    returns <constant>-EINPROGRESS</constant> and an engine must wait
 176 +    for an event callback when the thread is ready to stop.  The thread
 177 +    may be running on another CPU or may be blocked.  When it is ready
 178 +    to be examined, it will make callbacks to engines that set the
 179 +    <constant>UTRACE_EVENT(QUIESCE)</constant> event bit.  To wake up an
 180 +    interruptible wait, use <constant>UTRACE_INTERRUPT</constant>.
 181 +  </para>
 182 +
 183 +  <para>
 184 +    As long as some engine has used <constant>UTRACE_STOP</constant> and
 185 +    not called <function>utrace_control</function> to resume the thread,
 186 +    then the thread will remain stopped.  <constant>SIGKILL</constant>
 187 +    will wake it up, but it will not run user code.  When the stop is
 188 +    cleared with <function>utrace_control</function> or a callback
 189 +    return value, the thread starts running again.
 190 +    (See also <xref linkend="teardown"/>.)
 191 +  </para>
 192 +
 193 +  </sect2>
 194 +
 195 +  </sect1>
 196 +
 197 +  <sect1 id="teardown"><title>Tear-down Races</title>
 198 +
 199 +  <sect2 id="SIGKILL"><title>Primacy of <constant>SIGKILL</constant></title>
 200 +  <para>
 201 +    Ordinarily synchronization issues for tracing engines are kept fairly
 202 +    straightforward by using <constant>UTRACE_STOP</constant>.  You ask a
 203 +    thread to stop, and then once it makes the
 204 +    <function>report_quiesce</function> callback it cannot do anything else
 205 +    that would result in another callback, until you let it with a
 206 +    <function>utrace_control</function> call.  This simple arrangement
 207 +    avoids complex and error-prone code in each one of a tracing engine's
 208 +    event callbacks to keep them serialized with the engine's other
 209 +    operations done on that thread from another thread of control.
 210 +    However, giving tracing engines complete power to keep a traced thread
 211 +    stuck in place runs afoul of a more important kind of simplicity that
 212 +    the kernel overall guarantees: nothing can prevent or delay
 213 +    <constant>SIGKILL</constant> from making a thread die and release its
 214 +    resources.  To preserve this important property of
 215 +    <constant>SIGKILL</constant>, it as a special case can break
 216 +    <constant>UTRACE_STOP</constant> like nothing else normally can.  This
 217 +    includes both explicit <constant>SIGKILL</constant> signals and the
 218 +    implicit <constant>SIGKILL</constant> sent to each other thread in the
 219 +    same thread group by a thread doing an exec, or processing a fatal
 220 +    signal, or making an <function>exit_group</function> system call.  A
 221 +    tracing engine can prevent a thread from beginning the exit or exec or
 222 +    dying by signal (other than <constant>SIGKILL</constant>) if it is
 223 +    attached to that thread, but once the operation begins, no tracing
 224 +    engine can prevent or delay all other threads in the same thread group
 225 +    dying.
 226 +  </para>
 227 +  </sect2>
 228 +
 229 +  <sect2 id="reap"><title>Final callbacks</title>
 230 +  <para>
 231 +    The <function>report_reap</function> callback is always the final event
 232 +    in the life cycle of a traced thread.  Tracing engines can use this as
 233 +    the trigger to clean up their own data structures.  The
 234 +    <function>report_death</function> callback is always the penultimate
 235 +    event a tracing engine might see; it's seen unless the thread was
 236 +    already in the midst of dying when the engine attached.  Many tracing
 237 +    engines will have no interest in when a parent reaps a dead process,
 238 +    and nothing they want to do with a zombie thread once it dies; for
 239 +    them, the <function>report_death</function> callback is the natural
 240 +    place to clean up data structures and detach.  To facilitate writing
 241 +    such engines robustly, given the asynchrony of
 242 +    <constant>SIGKILL</constant>, and without error-prone manual
 243 +    implementation of synchronization schemes, the
 244 +    <application>utrace</application> infrastructure provides some special
 245 +    guarantees about the <function>report_death</function> and
 246 +    <function>report_reap</function> callbacks.  It still takes some care
 247 +    to be sure your tracing engine is robust to tear-down races, but these
 248 +    rules make it reasonably straightforward and concise to handle a lot of
 249 +    corner cases correctly.
 250 +  </para>
 251 +  </sect2>
 252 +
 253 +  <sect2 id="refcount"><title>Engine and task pointers</title>
 254 +  <para>
 255 +    The first sort of guarantee concerns the core data structures
 256 +    themselves.  <structname>struct utrace_engine</structname> is
 257 +    a reference-counted data structure.  While you hold a reference, an
 258 +    engine pointer will always stay valid so that you can safely pass it to
 259 +    any <application>utrace</application> call.  Each call to
 260 +    <function>utrace_attach_task</function> or
 261 +    <function>utrace_attach_pid</function> returns an engine pointer with a
 262 +    reference belonging to the caller.  You own that reference until you
 263 +    drop it using <function>utrace_engine_put</function>.  There is an
 264 +    implicit reference on the engine while it is attached.  So if you drop
 265 +    your only reference, and then use
 266 +    <function>utrace_attach_task</function> without
 267 +    <constant>UTRACE_ATTACH_CREATE</constant> to look up that same engine,
 268 +    you will get the same pointer with a new reference to replace the one
 269 +    you dropped, just like calling <function>utrace_engine_get</function>.
 270 +    When an engine has been detached, either explicitly with
 271 +    <constant>UTRACE_DETACH</constant> or implicitly after
 272 +    <function>report_reap</function>, then any references you hold are all
 273 +    that keep the old engine pointer alive.
 274 +  </para>
 275 +
 276 +  <para>
 277 +    There is nothing a kernel module can do to keep a <structname>struct
 278 +    task_struct</structname> alive outside of
 279 +    <function>rcu_read_lock</function>.  When the task dies and is reaped
 280 +    by its parent (or itself), that structure can be freed so that any
 281 +    dangling pointers you have stored become invalid.
 282 +    <application>utrace</application> will not prevent this, but it can
 283 +    help you detect it safely.  By definition, a task that has been reaped
 284 +    has had all its engines detached.  All
 285 +    <application>utrace</application> calls can be safely called on a
 286 +    detached engine if the caller holds a reference on that engine pointer,
 287 +    even if the task pointer passed in the call is invalid.  All calls
 288 +    return <constant>-ESRCH</constant> for a detached engine, which tells
 289 +    you that the task pointer you passed could be invalid now.  Since
 290 +    <function>utrace_control</function> and
 291 +    <function>utrace_set_events</function> do not block, you can call those
 292 +    inside a <function>rcu_read_lock</function> section and be sure after
 293 +    they don't return <constant>-ESRCH</constant> that the task pointer is
 294 +    still valid until <function>rcu_read_unlock</function>.  The
 295 +    infrastructure never holds task references of its own.  Though neither
 296 +    <function>rcu_read_lock</function> nor any other lock is held while
 297 +    making a callback, it's always guaranteed that the <structname>struct
 298 +    task_struct</structname> and the <structname>struct
 299 +    utrace_engine</structname> passed as arguments remain valid
 300 +    until the callback function returns.
 301 +  </para>
 302 +
 303 +  <para>
 304 +    The common means for safely holding task pointers that is available to
 305 +    kernel modules is to use <structname>struct pid</structname>, which
 306 +    permits <function>put_pid</function> from kernel modules.  When using
 307 +    that, the calls <function>utrace_attach_pid</function>,
 308 +    <function>utrace_control_pid</function>,
 309 +    <function>utrace_set_events_pid</function>, and
 310 +    <function>utrace_barrier_pid</function> are available.
 311 +  </para>
 312 +  </sect2>
 313 +
 314 +  <sect2 id="reap-after-death">
 315 +    <title>
 316 +      Serialization of <constant>DEATH</constant> and <constant>REAP</constant>
 317 +    </title>
 318 +    <para>
 319 +      The second guarantee is the serialization of
 320 +      <constant>DEATH</constant> and <constant>REAP</constant> event
 321 +      callbacks for a given thread.  The actual reaping by the parent
 322 +      (<function>release_task</function> call) can occur simultaneously
 323 +      while the thread is still doing the final steps of dying, including
 324 +      the <function>report_death</function> callback.  If a tracing engine
 325 +      has requested both <constant>DEATH</constant> and
 326 +      <constant>REAP</constant> event reports, it's guaranteed that the
 327 +      <function>report_reap</function> callback will not be made until
 328 +      after the <function>report_death</function> callback has returned.
 329 +      If the <function>report_death</function> callback itself detaches
 330 +      from the thread, then the <function>report_reap</function> callback
 331 +      will never be made.  Thus it is safe for a
 332 +      <function>report_death</function> callback to clean up data
 333 +      structures and detach.
 334 +    </para>
 335 +  </sect2>
 336 +
 337 +  <sect2 id="interlock"><title>Interlock with final callbacks</title>
 338 +  <para>
 339 +    The final sort of guarantee is that a tracing engine will know for sure
 340 +    whether or not the <function>report_death</function> and/or
 341 +    <function>report_reap</function> callbacks will be made for a certain
 342 +    thread.  These tear-down races are disambiguated by the error return
 343 +    values of <function>utrace_set_events</function> and
 344 +    <function>utrace_control</function>.  Normally
 345 +    <function>utrace_control</function> called with
 346 +    <constant>UTRACE_DETACH</constant> returns zero, and this means that no
 347 +    more callbacks will be made.  If the thread is in the midst of dying,
 348 +    it returns <constant>-EALREADY</constant> to indicate that the
 349 +    <constant>report_death</constant> callback may already be in progress;
 350 +    when you get this error, you know that any cleanup your
 351 +    <function>report_death</function> callback does is about to happen or
 352 +    has just happened--note that if the <function>report_death</function>
 353 +    callback does not detach, the engine remains attached until the thread
 354 +    gets reaped.  If the thread is in the midst of being reaped,
 355 +    <function>utrace_control</function> returns <constant>-ESRCH</constant>
 356 +    to indicate that the <function>report_reap</function> callback may
 357 +    already be in progress; this means the engine is implicitly detached
 358 +    when the callback completes.  This makes it possible for a tracing
 359 +    engine that has decided asynchronously to detach from a thread to
 360 +    safely clean up its data structures, knowing that no
 361 +    <function>report_death</function> or <function>report_reap</function>
 362 +    callback will try to do the same.  <constant>utrace_detach</constant>
 363 +    returns <constant>-ESRCH</constant> when the <structname>struct
 364 +    utrace_engine</structname> has already been detached, but is
 365 +    still a valid pointer because of its reference count.  A tracing engine
 366 +    can use this to safely synchronize its own independent multiple threads
 367 +    of control with each other and with its event callbacks that detach.
 368 +  </para>
 369 +
 370 +  <para>
 371 +    In the same vein, <function>utrace_set_events</function> normally
 372 +    returns zero; if the target thread was stopped before the call, then
 373 +    after a successful call, no event callbacks not requested in the new
 374 +    flags will be made.  It fails with <constant>-EALREADY</constant> if
 375 +    you try to clear <constant>UTRACE_EVENT(DEATH)</constant> when the
 376 +    <function>report_death</function> callback may already have begun, or if
 377 +    you try to newly set <constant>UTRACE_EVENT(DEATH)</constant> or
 378 +    <constant>UTRACE_EVENT(QUIESCE)</constant> when the target is already
 379 +    dead or dying.  Like <function>utrace_control</function>, it returns
 380 +    <constant>-ESRCH</constant> when the <function>report_reap</function>
 381 +    callback may already have begun, or the thread has already been detached
 382 +    (including forcible detach on reaping).  This lets the tracing engine
 383 +    know for sure which event callbacks it will or won't see after
 384 +    <function>utrace_set_events</function> has returned.  By checking for
 385 +    errors, it can know whether to clean up its data structures immediately
 386 +    or to let its callbacks do the work.
 387 +  </para>
 388 +  </sect2>
 389 +
 390 +  <sect2 id="barrier"><title>Using <function>utrace_barrier</function></title>
 391 +  <para>
 392 +    When a thread is safely stopped, calling
 393 +    <function>utrace_control</function> with <constant>UTRACE_DETACH</constant>
 394 +    or calling <function>utrace_set_events</function> to disable some events
 395 +    ensures synchronously that your engine won't get any more of the callbacks
 396 +    that have been disabled (none at all when detaching).  But these can also
 397 +    be used while the thread is not stopped, when it might be simultaneously
 398 +    making a callback to your engine.  For this situation, these calls return
 399 +    <constant>-EINPROGRESS</constant> when it's possible a callback is in
 400 +    progress.  If you are not prepared to have your old callbacks still run,
 401 +    then you can synchronize to be sure all the old callbacks are finished,
 402 +    using <function>utrace_barrier</function>.  This is necessary if the
 403 +    kernel module containing your callback code is going to be unloaded.
 404 +  </para>
 405 +  <para>
 406 +    After using <constant>UTRACE_DETACH</constant> once, further calls to
 407 +    <function>utrace_control</function> with the same engine pointer will
 408 +    return <constant>-ESRCH</constant>.  In contrast, after getting
 409 +    <constant>-EINPROGRESS</constant> from
 410 +    <function>utrace_set_events</function>, you can call
 411 +    <function>utrace_set_events</function> again later and if it returns zero
 412 +    then know the old callbacks have finished.
 413 +  </para>
 414 +  <para>
 415 +    Unlike all other calls, <function>utrace_barrier</function> (and
 416 +    <function>utrace_barrier_pid</function>) will accept any engine pointer you
 417 +    hold a reference on, even if <constant>UTRACE_DETACH</constant> has already
 418 +    been used.  After any <function>utrace_control</function> or
 419 +    <function>utrace_set_events</function> call (these do not block), you can
 420 +    call <function>utrace_barrier</function> to block until callbacks have
 421 +    finished.  This returns <constant>-ESRCH</constant> only if the engine is
 422 +    completely detached (finished all callbacks).  Otherwise it waits
 423 +    until the thread is definitely not in the midst of a callback to this
 424 +    engine and then returns zero, but can return
 425 +    <constant>-ERESTARTSYS</constant> if its wait is interrupted.
 426 +  </para>
 427 +  </sect2>
 428 +
 429 +</sect1>
 430 +
 431 +</chapter>
 432 +
 433 +<chapter id="core"><title>utrace core API</title>
 434 +
 435 +<para>
 436 +  The utrace API is declared in <filename>&lt;linux/utrace.h&gt;</filename>.
 437 +</para>
 438 +
 439 +!Iinclude/linux/utrace.h
 440 +!Ekernel/utrace.c
 441 +
 442 +</chapter>
 443 +
 444 +<chapter id="machine"><title>Machine State</title>
 445 +
 446 +<para>
 447 +  The <function>task_current_syscall</function> function can be used on any
 448 +  valid <structname>struct task_struct</structname> at any time, and does
 449 +  not even require that <function>utrace_attach_task</function> was used at all.
 450 +</para>
 451 +
 452 +<para>
 453 +  The other ways to access the registers and other machine-dependent state of
 454 +  a task can only be used on a task that is at a known safe point.  The safe
 455 +  points are all the places where <function>utrace_set_events</function> can
 456 +  request callbacks (except for the <constant>DEATH</constant> and
 457 +  <constant>REAP</constant> events).  So at any event callback, it is safe to
 458 +  examine <varname>current</varname>.
 459 +</para>
 460 +
 461 +<para>
 462 +  One task can examine another only after a callback in the target task that
 463 +  returns <constant>UTRACE_STOP</constant> so that task will not return to user
 464 +  mode after the safe point.  This guarantees that the task will not resume
 465 +  until the same engine uses <function>utrace_control</function>, unless the
 466 +  task dies suddenly.  To examine safely, one must use a pair of calls to
 467 +  <function>utrace_prepare_examine</function> and
 468 +  <function>utrace_finish_examine</function> surrounding the calls to
 469 +  <structname>struct user_regset</structname> functions or direct examination
 470 +  of task data structures.  <function>utrace_prepare_examine</function> returns
 471 +  an error if the task is not properly stopped, or is dead.  After a
 472 +  successful examination, the paired <function>utrace_finish_examine</function>
 473 +  call returns an error if the task ever woke up during the examination.  If
 474 +  so, any data gathered may be scrambled and should be discarded.  This means
 475 +  there was a spurious wake-up (which should not happen), or a sudden death.
 476 +</para>
 477 +
 478 +<sect1 id="regset"><title><structname>struct user_regset</structname></title>
 479 +
 480 +<para>
 481 +  The <structname>struct user_regset</structname> API
 482 +  is declared in <filename>&lt;linux/regset.h&gt;</filename>.
 483 +</para>
 484 +
 485 +!Finclude/linux/regset.h
 486 +
 487 +</sect1>
 488 +
 489 +<sect1 id="task_current_syscall">
 490 +  <title><filename>System Call Information</filename></title>
 491 +
 492 +<para>
 493 +  This function is declared in <filename>&lt;linux/ptrace.h&gt;</filename>.
 494 +</para>
 495 +
 496 +!Elib/syscall.c
 497 +
 498 +</sect1>
 499 +
 500 +<sect1 id="syscall"><title><filename>System Call Tracing</filename></title>
 501 +
 502 +<para>
 503 +  The arch API for system call information is declared in
 504 +  <filename>&lt;asm/syscall.h&gt;</filename>.
 505 +  Each of these calls can be used only at system call entry tracing,
 506 +  or can be used only at system call exit and the subsequent safe points
 507 +  before returning to user mode.
 508 +  At system call entry tracing means either during a
 509 +  <structfield>report_syscall_entry</structfield> callback,
 510 +  or any time after that callback has returned <constant>UTRACE_STOP</constant>.
 511 +</para>
 512 +
 513 +!Finclude/asm-generic/syscall.h
 514 +
 515 +</sect1>
 516 +
 517 +</chapter>
 518 +
 519 +<chapter id="internals"><title>Kernel Internals</title>
 520 +
 521 +<para>
 522 +  This chapter covers the interface to the tracing infrastructure
 523 +  from the core of the kernel and the architecture-specific code.
 524 +  This is for maintainers of the kernel and arch code, and not relevant
 525 +  to using the tracing facilities described in preceding chapters.
 526 +</para>
 527 +
 528 +<sect1 id="tracehook"><title>Core Calls In</title>
 529 +
 530 +<para>
 531 +  These calls are declared in <filename>&lt;linux/tracehook.h&gt;</filename>.
 532 +  The core kernel calls these functions at various important places.
 533 +</para>
 534 +
 535 +!Finclude/linux/tracehook.h
 536 +
 537 +</sect1>
 538 +
 539 +<sect1 id="arch"><title>Architecture Calls Out</title>
 540 +
 541 +<para>
 542 +  An arch that has done all these things sets
 543 +  <constant>CONFIG_HAVE_ARCH_TRACEHOOK</constant>.
 544 +  This is required to enable the <application>utrace</application> code.
 545 +</para>
 546 +
 547 +<sect2 id="arch-ptrace"><title><filename>&lt;asm/ptrace.h&gt;</filename></title>
 548 +
 549 +<para>
 550 +  An arch defines these in <filename>&lt;asm/ptrace.h&gt;</filename>
 551 +  if it supports hardware single-step or block-step features.
 552 +</para>
 553 +
 554 +!Finclude/linux/ptrace.h arch_has_single_step arch_has_block_step
 555 +!Finclude/linux/ptrace.h user_enable_single_step user_enable_block_step
 556 +!Finclude/linux/ptrace.h user_disable_single_step
 557 +
 558 +</sect2>
 559 +
 560 +<sect2 id="arch-syscall">
 561 +  <title><filename>&lt;asm/syscall.h&gt;</filename></title>
 562 +
 563 +  <para>
 564 +    An arch provides <filename>&lt;asm/syscall.h&gt;</filename> that
 565 +    defines these as inlines, or declares them as exported functions.
 566 +    These interfaces are described in <xref linkend="syscall"/>.
 567 +  </para>
 568 +
 569 +</sect2>
 570 +
 571 +<sect2 id="arch-tracehook">
 572 +  <title><filename>&lt;linux/tracehook.h&gt;</filename></title>
 573 +
 574 +  <para>
 575 +    An arch must define <constant>TIF_NOTIFY_RESUME</constant>
 576 +    and <constant>TIF_SYSCALL_TRACE</constant>
 577 +    in its <filename>&lt;asm/thread_info.h&gt;</filename>.
 578 +    The arch code must call the following functions, all declared
 579 +    in <filename>&lt;linux/tracehook.h&gt;</filename> and
 580 +    described in <xref linkend="tracehook"/>:
 581 +
 582 +    <itemizedlist>
 583 +      <listitem>
 584 +       <para><function>tracehook_notify_resume</function></para>
 585 +      </listitem>
 586 +      <listitem>
 587 +       <para><function>tracehook_report_syscall_entry</function></para>
 588 +      </listitem>
 589 +      <listitem>
 590 +       <para><function>tracehook_report_syscall_exit</function></para>
 591 +      </listitem>
 592 +      <listitem>
 593 +       <para><function>tracehook_signal_handler</function></para>
 594 +      </listitem>
 595 +    </itemizedlist>
 596 +
 597 +  </para>
 598 +
 599 +</sect2>
 600 +
 601 +</sect1>
 602 +
 603 +</chapter>
 604 +
 605 +</book>
 606 diff -urN linux-2.6.39.1/fs/proc/array.c linux-2.6.39.1b/fs/proc/array.c
 607 --- linux-2.6.39.1/fs/proc/array.c      2011-06-02 17:34:20.000000000 -0700
 608 +++ linux-2.6.39.1b/fs/proc/array.c     2011-06-30 10:42:11.416429346 -0700
 609 @@ -81,6 +81,7 @@
 610  #include <linux/pid_namespace.h>
 611  #include <linux/ptrace.h>
 612  #include <linux/tracehook.h>
 613 +#include <linux/utrace.h>
 614
 615  #include <asm/pgtable.h>
 616  #include <asm/processor.h>
 617 @@ -192,6 +193,8 @@
 618                 cred->uid, cred->euid, cred->suid, cred->fsuid,
 619                 cred->gid, cred->egid, cred->sgid, cred->fsgid);
 620
 621 +       task_utrace_proc_status(m, p);
 622 +
 623         task_lock(p);
 624         if (p->files)
 625                 fdt = files_fdtable(p->files);
 626 diff -urN linux-2.6.39.1/include/linux/ptrace.h linux-2.6.39.1b/include/linux/ptrace.h
 627 --- linux-2.6.39.1/include/linux/ptrace.h       2011-06-02 17:34:20.000000000 -0700
 628 +++ linux-2.6.39.1b/include/linux/ptrace.h      2011-06-30 17:31:06.755187126 -0700
 629 @@ -99,12 +99,15 @@
 630  #include <linux/compiler.h>            /* For unlikely.  */
 631  #include <linux/sched.h>               /* For struct task_struct.  */
 632
 633 -
 634 +extern void ptrace_notify_stop(struct task_struct *tracee);
 635  extern long arch_ptrace(struct task_struct *child, long request,
 636                         unsigned long addr, unsigned long data);
 637  extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len);
 638  extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len);
 639  extern void ptrace_disable(struct task_struct *);
 640 +extern int ptrace_attach(struct task_struct *tsk);
 641 +extern bool __ptrace_detach(struct task_struct *tracer, struct task_struct *tracee);
 642 +extern int ptrace_detach(struct task_struct *, unsigned int);
 643  extern int ptrace_check_attach(struct task_struct *task, int kill);
 644  extern int ptrace_request(struct task_struct *child, long request,
 645                           unsigned long addr, unsigned long data);
 646 diff -urN linux-2.6.39.1/include/linux/sched.h linux-2.6.39.1b/include/linux/sched.h
 647 --- linux-2.6.39.1/include/linux/sched.h        2011-06-02 17:34:20.000000000 -0700
 648 +++ linux-2.6.39.1b/include/linux/sched.h       2011-06-30 10:42:11.423096012 -0700
 649 @@ -1376,6 +1376,11 @@
 650  #endif
 651         seccomp_t seccomp;
 652
 653 +#ifdef CONFIG_UTRACE
 654 +       struct utrace *utrace;
 655 +       unsigned long utrace_flags;
 656 +#endif
 657 +
 658  /* Thread group tracking */
 659         u32 parent_exec_id;
 660         u32 self_exec_id;
 661 @@ -2106,6 +2111,7 @@
 662  extern int kill_pid(struct pid *pid, int sig, int priv);
 663  extern int kill_proc_info(int, struct siginfo *, pid_t);
 664  extern int do_notify_parent(struct task_struct *, int);
 665 +extern void do_notify_parent_cldstop(struct task_struct *, int);
 666  extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent);
 667  extern void force_sig(int, struct task_struct *);
 668  extern int send_sig(int, struct task_struct *, int);
 669 diff -urN linux-2.6.39.1/include/linux/tracehook.h linux-2.6.39.1b/include/linux/tracehook.h
 670 --- linux-2.6.39.1/include/linux/tracehook.h    2011-06-02 17:34:20.000000000 -0700
 671 +++ linux-2.6.39.1b/include/linux/tracehook.h   2011-06-30 13:52:12.195852127 -0700
 672 @@ -49,6 +49,29 @@
 673  #include <linux/sched.h>
 674  #include <linux/ptrace.h>
 675  #include <linux/security.h>
 676 +#include <linux/utrace.h>
 677 +
 678 +/*
 679 + * Hooks in <linux/tracehook.h> call these entry points to the utrace dispatch.
 680 + */
 681 +void utrace_free_task(struct task_struct *);
 682 +bool utrace_interrupt_pending(void);
 683 +void utrace_resume(struct task_struct *, struct pt_regs *);
 684 +void utrace_finish_stop(void);
 685 +void utrace_maybe_reap(struct task_struct *, struct utrace *, bool);
 686 +int utrace_get_signal(struct task_struct *, struct pt_regs *,
 687 +                      siginfo_t *, struct k_sigaction *);
 688 +void utrace_report_clone(unsigned long, struct task_struct *);
 689 +void utrace_finish_vfork(struct task_struct *);
 690 +void utrace_report_exit(long *exit_code);
 691 +void utrace_report_death(struct task_struct *, struct utrace *, bool, int);
 692 +void utrace_report_jctl(int notify, int type);
 693 +void utrace_report_exec(struct linux_binfmt *, struct linux_binprm *,
 694 +                        struct pt_regs *regs);
 695 +bool utrace_report_syscall_entry(struct pt_regs *);
 696 +void utrace_report_syscall_exit(struct pt_regs *);
 697 +void utrace_signal_handler(struct task_struct *, int);
 698 +
 699  struct linux_binprm;
 700
 701  /**
 702 @@ -63,6 +86,8 @@
 703   */
 704  static inline int tracehook_expect_breakpoints(struct task_struct *task)
 705  {
 706 +       if (unlikely(task_utrace_flags(task) & UTRACE_EVENT(SIGNAL_CORE)))
 707 +               return 1;
 708         return (task_ptrace(task) & PT_PTRACED) != 0;
 709  }
 710
 711 @@ -111,6 +136,9 @@
 712  static inline __must_check int tracehook_report_syscall_entry(
 713         struct pt_regs *regs)
 714  {
 715 +       if ((task_utrace_flags(current) & UTRACE_EVENT(SYSCALL_ENTRY)) &&
 716 +           utrace_report_syscall_entry(regs))
 717 +               return 1;
 718         ptrace_report_syscall(regs);
 719         return 0;
 720  }
 721 @@ -134,7 +162,10 @@
 722   */
 723  static inline void tracehook_report_syscall_exit(struct pt_regs *regs, int step)
 724  {
 725 -       if (step) {
 726 +        if (task_utrace_flags(current) & UTRACE_EVENT(SYSCALL_EXIT))
 727 +                utrace_report_syscall_exit(regs);
 728 +
 729 +       if (step && (task_ptrace(current) & PT_PTRACED)) {
 730                 siginfo_t info;
 731                 user_single_step_siginfo(current, regs, &info);
 732                 force_sig_info(SIGTRAP, &info, current);
 733 @@ -156,7 +187,7 @@
 734  {
 735         int unsafe = 0;
 736         int ptrace = task_ptrace(task);
 737 -       if (ptrace & PT_PTRACED) {
 738 +       if (ptrace) {
 739                 if (ptrace & PT_PTRACE_CAP)
 740                         unsafe |= LSM_UNSAFE_PTRACE_CAP;
 741                 else
 742 @@ -178,7 +209,7 @@
 743   */
 744  static inline struct task_struct *tracehook_tracer_task(struct task_struct *tsk)
 745  {
 746 -       if (task_ptrace(tsk) & PT_PTRACED)
 747 +       if (task_ptrace(tsk))
 748                 return rcu_dereference(tsk->parent);
 749         return NULL;
 750  }
 751 @@ -201,6 +232,8 @@
 752                                          struct linux_binprm *bprm,
 753                                          struct pt_regs *regs)
 754  {
 755 +       if (unlikely(task_utrace_flags(current) & UTRACE_EVENT(EXEC)))
 756 +               utrace_report_exec(fmt, bprm, regs);
 757         if (!ptrace_event(PT_TRACE_EXEC, PTRACE_EVENT_EXEC, 0) &&
 758             unlikely(task_ptrace(current) & PT_PTRACED))
 759                 send_sig(SIGTRAP, current, 0);
 760 @@ -218,10 +251,37 @@
 761   */
 762  static inline void tracehook_report_exit(long *exit_code)
 763  {
 764 +       if (unlikely(task_utrace_flags(current) & UTRACE_EVENT(EXIT)))
 765 +               utrace_report_exit(exit_code);
 766         ptrace_event(PT_TRACE_EXIT, PTRACE_EVENT_EXIT, *exit_code);
 767  }
 768
 769  /**
 770 + * tracehook_init_task - task_struct has just been copied
 771 + * @task:              new &struct task_struct just copied from parent
 772 + *
 773 + * Called from do_fork() when @task has just been duplicated.
 774 + * After this, @task will be passed to tracehook_free_task()
 775 + * even if the rest of its setup fails before it is fully created.
 776 + */
 777 +static inline void tracehook_init_task(struct task_struct *task)
 778 +{
 779 +       utrace_init_task(task);
 780 +}
 781 +
 782 +/**
 783 + * tracehook_free_task - task_struct is being freed
 784 + * @task:              dead &struct task_struct being freed
 785 + *
 786 + * Called from free_task() when @task is no longer in use.
 787 + */
 788 +static inline void tracehook_free_task(struct task_struct *task)
 789 +{
 790 +       if (task_utrace_struct(task))
 791 +               utrace_free_task(task);
 792 +}
 793 +
 794 +/**
 795   * tracehook_prepare_clone - prepare for new child to be cloned
 796   * @clone_flags:       %CLONE_* flags from clone/fork/vfork system call
 797   *
 798 @@ -285,6 +345,8 @@
 799                                           unsigned long clone_flags,
 800                                           pid_t pid, struct task_struct *child)
 801  {
 802 +       if (unlikely(task_utrace_flags(current) & UTRACE_EVENT(CLONE)))
 803 +               utrace_report_clone(clone_flags, child);
 804         if (unlikely(task_ptrace(child))) {
 805                 /*
 806                  * It doesn't matter who attached/attaching to this
 807 @@ -317,6 +379,9 @@
 808                                                    pid_t pid,
 809                                                    struct task_struct *child)
 810  {
 811 +       if (unlikely(task_utrace_flags(current) & UTRACE_EVENT(CLONE)) &&
 812 +           (clone_flags & CLONE_VFORK))
 813 +               utrace_finish_vfork(current);
 814         if (unlikely(trace))
 815                 ptrace_event(0, trace, pid);
 816  }
 817 @@ -351,6 +416,10 @@
 818   */
 819  static inline void tracehook_prepare_release_task(struct task_struct *task)
 820  {
 821 +       /* see utrace_add_engine() about this barrier */
 822 +       smp_mb();
 823 +       if (task_utrace_flags(task))
 824 +               utrace_maybe_reap(task, task_utrace_struct(task), true);
 825  }
 826
 827  /**
 828 @@ -365,6 +434,7 @@
 829  static inline void tracehook_finish_release_task(struct task_struct *task)
 830  {
 831         ptrace_release_task(task);
 832 +       BUG_ON(task->exit_state != EXIT_DEAD);
 833  }
 834
 835  /**
 836 @@ -386,7 +456,9 @@
 837                                             const struct k_sigaction *ka,
 838                                             struct pt_regs *regs, int stepping)
 839  {
 840 -       if (stepping)
 841 +       if (task_utrace_flags(current))
 842 +               utrace_signal_handler(current, stepping);
 843 +       if (stepping && (task_ptrace(current) & PT_PTRACED))
 844                 ptrace_notify(SIGTRAP);
 845  }
 846
 847 @@ -403,6 +475,8 @@
 848  static inline int tracehook_consider_ignored_signal(struct task_struct *task,
 849                                                     int sig)
 850  {
 851 +       if (unlikely(task_utrace_flags(task) & UTRACE_EVENT(SIGNAL_IGN)))
 852 +               return 1;
 853         return (task_ptrace(task) & PT_PTRACED) != 0;
 854  }
 855
 856 @@ -422,6 +496,9 @@
 857  static inline int tracehook_consider_fatal_signal(struct task_struct *task,
 858                                                   int sig)
 859  {
 860 +       if (unlikely(task_utrace_flags(task) & (UTRACE_EVENT(SIGNAL_TERM) |
 861 +                                               UTRACE_EVENT(SIGNAL_CORE))))
 862 +               return 1;
 863         return (task_ptrace(task) & PT_PTRACED) != 0;
 864  }
 865
 866 @@ -436,6 +513,8 @@
 867   */
 868  static inline int tracehook_force_sigpending(void)
 869  {
 870 +       if (unlikely(task_utrace_flags(current)))
 871 +               return utrace_interrupt_pending();
 872         return 0;
 873  }
 874
 875 @@ -465,6 +544,8 @@
 876                                        siginfo_t *info,
 877                                        struct k_sigaction *return_ka)
 878  {
 879 +       if (unlikely(task_utrace_flags(task)))
 880 +               return utrace_get_signal(task, regs, info, return_ka);
 881         return 0;
 882  }
 883
 884 @@ -492,7 +573,9 @@
 885   */
 886  static inline int tracehook_notify_jctl(int notify, int why)
 887  {
 888 -       return notify ?: (current->ptrace & PT_PTRACED) ? why : 0;
 889 +       if (task_utrace_flags(current) & UTRACE_EVENT(JCTL))
 890 +               utrace_report_jctl(notify, why);
 891 +       return notify ?: task_ptrace(current) ? why : 0;
 892  }
 893
 894  /**
 895 @@ -502,6 +585,8 @@
 896   */
 897  static inline void tracehook_finish_jctl(void)
 898  {
 899 +       if (task_utrace_flags(current))
 900 +               utrace_finish_stop();
 901  }
 902
 903  #define DEATH_REAP                     -1
 904 @@ -524,6 +609,8 @@
 905  static inline int tracehook_notify_death(struct task_struct *task,
 906                                          void **death_cookie, int group_dead)
 907  {
 908 +       *death_cookie = task_utrace_struct(task);
 909 +
 910         if (task_detached(task))
 911                 return task->ptrace ? SIGCHLD : DEATH_REAP;
 912
 913 @@ -560,6 +647,15 @@
 914                                           int signal, void *death_cookie,
 915                                           int group_dead)
 916  {
 917 +       /*
 918 +        * If utrace_set_events() was just called to enable
 919 +        * UTRACE_EVENT(DEATH), then we are obliged to call
 920 +        * utrace_report_death() and not miss it.  utrace_set_events()
 921 +        * checks @task->exit_state under tasklist_lock to synchronize
 922 +        * with exit_notify(), the caller.
 923 +        */
 924 +       if (task_utrace_flags(task) & _UTRACE_DEATH_EVENTS)
 925 +               utrace_report_death(task, death_cookie, group_dead, signal);
 926  }
 927
 928  #ifdef TIF_NOTIFY_RESUME
 929 @@ -589,10 +685,21 @@
 930   * asynchronously, this will be called again before we return to
 931   * user mode.
 932   *
 933 - * Called without locks.
 934 + * Called without locks.  However, on some machines this may be
 935 + * called with interrupts disabled.
 936   */
 937  static inline void tracehook_notify_resume(struct pt_regs *regs)
 938  {
 939 +       struct task_struct *task = current;
 940 +       /*
 941 +        * Prevent the following store/load from getting ahead of the
 942 +        * caller which clears TIF_NOTIFY_RESUME. This pairs with the
 943 +        * implicit mb() before setting TIF_NOTIFY_RESUME in
 944 +        * set_notify_resume().
 945 +        */
 946 +       smp_mb();
 947 +       if (task_utrace_flags(task))
 948 +               utrace_resume(task, regs);
 949  }
 950  #endif /* TIF_NOTIFY_RESUME */
 951
 952 diff -urN linux-2.6.39.1/include/linux/utrace.h linux-2.6.39.1b/include/linux/utrace.h
 953 --- linux-2.6.39.1/include/linux/utrace.h       1969-12-31 17:00:00.000000000 -0700
 954 +++ linux-2.6.39.1b/include/linux/utrace.h      2011-06-30 13:51:51.229186522 -0700
 955 @@ -0,0 +1,692 @@
 956 +/*
 957 + * utrace infrastructure interface for debugging user processes
 958 + *
 959 + * Copyright (C) 2006-2009 Red Hat, Inc.  All rights reserved.
 960 + *
 961 + * This copyrighted material is made available to anyone wishing to use,
 962 + * modify, copy, or redistribute it subject to the terms and conditions
 963 + * of the GNU General Public License v.2.
 964 + *
 965 + * Red Hat Author: Roland McGrath.
 966 + *
 967 + * This interface allows for notification of interesting events in a
 968 + * thread.  It also mediates access to thread state such as registers.
 969 + * Multiple unrelated users can be associated with a single thread.
 970 + * We call each of these a tracing engine.
 971 + *
 972 + * A tracing engine starts by calling utrace_attach_task() or
 973 + * utrace_attach_pid() on the chosen thread, passing in a set of hooks
 974 + * (&struct utrace_engine_ops), and some associated data.  This produces a
 975 + * &struct utrace_engine, which is the handle used for all other
 976 + * operations.  An attached engine has its ops vector, its data, and an
 977 + * event mask controlled by utrace_set_events().
 978 + *
 979 + * For each event bit that is set, that engine will get the
 980 + * appropriate ops->report_*() callback when the event occurs.  The
 981 + * &struct utrace_engine_ops need not provide callbacks for an event
 982 + * unless the engine sets one of the associated event bits.
 983 + */
 984 +
 985 +#ifndef _LINUX_UTRACE_H
 986 +#define _LINUX_UTRACE_H        1
 987 +
 988 +#include <linux/list.h>
 989 +#include <linux/kref.h>
 990 +#include <linux/signal.h>
 991 +#include <linux/sched.h>
 992 +
 993 +struct linux_binprm;
 994 +struct pt_regs;
 995 +struct utrace;
 996 +struct user_regset;
 997 +struct user_regset_view;
 998 +
 999 +/*
1000 + * Event bits passed to utrace_set_events().
1001 + * These appear in &struct task_struct.@utrace_flags
1002 + * and &struct utrace_engine.@flags.
1003 + */
1004 +enum utrace_events {
1005 +       _UTRACE_EVENT_QUIESCE,  /* Thread is available for examination.  */
1006 +       _UTRACE_EVENT_REAP,     /* Zombie reaped, no more tracing possible.  */
1007 +       _UTRACE_EVENT_CLONE,    /* Successful clone/fork/vfork just done.  */
1008 +       _UTRACE_EVENT_EXEC,     /* Successful execve just completed.  */
1009 +       _UTRACE_EVENT_EXIT,     /* Thread exit in progress.  */
1010 +       _UTRACE_EVENT_DEATH,    /* Thread has died.  */
1011 +       _UTRACE_EVENT_SYSCALL_ENTRY, /* User entered kernel for system call. */
1012 +       _UTRACE_EVENT_SYSCALL_EXIT, /* Returning to user after system call.  */
1013 +       _UTRACE_EVENT_SIGNAL,   /* Signal delivery will run a user handler.  */
1014 +       _UTRACE_EVENT_SIGNAL_IGN, /* No-op signal to be delivered.  */
1015 +       _UTRACE_EVENT_SIGNAL_STOP, /* Signal delivery will suspend.  */
1016 +       _UTRACE_EVENT_SIGNAL_TERM, /* Signal delivery will terminate.  */
1017 +       _UTRACE_EVENT_SIGNAL_CORE, /* Signal delivery will dump core.  */
1018 +       _UTRACE_EVENT_JCTL,     /* Job control stop or continue completed.  */
1019 +       _UTRACE_NEVENTS
1020 +};
1021 +#define UTRACE_EVENT(type)     (1UL << _UTRACE_EVENT_##type)
1022 +
1023 +/*
1024 + * All the kinds of signal events.
1025 + * These all use the @report_signal() callback.
1026 + */
1027 +#define UTRACE_EVENT_SIGNAL_ALL        (UTRACE_EVENT(SIGNAL) \
1028 +                                | UTRACE_EVENT(SIGNAL_IGN) \
1029 +                                | UTRACE_EVENT(SIGNAL_STOP) \
1030 +                                | UTRACE_EVENT(SIGNAL_TERM) \
1031 +                                | UTRACE_EVENT(SIGNAL_CORE))
1032 +/*
1033 + * Both kinds of syscall events; these call the @report_syscall_entry()
1034 + * and @report_syscall_exit() callbacks, respectively.
1035 + */
1036 +#define UTRACE_EVENT_SYSCALL   \
1037 +       (UTRACE_EVENT(SYSCALL_ENTRY) | UTRACE_EVENT(SYSCALL_EXIT))
1038 +
1039 +/*
1040 + * The event reports triggered synchronously by task death.
1041 + */
1042 +#define _UTRACE_DEATH_EVENTS (UTRACE_EVENT(DEATH) | UTRACE_EVENT(QUIESCE))
1043 +
1044 +/*
1045 + * Hooks in <linux/tracehook.h> call these entry points to the utrace dispatch.
1046 + */
1047 +void utrace_free_task(struct task_struct *);
1048 +bool utrace_interrupt_pending(void);
1049 +void utrace_resume(struct task_struct *, struct pt_regs *);
1050 +void utrace_finish_stop(void);
1051 +void utrace_maybe_reap(struct task_struct *, struct utrace *, bool);
1052 +int utrace_get_signal(struct task_struct *, struct pt_regs *,
1053 +                     siginfo_t *, struct k_sigaction *);
1054 +void utrace_report_clone(unsigned long, struct task_struct *);
1055 +void utrace_finish_vfork(struct task_struct *);
1056 +void utrace_report_exit(long *exit_code);
1057 +void utrace_report_death(struct task_struct *, struct utrace *, bool, int);
1058 +void utrace_report_jctl(int notify, int type);
1059 +void utrace_report_exec(struct linux_binfmt *, struct linux_binprm *,
1060 +                       struct pt_regs *regs);
1061 +bool utrace_report_syscall_entry(struct pt_regs *);
1062 +void utrace_report_syscall_exit(struct pt_regs *);
1063 +void utrace_signal_handler(struct task_struct *, int);
1064 +
1065 +#ifndef CONFIG_UTRACE
1066 +
1067 +/*
1068 + * <linux/tracehook.h> uses these accessors to avoid #ifdef CONFIG_UTRACE.
1069 + */
1070 +static inline unsigned long task_utrace_flags(struct task_struct *task)
1071 +{
1072 +       return 0;
1073 +}
1074 +static inline struct utrace *task_utrace_struct(struct task_struct *task)
1075 +{
1076 +       return NULL;
1077 +}
1078 +static inline void utrace_init_task(struct task_struct *child)
1079 +{
1080 +}
1081 +
1082 +static inline void task_utrace_proc_status(struct seq_file *m,
1083 +                                          struct task_struct *p)
1084 +{
1085 +}
1086 +
1087 +#else  /* CONFIG_UTRACE */
1088 +
1089 +static inline unsigned long task_utrace_flags(struct task_struct *task)
1090 +{
1091 +       return task->utrace_flags;
1092 +}
1093 +
1094 +static inline struct utrace *task_utrace_struct(struct task_struct *task)
1095 +{
1096 +       struct utrace *utrace;
1097 +
1098 +       /*
1099 +        * This barrier ensures that any prior load of task->utrace_flags
1100 +        * is ordered before this load of task->utrace.  We use those
1101 +        * utrace_flags checks in the hot path to decide to call into
1102 +        * the utrace code.  The first attach installs task->utrace before
1103 +        * setting task->utrace_flags nonzero with implicit barrier in
1104 +        * between, see utrace_add_engine().
1105 +        */
1106 +       smp_rmb();
1107 +       utrace = task->utrace;
1108 +
1109 +       smp_read_barrier_depends(); /* See utrace_task_alloc().  */
1110 +       return utrace;
1111 +}
1112 +
1113 +static inline void utrace_init_task(struct task_struct *task)
1114 +{
1115 +       task->utrace_flags = 0;
1116 +       task->utrace = NULL;
1117 +}
1118 +
1119 +void task_utrace_proc_status(struct seq_file *m, struct task_struct *p);
1120 +
1121 +
1122 +/*
1123 + * Version number of the API defined in this file.  This will change
1124 + * whenever a tracing engine's code would need some updates to keep
1125 + * working.  We maintain this here for the benefit of tracing engine code
1126 + * that is developed concurrently with utrace API improvements before they
1127 + * are merged into the kernel, making LINUX_VERSION_CODE checks unwieldy.
1128 + */
1129 +#define UTRACE_API_VERSION     20091216
1130 +
1131 +/**
1132 + * enum utrace_resume_action - engine's choice of action for a traced task
1133 + * @UTRACE_STOP:               Stay quiescent after callbacks.
1134 + * @UTRACE_INTERRUPT:          Make @report_signal() callback soon.
1135 + * @UTRACE_REPORT:             Make some callback soon.
1136 + * @UTRACE_SINGLESTEP:         Resume in user mode for one instruction.
1137 + * @UTRACE_BLOCKSTEP:          Resume in user mode until next branch.
1138 + * @UTRACE_RESUME:             Resume normally in user mode.
1139 + * @UTRACE_DETACH:             Detach my engine (implies %UTRACE_RESUME).
1140 + *
1141 + * See utrace_control() for detailed descriptions of each action.  This is
1142 + * encoded in the @action argument and the return value for every callback
1143 + * with a &u32 return value.
1144 + *
1145 + * The order of these is important.  When there is more than one engine,
1146 + * each supplies its choice and the smallest value prevails.
1147 + */
1148 +enum utrace_resume_action {
1149 +       UTRACE_STOP,
1150 +       UTRACE_INTERRUPT,
1151 +       UTRACE_REPORT,
1152 +       UTRACE_SINGLESTEP,
1153 +       UTRACE_BLOCKSTEP,
1154 +       UTRACE_RESUME,
1155 +       UTRACE_DETACH,
1156 +       UTRACE_RESUME_MAX
1157 +};
1158 +#define UTRACE_RESUME_BITS     (ilog2(UTRACE_RESUME_MAX) + 1)
1159 +#define UTRACE_RESUME_MASK     ((1 << UTRACE_RESUME_BITS) - 1)
1160 +
1161 +/**
1162 + * utrace_resume_action - &enum utrace_resume_action from callback action
1163 + * @action:            &u32 callback @action argument or return value
1164 + *
1165 + * This extracts the &enum utrace_resume_action from @action,
1166 + * which is the @action argument to a &struct utrace_engine_ops
1167 + * callback or the return value from one.
1168 + */
1169 +static inline enum utrace_resume_action utrace_resume_action(u32 action)
1170 +{
1171 +       return action & UTRACE_RESUME_MASK;
1172 +}
1173 +
1174 +/**
1175 + * enum utrace_signal_action - disposition of signal
1176 + * @UTRACE_SIGNAL_DELIVER:     Deliver according to sigaction.
1177 + * @UTRACE_SIGNAL_IGN:         Ignore the signal.
1178 + * @UTRACE_SIGNAL_TERM:                Terminate the process.
1179 + * @UTRACE_SIGNAL_CORE:                Terminate with core dump.
1180 + * @UTRACE_SIGNAL_STOP:                Deliver as absolute stop.
1181 + * @UTRACE_SIGNAL_TSTP:                Deliver as job control stop.
1182 + * @UTRACE_SIGNAL_REPORT:      Reporting before pending signals.
1183 + * @UTRACE_SIGNAL_HANDLER:     Reporting after signal handler setup.
1184 + *
1185 + * This is encoded in the @action argument and the return value for
1186 + * a @report_signal() callback.  It says what will happen to the
1187 + * signal described by the &siginfo_t parameter to the callback.
1188 + *
1189 + * The %UTRACE_SIGNAL_REPORT value is used in an @action argument when
1190 + * a tracing report is being made before dequeuing any pending signal.
1191 + * If this is immediately after a signal handler has been set up, then
1192 + * %UTRACE_SIGNAL_HANDLER is used instead.  A @report_signal callback
1193 + * that uses %UTRACE_SIGNAL_DELIVER|%UTRACE_SINGLESTEP will ensure
1194 + * it sees a %UTRACE_SIGNAL_HANDLER report.
1195 + */
1196 +enum utrace_signal_action {
1197 +       UTRACE_SIGNAL_DELIVER   = 0x00,
1198 +       UTRACE_SIGNAL_IGN       = 0x10,
1199 +       UTRACE_SIGNAL_TERM      = 0x20,
1200 +       UTRACE_SIGNAL_CORE      = 0x30,
1201 +       UTRACE_SIGNAL_STOP      = 0x40,
1202 +       UTRACE_SIGNAL_TSTP      = 0x50,
1203 +       UTRACE_SIGNAL_REPORT    = 0x60,
1204 +       UTRACE_SIGNAL_HANDLER   = 0x70
1205 +};
1206 +#define        UTRACE_SIGNAL_MASK      0xf0
1207 +#define UTRACE_SIGNAL_HOLD     0x100 /* Flag, push signal back on queue.  */
1208 +
1209 +/**
1210 + * utrace_signal_action - &enum utrace_signal_action from callback action
1211 + * @action:            @report_signal callback @action argument or return value
1212 + *
1213 + * This extracts the &enum utrace_signal_action from @action, which
1214 + * is the @action argument to a @report_signal callback or the
1215 + * return value from one.
1216 + */
1217 +static inline enum utrace_signal_action utrace_signal_action(u32 action)
1218 +{
1219 +       return action & UTRACE_SIGNAL_MASK;
1220 +}
1221 +
1222 +/**
1223 + * enum utrace_syscall_action - disposition of system call attempt
1224 + * @UTRACE_SYSCALL_RUN:                Run the system call.
1225 + * @UTRACE_SYSCALL_ABORT:      Don't run the system call.
1226 + *
1227 + * This is encoded in the @action argument and the return value for
1228 + * a @report_syscall_entry callback.
1229 + */
1230 +enum utrace_syscall_action {
1231 +       UTRACE_SYSCALL_RUN      = 0x00,
1232 +       UTRACE_SYSCALL_ABORT    = 0x10
1233 +};
1234 +#define        UTRACE_SYSCALL_MASK     0xf0
1235 +#define        UTRACE_SYSCALL_RESUMED  0x100 /* Flag, report_syscall_entry() repeats */
1236 +
1237 +/**
1238 + * utrace_syscall_action - &enum utrace_syscall_action from callback action
1239 + * @action:            @report_syscall_entry callback @action or return value
1240 + *
1241 + * This extracts the &enum utrace_syscall_action from @action, which
1242 + * is the @action argument to a @report_syscall_entry callback or the
1243 + * return value from one.
1244 + */
1245 +static inline enum utrace_syscall_action utrace_syscall_action(u32 action)
1246 +{
1247 +       return action & UTRACE_SYSCALL_MASK;
1248 +}
1249 +
1250 +/*
1251 + * Flags for utrace_attach_task() and utrace_attach_pid().
1252 + */
1253 +#define UTRACE_ATTACH_MATCH_OPS                0x0001 /* Match engines on ops.  */
1254 +#define UTRACE_ATTACH_MATCH_DATA       0x0002 /* Match engines on data.  */
1255 +#define UTRACE_ATTACH_MATCH_MASK       0x000f
1256 +#define UTRACE_ATTACH_CREATE           0x0010 /* Attach a new engine.  */
1257 +#define UTRACE_ATTACH_EXCLUSIVE                0x0020 /* Refuse if existing match.  */
1258 +
1259 +/**
1260 + * struct utrace_engine - per-engine structure
1261 + * @ops:       &struct utrace_engine_ops pointer passed to utrace_attach_task()
1262 + * @data:      engine-private &void * passed to utrace_attach_task()
1263 + * @flags:     event mask set by utrace_set_events() plus internal flag bits
1264 + *
1265 + * The task itself never has to worry about engines detaching while
1266 + * it's doing event callbacks.  These structures are removed from the
1267 + * task's active list only when it's stopped, or by the task itself.
1268 + *
1269 + * utrace_engine_get() and utrace_engine_put() maintain a reference count.
1270 + * When it drops to zero, the structure is freed.  One reference is held
1271 + * implicitly while the engine is attached to its task.
1272 + */
1273 +struct utrace_engine {
1274 +/* private: */
1275 +       struct kref kref;
1276 +       void (*release)(void *);
1277 +       struct list_head entry;
1278 +
1279 +/* public: */
1280 +       const struct utrace_engine_ops *ops;
1281 +       void *data;
1282 +
1283 +       unsigned long flags;
1284 +};
1285 +
1286 +/**
1287 + * utrace_engine_get - acquire a reference on a &struct utrace_engine
1288 + * @engine:    &struct utrace_engine pointer
1289 + *
1290 + * You must hold a reference on @engine, and you get another.
1291 + */
1292 +static inline void utrace_engine_get(struct utrace_engine *engine)
1293 +{
1294 +       kref_get(&engine->kref);
1295 +}
1296 +
1297 +void __utrace_engine_release(struct kref *);
1298 +
1299 +/**
1300 + * utrace_engine_put - release a reference on a &struct utrace_engine
1301 + * @engine:    &struct utrace_engine pointer
1302 + *
1303 + * You must hold a reference on @engine, and you lose that reference.
1304 + * If it was the last one, @engine becomes an invalid pointer.
1305 + */
1306 +static inline void utrace_engine_put(struct utrace_engine *engine)
1307 +{
1308 +       kref_put(&engine->kref, __utrace_engine_release);
1309 +}
1310 +
1311 +/**
1312 + * struct utrace_engine_ops - tracing engine callbacks
1313 + *
1314 + * Each @report_*() callback corresponds to an %UTRACE_EVENT(*) bit.
1315 + * utrace_set_events() calls on @engine choose which callbacks will
1316 + * be made to @engine from @task.
1317 + *
1318 + * Most callbacks take an @action argument, giving the resume action
1319 + * chosen by other tracing engines.  All callbacks take an @engine
1320 + * argument.  The @report_reap callback takes a @task argument that
1321 + * might or might not be @current.  All other @report_* callbacks
1322 + * report an event in the @current task.
1323 + *
1324 + * For some calls, @action also includes bits specific to that event
1325 + * and utrace_resume_action() is used to extract the resume action.
1326 + * This shows what would happen if @engine wasn't there, or will if
1327 + * the callback's return value uses %UTRACE_RESUME.  This always
1328 + * starts as %UTRACE_RESUME when no other tracing is being done on
1329 + * this task.
1330 + *
1331 + * All return values contain &enum utrace_resume_action bits.  For
1332 + * some calls, other bits specific to that kind of event are added to
1333 + * the resume action bits with OR.  These are the same bits used in
1334 + * the @action argument.  The resume action returned by a callback
1335 + * does not override previous engines' choices, it only says what
1336 + * @engine wants done.  What @current actually does is the action that's
1337 + * most constrained among the choices made by all attached engines.
1338 + * See utrace_control() for more information on the actions.
1339 + *
1340 + * When %UTRACE_STOP is used in @report_syscall_entry, then @current
1341 + * stops before attempting the system call.  In this case, another
1342 + * @report_syscall_entry callback will follow after @current resumes if
1343 + * %UTRACE_REPORT or %UTRACE_INTERRUPT was returned by some callback
1344 + * or passed to utrace_control().  In a second or later callback,
1345 + * %UTRACE_SYSCALL_RESUMED is set in the @action argument to indicate
1346 + * a repeat callback still waiting to attempt the same system call
1347 + * invocation.  This repeat callback gives each engine an opportunity
1348 + * to reexamine registers another engine might have changed while
1349 + * @current was held in %UTRACE_STOP.
1350 + *
1351 + * In other cases, the resume action does not take effect until @current
1352 + * is ready to check for signals and return to user mode.  If there
1353 + * are more callbacks to be made, the last round of calls determines
1354 + * the final action.  A @report_quiesce callback with @event zero, or
1355 + * a @report_signal callback, will always be the last one made before
1356 + * @current resumes.  Only %UTRACE_STOP is "sticky"--if @engine returned
1357 + * %UTRACE_STOP then @current stays stopped unless @engine returns
1358 + * different from a following callback.
1359 + *
1360 + * The report_death() and report_reap() callbacks do not take @action
1361 + * arguments, and only %UTRACE_DETACH is meaningful in the return value
1362 + * from a report_death() callback.  None of the resume actions applies
1363 + * to a dead thread.
1364 + *
1365 + * All @report_*() hooks are called with no locks held, in a generally
1366 + * safe environment when we will be returning to user mode soon (or just
1367 + * entered the kernel).  It is fine to block for memory allocation and
1368 + * the like, but all hooks are asynchronous and must not block on
1369 + * external events!  If you want the thread to block, use %UTRACE_STOP
1370 + * in your hook's return value; then later wake it up with utrace_control().
1371 + *
1372 + * @report_quiesce:
1373 + *     Requested by %UTRACE_EVENT(%QUIESCE).
1374 + *     This does not indicate any event, but just that @current is in a
1375 + *     safe place for examination.  This call is made before each specific
1376 + *     event callback, except for @report_reap.  The @event argument gives
1377 + *     the %UTRACE_EVENT(@which) value for the event occurring.  This
1378 + *     callback might be made for events @engine has not requested, if
1379 + *     some other engine is tracing the event; calling utrace_set_events()
1380 + *     call here can request the immediate callback for this occurrence of
1381 + *     @event.  @event is zero when there is no other event, @current is
1382 + *     now ready to check for signals and return to user mode, and some
1383 + *     engine has used %UTRACE_REPORT or %UTRACE_INTERRUPT to request this
1384 + *     callback.  For this case, if @report_signal is not %NULL, the
1385 + *     @report_quiesce callback may be replaced with a @report_signal
1386 + *     callback passing %UTRACE_SIGNAL_REPORT in its @action argument,
1387 + *     whenever @current is entering the signal-check path anyway.
1388 + *
1389 + * @report_signal:
1390 + *     Requested by %UTRACE_EVENT(%SIGNAL_*) or %UTRACE_EVENT(%QUIESCE).
1391 + *     Use utrace_signal_action() and utrace_resume_action() on @action.
1392 + *     The signal action is %UTRACE_SIGNAL_REPORT when some engine has
1393 + *     used %UTRACE_REPORT or %UTRACE_INTERRUPT; the callback can choose
1394 + *     to stop or to deliver an artificial signal, before pending signals.
1395 + *     It's %UTRACE_SIGNAL_HANDLER instead when signal handler setup just
1396 + *     finished (after a previous %UTRACE_SIGNAL_DELIVER return); this
1397 + *     serves in lieu of any %UTRACE_SIGNAL_REPORT callback requested by
1398 + *     %UTRACE_REPORT or %UTRACE_INTERRUPT, and is also implicitly
1399 + *     requested by %UTRACE_SINGLESTEP or %UTRACE_BLOCKSTEP into the
1400 + *     signal delivery.  The other signal actions indicate a signal about
1401 + *     to be delivered; the previous engine's return value sets the signal
1402 + *     action seen by the the following engine's callback.  The @info data
1403 + *     can be changed at will, including @info->si_signo.  The settings in
1404 + *     @return_ka determines what %UTRACE_SIGNAL_DELIVER does.  @orig_ka
1405 + *     is what was in force before other tracing engines intervened, and
1406 + *     it's %NULL when this report began as %UTRACE_SIGNAL_REPORT or
1407 + *     %UTRACE_SIGNAL_HANDLER.  For a report without a new signal, @info
1408 + *     is left uninitialized and must be set completely by an engine that
1409 + *     chooses to deliver a signal; if there was a previous @report_signal
1410 + *     callback ending in %UTRACE_STOP and it was just resumed using
1411 + *     %UTRACE_REPORT or %UTRACE_INTERRUPT, then @info is left unchanged
1412 + *     from the previous callback.  In this way, the original signal can
1413 + *     be left in @info while returning %UTRACE_STOP|%UTRACE_SIGNAL_IGN
1414 + *     and then found again when resuming with %UTRACE_INTERRUPT.
1415 + *     The %UTRACE_SIGNAL_HOLD flag bit can be OR'd into the return value,
1416 + *     and might be in @action if the previous engine returned it.  This
1417 + *     flag asks that the signal in @info be pushed back on @current's queue
1418 + *     so that it will be seen again after whatever action is taken now.
1419 + *
1420 + * @report_clone:
1421 + *     Requested by %UTRACE_EVENT(%CLONE).
1422 + *     Event reported for parent, before the new task @child might run.
1423 + *     @clone_flags gives the flags used in the clone system call, or
1424 + *     equivalent flags for a fork() or vfork() system call.  This
1425 + *     function can use utrace_attach_task() on @child.  Then passing
1426 + *     %UTRACE_STOP to utrace_control() on @child here keeps the child
1427 + *     stopped before it ever runs in user mode, %UTRACE_REPORT or
1428 + *     %UTRACE_INTERRUPT ensures a callback from @child before it
1429 + *     starts in user mode.
1430 + *
1431 + * @report_jctl:
1432 + *     Requested by %UTRACE_EVENT(%JCTL).
1433 + *     Job control event; @type is %CLD_STOPPED or %CLD_CONTINUED,
1434 + *     indicating whether we are stopping or resuming now.  If @notify
1435 + *     is nonzero, @current is the last thread to stop and so will send
1436 + *     %SIGCHLD to its parent after this callback; @notify reflects
1437 + *     what the parent's %SIGCHLD has in @si_code, which can sometimes
1438 + *     be %CLD_STOPPED even when @type is %CLD_CONTINUED.
1439 + *
1440 + * @report_exec:
1441 + *     Requested by %UTRACE_EVENT(%EXEC).
1442 + *     An execve system call has succeeded and the new program is about to
1443 + *     start running.  The initial user register state is handy to be tweaked
1444 + *     directly in @regs.  @fmt and @bprm gives the details of this exec.
1445 + *
1446 + * @report_syscall_entry:
1447 + *     Requested by %UTRACE_EVENT(%SYSCALL_ENTRY).
1448 + *     Thread has entered the kernel to request a system call.
1449 + *     The user register state is handy to be tweaked directly in @regs.
1450 + *     The @action argument contains an &enum utrace_syscall_action,
1451 + *     use utrace_syscall_action() to extract it.  The return value
1452 + *     overrides the last engine's action for the system call.
1453 + *     If the final action is %UTRACE_SYSCALL_ABORT, no system call
1454 + *     is made.  The details of the system call being attempted can
1455 + *     be fetched here with syscall_get_nr() and syscall_get_arguments().
1456 + *     The parameter registers can be changed with syscall_set_arguments().
1457 + *     See above about the %UTRACE_SYSCALL_RESUMED flag in @action.
1458 + *     Use %UTRACE_REPORT in the return value to guarantee you get
1459 + *     another callback (with %UTRACE_SYSCALL_RESUMED flag) in case
1460 + *     @current stops with %UTRACE_STOP before attempting the system call.
1461 + *
1462 + * @report_syscall_exit:
1463 + *     Requested by %UTRACE_EVENT(%SYSCALL_EXIT).
1464 + *     Thread is about to leave the kernel after a system call request.
1465 + *     The user register state is handy to be tweaked directly in @regs.
1466 + *     The results of the system call attempt can be examined here using
1467 + *     syscall_get_error() and syscall_get_return_value().  It is safe
1468 + *     here to call syscall_set_return_value() or syscall_rollback().
1469 + *
1470 + * @report_exit:
1471 + *     Requested by %UTRACE_EVENT(%EXIT).
1472 + *     Thread is exiting and cannot be prevented from doing so,
1473 + *     but all its state is still live.  The @code value will be
1474 + *     the wait result seen by the parent, and can be changed by
1475 + *     this engine or others.  The @orig_code value is the real
1476 + *     status, not changed by any tracing engine.  Returning %UTRACE_STOP
1477 + *     here keeps @current stopped before it cleans up its state and dies,
1478 + *     so it can be examined by other processes.  When @current is allowed
1479 + *     to run, it will die and get to the @report_death callback.
1480 + *
1481 + * @report_death:
1482 + *     Requested by %UTRACE_EVENT(%DEATH).
1483 + *     Thread is really dead now.  It might be reaped by its parent at
1484 + *     any time, or self-reap immediately.  Though the actual reaping
1485 + *     may happen in parallel, a report_reap() callback will always be
1486 + *     ordered after a report_death() callback.
1487 + *
1488 + * @report_reap:
1489 + *     Requested by %UTRACE_EVENT(%REAP).
1490 + *     Called when someone reaps the dead task (parent, init, or self).
1491 + *     This means the parent called wait, or else this was a detached
1492 + *     thread or a process whose parent ignores SIGCHLD.
1493 + *     No more callbacks are made after this one.
1494 + *     The engine is always detached.
1495 + *     There is nothing more a tracing engine can do about this thread.
1496 + *     After this callback, the @engine pointer will become invalid.
1497 + *     The @task pointer may become invalid if get_task_struct() hasn't
1498 + *     been used to keep it alive.
1499 + *     An engine should always request this callback if it stores the
1500 + *     @engine pointer or stores any pointer in @engine->data, so it
1501 + *     can clean up its data structures.
1502 + *     Unlike other callbacks, this can be called from the parent's context
1503 + *     rather than from the traced thread itself--it must not delay the
1504 + *     parent by blocking.
1505 + *
1506 + * @release:
1507 + *     If not %NULL, this is called after the last utrace_engine_put()
1508 + *     call for a &struct utrace_engine, which could be implicit after
1509 + *     a %UTRACE_DETACH return from another callback.  Its argument is
1510 + *     the engine's @data member.
1511 + */
1512 +struct utrace_engine_ops {
1513 +       u32 (*report_quiesce)(u32 action, struct utrace_engine *engine,
1514 +                             unsigned long event);
1515 +       u32 (*report_signal)(u32 action, struct utrace_engine *engine,
1516 +                            struct pt_regs *regs,
1517 +                            siginfo_t *info,
1518 +                            const struct k_sigaction *orig_ka,
1519 +                            struct k_sigaction *return_ka);
1520 +       u32 (*report_clone)(u32 action, struct utrace_engine *engine,
1521 +                           unsigned long clone_flags,
1522 +                           struct task_struct *child);
1523 +       u32 (*report_jctl)(u32 action, struct utrace_engine *engine,
1524 +                          int type, int notify);
1525 +       u32 (*report_exec)(u32 action, struct utrace_engine *engine,
1526 +                          const struct linux_binfmt *fmt,
1527 +                          const struct linux_binprm *bprm,
1528 +                          struct pt_regs *regs);
1529 +       u32 (*report_syscall_entry)(u32 action, struct utrace_engine *engine,
1530 +                                   struct pt_regs *regs);
1531 +       u32 (*report_syscall_exit)(u32 action, struct utrace_engine *engine,
1532 +                                  struct pt_regs *regs);
1533 +       u32 (*report_exit)(u32 action, struct utrace_engine *engine,
1534 +                          long orig_code, long *code);
1535 +       u32 (*report_death)(struct utrace_engine *engine,
1536 +                           bool group_dead, int signal);
1537 +       void (*report_reap)(struct utrace_engine *engine,
1538 +                           struct task_struct *task);
1539 +       void (*release)(void *data);
1540 +};
1541 +
1542 +/**
1543 + * struct utrace_examiner - private state for using utrace_prepare_examine()
1544 + *
1545 + * The members of &struct utrace_examiner are private to the implementation.
1546 + * This data type holds the state from a call to utrace_prepare_examine()
1547 + * to be used by a call to utrace_finish_examine().
1548 + */
1549 +struct utrace_examiner {
1550 +/* private: */
1551 +       long state;
1552 +       unsigned long ncsw;
1553 +};
1554 +
1555 +/*
1556 + * These are the exported entry points for tracing engines to use.
1557 + * See kernel/utrace.c for their kerneldoc comments with interface details.
1558 + */
1559 +struct utrace_engine *utrace_attach_task(struct task_struct *, int,
1560 +                                        const struct utrace_engine_ops *,
1561 +                                        void *);
1562 +struct utrace_engine *utrace_attach_pid(struct pid *, int,
1563 +                                       const struct utrace_engine_ops *,
1564 +                                       void *);
1565 +int __must_check utrace_control(struct task_struct *,
1566 +                               struct utrace_engine *,
1567 +                               enum utrace_resume_action);
1568 +int __must_check utrace_set_events(struct task_struct *,
1569 +                                  struct utrace_engine *,
1570 +                                  unsigned long eventmask);
1571 +int __must_check utrace_barrier(struct task_struct *,
1572 +                               struct utrace_engine *);
1573 +int __must_check utrace_prepare_examine(struct task_struct *,
1574 +                                       struct utrace_engine *,
1575 +                                       struct utrace_examiner *);
1576 +int __must_check utrace_finish_examine(struct task_struct *,
1577 +                                      struct utrace_engine *,
1578 +                                      struct utrace_examiner *);
1579 +
1580 +/**
1581 + * utrace_control_pid - control a thread being traced by a tracing engine
1582 + * @pid:               thread to affect
1583 + * @engine:            attached engine to affect
1584 + * @action:            &enum utrace_resume_action for thread to do
1585 + *
1586 + * This is the same as utrace_control(), but takes a &struct pid
1587 + * pointer rather than a &struct task_struct pointer.  The caller must
1588 + * hold a ref on @pid, but does not need to worry about the task
1589 + * staying valid.  If it's been reaped so that @pid points nowhere,
1590 + * then this call returns -%ESRCH.
1591 + */
1592 +static inline __must_check int utrace_control_pid(
1593 +       struct pid *pid, struct utrace_engine *engine,
1594 +       enum utrace_resume_action action)
1595 +{
1596 +       /*
1597 +        * We don't bother with rcu_read_lock() here to protect the
1598 +        * task_struct pointer, because utrace_control will return
1599 +        * -ESRCH without looking at that pointer if the engine is
1600 +        * already detached.  A task_struct pointer can't die before
1601 +        * all the engines are detached in release_task() first.
1602 +        */
1603 +       struct task_struct *task = pid_task(pid, PIDTYPE_PID);
1604 +       return unlikely(!task) ? -ESRCH : utrace_control(task, engine, action);
1605 +}
1606 +
1607 +/**
1608 + * utrace_set_events_pid - choose which event reports a tracing engine gets
1609 + * @pid:               thread to affect
1610 + * @engine:            attached engine to affect
1611 + * @eventmask:         new event mask
1612 + *
1613 + * This is the same as utrace_set_events(), but takes a &struct pid
1614 + * pointer rather than a &struct task_struct pointer.  The caller must
1615 + * hold a ref on @pid, but does not need to worry about the task
1616 + * staying valid.  If it's been reaped so that @pid points nowhere,
1617 + * then this call returns -%ESRCH.
1618 + */
1619 +static inline __must_check int utrace_set_events_pid(
1620 +       struct pid *pid, struct utrace_engine *engine, unsigned long eventmask)
1621 +{
1622 +       struct task_struct *task = pid_task(pid, PIDTYPE_PID);
1623 +       return unlikely(!task) ? -ESRCH :
1624 +               utrace_set_events(task, engine, eventmask);
1625 +}
1626 +
1627 +/**
1628 + * utrace_barrier_pid - synchronize with simultaneous tracing callbacks
1629 + * @pid:               thread to affect
1630 + * @engine:            engine to affect (can be detached)
1631 + *
1632 + * This is the same as utrace_barrier(), but takes a &struct pid
1633 + * pointer rather than a &struct task_struct pointer.  The caller must
1634 + * hold a ref on @pid, but does not need to worry about the task
1635 + * staying valid.  If it's been reaped so that @pid points nowhere,
1636 + * then this call returns -%ESRCH.
1637 + */
1638 +static inline __must_check int utrace_barrier_pid(struct pid *pid,
1639 +                                                 struct utrace_engine *engine)
1640 +{
1641 +       struct task_struct *task = pid_task(pid, PIDTYPE_PID);
1642 +       return unlikely(!task) ? -ESRCH : utrace_barrier(task, engine);
1643 +}
1644 +
1645 +#endif /* CONFIG_UTRACE */
1646 +
1647 +#endif /* linux/utrace.h */
1648 diff -urN linux-2.6.39.1/init/Kconfig linux-2.6.39.1b/init/Kconfig
1649 --- linux-2.6.39.1/init/Kconfig 2011-06-02 17:34:20.000000000 -0700
1650 +++ linux-2.6.39.1b/init/Kconfig        2011-06-30 10:42:11.423096013 -0700
1651 @@ -364,6 +364,15 @@
1652         depends on AUDITSYSCALL
1653         select FSNOTIFY
1654
1655 +config UTRACE
1656 +       bool "Infrastructure for tracing and debugging user processes"
1657 +       depends on EXPERIMENTAL
1658 +       depends on HAVE_ARCH_TRACEHOOK
1659 +       help
1660 +         Enable the utrace process tracing interface.  This is an internal
1661 +         kernel interface exported to kernel modules, to track events in
1662 +         user threads, extract and change user thread state.
1663 +
1664  source "kernel/irq/Kconfig"
1665
1666  menu "RCU Subsystem"
1667 diff -urN linux-2.6.39.1/kernel/fork.c linux-2.6.39.1b/kernel/fork.c
1668 --- linux-2.6.39.1/kernel/fork.c        2011-06-02 17:34:20.000000000 -0700
1669 +++ linux-2.6.39.1b/kernel/fork.c       2011-06-30 10:42:11.423096013 -0700
1670 @@ -169,6 +169,7 @@
1671         free_thread_info(tsk->stack);
1672         rt_mutex_debug_task_free(tsk);
1673         ftrace_graph_exit_task(tsk);
1674 +       tracehook_free_task(tsk);
1675         free_task_struct(tsk);
1676  }
1677  EXPORT_SYMBOL(free_task);
1678 @@ -1040,6 +1041,8 @@
1679         if (!p)
1680                 goto fork_out;
1681
1682 +       tracehook_init_task(p);
1683 +
1684         ftrace_graph_init_task(p);
1685
1686         rt_mutex_init_task(p);
1687 diff -urN linux-2.6.39.1/kernel/Makefile linux-2.6.39.1b/kernel/Makefile
1688 --- linux-2.6.39.1/kernel/Makefile      2011-06-02 17:34:20.000000000 -0700
1689 +++ linux-2.6.39.1b/kernel/Makefile     2011-06-30 13:51:31.965854164 -0700
1690 @@ -70,6 +70,7 @@
1691  obj-$(CONFIG_RESOURCE_COUNTERS) += res_counter.o
1692  obj-$(CONFIG_SMP) += stop_machine.o
1693  obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o
1694 +obj-$(CONFIG_UTRACE) += utrace.o
1695  obj-$(CONFIG_AUDIT) += audit.o auditfilter.o
1696  obj-$(CONFIG_AUDITSYSCALL) += auditsc.o
1697  obj-$(CONFIG_AUDIT_WATCH) += audit_watch.o
1698 diff -urN linux-2.6.39.1/kernel/ptrace.c linux-2.6.39.1b/kernel/ptrace.c
1699 --- linux-2.6.39.1/kernel/ptrace.c      2011-06-02 17:34:20.000000000 -0700
1700 +++ linux-2.6.39.1b/kernel/ptrace.c     2011-06-30 13:50:53.915856090 -0700
1701 @@ -23,7 +23,30 @@
1702  #include <linux/uaccess.h>
1703  #include <linux/regset.h>
1704  #include <linux/hw_breakpoint.h>
1705 +#include <linux/utrace.h>
1706
1707 +struct ptrace_context {
1708 +       int                             options;
1709 +
1710 +       int                             signr;
1711 +       siginfo_t                       *siginfo;
1712 +
1713 +       int                             stop_code;
1714 +       unsigned long                   eventmsg;
1715 +
1716 +       enum utrace_resume_action       resume;
1717 +};
1718 +
1719 +#define PT_UTRACED                     0x00001000
1720 +
1721 +#define PTRACE_O_SYSEMU                        0x100
1722 +#define PTRACE_O_DETACHED              0x200
1723 +
1724 +#define PTRACE_EVENT_SYSCALL           (1 << 16)
1725 +#define PTRACE_EVENT_SIGTRAP           (2 << 16)
1726 +#define PTRACE_EVENT_SIGNAL            (3 << 16)
1727 +/* events visible to user-space */
1728 +#define PTRACE_EVENT_MASK              0xFFFF
1729
1730  /*
1731   * ptrace a task: make the debugger its new parent and
1732 @@ -80,13 +103,750 @@
1733                 ptrace_untrace(child);
1734  }
1735
1736 +static inline bool ptrace_event_pending(struct ptrace_context *ctx)
1737 +{
1738 +       return ctx->stop_code != 0;
1739 +}
1740 +
1741 +static inline int get_stop_event(struct ptrace_context *ctx)
1742 +{
1743 +       return ctx->stop_code >> 8;
1744 +}
1745 +
1746 +static inline void set_stop_code(struct ptrace_context *ctx, int event)
1747 +{
1748 +       ctx->stop_code = (event << 8) | SIGTRAP;
1749 +}
1750 +
1751 +static inline struct ptrace_context *
1752 +ptrace_context(struct utrace_engine *engine)
1753 +{
1754 +       return engine->data;
1755 +}
1756 +
1757 +static const struct utrace_engine_ops ptrace_utrace_ops; /* forward decl */
1758 +
1759 +static struct utrace_engine *ptrace_lookup_engine(struct task_struct *tracee)
1760 +{
1761 +       return utrace_attach_task(tracee, UTRACE_ATTACH_MATCH_OPS,
1762 +                                       &ptrace_utrace_ops, NULL);
1763 +}
1764 +
1765 +static int utrace_barrier_uninterruptible(struct task_struct *target,
1766 +                                       struct utrace_engine *engine)
1767 +{
1768 +       for (;;) {
1769 +               int err = utrace_barrier(target, engine);
1770 +
1771 +               if (err != -ERESTARTSYS)
1772 +                       return err;
1773 +
1774 +               schedule_timeout_uninterruptible(1);
1775 +       }
1776 +}
1777 +
1778 +static struct utrace_engine *
1779 +ptrace_reuse_engine(struct task_struct *tracee)
1780 +{
1781 +       struct utrace_engine *engine;
1782 +       struct ptrace_context *ctx;
1783 +       int err = -EPERM;
1784 +
1785 +       engine = ptrace_lookup_engine(tracee);
1786 +       if (IS_ERR(engine))
1787 +               return engine;
1788 +
1789 +       ctx = ptrace_context(engine);
1790 +       if (unlikely(ctx->options == PTRACE_O_DETACHED)) {
1791 +               /*
1792 +                * Try to reuse this self-detaching engine.
1793 +                * The only caller which can hit this case is ptrace_attach(),
1794 +                * it holds ->cred_guard_mutex.
1795 +                */
1796 +               ctx->options = 0;
1797 +               ctx->eventmsg = 0;
1798 +
1799 +               /* make sure we don't get unwanted reports */
1800 +               err = utrace_set_events(tracee, engine, UTRACE_EVENT(QUIESCE));
1801 +               if (!err || err == -EINPROGRESS) {
1802 +                       ctx->resume = UTRACE_RESUME;
1803 +                       /* synchronize with ptrace_report_signal() */
1804 +                       err = utrace_barrier_uninterruptible(tracee, engine);
1805 +               }
1806 +
1807 +               if (!err) {
1808 +                       WARN_ON(engine->ops != &ptrace_utrace_ops &&
1809 +                               !tracee->exit_state);
1810 +                       return engine;
1811 +               }
1812 +
1813 +               WARN_ON(engine->ops == &ptrace_utrace_ops);
1814 +       }
1815 +
1816 +       utrace_engine_put(engine);
1817 +       return ERR_PTR(err);
1818 +}
1819 +
1820 +static struct utrace_engine *
1821 +ptrace_attach_engine(struct task_struct *tracee)
1822 +{
1823 +       struct utrace_engine *engine;
1824 +       struct ptrace_context *ctx;
1825 +
1826 +       if (unlikely(task_utrace_flags(tracee))) {
1827 +               engine = ptrace_reuse_engine(tracee);
1828 +               if (!IS_ERR(engine) || IS_ERR(engine) == -EPERM)
1829 +                       return engine;
1830 +       }
1831 +
1832 +       ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
1833 +       if (unlikely(!ctx))
1834 +               return ERR_PTR(-ENOMEM);
1835 +
1836 +       ctx->resume = UTRACE_RESUME;
1837 +
1838 +       engine = utrace_attach_task(tracee, UTRACE_ATTACH_CREATE |
1839 +                                               UTRACE_ATTACH_EXCLUSIVE |
1840 +                                               UTRACE_ATTACH_MATCH_OPS,
1841 +                                               &ptrace_utrace_ops, ctx);
1842 +       if (unlikely(IS_ERR(engine))) {
1843 +               if (engine != ERR_PTR(-ESRCH) &&
1844 +                   engine != ERR_PTR(-ERESTARTNOINTR))
1845 +                       engine = ERR_PTR(-EPERM);
1846 +               kfree(ctx);
1847 +       }
1848 +
1849 +       return engine;
1850 +}
1851 +
1852 +static inline int ptrace_set_events(struct task_struct *target,
1853 +                                       struct utrace_engine *engine,
1854 +                                       unsigned long options)
1855 +{
1856 +       struct ptrace_context *ctx = ptrace_context(engine);
1857 +       /*
1858 +        * We need QUIESCE for resume handling, CLONE to check
1859 +        * for CLONE_PTRACE, other events are always reported.
1860 +        */
1861 +       unsigned long events = UTRACE_EVENT(QUIESCE) | UTRACE_EVENT(CLONE) |
1862 +                              UTRACE_EVENT(EXEC) | UTRACE_EVENT_SIGNAL_ALL;
1863 +
1864 +       ctx->options = options;
1865 +       if (options & PTRACE_O_TRACEEXIT)
1866 +               events |= UTRACE_EVENT(EXIT);
1867 +
1868 +       return utrace_set_events(target, engine, events);
1869 +}
1870 +
1871 +/*
1872 + * Attach a utrace engine for ptrace and set up its event mask.
1873 + * Returns error code or 0 on success.
1874 + */
1875 +static int ptrace_attach_task(struct task_struct *tracee, int options)
1876 +{
1877 +       struct utrace_engine *engine;
1878 +       int err;
1879 +
1880 +       engine = ptrace_attach_engine(tracee);
1881 +       if (IS_ERR(engine))
1882 +               return PTR_ERR(engine);
1883 +       /*
1884 +        * It can fail only if the tracee is dead, the caller
1885 +        * must notice this before setting PT_UTRACED.
1886 +        */
1887 +       err = ptrace_set_events(tracee, engine, options);
1888 +       WARN_ON(err && !tracee->exit_state);
1889 +       utrace_engine_put(engine);
1890 +       return 0;
1891 +}
1892 +
1893 +static int ptrace_wake_up(struct task_struct *tracee,
1894 +                               struct utrace_engine *engine,
1895 +                               enum utrace_resume_action action,
1896 +                               bool force_wakeup)
1897 +{
1898 +       if (force_wakeup) {
1899 +               unsigned long flags;
1900 +               /*
1901 +                * Preserve the compatibility bug. Historically ptrace
1902 +                * wakes up the tracee even if it should not. Clear
1903 +                * SIGNAL_STOP_STOPPED for utrace_wakeup().
1904 +                */
1905 +               if (lock_task_sighand(tracee, &flags)) {
1906 +                       tracee->signal->flags &= ~SIGNAL_STOP_STOPPED;
1907 +                       unlock_task_sighand(tracee, &flags);
1908 +               }
1909 +       }
1910 +
1911 +       if (action != UTRACE_REPORT)
1912 +               ptrace_context(engine)->stop_code = 0;
1913 +
1914 +       return utrace_control(tracee, engine, action);
1915 +}
1916 +
1917 +static void ptrace_detach_task(struct task_struct *tracee, int sig)
1918 +{
1919 +       /*
1920 +        * If true, the caller is PTRACE_DETACH, otherwise
1921 +        * the tracer detaches implicitly during exit.
1922 +        */
1923 +       bool explicit = (sig >= 0);
1924 +       struct utrace_engine *engine = ptrace_lookup_engine(tracee);
1925 +       enum utrace_resume_action action = UTRACE_DETACH;
1926 +       struct ptrace_context *ctx;
1927 +
1928 +       if (unlikely(IS_ERR(engine)))
1929 +               return;
1930 +
1931 +       ctx = ptrace_context(engine);
1932 +
1933 +       if (!explicit) {
1934 +               int err;
1935 +
1936 +               /*
1937 +                * We are going to detach, the tracee can be running.
1938 +                * Ensure ptrace_report_signal() won't report a signal.
1939 +                */
1940 +               ctx->resume = UTRACE_DETACH;
1941 +               err = utrace_barrier_uninterruptible(tracee, engine);
1942 +
1943 +               if (!err && ctx->siginfo) {
1944 +                       /*
1945 +                        * The tracee has already reported a signal
1946 +                        * before utrace_barrier().
1947 +                        *
1948 +                        * Resume it like we do in PTRACE_EVENT_SIGNAL
1949 +                        * case below. The difference is that we can race
1950 +                        * with ptrace_report_signal() if the tracee is
1951 +                        * running but this doesn't matter. In any case
1952 +                        * UTRACE_SIGNAL_REPORT must be pending and it
1953 +                        * can return nothing but UTRACE_DETACH.
1954 +                        */
1955 +                       action = UTRACE_RESUME;
1956 +               }
1957 +
1958 +       } else if (sig) {
1959 +               switch (get_stop_event(ctx)) {
1960 +               case PTRACE_EVENT_SYSCALL:
1961 +                       send_sig_info(sig, SEND_SIG_PRIV, tracee);
1962 +                       break;
1963 +
1964 +               case PTRACE_EVENT_SIGNAL:
1965 +                       ctx->signr = sig;
1966 +                       ctx->resume = UTRACE_DETACH;
1967 +                       action = UTRACE_RESUME;
1968 +                       break;
1969 +               }
1970 +       }
1971 +
1972 +       ptrace_wake_up(tracee, engine, action, explicit);
1973 +
1974 +       if (action != UTRACE_DETACH)
1975 +               ctx->options = PTRACE_O_DETACHED;
1976 +
1977 +       utrace_engine_put(engine);
1978 +}
1979 +
1980 +static void ptrace_abort_attach(struct task_struct *tracee)
1981 +{
1982 +       ptrace_detach_task(tracee, 0);
1983 +}
1984 +
1985 +static u32 ptrace_report_exit(u32 action, struct utrace_engine *engine,
1986 +                             long orig_code, long *code)
1987 +{
1988 +       struct ptrace_context *ctx = ptrace_context(engine);
1989 +
1990 +       WARN_ON(ptrace_event_pending(ctx) &&
1991 +               !signal_group_exit(current->signal));
1992 +
1993 +       set_stop_code(ctx, PTRACE_EVENT_EXIT);
1994 +       ctx->eventmsg = *code;
1995 +
1996 +       return UTRACE_STOP;
1997 +}
1998 +
1999 +static void ptrace_clone_attach(struct task_struct *child,
2000 +                               int options)
2001 +{
2002 +       struct task_struct *parent = current;
2003 +       struct task_struct *tracer;
2004 +       bool abort = true;
2005 +
2006 +       if (unlikely(ptrace_attach_task(child, options))) {
2007 +               WARN_ON(1);
2008 +               return;
2009 +       }
2010 +
2011 +       write_lock_irq(&tasklist_lock);
2012 +       tracer = parent->parent;
2013 +       if (!(tracer->flags & PF_EXITING) && parent->ptrace) {
2014 +               child->ptrace = parent->ptrace;
2015 +               __ptrace_link(child, tracer);
2016 +               abort = false;
2017 +       }
2018 +       write_unlock_irq(&tasklist_lock);
2019 +       if (unlikely(abort)) {
2020 +               ptrace_abort_attach(child);
2021 +               return;
2022 +       }
2023 +
2024 +       sigaddset(&child->pending.signal, SIGSTOP);
2025 +       set_tsk_thread_flag(child, TIF_SIGPENDING);
2026 +}
2027 +
2028 +static u32 ptrace_report_clone(u32 action, struct utrace_engine *engine,
2029 +                              unsigned long clone_flags,
2030 +                              struct task_struct *child)
2031 +{
2032 +       struct ptrace_context *ctx = ptrace_context(engine);
2033 +       int event = 0;
2034 +
2035 +       WARN_ON(ptrace_event_pending(ctx));
2036 +
2037 +       if (clone_flags & CLONE_UNTRACED) {
2038 +               /* no events reported */
2039 +       } else if (clone_flags & CLONE_VFORK) {
2040 +               if (ctx->options & PTRACE_O_TRACEVFORK)
2041 +                       event = PTRACE_EVENT_VFORK;
2042 +               else if (ctx->options & PTRACE_O_TRACEVFORKDONE)
2043 +                       event = PTRACE_EVENT_VFORK_DONE;
2044 +       } else if ((clone_flags & CSIGNAL) != SIGCHLD) {
2045 +               if (ctx->options & PTRACE_O_TRACECLONE)
2046 +                       event = PTRACE_EVENT_CLONE;
2047 +       } else if (ctx->options & PTRACE_O_TRACEFORK) {
2048 +               event = PTRACE_EVENT_FORK;
2049 +       }
2050 +       /*
2051 +        * Any of these reports implies auto-attaching the new child.
2052 +        * So does CLONE_PTRACE, even with no event to report.
2053 +        */
2054 +       if ((event && event != PTRACE_EVENT_VFORK_DONE) ||
2055 +                               (clone_flags & CLONE_PTRACE))
2056 +               ptrace_clone_attach(child, ctx->options);
2057 +
2058 +       if (!event)
2059 +               return UTRACE_RESUME;
2060 +
2061 +       set_stop_code(ctx, event);
2062 +       ctx->eventmsg = child->pid;
2063 +       /*
2064 +        * We shouldn't stop now, inside the do_fork() path.
2065 +        * We will stop later, before return to user-mode.
2066 +        */
2067 +       if (event == PTRACE_EVENT_VFORK_DONE)
2068 +               return UTRACE_REPORT;
2069 +       else
2070 +               return UTRACE_STOP;
2071 +}
2072 +
2073 +static inline void set_syscall_code(struct ptrace_context *ctx)
2074 +{
2075 +       set_stop_code(ctx, PTRACE_EVENT_SYSCALL);
2076 +       if (ctx->options & PTRACE_O_TRACESYSGOOD)
2077 +               ctx->stop_code |= 0x80;
2078 +}
2079 +
2080 +static u32 ptrace_report_syscall_entry(u32 action, struct utrace_engine *engine,
2081 +                                      struct pt_regs *regs)
2082 +{
2083 +       struct ptrace_context *ctx = ptrace_context(engine);
2084 +
2085 +       if (action & UTRACE_SYSCALL_RESUMED) {
2086 +               /*
2087 +                * We already reported the first time.
2088 +                * Nothing more to do now.
2089 +                */
2090 +               if (unlikely(ctx->options & PTRACE_O_SYSEMU))
2091 +                       return UTRACE_SYSCALL_ABORT | UTRACE_REPORT;
2092 +               return utrace_syscall_action(action) | UTRACE_RESUME;
2093 +       }
2094 +
2095 +       WARN_ON(ptrace_event_pending(ctx));
2096 +
2097 +       set_syscall_code(ctx);
2098 +
2099 +       if (unlikely(ctx->options & PTRACE_O_SYSEMU))
2100 +               return UTRACE_SYSCALL_ABORT | UTRACE_REPORT;
2101 +       /*
2102 +        * Stop now to report.  We will get another callback after
2103 +        * we resume, with the UTRACE_SYSCALL_RESUMED flag set.
2104 +        */
2105 +       return UTRACE_SYSCALL_RUN | UTRACE_STOP;
2106 +}
2107 +
2108 +static inline bool is_step_resume(enum utrace_resume_action resume)
2109 +{
2110 +       return resume == UTRACE_BLOCKSTEP || resume == UTRACE_SINGLESTEP;
2111 +}
2112 +
2113 +static u32 ptrace_report_syscall_exit(u32 action, struct utrace_engine *engine,
2114 +                                     struct pt_regs *regs)
2115 +{
2116 +       struct ptrace_context *ctx = ptrace_context(engine);
2117 +
2118 +       if (ptrace_event_pending(ctx))
2119 +               return UTRACE_STOP;
2120 +
2121 +       if (is_step_resume(ctx->resume)) {
2122 +               ctx->signr = SIGTRAP;
2123 +               return UTRACE_INTERRUPT;
2124 +       }
2125 +
2126 +       set_syscall_code(ctx);
2127 +       return UTRACE_STOP;
2128 +}
2129 +
2130 +static u32 ptrace_report_exec(u32 action, struct utrace_engine *engine,
2131 +                             const struct linux_binfmt *fmt,
2132 +                             const struct linux_binprm *bprm,
2133 +                             struct pt_regs *regs)
2134 +{
2135 +       struct ptrace_context *ctx = ptrace_context(engine);
2136 +
2137 +       WARN_ON(ptrace_event_pending(ctx));
2138 +
2139 +       if (!(ctx->options & PTRACE_O_TRACEEXEC)) {
2140 +               /*
2141 +                * Old-fashioned ptrace'd exec just posts a plain signal.
2142 +                */
2143 +               send_sig(SIGTRAP, current, 0);
2144 +               return UTRACE_RESUME;
2145 +       }
2146 +
2147 +       set_stop_code(ctx, PTRACE_EVENT_EXEC);
2148 +       return UTRACE_STOP;
2149 +}
2150 +
2151 +static enum utrace_signal_action resume_signal(struct ptrace_context *ctx,
2152 +                                              struct k_sigaction *return_ka)
2153 +{
2154 +       siginfo_t *info = ctx->siginfo;
2155 +       int signr = ctx->signr;
2156 +
2157 +       ctx->siginfo = NULL;
2158 +       ctx->signr = 0;
2159 +
2160 +       /* Did the debugger cancel the sig? */
2161 +       if (!signr)
2162 +               return UTRACE_SIGNAL_IGN;
2163 +       /*
2164 +        * Update the siginfo structure if the signal has changed.
2165 +        * If the debugger wanted something specific in the siginfo
2166 +        * then it should have updated *info via PTRACE_SETSIGINFO.
2167 +        */
2168 +       if (info->si_signo != signr) {
2169 +               info->si_signo = signr;
2170 +               info->si_errno = 0;
2171 +               info->si_code = SI_USER;
2172 +               info->si_pid = task_pid_vnr(current->parent);
2173 +               info->si_uid = task_uid(current->parent);
2174 +       }
2175 +
2176 +       /* If the (new) signal is now blocked, requeue it. */
2177 +       if (sigismember(&current->blocked, signr)) {
2178 +               send_sig_info(signr, info, current);
2179 +               return UTRACE_SIGNAL_IGN;
2180 +       }
2181 +
2182 +       spin_lock_irq(&current->sighand->siglock);
2183 +       *return_ka = current->sighand->action[signr - 1];
2184 +       spin_unlock_irq(&current->sighand->siglock);
2185 +
2186 +       return UTRACE_SIGNAL_DELIVER;
2187 +}
2188 +
2189 +static u32 ptrace_report_signal(u32 action, struct utrace_engine *engine,
2190 +                               struct pt_regs *regs,
2191 +                               siginfo_t *info,
2192 +                               const struct k_sigaction *orig_ka,
2193 +                               struct k_sigaction *return_ka)
2194 +{
2195 +       struct ptrace_context *ctx = ptrace_context(engine);
2196 +       enum utrace_resume_action resume = ctx->resume;
2197 +
2198 +       if (ptrace_event_pending(ctx)) {
2199 +               action = utrace_signal_action(action);
2200 +               WARN_ON(action != UTRACE_SIGNAL_REPORT);
2201 +               return action | UTRACE_STOP;
2202 +       }
2203 +
2204 +       switch (utrace_signal_action(action)) {
2205 +       case UTRACE_SIGNAL_HANDLER:
2206 +               if (WARN_ON(ctx->siginfo))
2207 +                       ctx->siginfo = NULL;
2208 +
2209 +               if (is_step_resume(resume)) {
2210 +                       set_stop_code(ctx, PTRACE_EVENT_SIGTRAP);
2211 +                       return UTRACE_STOP | UTRACE_SIGNAL_IGN;
2212 +               }
2213 +
2214 +       case UTRACE_SIGNAL_REPORT:
2215 +               if (!ctx->siginfo) {
2216 +                       if (ctx->signr) {
2217 +                               /* set by ptrace_resume(SYSCALL_EXIT) */
2218 +                               WARN_ON(ctx->signr != SIGTRAP);
2219 +                               user_single_step_siginfo(current, regs, info);
2220 +                               force_sig_info(SIGTRAP, info, current);
2221 +                       }
2222 +
2223 +                       return resume | UTRACE_SIGNAL_IGN;
2224 +               }
2225 +
2226 +               if (WARN_ON(ctx->siginfo != info))
2227 +                       return resume | UTRACE_SIGNAL_IGN;
2228 +
2229 +               return resume | resume_signal(ctx, return_ka);
2230 +
2231 +       default:
2232 +               break;
2233 +       }
2234 +
2235 +       WARN_ON(ctx->siginfo);
2236 +
2237 +       /* Raced with the exiting tracer ? */
2238 +       if (resume == UTRACE_DETACH)
2239 +               return action;
2240 +
2241 +       ctx->siginfo = info;
2242 +       /*
2243 +        * ctx->siginfo points to the caller's stack.
2244 +        * Make sure the subsequent UTRACE_SIGNAL_REPORT clears
2245 +        * ->siginfo before return from get_signal_to_deliver().
2246 +        */
2247 +       if (utrace_control(current, engine, UTRACE_INTERRUPT))
2248 +               WARN_ON(1);
2249 +
2250 +       ctx->signr = info->si_signo;
2251 +       ctx->stop_code = (PTRACE_EVENT_SIGNAL << 8) | ctx->signr;
2252 +
2253 +       return UTRACE_STOP | UTRACE_SIGNAL_IGN;
2254 +}
2255 +
2256 +static u32 ptrace_report_quiesce(u32 action, struct utrace_engine *engine,
2257 +                                unsigned long event)
2258 +{
2259 +       struct ptrace_context *ctx = ptrace_context(engine);
2260 +
2261 +       if (ptrace_event_pending(ctx))
2262 +               return UTRACE_STOP;
2263 +
2264 +       return event ? UTRACE_RESUME : ctx->resume;
2265 +}
2266 +
2267 +static void ptrace_release(void *data)
2268 +{
2269 +       kfree(data);
2270 +}
2271 +
2272 +static const struct utrace_engine_ops ptrace_utrace_ops = {
2273 +       .report_signal = ptrace_report_signal,
2274 +       .report_quiesce = ptrace_report_quiesce,
2275 +       .report_exec = ptrace_report_exec,
2276 +       .report_exit = ptrace_report_exit,
2277 +       .report_clone = ptrace_report_clone,
2278 +       .report_syscall_entry = ptrace_report_syscall_entry,
2279 +       .report_syscall_exit = ptrace_report_syscall_exit,
2280 +       .release = ptrace_release,
2281 +};
2282 +
2283 +static void ptrace_do_detach(struct task_struct *tracee, unsigned int data)
2284 +{
2285 +       bool detach, release;
2286 +
2287 +       write_lock_irq(&tasklist_lock);
2288 +       /*
2289 +        * This tracee can be already killed. Make sure de_thread() or
2290 +        * our sub-thread doing do_wait() didn't do release_task() yet.
2291 +        */
2292 +       detach = tracee->ptrace != 0;
2293 +       release = false;
2294 +       if (likely(detach))
2295 +               release = __ptrace_detach(current, tracee);
2296 +       write_unlock_irq(&tasklist_lock);
2297 +
2298 +       if (unlikely(release))
2299 +               release_task(tracee);
2300 +       else if (likely(detach))
2301 +               ptrace_detach_task(tracee, data);
2302 +}
2303 +
2304 +static int ptrace_set_options(struct task_struct *tracee,
2305 +                               struct utrace_engine *engine, long data)
2306 +{
2307 +       BUILD_BUG_ON(PTRACE_O_MASK & (PTRACE_O_SYSEMU | PTRACE_O_DETACHED));
2308 +
2309 +       ptrace_set_events(tracee, engine, data & PTRACE_O_MASK);
2310 +       return (data & ~PTRACE_O_MASK) ? -EINVAL : 0;
2311 +}
2312 +
2313 +static int ptrace_rw_siginfo(struct task_struct *tracee,
2314 +                               struct ptrace_context *ctx,
2315 +                               siginfo_t *info, bool write)
2316 +{
2317 +       unsigned long flags;
2318 +       int err;
2319 +
2320 +       switch (get_stop_event(ctx)) {
2321 +       case 0: /* jctl stop */
2322 +               return -EINVAL;
2323 +
2324 +       case PTRACE_EVENT_SIGNAL:
2325 +               err = -ESRCH;
2326 +               if (lock_task_sighand(tracee, &flags)) {
2327 +                       if (likely(task_is_traced(tracee))) {
2328 +                               if (write)
2329 +                                       *ctx->siginfo = *info;
2330 +                               else
2331 +                                       *info = *ctx->siginfo;
2332 +                               err = 0;
2333 +                       }
2334 +                       unlock_task_sighand(tracee, &flags);
2335 +               }
2336 +
2337 +               return err;
2338 +
2339 +       default:
2340 +               if (!write) {
2341 +                       memset(info, 0, sizeof(*info));
2342 +                       info->si_signo = SIGTRAP;
2343 +                       info->si_code = ctx->stop_code & PTRACE_EVENT_MASK;
2344 +                       info->si_pid = task_pid_vnr(tracee);
2345 +                       info->si_uid = task_uid(tracee);
2346 +               }
2347 +
2348 +               return 0;
2349 +       }
2350 +}
2351 +
2352 +static void do_ptrace_notify_stop(struct ptrace_context *ctx,
2353 +                                       struct task_struct *tracee)
2354 +{
2355 +       /*
2356 +        * This can race with SIGKILL, but we borrow this race from
2357 +        * the old ptrace implementation. ->exit_code is only needed
2358 +        * for wait_task_stopped()->task_stopped_code(), we should
2359 +        * change it to use ptrace_context.
2360 +        */
2361 +       tracee->exit_code = ctx->stop_code & PTRACE_EVENT_MASK;
2362 +       WARN_ON(!tracee->exit_code);
2363 +
2364 +       read_lock(&tasklist_lock);
2365 +       /*
2366 +        * Don't want to allow preemption here, because
2367 +        * sys_ptrace() needs this task to be inactive.
2368 +        */
2369 +       preempt_disable();
2370 +       /*
2371 +        * It can be killed and then released by our subthread,
2372 +        * or ptrace_attach() has not completed yet.
2373 +        */
2374 +       if (task_ptrace(tracee))
2375 +               do_notify_parent_cldstop(tracee, CLD_TRAPPED);
2376 +       read_unlock(&tasklist_lock);
2377 +       preempt_enable_no_resched();
2378 +}
2379 +
2380 +void ptrace_notify_stop(struct task_struct *tracee)
2381 +{
2382 +       struct utrace_engine *engine = ptrace_lookup_engine(tracee);
2383 +
2384 +       if (IS_ERR(engine))
2385 +               return;
2386 +
2387 +       do_ptrace_notify_stop(ptrace_context(engine), tracee);
2388 +       utrace_engine_put(engine);
2389 +}
2390 +
2391 +static int ptrace_resume_action(struct task_struct *tracee,
2392 +                               struct utrace_engine *engine, long request)
2393 +{
2394 +       struct ptrace_context *ctx = ptrace_context(engine);
2395 +       unsigned long events;
2396 +       int action;
2397 +
2398 +       ctx->options &= ~PTRACE_O_SYSEMU;
2399 +       events = engine->flags & ~UTRACE_EVENT_SYSCALL;
2400 +       action = UTRACE_RESUME;
2401 +
2402 +       switch (request) {
2403 +#ifdef PTRACE_SINGLEBLOCK
2404 +       case PTRACE_SINGLEBLOCK:
2405 +               if (unlikely(!arch_has_block_step()))
2406 +                       return -EIO;
2407 +               action = UTRACE_BLOCKSTEP;
2408 +               events |= UTRACE_EVENT(SYSCALL_EXIT);
2409 +               break;
2410 +#endif
2411 +
2412 +#ifdef PTRACE_SINGLESTEP
2413 +       case PTRACE_SINGLESTEP:
2414 +               if (unlikely(!arch_has_single_step()))
2415 +                       return -EIO;
2416 +               action = UTRACE_SINGLESTEP;
2417 +               events |= UTRACE_EVENT(SYSCALL_EXIT);
2418 +               break;
2419 +#endif
2420 +
2421 +#ifdef PTRACE_SYSEMU
2422 +       case PTRACE_SYSEMU_SINGLESTEP:
2423 +               if (unlikely(!arch_has_single_step()))
2424 +                       return -EIO;
2425 +               action = UTRACE_SINGLESTEP;
2426 +       case PTRACE_SYSEMU:
2427 +               ctx->options |= PTRACE_O_SYSEMU;
2428 +               events |= UTRACE_EVENT(SYSCALL_ENTRY);
2429 +               break;
2430 +#endif
2431 +
2432 +       case PTRACE_SYSCALL:
2433 +               events |= UTRACE_EVENT_SYSCALL;
2434 +               break;
2435 +
2436 +       case PTRACE_CONT:
2437 +               break;
2438 +       default:
2439 +               return -EIO;
2440 +       }
2441 +
2442 +       if (events != engine->flags &&
2443 +           utrace_set_events(tracee, engine, events))
2444 +               return -ESRCH;
2445 +
2446 +       return action;
2447 +}
2448 +/*
2449 +extern int ptrace_regset(struct task_struct *task, int req, unsigned int type,
2450 +                        struct iovec *kiov);
2451 +*/
2452  /*
2453   * Check that we have indeed attached to the thing..
2454   */
2455  int ptrace_check_attach(struct task_struct *child, int kill)
2456  {
2457 +       struct utrace_engine *engine;
2458 +       struct utrace_examiner exam;
2459         int ret = -ESRCH;
2460
2461 +       engine = ptrace_lookup_engine(child);
2462 +       if (IS_ERR(engine))
2463 +               return ret;
2464 +
2465 +       if (child->parent != current)
2466 +               goto out;
2467 +
2468 +       if (unlikely(kill))
2469 +               ret = 0;
2470 +
2471 +       if (!task_is_stopped_or_traced(child))
2472 +               goto out;
2473 +       /*
2474 +        * Make sure our engine has already stopped the child.
2475 +        * Then wait for it to be off the CPU.
2476 +        */
2477 +       if (!utrace_control(child, engine, UTRACE_STOP) &&
2478 +           !utrace_prepare_examine(child, engine, &exam))
2479 +               ret = 0;
2480         /*
2481          * We take the read lock around doing both checks to close a
2482          * possible race where someone else was tracing our child and
2483 @@ -115,6 +875,9 @@
2484
2485         /* All systems go.. */
2486         return ret;
2487 +out:
2488 +       utrace_engine_put(engine);
2489 +       return ret;
2490  }
2491
2492  int __ptrace_may_access(struct task_struct *task, unsigned int mode)
2493 @@ -167,7 +930,7 @@
2494         return !err;
2495  }
2496
2497 -static int ptrace_attach(struct task_struct *task)
2498 +int ptrace_attach(struct task_struct *task)
2499  {
2500         int retval;
2501
2502 @@ -194,6 +957,10 @@
2503         if (retval)
2504                 goto unlock_creds;
2505
2506 +       retval = ptrace_attach_task(task, 0);
2507 +       if (unlikely(retval))
2508 +               goto unlock_creds;
2509 +
2510         write_lock_irq(&tasklist_lock);
2511         retval = -EPERM;
2512         if (unlikely(task->exit_state))
2513 @@ -201,6 +968,7 @@
2514         if (task->ptrace)
2515                 goto unlock_tasklist;
2516
2517 +       BUG_ON(task->ptrace);
2518         task->ptrace = PT_PTRACED;
2519         if (task_ns_capable(task, CAP_SYS_PTRACE))
2520                 task->ptrace |= PT_PTRACE_CAP;
2521 @@ -223,11 +991,20 @@
2522   * Performs checks and sets PT_PTRACED.
2523   * Should be used by all ptrace implementations for PTRACE_TRACEME.
2524   */
2525 -static int ptrace_traceme(void)
2526 +int ptrace_traceme(void)
2527  {
2528 -       int ret = -EPERM;
2529 +       bool detach = true;
2530 +       int ret;
2531
2532 +       ret = ptrace_attach_task(current, 0);
2533 +       if (unlikely(ret))
2534 +               return ret;
2535 +
2536 +       ret = -EPERM;
2537 +
2538         write_lock_irq(&tasklist_lock);
2539 +       BUG_ON(current->ptrace);
2540 +       ret = security_ptrace_traceme(current->parent);
2541         /* Are we already being traced? */
2542         if (!current->ptrace) {
2543                 ret = security_ptrace_traceme(current->parent);
2544 @@ -239,6 +1016,7 @@
2545                 if (!ret && !(current->real_parent->flags & PF_EXITING)) {
2546                         current->ptrace = PT_PTRACED;
2547                         __ptrace_link(current, current->real_parent);
2548 +                       detach = false;
2549                 }
2550         }
2551         write_unlock_irq(&tasklist_lock);
2552 @@ -274,7 +1052,7 @@
2553   * reap it now, in that case we must also wake up sub-threads sleeping in
2554   * do_wait().
2555   */
2556 -static bool __ptrace_detach(struct task_struct *tracer, struct task_struct *p)
2557 +bool __ptrace_detach(struct task_struct *tracer, struct task_struct *p)
2558  {
2559         __ptrace_unlink(p);
2560
2561 @@ -297,7 +1075,7 @@
2562         return false;
2563  }
2564
2565 -static int ptrace_detach(struct task_struct *child, unsigned int data)
2566 +int ptrace_detach(struct task_struct *child, unsigned int data)
2567  {
2568         bool dead = false;
2569
2570 @@ -324,6 +1102,8 @@
2571         if (unlikely(dead))
2572                 release_task(child);
2573
2574 +       ptrace_do_detach(child, data);
2575 +
2576         return 0;
2577  }
2578
2579 @@ -571,6 +1351,11 @@
2580         siginfo_t siginfo;
2581         void __user *datavp = (void __user *) data;
2582         unsigned long __user *datalp = datavp;
2583 +       struct utrace_engine *engine = ptrace_lookup_engine(child);
2584 +
2585 +       if (unlikely(IS_ERR(engine)))
2586 +               return -ESRCH;
2587 +
2588
2589         switch (request) {
2590         case PTRACE_PEEKTEXT:
2591 @@ -648,6 +1433,10 @@
2592                 return ptrace_resume(child, request, data);
2593
2594         case PTRACE_KILL:
2595 +               /* Ugly historical behaviour. */
2596 +               if (task_is_traced(child))
2597 +                       ptrace_resume(child, engine, SIGKILL);
2598 +
2599                 if (child->exit_state)  /* already dead */
2600                         return 0;
2601                 return ptrace_resume(child, request, SIGKILL);
2602 @@ -673,9 +1462,12 @@
2603         }
2604  #endif
2605         default:
2606 +               ret = ptrace_resume(child, engine, data);
2607                 break;
2608         }
2609
2610 +       utrace_engine_put(engine);
2611 +
2612         return ret;
2613  }
2614
2615 @@ -767,11 +1559,16 @@
2616  int compat_ptrace_request(struct task_struct *child, compat_long_t request,
2617                           compat_ulong_t addr, compat_ulong_t data)
2618  {
2619 +       struct utrace_engine *engine = ptrace_lookup_engine(child);
2620 +
2621         compat_ulong_t __user *datap = compat_ptr(data);
2622         compat_ulong_t word;
2623         siginfo_t siginfo;
2624         int ret;
2625
2626 +       if (unlikely(IS_ERR(engine)))
2627 +               return -ESRCH;
2628 +
2629         switch (request) {
2630         case PTRACE_PEEKTEXT:
2631         case PTRACE_PEEKDATA:
2632 @@ -838,7 +1635,7 @@
2633         default:
2634                 ret = ptrace_request(child, request, addr, data);
2635         }
2636 -
2637 +       utrace_engine_put(engine);
2638         return ret;
2639  }
2640
2641 diff -urN linux-2.6.39.1/kernel/signal.c linux-2.6.39.1b/kernel/signal.c
2642 --- linux-2.6.39.1/kernel/signal.c      2011-06-02 17:34:20.000000000 -0700
2643 +++ linux-2.6.39.1b/kernel/signal.c     2011-06-30 10:41:49.499763788 -0700
2644 @@ -1536,7 +1536,7 @@
2645         return ret;
2646  }
2647
2648 -static void do_notify_parent_cldstop(struct task_struct *tsk, int why)
2649 +void do_notify_parent_cldstop(struct task_struct *tsk, int why)
2650  {
2651         struct siginfo info;
2652         unsigned long flags;
2653 @@ -1808,7 +1808,7 @@
2654  static int ptrace_signal(int signr, siginfo_t *info,
2655                          struct pt_regs *regs, void *cookie)
2656  {
2657 -       if (!task_ptrace(current))
2658 +       if (!(task_ptrace(current) & PT_PTRACED))
2659                 return signr;
2660
2661         ptrace_signal_deliver(regs, cookie);
2662 diff -urN linux-2.6.39.1/kernel/utrace.c linux-2.6.39.1b/kernel/utrace.c
2663 --- linux-2.6.39.1/kernel/utrace.c      1969-12-31 17:00:00.000000000 -0700
2664 +++ linux-2.6.39.1b/kernel/utrace.c     2011-06-30 13:51:25.269187836 -0700
2665 @@ -0,0 +1,2456 @@
2666 +/*
2667 + * utrace infrastructure interface for debugging user processes
2668 + *
2669 + * Copyright (C) 2006-2010 Red Hat, Inc.  All rights reserved.
2670 + *
2671 + * This copyrighted material is made available to anyone wishing to use,
2672 + * modify, copy, or redistribute it subject to the terms and conditions
2673 + * of the GNU General Public License v.2.
2674 + *
2675 + * Red Hat Author: Roland McGrath.
2676 + */
2677 +
2678 +#include <linux/utrace.h>
2679 +#include <linux/tracehook.h>
2680 +#include <linux/regset.h>
2681 +#include <asm/syscall.h>
2682 +#include <linux/ptrace.h>
2683 +#include <linux/err.h>
2684 +#include <linux/sched.h>
2685 +#include <linux/freezer.h>
2686 +#include <linux/module.h>
2687 +#include <linux/init.h>
2688 +#include <linux/slab.h>
2689 +#include <linux/seq_file.h>
2690 +
2691 +
2692 +/*
2693 + * Per-thread structure private to utrace implementation.
2694 + * If task_struct.utrace_flags is nonzero, task_struct.utrace
2695 + * has always been allocated first.  Once allocated, it is
2696 + * never freed until free_task().
2697 + *
2698 + * The common event reporting loops are done by the task making the
2699 + * report without ever taking any locks.  To facilitate this, the two
2700 + * lists @attached and @attaching work together for smooth asynchronous
2701 + * attaching with low overhead.  Modifying either list requires @lock.
2702 + * The @attaching list can be modified any time while holding @lock.
2703 + * New engines being attached always go on this list.
2704 + *
2705 + * The @attached list is what the task itself uses for its reporting
2706 + * loops.  When the task itself is not quiescent, it can use the
2707 + * @attached list without taking any lock.  Nobody may modify the list
2708 + * when the task is not quiescent.  When it is quiescent, that means
2709 + * that it won't run again without taking @lock itself before using
2710 + * the list.
2711 + *
2712 + * At each place where we know the task is quiescent (or it's current),
2713 + * while holding @lock, we call splice_attaching(), below.  This moves
2714 + * the @attaching list members on to the end of the @attached list.
2715 + * Since this happens at the start of any reporting pass, any new
2716 + * engines attached asynchronously go on the stable @attached list
2717 + * in time to have their callbacks seen.
2718 + */
2719 +struct utrace {
2720 +       spinlock_t lock;
2721 +       struct list_head attached, attaching;
2722 +
2723 +       struct task_struct *cloning;
2724 +
2725 +       struct utrace_engine *reporting;
2726 +
2727 +       enum utrace_resume_action resume:UTRACE_RESUME_BITS;
2728 +       unsigned int signal_handler:1;
2729 +       unsigned int vfork_stop:1; /* need utrace_stop() before vfork wait */
2730 +       unsigned int death:1;   /* in utrace_report_death() now */
2731 +       unsigned int reap:1;    /* release_task() has run */
2732 +       unsigned int pending_attach:1; /* need splice_attaching() */
2733 +};
2734 +
2735 +static struct kmem_cache *utrace_cachep;
2736 +static struct kmem_cache *utrace_engine_cachep;
2737 +static const struct utrace_engine_ops utrace_detached_ops; /* forward decl */
2738 +
2739 +static int __init utrace_init(void)
2740 +{
2741 +       utrace_cachep = KMEM_CACHE(utrace, SLAB_PANIC);
2742 +       utrace_engine_cachep = KMEM_CACHE(utrace_engine, SLAB_PANIC);
2743 +       return 0;
2744 +}
2745 +module_init(utrace_init);
2746 +
2747 +/*
2748 + * Set up @task.utrace for the first time.  We can have races
2749 + * between two utrace_attach_task() calls here.  The task_lock()
2750 + * governs installing the new pointer.  If another one got in first,
2751 + * we just punt the new one we allocated.
2752 + *
2753 + * This returns false only in case of a memory allocation failure.
2754 + */
2755 +static bool utrace_task_alloc(struct task_struct *task)
2756 +{
2757 +       struct utrace *utrace = kmem_cache_zalloc(utrace_cachep, GFP_KERNEL);
2758 +       if (unlikely(!utrace))
2759 +               return false;
2760 +       spin_lock_init(&utrace->lock);
2761 +       INIT_LIST_HEAD(&utrace->attached);
2762 +       INIT_LIST_HEAD(&utrace->attaching);
2763 +       utrace->resume = UTRACE_RESUME;
2764 +       task_lock(task);
2765 +       if (likely(!task->utrace)) {
2766 +               /*
2767 +                * This barrier makes sure the initialization of the struct
2768 +                * precedes the installation of the pointer.  This pairs
2769 +                * with smp_read_barrier_depends() in task_utrace_struct().
2770 +                */
2771 +               smp_wmb();
2772 +               task->utrace = utrace;
2773 +       }
2774 +       task_unlock(task);
2775 +
2776 +       if (unlikely(task->utrace != utrace))
2777 +               kmem_cache_free(utrace_cachep, utrace);
2778 +       return true;
2779 +}
2780 +
2781 +/*
2782 + * This is called via tracehook_free_task() from free_task()
2783 + * when @task is being deallocated.
2784 + */
2785 +void utrace_free_task(struct task_struct *task)
2786 +{
2787 +       kmem_cache_free(utrace_cachep, task->utrace);
2788 +}
2789 +
2790 +/*
2791 + * This is calledwhen the task is safely quiescent, i.e. it won't consult
2792 + * utrace->attached without the lock.  Move any engines attached
2793 + * asynchronously from @utrace->attaching onto the @utrace->attached list.
2794 + */
2795 +static void splice_attaching(struct utrace *utrace)
2796 +{
2797 +       lockdep_assert_held(&utrace->lock);
2798 +       list_splice_tail_init(&utrace->attaching, &utrace->attached);
2799 +       utrace->pending_attach = 0;
2800 +}
2801 +
2802 +/*
2803 + * This is the exported function used by the utrace_engine_put() inline.
2804 + */
2805 +void __utrace_engine_release(struct kref *kref)
2806 +{
2807 +       struct utrace_engine *engine = container_of(kref, struct utrace_engine,
2808 +                                                   kref);
2809 +       BUG_ON(!list_empty(&engine->entry));
2810 +       if (engine->release)
2811 +               (*engine->release)(engine->data);
2812 +       kmem_cache_free(utrace_engine_cachep, engine);
2813 +}
2814 +EXPORT_SYMBOL_GPL(__utrace_engine_release);
2815 +
2816 +static bool engine_matches(struct utrace_engine *engine, int flags,
2817 +                          const struct utrace_engine_ops *ops, void *data)
2818 +{
2819 +       if ((flags & UTRACE_ATTACH_MATCH_OPS) && engine->ops != ops)
2820 +               return false;
2821 +       if ((flags & UTRACE_ATTACH_MATCH_DATA) && engine->data != data)
2822 +               return false;
2823 +       return engine->ops && engine->ops != &utrace_detached_ops;
2824 +}
2825 +
2826 +static struct utrace_engine *find_matching_engine(
2827 +       struct utrace *utrace, int flags,
2828 +       const struct utrace_engine_ops *ops, void *data)
2829 +{
2830 +       struct utrace_engine *engine;
2831 +       list_for_each_entry(engine, &utrace->attached, entry)
2832 +               if (engine_matches(engine, flags, ops, data))
2833 +                       return engine;
2834 +       list_for_each_entry(engine, &utrace->attaching, entry)
2835 +               if (engine_matches(engine, flags, ops, data))
2836 +                       return engine;
2837 +       return NULL;
2838 +}
2839 +
2840 +/*
2841 + * Enqueue @engine, or maybe don't if UTRACE_ATTACH_EXCLUSIVE.
2842 + */
2843 +static int utrace_add_engine(struct task_struct *target,
2844 +                            struct utrace *utrace,
2845 +                            struct utrace_engine *engine,
2846 +                            int flags,
2847 +                            const struct utrace_engine_ops *ops,
2848 +                            void *data)
2849 +{
2850 +       int ret;
2851 +
2852 +       spin_lock(&utrace->lock);
2853 +
2854 +       ret = -EEXIST;
2855 +       if ((flags & UTRACE_ATTACH_EXCLUSIVE) &&
2856 +            unlikely(find_matching_engine(utrace, flags, ops, data)))
2857 +               goto unlock;
2858 +
2859 +       /*
2860 +        * In case we had no engines before, make sure that
2861 +        * utrace_flags is not zero. Since we did unlock+lock
2862 +        * at least once after utrace_task_alloc() installed
2863 +        * ->utrace, we have the necessary barrier which pairs
2864 +        * with rmb() in task_utrace_struct().
2865 +        */
2866 +       ret = -ESRCH;
2867 +       if (!target->utrace_flags) {
2868 +               target->utrace_flags = UTRACE_EVENT(REAP);
2869 +               /*
2870 +                * If we race with tracehook_prepare_release_task()
2871 +                * make sure that either it sees utrace_flags != 0
2872 +                * or we see exit_state == EXIT_DEAD.
2873 +                */
2874 +               smp_mb();
2875 +               if (unlikely(target->exit_state == EXIT_DEAD)) {
2876 +                       target->utrace_flags = 0;
2877 +                       goto unlock;
2878 +               }
2879 +       }
2880 +
2881 +       /*
2882 +        * Put the new engine on the pending ->attaching list.
2883 +        * Make sure it gets onto the ->attached list by the next
2884 +        * time it's examined.  Setting ->pending_attach ensures
2885 +        * that start_report() takes the lock and splices the lists
2886 +        * before the next new reporting pass.
2887 +        *
2888 +        * When target == current, it would be safe just to call
2889 +        * splice_attaching() right here.  But if we're inside a
2890 +        * callback, that would mean the new engine also gets
2891 +        * notified about the event that precipitated its own
2892 +        * creation.  This is not what the user wants.
2893 +        */
2894 +       list_add_tail(&engine->entry, &utrace->attaching);
2895 +       utrace->pending_attach = 1;
2896 +       utrace_engine_get(engine);
2897 +       ret = 0;
2898 +unlock:
2899 +       spin_unlock(&utrace->lock);
2900 +
2901 +       return ret;
2902 +}
2903 +
2904 +/**
2905 + * utrace_attach_task - attach new engine, or look up an attached engine
2906 + * @target:    thread to attach to
2907 + * @flags:     flag bits combined with OR, see below
2908 + * @ops:       callback table for new engine
2909 + * @data:      engine private data pointer
2910 + *
2911 + * The caller must ensure that the @target thread does not get freed,
2912 + * i.e. hold a ref or be its parent.  It is always safe to call this
2913 + * on @current, or on the @child pointer in a @report_clone callback.
2914 + * For most other cases, it's easier to use utrace_attach_pid() instead.
2915 + *
2916 + * UTRACE_ATTACH_CREATE:
2917 + * Create a new engine.  If %UTRACE_ATTACH_CREATE is not specified, you
2918 + * only look up an existing engine already attached to the thread.
2919 + *
2920 + * UTRACE_ATTACH_EXCLUSIVE:
2921 + * Attempting to attach a second (matching) engine fails with -%EEXIST.
2922 + *
2923 + * UTRACE_ATTACH_MATCH_OPS: Only consider engines matching @ops.
2924 + * UTRACE_ATTACH_MATCH_DATA: Only consider engines matching @data.
2925 + *
2926 + * Calls with neither %UTRACE_ATTACH_MATCH_OPS nor %UTRACE_ATTACH_MATCH_DATA
2927 + * match the first among any engines attached to @target.  That means that
2928 + * %UTRACE_ATTACH_EXCLUSIVE in such a call fails with -%EEXIST if there
2929 + * are any engines on @target at all.
2930 + */
2931 +struct utrace_engine *utrace_attach_task(
2932 +       struct task_struct *target, int flags,
2933 +       const struct utrace_engine_ops *ops, void *data)
2934 +{
2935 +       struct utrace *utrace = task_utrace_struct(target);
2936 +       struct utrace_engine *engine;
2937 +       int ret;
2938 +
2939 +       if (!(flags & UTRACE_ATTACH_CREATE)) {
2940 +               if (unlikely(!utrace))
2941 +                       return ERR_PTR(-ENOENT);
2942 +               spin_lock(&utrace->lock);
2943 +               engine = find_matching_engine(utrace, flags, ops, data);
2944 +               if (engine)
2945 +                       utrace_engine_get(engine);
2946 +               spin_unlock(&utrace->lock);
2947 +               return engine ?: ERR_PTR(-ENOENT);
2948 +       }
2949 +
2950 +       if (unlikely(!ops) || unlikely(ops == &utrace_detached_ops))
2951 +               return ERR_PTR(-EINVAL);
2952 +
2953 +       if (unlikely(target->flags & PF_KTHREAD))
2954 +               /*
2955 +                * Silly kernel, utrace is for users!
2956 +                */
2957 +               return ERR_PTR(-EPERM);
2958 +
2959 +       if (!utrace) {
2960 +               if (unlikely(!utrace_task_alloc(target)))
2961 +                       return ERR_PTR(-ENOMEM);
2962 +               utrace = task_utrace_struct(target);
2963 +       }
2964 +
2965 +       engine = kmem_cache_alloc(utrace_engine_cachep, GFP_KERNEL);
2966 +       if (unlikely(!engine))
2967 +               return ERR_PTR(-ENOMEM);
2968 +
2969 +       /*
2970 +        * Initialize the new engine structure.  It starts out with one ref
2971 +        * to return.  utrace_add_engine() adds another for being attached.
2972 +        */
2973 +       kref_init(&engine->kref);
2974 +       engine->flags = 0;
2975 +       engine->ops = ops;
2976 +       engine->data = data;
2977 +       engine->release = ops->release;
2978 +
2979 +       ret = utrace_add_engine(target, utrace, engine, flags, ops, data);
2980 +
2981 +       if (unlikely(ret)) {
2982 +               kmem_cache_free(utrace_engine_cachep, engine);
2983 +               engine = ERR_PTR(ret);
2984 +       }
2985 +
2986 +
2987 +       return engine;
2988 +}
2989 +EXPORT_SYMBOL_GPL(utrace_attach_task);
2990 +
2991 +/**
2992 + * utrace_attach_pid - attach new engine, or look up an attached engine
2993 + * @pid:       &struct pid pointer representing thread to attach to
2994 + * @flags:     flag bits combined with OR, see utrace_attach_task()
2995 + * @ops:       callback table for new engine
2996 + * @data:      engine private data pointer
2997 + *
2998 + * This is the same as utrace_attach_task(), but takes a &struct pid
2999 + * pointer rather than a &struct task_struct pointer.  The caller must
3000 + * hold a ref on @pid, but does not need to worry about the task
3001 + * staying valid.  If it's been reaped so that @pid points nowhere,
3002 + * then this call returns -%ESRCH.
3003 + */
3004 +struct utrace_engine *utrace_attach_pid(
3005 +       struct pid *pid, int flags,
3006 +       const struct utrace_engine_ops *ops, void *data)
3007 +{
3008 +       struct utrace_engine *engine = ERR_PTR(-ESRCH);
3009 +       struct task_struct *task = get_pid_task(pid, PIDTYPE_PID);
3010 +       if (task) {
3011 +               engine = utrace_attach_task(task, flags, ops, data);
3012 +               put_task_struct(task);
3013 +       }
3014 +       return engine;
3015 +}
3016 +EXPORT_SYMBOL_GPL(utrace_attach_pid);
3017 +
3018 +/*
3019 + * When an engine is detached, the target thread may still see it and
3020 + * make callbacks until it quiesces.  We install a special ops vector
3021 + * with these two callbacks.  When the target thread quiesces, it can
3022 + * safely free the engine itself.  For any event we will always get
3023 + * the report_quiesce() callback first, so we only need this one
3024 + * pointer to be set.  The only exception is report_reap(), so we
3025 + * supply that callback too.
3026 + */
3027 +static u32 utrace_detached_quiesce(u32 action, struct utrace_engine *engine,
3028 +                                  unsigned long event)
3029 +{
3030 +       return UTRACE_DETACH;
3031 +}
3032 +
3033 +static void utrace_detached_reap(struct utrace_engine *engine,
3034 +                                struct task_struct *task)
3035 +{
3036 +}
3037 +
3038 +static const struct utrace_engine_ops utrace_detached_ops = {
3039 +       .report_quiesce = &utrace_detached_quiesce,
3040 +       .report_reap = &utrace_detached_reap
3041 +};
3042 +
3043 +/*
3044 + * The caller has to hold a ref on the engine.  If the attached flag is
3045 + * true (all but utrace_barrier() calls), the engine is supposed to be
3046 + * attached.  If the attached flag is false (utrace_barrier() only),
3047 + * then return -ERESTARTSYS for an engine marked for detach but not yet
3048 + * fully detached.  The task pointer can be invalid if the engine is
3049 + * detached.
3050 + *
3051 + * Get the utrace lock for the target task.
3052 + * Returns the struct if locked, or ERR_PTR(-errno).
3053 + *
3054 + * This has to be robust against races with:
3055 + *     utrace_control(target, UTRACE_DETACH) calls
3056 + *     UTRACE_DETACH after reports
3057 + *     utrace_report_death
3058 + *     utrace_release_task
3059 + */
3060 +static struct utrace *get_utrace_lock(struct task_struct *target,
3061 +                                     struct utrace_engine *engine,
3062 +                                     bool attached)
3063 +       __acquires(utrace->lock)
3064 +{
3065 +       struct utrace *utrace;
3066 +
3067 +       rcu_read_lock();
3068 +
3069 +       /*
3070 +        * If this engine was already detached, bail out before we look at
3071 +        * the task_struct pointer at all.  If it's detached after this
3072 +        * check, then RCU is still keeping this task_struct pointer valid.
3073 +        *
3074 +        * The ops pointer is NULL when the engine is fully detached.
3075 +        * It's &utrace_detached_ops when it's marked detached but still
3076 +        * on the list.  In the latter case, utrace_barrier() still works,
3077 +        * since the target might be in the middle of an old callback.
3078 +        */
3079 +       if (unlikely(!engine->ops)) {
3080 +               rcu_read_unlock();
3081 +               return ERR_PTR(-ESRCH);
3082 +       }
3083 +
3084 +       if (unlikely(engine->ops == &utrace_detached_ops)) {
3085 +               rcu_read_unlock();
3086 +               return attached ? ERR_PTR(-ESRCH) : ERR_PTR(-ERESTARTSYS);
3087 +       }
3088 +
3089 +       utrace = task_utrace_struct(target);
3090 +       spin_lock(&utrace->lock);
3091 +       if (unlikely(utrace->reap) || unlikely(!engine->ops) ||
3092 +           unlikely(engine->ops == &utrace_detached_ops)) {
3093 +               /*
3094 +                * By the time we got the utrace lock,
3095 +                * it had been reaped or detached already.
3096 +                */
3097 +               spin_unlock(&utrace->lock);
3098 +               utrace = ERR_PTR(-ESRCH);
3099 +               if (!attached && engine->ops == &utrace_detached_ops)
3100 +                       utrace = ERR_PTR(-ERESTARTSYS);
3101 +       }
3102 +       rcu_read_unlock();
3103 +
3104 +       return utrace;
3105 +}
3106 +
3107 +/*
3108 + * Now that we don't hold any locks, run through any
3109 + * detached engines and free their references.  Each
3110 + * engine had one implicit ref while it was attached.
3111 + */
3112 +static void put_detached_list(struct list_head *list)
3113 +{
3114 +       struct utrace_engine *engine, *next;
3115 +       list_for_each_entry_safe(engine, next, list, entry) {
3116 +               list_del_init(&engine->entry);
3117 +               utrace_engine_put(engine);
3118 +       }
3119 +}
3120 +
3121 +/*
3122 + * We use an extra bit in utrace_engine.flags past the event bits,
3123 + * to record whether the engine is keeping the target thread stopped.
3124 + *
3125 + * This bit is set in task_struct.utrace_flags whenever it is set in any
3126 + * engine's flags.  Only utrace_reset() resets it in utrace_flags.
3127 + */
3128 +#define ENGINE_STOP            (1UL << _UTRACE_NEVENTS)
3129 +
3130 +static void mark_engine_wants_stop(struct task_struct *task,
3131 +                                  struct utrace_engine *engine)
3132 +{
3133 +       engine->flags |= ENGINE_STOP;
3134 +       task->utrace_flags |= ENGINE_STOP;
3135 +}
3136 +
3137 +static void clear_engine_wants_stop(struct utrace_engine *engine)
3138 +{
3139 +       engine->flags &= ~ENGINE_STOP;
3140 +}
3141 +
3142 +static bool engine_wants_stop(struct utrace_engine *engine)
3143 +{
3144 +       return (engine->flags & ENGINE_STOP) != 0;
3145 +}
3146 +
3147 +/**
3148 + * utrace_set_events - choose which event reports a tracing engine gets
3149 + * @target:            thread to affect
3150 + * @engine:            attached engine to affect
3151 + * @events:            new event mask
3152 + *
3153 + * This changes the set of events for which @engine wants callbacks made.
3154 + *
3155 + * This fails with -%EALREADY and does nothing if you try to clear
3156 + * %UTRACE_EVENT(%DEATH) when the @report_death callback may already have
3157 + * begun, or if you try to newly set %UTRACE_EVENT(%DEATH) or
3158 + * %UTRACE_EVENT(%QUIESCE) when @target is already dead or dying.
3159 + *
3160 + * This fails with -%ESRCH if you try to clear %UTRACE_EVENT(%REAP) when
3161 + * the @report_reap callback may already have begun, or when @target has
3162 + * already been detached, including forcible detach on reaping.
3163 + *
3164 + * If @target was stopped before the call, then after a successful call,
3165 + * no event callbacks not requested in @events will be made; if
3166 + * %UTRACE_EVENT(%QUIESCE) is included in @events, then a
3167 + * @report_quiesce callback will be made when @target resumes.
3168 + *
3169 + * If @target was not stopped and @events excludes some bits that were
3170 + * set before, this can return -%EINPROGRESS to indicate that @target
3171 + * may have been making some callback to @engine.  When this returns
3172 + * zero, you can be sure that no event callbacks you've disabled in
3173 + * @events can be made.  If @events only sets new bits that were not set
3174 + * before on @engine, then -%EINPROGRESS will never be returned.
3175 + *
3176 + * To synchronize after an -%EINPROGRESS return, see utrace_barrier().
3177 + *
3178 + * When @target is @current, -%EINPROGRESS is not returned.  But note
3179 + * that a newly-created engine will not receive any callbacks related to
3180 + * an event notification already in progress.  This call enables @events
3181 + * callbacks to be made as soon as @engine becomes eligible for any
3182 + * callbacks, see utrace_attach_task().
3183 + *
3184 + * These rules provide for coherent synchronization based on %UTRACE_STOP,
3185 + * even when %SIGKILL is breaking its normal simple rules.
3186 + */
3187 +int utrace_set_events(struct task_struct *target,
3188 +                     struct utrace_engine *engine,
3189 +                     unsigned long events)
3190 +{
3191 +       struct utrace *utrace;
3192 +       unsigned long old_flags, old_utrace_flags;
3193 +       int ret = -EALREADY;
3194 +
3195 +       /*
3196 +        * We just ignore the internal bit, so callers can use
3197 +        * engine->flags to seed bitwise ops for our argument.
3198 +        */
3199 +       events &= ~ENGINE_STOP;
3200 +
3201 +       utrace = get_utrace_lock(target, engine, true);
3202 +       if (unlikely(IS_ERR(utrace)))
3203 +               return PTR_ERR(utrace);
3204 +
3205 +       old_utrace_flags = target->utrace_flags;
3206 +       old_flags = engine->flags & ~ENGINE_STOP;
3207 +
3208 +       /*
3209 +        * If utrace_report_death() is already progress now,
3210 +        * it's too late to clear the death event bits.
3211 +        */
3212 +       if (((old_flags & ~events) & _UTRACE_DEATH_EVENTS) && utrace->death)
3213 +               goto unlock;
3214 +
3215 +       /*
3216 +        * When setting these flags, it's essential that we really
3217 +        * synchronize with exit_notify().  They cannot be set after
3218 +        * exit_notify() takes the tasklist_lock.  By holding the read
3219 +        * lock here while setting the flags, we ensure that the calls
3220 +        * to tracehook_notify_death() and tracehook_report_death() will
3221 +        * see the new flags.  This ensures that utrace_release_task()
3222 +        * knows positively that utrace_report_death() will be called or
3223 +        * that it won't.
3224 +        */
3225 +       if ((events & ~old_flags) & _UTRACE_DEATH_EVENTS) {
3226 +               read_lock(&tasklist_lock);
3227 +               if (unlikely(target->exit_state)) {
3228 +                       read_unlock(&tasklist_lock);
3229 +                       goto unlock;
3230 +               }
3231 +               target->utrace_flags |= events;
3232 +               read_unlock(&tasklist_lock);
3233 +       }
3234 +
3235 +       engine->flags = events | (engine->flags & ENGINE_STOP);
3236 +       target->utrace_flags |= events;
3237 +
3238 +       if ((events & UTRACE_EVENT_SYSCALL) &&
3239 +           !(old_utrace_flags & UTRACE_EVENT_SYSCALL))
3240 +               set_tsk_thread_flag(target, TIF_SYSCALL_TRACE);
3241 +
3242 +       ret = 0;
3243 +       if ((old_flags & ~events) && target != current &&
3244 +           !task_is_stopped_or_traced(target) && !target->exit_state) {
3245 +               /*
3246 +                * This barrier ensures that our engine->flags changes
3247 +                * have hit before we examine utrace->reporting,
3248 +                * pairing with the barrier in start_callback().  If
3249 +                * @target has not yet hit finish_callback() to clear
3250 +                * utrace->reporting, we might be in the middle of a
3251 +                * callback to @engine.
3252 +                */
3253 +               smp_mb();
3254 +               if (utrace->reporting == engine)
3255 +                       ret = -EINPROGRESS;
3256 +       }
3257 +unlock:
3258 +       spin_unlock(&utrace->lock);
3259 +
3260 +       return ret;
3261 +}
3262 +EXPORT_SYMBOL_GPL(utrace_set_events);
3263 +
3264 +/*
3265 + * Asynchronously mark an engine as being detached.
3266 + *
3267 + * This must work while the target thread races with us doing
3268 + * start_callback(), defined below.  It uses smp_rmb() between checking
3269 + * @engine->flags and using @engine->ops.  Here we change @engine->ops
3270 + * first, then use smp_wmb() before changing @engine->flags.  This ensures
3271 + * it can check the old flags before using the old ops, or check the old
3272 + * flags before using the new ops, or check the new flags before using the
3273 + * new ops, but can never check the new flags before using the old ops.
3274 + * Hence, utrace_detached_ops might be used with any old flags in place.
3275 + * It has report_quiesce() and report_reap() callbacks to handle all cases.
3276 + */
3277 +static void mark_engine_detached(struct utrace_engine *engine)
3278 +{
3279 +       engine->ops = &utrace_detached_ops;
3280 +       smp_wmb();
3281 +       engine->flags = UTRACE_EVENT(QUIESCE);
3282 +}
3283 +
3284 +/*
3285 + * Get @target to stop and return true if it is already stopped now.
3286 + * If we return false, it will make some event callback soonish.
3287 + * Called with @utrace locked.
3288 + */
3289 +static bool utrace_do_stop(struct task_struct *target, struct utrace *utrace)
3290 +{
3291 +       if (task_is_stopped(target)) {
3292 +               /*
3293 +                * Stopped is considered quiescent; when it wakes up, it will
3294 +                * go through utrace_finish_stop() before doing anything else.
3295 +                */
3296 +               spin_lock_irq(&target->sighand->siglock);
3297 +               if (likely(task_is_stopped(target)))
3298 +                       __set_task_state(target, TASK_TRACED);
3299 +               spin_unlock_irq(&target->sighand->siglock);
3300 +       } else if (utrace->resume > UTRACE_REPORT) {
3301 +               utrace->resume = UTRACE_REPORT;
3302 +               set_notify_resume(target);
3303 +       }
3304 +
3305 +       return task_is_traced(target);
3306 +}
3307 +
3308 +/*
3309 + * If the target is not dead it should not be in tracing
3310 + * stop any more.  Wake it unless it's in job control stop.
3311 + */
3312 +static void utrace_wakeup(struct task_struct *target, struct utrace *utrace)
3313 +{
3314 +       lockdep_assert_held(&utrace->lock);
3315 +       spin_lock_irq(&target->sighand->siglock);
3316 +       if (target->signal->flags & SIGNAL_STOP_STOPPED ||
3317 +           target->signal->group_stop_count)
3318 +               target->state = TASK_STOPPED;
3319 +       else
3320 +               wake_up_state(target, __TASK_TRACED);
3321 +       spin_unlock_irq(&target->sighand->siglock);
3322 +}
3323 +
3324 +/*
3325 + * This is called when there might be some detached engines on the list or
3326 + * some stale bits in @task->utrace_flags.  Clean them up and recompute the
3327 + * flags.  Returns true if we're now fully detached.
3328 + *
3329 + * Called with @utrace->lock held, returns with it released.
3330 + * After this returns, @utrace might be freed if everything detached.
3331 + */
3332 +static bool utrace_reset(struct task_struct *task, struct utrace *utrace)
3333 +       __releases(utrace->lock)
3334 +{
3335 +       struct utrace_engine *engine, *next;
3336 +       unsigned long flags = 0;
3337 +       LIST_HEAD(detached);
3338 +
3339 +       splice_attaching(utrace);
3340 +
3341 +       /*
3342 +        * Update the set of events of interest from the union
3343 +        * of the interests of the remaining tracing engines.
3344 +        * For any engine marked detached, remove it from the list.
3345 +        * We'll collect them on the detached list.
3346 +        */
3347 +       list_for_each_entry_safe(engine, next, &utrace->attached, entry) {
3348 +               if (engine->ops == &utrace_detached_ops) {
3349 +                       engine->ops = NULL;
3350 +                       list_move(&engine->entry, &detached);
3351 +               } else {
3352 +                       flags |= engine->flags | UTRACE_EVENT(REAP);
3353 +               }
3354 +       }
3355 +
3356 +       if (task->exit_state) {
3357 +               /*
3358 +                * Once it's already dead, we never install any flags
3359 +                * except REAP.  When ->exit_state is set and events
3360 +                * like DEATH are not set, then they never can be set.
3361 +                * This ensures that utrace_release_task() knows
3362 +                * positively that utrace_report_death() can never run.
3363 +                */
3364 +               BUG_ON(utrace->death);
3365 +               flags &= UTRACE_EVENT(REAP);
3366 +       } else if (!(flags & UTRACE_EVENT_SYSCALL) &&
3367 +                  test_tsk_thread_flag(task, TIF_SYSCALL_TRACE)) {
3368 +               clear_tsk_thread_flag(task, TIF_SYSCALL_TRACE);
3369 +       }
3370 +
3371 +       if (!flags) {
3372 +               /*
3373 +                * No more engines, cleared out the utrace.
3374 +                */
3375 +               utrace->resume = UTRACE_RESUME;
3376 +               utrace->signal_handler = 0;
3377 +       }
3378 +
3379 +       /*
3380 +        * If no more engines want it stopped, wake it up.
3381 +        */
3382 +       if (task_is_traced(task) && !(flags & ENGINE_STOP)) {
3383 +               /*
3384 +                * It just resumes, so make sure single-step
3385 +                * is not left set.
3386 +                */
3387 +               if (utrace->resume == UTRACE_RESUME)
3388 +                       user_disable_single_step(task);
3389 +               utrace_wakeup(task, utrace);
3390 +       }
3391 +
3392 +       /*
3393 +        * In theory spin_lock() doesn't imply rcu_read_lock().
3394 +        * Once we clear ->utrace_flags this task_struct can go away
3395 +        * because tracehook_prepare_release_task() path does not take
3396 +        * utrace->lock when ->utrace_flags == 0.
3397 +        */
3398 +       rcu_read_lock();
3399 +       task->utrace_flags = flags;
3400 +       spin_unlock(&utrace->lock);
3401 +       rcu_read_unlock();
3402 +
3403 +       put_detached_list(&detached);
3404 +
3405 +       return !flags;
3406 +}
3407 +
3408 +void utrace_finish_stop(void)
3409 +{
3410 +       /*
3411 +        * If we were task_is_traced() and then SIGKILL'ed, make
3412 +        * sure we do nothing until the tracer drops utrace->lock.
3413 +        */
3414 +       if (unlikely(__fatal_signal_pending(current))) {
3415 +               struct utrace *utrace = task_utrace_struct(current);
3416 +               spin_unlock_wait(&utrace->lock);
3417 +       }
3418 +}
3419 +
3420 +/*
3421 + * Perform %UTRACE_STOP, i.e. block in TASK_TRACED until woken up.
3422 + * @task == current, @utrace == current->utrace, which is not locked.
3423 + * Return true if we were woken up by SIGKILL even though some utrace
3424 + * engine may still want us to stay stopped.
3425 + */
3426 +static void utrace_stop(struct task_struct *task, struct utrace *utrace,
3427 +                       enum utrace_resume_action action)
3428 +{
3429 +relock:
3430 +       spin_lock(&utrace->lock);
3431 +
3432 +       if (action < utrace->resume) {
3433 +               /*
3434 +                * Ensure a reporting pass when we're resumed.
3435 +                */
3436 +               utrace->resume = action;
3437 +               if (action == UTRACE_INTERRUPT)
3438 +                       set_thread_flag(TIF_SIGPENDING);
3439 +               else
3440 +                       set_thread_flag(TIF_NOTIFY_RESUME);
3441 +       }
3442 +
3443 +       /*
3444 +        * If the ENGINE_STOP bit is clear in utrace_flags, that means
3445 +        * utrace_reset() ran after we processed some UTRACE_STOP return
3446 +        * values from callbacks to get here.  If all engines have detached
3447 +        * or resumed us, we don't stop.  This check doesn't require
3448 +        * siglock, but it should follow the interrupt/report bookkeeping
3449 +        * steps (this can matter for UTRACE_RESUME but not UTRACE_DETACH).
3450 +        */
3451 +       if (unlikely(!(task->utrace_flags & ENGINE_STOP))) {
3452 +               utrace_reset(task, utrace);
3453 +               if (task->utrace_flags & ENGINE_STOP)
3454 +                       goto relock;
3455 +               return;
3456 +       }
3457 +
3458 +       /*
3459 +        * The siglock protects us against signals.  As well as SIGKILL
3460 +        * waking us up, we must synchronize with the signal bookkeeping
3461 +        * for stop signals and SIGCONT.
3462 +        */
3463 +       spin_lock_irq(&task->sighand->siglock);
3464 +
3465 +       if (unlikely(__fatal_signal_pending(task))) {
3466 +               spin_unlock_irq(&task->sighand->siglock);
3467 +               spin_unlock(&utrace->lock);
3468 +               return;
3469 +       }
3470 +
3471 +       __set_current_state(TASK_TRACED);
3472 +
3473 +       /*
3474 +        * If there is a group stop in progress,
3475 +        * we must participate in the bookkeeping.
3476 +        */
3477 +       if (unlikely(task->signal->group_stop_count) &&
3478 +                       !--task->signal->group_stop_count)
3479 +               task->signal->flags = SIGNAL_STOP_STOPPED;
3480 +
3481 +       spin_unlock_irq(&task->sighand->siglock);
3482 +       spin_unlock(&utrace->lock);
3483 +
3484 +       /*
3485 +        * If ptrace is among the reasons for this stop, do its
3486 +        * notification now.  This could not just be done in
3487 +        * ptrace's own event report callbacks because it has to
3488 +        * be done after we are in TASK_TRACED.  This makes the
3489 +        * synchronization with ptrace_do_wait() work right.
3490 +        *
3491 +        * It's only because of the bad old overloading of the do_wait()
3492 +        * logic for handling ptrace stops that we need this special case
3493 +        * here.  One day we will clean up ptrace so it does not need to
3494 +        * work this way.  New things that are designed sensibly don't need
3495 +        * a wakeup that synchronizes with tasklist_lock and ->state, so
3496 +        * the proper utrace API does not try to support this weirdness.
3497 +        */
3498 +       ptrace_notify_stop(task);
3499 +
3500 +       schedule();
3501 +
3502 +       utrace_finish_stop();
3503 +
3504 +       /*
3505 +        * While in TASK_TRACED, we were considered "frozen enough".
3506 +        * Now that we woke up, it's crucial if we're supposed to be
3507 +        * frozen that we freeze now before running anything substantial.
3508 +        */
3509 +       try_to_freeze();
3510 +
3511 +       /*
3512 +        * While we were in TASK_TRACED, complete_signal() considered
3513 +        * us "uninterested" in signal wakeups.  Now make sure our
3514 +        * TIF_SIGPENDING state is correct for normal running.
3515 +        */
3516 +       spin_lock_irq(&task->sighand->siglock);
3517 +       recalc_sigpending();
3518 +       spin_unlock_irq(&task->sighand->siglock);
3519 +}
3520 +
3521 +/*
3522 + * Called by release_task() with @reap set to true.
3523 + * Called by utrace_report_death() with @reap set to false.
3524 + * On reap, make report_reap callbacks and clean out @utrace
3525 + * unless still making callbacks.  On death, update bookkeeping
3526 + * and handle the reap work if release_task() came in first.
3527 + */
3528 +void utrace_maybe_reap(struct task_struct *target, struct utrace *utrace,
3529 +                      bool reap)
3530 +{
3531 +       struct utrace_engine *engine, *next;
3532 +       struct list_head attached;
3533 +
3534 +       spin_lock(&utrace->lock);
3535 +
3536 +       if (reap) {
3537 +               /*
3538 +                * If the target will do some final callbacks but hasn't
3539 +                * finished them yet, we know because it clears these event
3540 +                * bits after it's done.  Instead of cleaning up here and
3541 +                * requiring utrace_report_death() to cope with it, we
3542 +                * delay the REAP report and the teardown until after the
3543 +                * target finishes its death reports.
3544 +                */
3545 +               utrace->reap = 1;
3546 +
3547 +               if (target->utrace_flags & _UTRACE_DEATH_EVENTS) {
3548 +                       spin_unlock(&utrace->lock);
3549 +                       return;
3550 +               }
3551 +       } else {
3552 +               /*
3553 +                * After we unlock with this flag clear, any competing
3554 +                * utrace_control/utrace_set_events calls know that we've
3555 +                * finished our callbacks and any detach bookkeeping.
3556 +                */
3557 +               utrace->death = 0;
3558 +
3559 +               if (!utrace->reap) {
3560 +                       /*
3561 +                        * We're just dead, not reaped yet.  This will
3562 +                        * reset @target->utrace_flags so the later call
3563 +                        * with @reap set won't hit the check above.
3564 +                        */
3565 +                       utrace_reset(target, utrace);
3566 +                       return;
3567 +               }
3568 +       }
3569 +
3570 +       /*
3571 +        * utrace_add_engine() checks ->utrace_flags != 0.  Since
3572 +        * @utrace->reap is set, nobody can set or clear UTRACE_EVENT(REAP)
3573 +        * in @engine->flags or change @engine->ops and nobody can change
3574 +        * @utrace->attached after we drop the lock.
3575 +        */
3576 +       target->utrace_flags = 0;
3577 +
3578 +       /*
3579 +        * We clear out @utrace->attached before we drop the lock so
3580 +        * that find_matching_engine() can't come across any old engine
3581 +        * while we are busy tearing it down.
3582 +        */
3583 +       list_replace_init(&utrace->attached, &attached);
3584 +       list_splice_tail_init(&utrace->attaching, &attached);
3585 +
3586 +       spin_unlock(&utrace->lock);
3587 +
3588 +       list_for_each_entry_safe(engine, next, &attached, entry) {
3589 +               if (engine->flags & UTRACE_EVENT(REAP))
3590 +                       engine->ops->report_reap(engine, target);
3591 +
3592 +               engine->ops = NULL;
3593 +               engine->flags = 0;
3594 +               list_del_init(&engine->entry);
3595 +
3596 +               utrace_engine_put(engine);
3597 +       }
3598 +}
3599 +
3600 +/*
3601 + * You can't do anything to a dead task but detach it.
3602 + * If release_task() has been called, you can't do that.
3603 + *
3604 + * On the exit path, DEATH and QUIESCE event bits are set only
3605 + * before utrace_report_death() has taken the lock.  At that point,
3606 + * the death report will come soon, so disallow detach until it's
3607 + * done.  This prevents us from racing with it detaching itself.
3608 + *
3609 + * Called only when @target->exit_state is nonzero.
3610 + */
3611 +static inline int utrace_control_dead(struct task_struct *target,
3612 +                                     struct utrace *utrace,
3613 +                                     enum utrace_resume_action action)
3614 +{
3615 +       lockdep_assert_held(&utrace->lock);
3616 +
3617 +       if (action != UTRACE_DETACH || unlikely(utrace->reap))
3618 +               return -ESRCH;
3619 +
3620 +       if (unlikely(utrace->death))
3621 +               /*
3622 +                * We have already started the death report.  We can't
3623 +                * prevent the report_death and report_reap callbacks,
3624 +                * so tell the caller they will happen.
3625 +                */
3626 +               return -EALREADY;
3627 +
3628 +       return 0;
3629 +}
3630 +
3631 +/**
3632 + * utrace_control - control a thread being traced by a tracing engine
3633 + * @target:            thread to affect
3634 + * @engine:            attached engine to affect
3635 + * @action:            &enum utrace_resume_action for thread to do
3636 + *
3637 + * This is how a tracing engine asks a traced thread to do something.
3638 + * This call is controlled by the @action argument, which has the
3639 + * same meaning as the &enum utrace_resume_action value returned by
3640 + * event reporting callbacks.
3641 + *
3642 + * If @target is already dead (@target->exit_state nonzero),
3643 + * all actions except %UTRACE_DETACH fail with -%ESRCH.
3644 + *
3645 + * The following sections describe each option for the @action argument.
3646 + *
3647 + * UTRACE_DETACH:
3648 + *
3649 + * After this, the @engine data structure is no longer accessible,
3650 + * and the thread might be reaped.  The thread will start running
3651 + * again if it was stopped and no longer has any attached engines
3652 + * that want it stopped.
3653 + *
3654 + * If the @report_reap callback may already have begun, this fails
3655 + * with -%ESRCH.  If the @report_death callback may already have
3656 + * begun, this fails with -%EALREADY.
3657 + *
3658 + * If @target is not already stopped, then a callback to this engine
3659 + * might be in progress or about to start on another CPU.  If so,
3660 + * then this returns -%EINPROGRESS; the detach happens as soon as
3661 + * the pending callback is finished.  To synchronize after an
3662 + * -%EINPROGRESS return, see utrace_barrier().
3663 + *
3664 + * If @target is properly stopped before utrace_control() is called,
3665 + * then after successful return it's guaranteed that no more callbacks
3666 + * to the @engine->ops vector will be made.
3667 + *
3668 + * The only exception is %SIGKILL (and exec or group-exit by another
3669 + * thread in the group), which can cause asynchronous @report_death
3670 + * and/or @report_reap callbacks even when %UTRACE_STOP was used.
3671 + * (In that event, this fails with -%ESRCH or -%EALREADY, see above.)
3672 + *
3673 + * UTRACE_STOP:
3674 + *
3675 + * This asks that @target stop running.  This returns 0 only if
3676 + * @target is already stopped, either for tracing or for job
3677 + * control.  Then @target will remain stopped until another
3678 + * utrace_control() call is made on @engine; @target can be woken
3679 + * only by %SIGKILL (or equivalent, such as exec or termination by
3680 + * another thread in the same thread group).
3681 + *
3682 + * This returns -%EINPROGRESS if @target is not already stopped.
3683 + * Then the effect is like %UTRACE_REPORT.  A @report_quiesce or
3684 + * @report_signal callback will be made soon.  Your callback can
3685 + * then return %UTRACE_STOP to keep @target stopped.
3686 + *
3687 + * This does not interrupt system calls in progress, including ones
3688 + * that sleep for a long time.  For that, use %UTRACE_INTERRUPT.
3689 + * To interrupt system calls and then keep @target stopped, your
3690 + * @report_signal callback can return %UTRACE_STOP.
3691 + *
3692 + * UTRACE_RESUME:
3693 + *
3694 + * Just let @target continue running normally, reversing the effect
3695 + * of a previous %UTRACE_STOP.  If another engine is keeping @target
3696 + * stopped, then it remains stopped until all engines let it resume.
3697 + * If @target was not stopped, this has no effect.
3698 + *
3699 + * UTRACE_REPORT:
3700 + *
3701 + * This is like %UTRACE_RESUME, but also ensures that there will be
3702 + * a @report_quiesce or @report_signal callback made soon.  If
3703 + * @target had been stopped, then there will be a callback before it
3704 + * resumes running normally.  If another engine is keeping @target
3705 + * stopped, then there might be no callbacks until all engines let
3706 + * it resume.
3707 + *
3708 + * Since this is meaningless unless @report_quiesce callbacks will
3709 + * be made, it returns -%EINVAL if @engine lacks %UTRACE_EVENT(%QUIESCE).
3710 + *
3711 + * UTRACE_INTERRUPT:
3712 + *
3713 + * This is like %UTRACE_REPORT, but ensures that @target will make a
3714 + * @report_signal callback before it resumes or delivers signals.
3715 + * If @target was in a system call or about to enter one, work in
3716 + * progress will be interrupted as if by %SIGSTOP.  If another
3717 + * engine is keeping @target stopped, then there might be no
3718 + * callbacks until all engines let it resume.
3719 + *
3720 + * This gives @engine an opportunity to introduce a forced signal
3721 + * disposition via its @report_signal callback.
3722 + *
3723 + * UTRACE_SINGLESTEP:
3724 + *
3725 + * It's invalid to use this unless arch_has_single_step() returned true.
3726 + * This is like %UTRACE_RESUME, but resumes for one user instruction only.
3727 + *
3728 + * Note that passing %UTRACE_SINGLESTEP or %UTRACE_BLOCKSTEP to
3729 + * utrace_control() or returning it from an event callback alone does
3730 + * not necessarily ensure that stepping will be enabled.  If there are
3731 + * more callbacks made to any engine before returning to user mode,
3732 + * then the resume action is chosen only by the last set of callbacks.
3733 + * To be sure, enable %UTRACE_EVENT(%QUIESCE) and look for the
3734 + * @report_quiesce callback with a zero event mask, or the
3735 + * @report_signal callback with %UTRACE_SIGNAL_REPORT.
3736 + *
3737 + * Since this is not robust unless @report_quiesce callbacks will
3738 + * be made, it returns -%EINVAL if @engine lacks %UTRACE_EVENT(%QUIESCE).
3739 + *
3740 + * UTRACE_BLOCKSTEP:
3741 + *
3742 + * It's invalid to use this unless arch_has_block_step() returned true.
3743 + * This is like %UTRACE_SINGLESTEP, but resumes for one whole basic
3744 + * block of user instructions.
3745 + *
3746 + * Since this is not robust unless @report_quiesce callbacks will
3747 + * be made, it returns -%EINVAL if @engine lacks %UTRACE_EVENT(%QUIESCE).
3748 + *
3749 + * %UTRACE_BLOCKSTEP devolves to %UTRACE_SINGLESTEP when another
3750 + * tracing engine is using %UTRACE_SINGLESTEP at the same time.
3751 + */
3752 +int utrace_control(struct task_struct *target,
3753 +                  struct utrace_engine *engine,
3754 +                  enum utrace_resume_action action)
3755 +{
3756 +       struct utrace *utrace;
3757 +       bool reset;
3758 +       int ret;
3759 +
3760 +       if (unlikely(action >= UTRACE_RESUME_MAX)) {
3761 +               WARN(1, "invalid action argument to utrace_control()!");
3762 +               return -EINVAL;
3763 +       }
3764 +
3765 +       /*
3766 +        * This is a sanity check for a programming error in the caller.
3767 +        * Their request can only work properly in all cases by relying on
3768 +        * a follow-up callback, but they didn't set one up!  This check
3769 +        * doesn't do locking, but it shouldn't matter.  The caller has to
3770 +        * be synchronously sure the callback is set up to be operating the
3771 +        * interface properly.
3772 +        */
3773 +       if (action >= UTRACE_REPORT && action < UTRACE_RESUME &&
3774 +           unlikely(!(engine->flags & UTRACE_EVENT(QUIESCE)))) {
3775 +               WARN(1, "utrace_control() with no QUIESCE callback in place!");
3776 +               return -EINVAL;
3777 +       }
3778 +
3779 +       utrace = get_utrace_lock(target, engine, true);
3780 +       if (unlikely(IS_ERR(utrace)))
3781 +               return PTR_ERR(utrace);
3782 +
3783 +       reset = task_is_traced(target);
3784 +       ret = 0;
3785 +
3786 +       /*
3787 +        * ->exit_state can change under us, this doesn't matter.
3788 +        * We do not care about ->exit_state in fact, but we do
3789 +        * care about ->reap and ->death. If either flag is set,
3790 +        * we must also see ->exit_state != 0.
3791 +        */
3792 +       if (unlikely(target->exit_state)) {
3793 +               ret = utrace_control_dead(target, utrace, action);
3794 +               if (ret) {
3795 +                       spin_unlock(&utrace->lock);
3796 +                       return ret;
3797 +               }
3798 +               reset = true;
3799 +       }
3800 +
3801 +       switch (action) {
3802 +       case UTRACE_STOP:
3803 +               mark_engine_wants_stop(target, engine);
3804 +               if (!reset && !utrace_do_stop(target, utrace))
3805 +                       ret = -EINPROGRESS;
3806 +               reset = false;
3807 +               break;
3808 +
3809 +       case UTRACE_DETACH:
3810 +               if (engine_wants_stop(engine))
3811 +                       target->utrace_flags &= ~ENGINE_STOP;
3812 +               mark_engine_detached(engine);
3813 +               reset = reset || utrace_do_stop(target, utrace);
3814 +               if (!reset) {
3815 +                       /*
3816 +                        * As in utrace_set_events(), this barrier ensures
3817 +                        * that our engine->flags changes have hit before we
3818 +                        * examine utrace->reporting, pairing with the barrier
3819 +                        * in start_callback().  If @target has not yet hit
3820 +                        * finish_callback() to clear utrace->reporting, we
3821 +                        * might be in the middle of a callback to @engine.
3822 +                        */
3823 +                       smp_mb();
3824 +                       if (utrace->reporting == engine)
3825 +                               ret = -EINPROGRESS;
3826 +               }
3827 +               break;
3828 +
3829 +       case UTRACE_RESUME:
3830 +               clear_engine_wants_stop(engine);
3831 +               break;
3832 +
3833 +       case UTRACE_BLOCKSTEP:
3834 +               /*
3835 +                * Resume from stopped, step one block.
3836 +                * We fall through to treat it like UTRACE_SINGLESTEP.
3837 +                */
3838 +               if (unlikely(!arch_has_block_step())) {
3839 +                       WARN(1, "UTRACE_BLOCKSTEP when !arch_has_block_step()");
3840 +                       action = UTRACE_SINGLESTEP;
3841 +               }
3842 +
3843 +       case UTRACE_SINGLESTEP:
3844 +               /*
3845 +                * Resume from stopped, step one instruction.
3846 +                * We fall through to the UTRACE_REPORT case.
3847 +                */
3848 +               if (unlikely(!arch_has_single_step())) {
3849 +                       WARN(1,
3850 +                            "UTRACE_SINGLESTEP when !arch_has_single_step()");
3851 +                       reset = false;
3852 +                       ret = -EOPNOTSUPP;
3853 +                       break;
3854 +               }
3855 +
3856 +       case UTRACE_REPORT:
3857 +               /*
3858 +                * Make the thread call tracehook_notify_resume() soon.
3859 +                * But don't bother if it's already been interrupted.
3860 +                * In that case, utrace_get_signal() will be reporting soon.
3861 +                */
3862 +               clear_engine_wants_stop(engine);
3863 +               if (action < utrace->resume) {
3864 +                       utrace->resume = action;
3865 +                       set_notify_resume(target);
3866 +               }
3867 +               break;
3868 +
3869 +       case UTRACE_INTERRUPT:
3870 +               /*
3871 +                * Make the thread call tracehook_get_signal() soon.
3872 +                */
3873 +               clear_engine_wants_stop(engine);
3874 +               if (utrace->resume == UTRACE_INTERRUPT)
3875 +                       break;
3876 +               utrace->resume = UTRACE_INTERRUPT;
3877 +
3878 +               /*
3879 +                * If it's not already stopped, interrupt it now.  We need
3880 +                * the siglock here in case it calls recalc_sigpending()
3881 +                * and clears its own TIF_SIGPENDING.  By taking the lock,
3882 +                * we've serialized any later recalc_sigpending() after our
3883 +                * setting of utrace->resume to force it on.
3884 +                */
3885 +               if (reset) {
3886 +                       /*
3887 +                        * This is really just to keep the invariant that
3888 +                        * TIF_SIGPENDING is set with UTRACE_INTERRUPT.
3889 +                        * When it's stopped, we know it's always going
3890 +                        * through utrace_get_signal() and will recalculate.
3891 +                        */
3892 +                       set_tsk_thread_flag(target, TIF_SIGPENDING);
3893 +               } else {
3894 +                       struct sighand_struct *sighand;
3895 +                       unsigned long irqflags;
3896 +                       sighand = lock_task_sighand(target, &irqflags);
3897 +                       if (likely(sighand)) {
3898 +                               signal_wake_up(target, 0);
3899 +                               unlock_task_sighand(target, &irqflags);
3900 +                       }
3901 +               }
3902 +               break;
3903 +
3904 +       default:
3905 +               BUG();          /* We checked it on entry.  */
3906 +       }
3907 +
3908 +       /*
3909 +        * Let the thread resume running.  If it's not stopped now,
3910 +        * there is nothing more we need to do.
3911 +        */
3912 +       if (reset)
3913 +               utrace_reset(target, utrace);
3914 +       else
3915 +               spin_unlock(&utrace->lock);
3916 +
3917 +       return ret;
3918 +}
3919 +EXPORT_SYMBOL_GPL(utrace_control);
3920 +
3921 +/**
3922 + * utrace_barrier - synchronize with simultaneous tracing callbacks
3923 + * @target:            thread to affect
3924 + * @engine:            engine to affect (can be detached)
3925 + *
3926 + * This blocks while @target might be in the midst of making a callback to
3927 + * @engine.  It can be interrupted by signals and will return -%ERESTARTSYS.
3928 + * A return value of zero means no callback from @target to @engine was
3929 + * in progress.  Any effect of its return value (such as %UTRACE_STOP) has
3930 + * already been applied to @engine.
3931 + *
3932 + * It's not necessary to keep the @target pointer alive for this call.
3933 + * It's only necessary to hold a ref on @engine.  This will return
3934 + * safely even if @target has been reaped and has no task refs.
3935 + *
3936 + * A successful return from utrace_barrier() guarantees its ordering
3937 + * with respect to utrace_set_events() and utrace_control() calls.  If
3938 + * @target was not properly stopped, event callbacks just disabled might
3939 + * still be in progress; utrace_barrier() waits until there is no chance
3940 + * an unwanted callback can be in progress.
3941 + */
3942 +int utrace_barrier(struct task_struct *target, struct utrace_engine *engine)
3943 +{
3944 +       struct utrace *utrace;
3945 +       int ret = -ERESTARTSYS;
3946 +
3947 +       if (unlikely(target == current))
3948 +               return 0;
3949 +
3950 +       do {
3951 +               utrace = get_utrace_lock(target, engine, false);
3952 +               if (unlikely(IS_ERR(utrace))) {
3953 +                       ret = PTR_ERR(utrace);
3954 +                       if (ret != -ERESTARTSYS)
3955 +                               break;
3956 +               } else {
3957 +                       /*
3958 +                        * All engine state changes are done while
3959 +                        * holding the lock, i.e. before we get here.
3960 +                        * Since we have the lock, we only need to
3961 +                        * worry about @target making a callback.
3962 +                        * When it has entered start_callback() but
3963 +                        * not yet gotten to finish_callback(), we
3964 +                        * will see utrace->reporting == @engine.
3965 +                        * When @target doesn't take the lock, it uses
3966 +                        * barriers to order setting utrace->reporting
3967 +                        * before it examines the engine state.
3968 +                        */
3969 +                       if (utrace->reporting != engine)
3970 +                               ret = 0;
3971 +                       spin_unlock(&utrace->lock);
3972 +                       if (!ret)
3973 +                               break;
3974 +               }
3975 +               schedule_timeout_interruptible(1);
3976 +       } while (!signal_pending(current));
3977 +
3978 +       return ret;
3979 +}
3980 +EXPORT_SYMBOL_GPL(utrace_barrier);
3981 +
3982 +/*
3983 + * This is local state used for reporting loops, perhaps optimized away.
3984 + */
3985 +struct utrace_report {
3986 +       u32 result;
3987 +       enum utrace_resume_action action;
3988 +       enum utrace_resume_action resume_action;
3989 +       bool detaches;
3990 +       bool spurious;
3991 +};
3992 +
3993 +#define INIT_REPORT(var)                       \
3994 +       struct utrace_report var = {            \
3995 +               .action = UTRACE_RESUME,        \
3996 +               .resume_action = UTRACE_RESUME, \
3997 +               .spurious = true                \
3998 +       }
3999 +
4000 +/*
4001 + * We are now making the report, so clear the flag saying we need one.
4002 + * When there is a new attach, ->pending_attach is set just so we will
4003 + * know to do splice_attaching() here before the callback loop.
4004 + */
4005 +static enum utrace_resume_action start_report(struct utrace *utrace)
4006 +{
4007 +       enum utrace_resume_action resume = utrace->resume;
4008 +       if (utrace->pending_attach ||
4009 +           (resume > UTRACE_INTERRUPT && resume < UTRACE_RESUME)) {
4010 +               spin_lock(&utrace->lock);
4011 +               splice_attaching(utrace);
4012 +               resume = utrace->resume;
4013 +               if (resume > UTRACE_INTERRUPT)
4014 +                       utrace->resume = UTRACE_RESUME;
4015 +               spin_unlock(&utrace->lock);
4016 +       }
4017 +       return resume;
4018 +}
4019 +
4020 +static inline void finish_report_reset(struct task_struct *task,
4021 +                                      struct utrace *utrace,
4022 +                                      struct utrace_report *report)
4023 +{
4024 +       if (unlikely(report->spurious || report->detaches)) {
4025 +               spin_lock(&utrace->lock);
4026 +               if (utrace_reset(task, utrace))
4027 +                       report->action = UTRACE_RESUME;
4028 +       }
4029 +}
4030 +
4031 +/*
4032 + * Complete a normal reporting pass, pairing with a start_report() call.
4033 + * This handles any UTRACE_DETACH or UTRACE_REPORT or UTRACE_INTERRUPT
4034 + * returns from engine callbacks.  If @will_not_stop is true and any
4035 + * engine's last callback used UTRACE_STOP, we do UTRACE_REPORT here to
4036 + * ensure we stop before user mode.  If there were no callbacks made, it
4037 + * will recompute @task->utrace_flags to avoid another false-positive.
4038 + */
4039 +static void finish_report(struct task_struct *task, struct utrace *utrace,
4040 +                         struct utrace_report *report, bool will_not_stop)
4041 +{
4042 +       enum utrace_resume_action resume = report->action;
4043 +
4044 +       if (resume == UTRACE_STOP)
4045 +               resume = will_not_stop ? UTRACE_REPORT : UTRACE_RESUME;
4046 +
4047 +       if (resume < utrace->resume) {
4048 +               spin_lock(&utrace->lock);
4049 +               utrace->resume = resume;
4050 +               if (resume == UTRACE_INTERRUPT)
4051 +                       set_tsk_thread_flag(task, TIF_SIGPENDING);
4052 +               else
4053 +                       set_tsk_thread_flag(task, TIF_NOTIFY_RESUME);
4054 +               spin_unlock(&utrace->lock);
4055 +       }
4056 +
4057 +       finish_report_reset(task, utrace, report);
4058 +}
4059 +
4060 +static void finish_callback_report(struct task_struct *task,
4061 +                                  struct utrace *utrace,
4062 +                                  struct utrace_report *report,
4063 +                                  struct utrace_engine *engine,
4064 +                                  enum utrace_resume_action action)
4065 +{
4066 +       if (action == UTRACE_DETACH) {
4067 +               /*
4068 +                * By holding the lock here, we make sure that
4069 +                * utrace_barrier() (really get_utrace_lock()) sees the
4070 +                * effect of this detach.  Otherwise utrace_barrier() could
4071 +                * return 0 after this callback had returned UTRACE_DETACH.
4072 +                * This way, a 0 return is an unambiguous indicator that any
4073 +                * callback returning UTRACE_DETACH has indeed caused detach.
4074 +                */
4075 +               spin_lock(&utrace->lock);
4076 +               engine->ops = &utrace_detached_ops;
4077 +               spin_unlock(&utrace->lock);
4078 +       }
4079 +
4080 +       /*
4081 +        * If utrace_control() was used, treat that like UTRACE_DETACH here.
4082 +        */
4083 +       if (engine->ops == &utrace_detached_ops) {
4084 +               report->detaches = true;
4085 +               return;
4086 +       }
4087 +
4088 +       if (action < report->action)
4089 +               report->action = action;
4090 +
4091 +       if (action != UTRACE_STOP) {
4092 +               if (action < report->resume_action)
4093 +                       report->resume_action = action;
4094 +
4095 +               if (engine_wants_stop(engine)) {
4096 +                       spin_lock(&utrace->lock);
4097 +                       clear_engine_wants_stop(engine);
4098 +                       spin_unlock(&utrace->lock);
4099 +               }
4100 +
4101 +               return;
4102 +       }
4103 +
4104 +       if (!engine_wants_stop(engine)) {
4105 +               spin_lock(&utrace->lock);
4106 +               /*
4107 +                * If utrace_control() came in and detached us
4108 +                * before we got the lock, we must not stop now.
4109 +                */
4110 +               if (unlikely(engine->ops == &utrace_detached_ops))
4111 +                       report->detaches = true;
4112 +               else
4113 +                       mark_engine_wants_stop(task, engine);
4114 +               spin_unlock(&utrace->lock);
4115 +       }
4116 +}
4117 +
4118 +/*
4119 + * Apply the return value of one engine callback to @report.
4120 + * Returns true if @engine detached and should not get any more callbacks.
4121 + */
4122 +static bool finish_callback(struct task_struct *task, struct utrace *utrace,
4123 +                           struct utrace_report *report,
4124 +                           struct utrace_engine *engine,
4125 +                           u32 ret)
4126 +{
4127 +       report->result = ret & ~UTRACE_RESUME_MASK;
4128 +       finish_callback_report(task, utrace, report, engine,
4129 +                              utrace_resume_action(ret));
4130 +
4131 +       /*
4132 +        * Now that we have applied the effect of the return value,
4133 +        * clear this so that utrace_barrier() can stop waiting.
4134 +        * A subsequent utrace_control() can stop or resume @engine
4135 +        * and know this was ordered after its callback's action.
4136 +        *
4137 +        * We don't need any barriers here because utrace_barrier()
4138 +        * takes utrace->lock.  If we touched engine->flags above,
4139 +        * the lock guaranteed this change was before utrace_barrier()
4140 +        * examined utrace->reporting.
4141 +        */
4142 +       utrace->reporting = NULL;
4143 +
4144 +       /*
4145 +        * We've just done an engine callback.  These are allowed to sleep,
4146 +        * though all well-behaved ones restrict that to blocking kalloc()
4147 +        * or quickly-acquired mutex_lock() and the like.  This is a good
4148 +        * place to make sure tracing engines don't introduce too much
4149 +        * latency under voluntary preemption.
4150 +        */
4151 +       might_sleep();
4152 +
4153 +       return engine->ops == &utrace_detached_ops;
4154 +}
4155 +
4156 +/*
4157 + * Start the callbacks for @engine to consider @event (a bit mask).
4158 + * This makes the report_quiesce() callback first.  If @engine wants
4159 + * a specific callback for @event, we return the ops vector to use.
4160 + * If not, we return NULL.  The return value from the ops->callback
4161 + * function called should be passed to finish_callback().
4162 + */
4163 +static const struct utrace_engine_ops *start_callback(
4164 +       struct utrace *utrace, struct utrace_report *report,
4165 +       struct utrace_engine *engine, struct task_struct *task,
4166 +       unsigned long event)
4167 +{
4168 +       const struct utrace_engine_ops *ops;
4169 +       unsigned long want;
4170 +
4171 +       /*
4172 +        * This barrier ensures that we've set utrace->reporting before
4173 +        * we examine engine->flags or engine->ops.  utrace_barrier()
4174 +        * relies on this ordering to indicate that the effect of any
4175 +        * utrace_control() and utrace_set_events() calls is in place
4176 +        * by the time utrace->reporting can be seen to be NULL.
4177 +        */
4178 +       utrace->reporting = engine;
4179 +       smp_mb();
4180 +
4181 +       /*
4182 +        * This pairs with the barrier in mark_engine_detached().
4183 +        * It makes sure that we never see the old ops vector with
4184 +        * the new flags, in case the original vector had no report_quiesce.
4185 +        */
4186 +       want = engine->flags;
4187 +       smp_rmb();
4188 +       ops = engine->ops;
4189 +
4190 +       if ((want & UTRACE_EVENT(QUIESCE)) || ops == &utrace_detached_ops) {
4191 +               if (finish_callback(task, utrace, report, engine,
4192 +                                   (*ops->report_quiesce)(report->action,
4193 +                                                          engine, event)))
4194 +                       return NULL;
4195 +
4196 +               if (!event) {
4197 +                       /* We only got here to report QUIESCE */
4198 +                       report->spurious = false;
4199 +                       return NULL;
4200 +               }
4201 +
4202 +               /*
4203 +                * finish_callback() reset utrace->reporting after the
4204 +                * quiesce callback.  Now we set it again (as above)
4205 +                * before re-examining engine->flags, which could have
4206 +                * been changed synchronously by ->report_quiesce or
4207 +                * asynchronously by utrace_control() or utrace_set_events().
4208 +                */
4209 +               utrace->reporting = engine;
4210 +               smp_mb();
4211 +               want = engine->flags;
4212 +       }
4213 +
4214 +       if (want & ENGINE_STOP)
4215 +               report->action = UTRACE_STOP;
4216 +
4217 +       if (want & event) {
4218 +               report->spurious = false;
4219 +               return ops;
4220 +       }
4221 +
4222 +       utrace->reporting = NULL;
4223 +       return NULL;
4224 +}
4225 +
4226 +/*
4227 + * Do a normal reporting pass for engines interested in @event.
4228 + * @callback is the name of the member in the ops vector, and remaining
4229 + * args are the extras it takes after the standard three args.
4230 + */
4231 +#define REPORT_CALLBACKS(rev, task, utrace, report, event, callback, ...)     \
4232 +       do {                                                                  \
4233 +               struct utrace_engine *engine;                                 \
4234 +               const struct utrace_engine_ops *ops;                          \
4235 +               list_for_each_entry##rev(engine, &utrace->attached, entry) {  \
4236 +                       ops = start_callback(utrace, report, engine, task,    \
4237 +                                            event);                          \
4238 +                       if (!ops)                                             \
4239 +                               continue;                                     \
4240 +                       finish_callback(task, utrace, report, engine,         \
4241 +                                       (*ops->callback)(__VA_ARGS__));       \
4242 +               }                                                             \
4243 +       } while (0)
4244 +#define REPORT(task, utrace, report, event, callback, ...)                   \
4245 +       do {                                                                  \
4246 +               start_report(utrace);                                         \
4247 +               REPORT_CALLBACKS(, task, utrace, report, event, callback,     \
4248 +                                (report)->action, engine, ## __VA_ARGS__);   \
4249 +               finish_report(task, utrace, report, true);                    \
4250 +       } while (0)
4251 +
4252 +/*
4253 + * Called iff UTRACE_EVENT(EXEC) flag is set.
4254 + */
4255 +void utrace_report_exec(struct linux_binfmt *fmt, struct linux_binprm *bprm,
4256 +                       struct pt_regs *regs)
4257 +{
4258 +       struct task_struct *task = current;
4259 +       struct utrace *utrace = task_utrace_struct(task);
4260 +       INIT_REPORT(report);
4261 +
4262 +       REPORT(task, utrace, &report, UTRACE_EVENT(EXEC),
4263 +              report_exec, fmt, bprm, regs);
4264 +}
4265 +
4266 +static u32 do_report_syscall_entry(struct pt_regs *regs,
4267 +                                  struct task_struct *task,
4268 +                                  struct utrace *utrace,
4269 +                                  struct utrace_report *report,
4270 +                                  u32 resume_report)
4271 +{
4272 +       start_report(utrace);
4273 +       REPORT_CALLBACKS(_reverse, task, utrace, report,
4274 +                        UTRACE_EVENT(SYSCALL_ENTRY), report_syscall_entry,
4275 +                        resume_report | report->result | report->action,
4276 +                        engine, regs);
4277 +       finish_report(task, utrace, report, false);
4278 +
4279 +       if (report->action != UTRACE_STOP)
4280 +               return 0;
4281 +
4282 +       utrace_stop(task, utrace, report->resume_action);
4283 +
4284 +       if (fatal_signal_pending(task)) {
4285 +               /*
4286 +                * We are continuing despite UTRACE_STOP because of a
4287 +                * SIGKILL.  Don't let the system call actually proceed.
4288 +                */
4289 +               report->result = UTRACE_SYSCALL_ABORT;
4290 +       } else if (utrace->resume <= UTRACE_REPORT) {
4291 +               /*
4292 +                * If we've been asked for another report after our stop,
4293 +                * go back to report (and maybe stop) again before we run
4294 +                * the system call.  The second (and later) reports are
4295 +                * marked with the UTRACE_SYSCALL_RESUMED flag so that
4296 +                * engines know this is a second report at the same
4297 +                * entry.  This gives them the chance to examine the
4298 +                * registers anew after they might have been changed
4299 +                * while we were stopped.
4300 +                */
4301 +               report->detaches = false;
4302 +               report->spurious = true;
4303 +               report->action = report->resume_action = UTRACE_RESUME;
4304 +               return UTRACE_SYSCALL_RESUMED;
4305 +       }
4306 +
4307 +       return 0;
4308 +}
4309 +
4310 +/*
4311 + * Called iff UTRACE_EVENT(SYSCALL_ENTRY) flag is set.
4312 + * Return true to prevent the system call.
4313 + */
4314 +bool utrace_report_syscall_entry(struct pt_regs *regs)
4315 +{
4316 +       struct task_struct *task = current;
4317 +       struct utrace *utrace = task_utrace_struct(task);
4318 +       INIT_REPORT(report);
4319 +       u32 resume_report = 0;
4320 +
4321 +       do {
4322 +               resume_report = do_report_syscall_entry(regs, task, utrace,
4323 +                                                       &report, resume_report);
4324 +       } while (resume_report);
4325 +
4326 +       return utrace_syscall_action(report.result) == UTRACE_SYSCALL_ABORT;
4327 +}
4328 +
4329 +/*
4330 + * Called iff UTRACE_EVENT(SYSCALL_EXIT) flag is set.
4331 + */
4332 +void utrace_report_syscall_exit(struct pt_regs *regs)
4333 +{
4334 +       struct task_struct *task = current;
4335 +       struct utrace *utrace = task_utrace_struct(task);
4336 +       INIT_REPORT(report);
4337 +
4338 +       REPORT(task, utrace, &report, UTRACE_EVENT(SYSCALL_EXIT),
4339 +              report_syscall_exit, regs);
4340 +}
4341 +
4342 +/*
4343 + * Called iff UTRACE_EVENT(CLONE) flag is set.
4344 + * This notification call blocks the wake_up_new_task call on the child.
4345 + * So we must not quiesce here.  tracehook_report_clone_complete will do
4346 + * a quiescence check momentarily.
4347 + */
4348 +void utrace_report_clone(unsigned long clone_flags, struct task_struct *child)
4349 +{
4350 +       struct task_struct *task = current;
4351 +       struct utrace *utrace = task_utrace_struct(task);
4352 +       INIT_REPORT(report);
4353 +
4354 +       /*
4355 +        * We don't use the REPORT() macro here, because we need
4356 +        * to clear utrace->cloning before finish_report().
4357 +        * After finish_report(), utrace can be a stale pointer
4358 +        * in cases when report.action is still UTRACE_RESUME.
4359 +        */
4360 +       start_report(utrace);
4361 +       utrace->cloning = child;
4362 +
4363 +       REPORT_CALLBACKS(, task, utrace, &report,
4364 +                        UTRACE_EVENT(CLONE), report_clone,
4365 +                        report.action, engine, clone_flags, child);
4366 +
4367 +       utrace->cloning = NULL;
4368 +       finish_report(task, utrace, &report, !(clone_flags & CLONE_VFORK));
4369 +
4370 +       /*
4371 +        * For a vfork, we will go into an uninterruptible block waiting
4372 +        * for the child.  We need UTRACE_STOP to happen before this, not
4373 +        * after.  For CLONE_VFORK, utrace_finish_vfork() will be called.
4374 +        */
4375 +       if (report.action == UTRACE_STOP && (clone_flags & CLONE_VFORK)) {
4376 +               spin_lock(&utrace->lock);
4377 +               utrace->vfork_stop = 1;
4378 +               spin_unlock(&utrace->lock);
4379 +       }
4380 +}
4381 +
4382 +/*
4383 + * We're called after utrace_report_clone() for a CLONE_VFORK.
4384 + * If UTRACE_STOP was left from the clone report, we stop here.
4385 + * After this, we'll enter the uninterruptible wait_for_completion()
4386 + * waiting for the child.
4387 + */
4388 +void utrace_finish_vfork(struct task_struct *task)
4389 +{
4390 +       struct utrace *utrace = task_utrace_struct(task);
4391 +
4392 +       if (utrace->vfork_stop) {
4393 +               spin_lock(&utrace->lock);
4394 +               utrace->vfork_stop = 0;
4395 +               spin_unlock(&utrace->lock);
4396 +               utrace_stop(task, utrace, UTRACE_RESUME); /* XXX */
4397 +       }
4398 +}
4399 +
4400 +/*
4401 + * Called iff UTRACE_EVENT(JCTL) flag is set.
4402 + *
4403 + * Called with siglock held.
4404 + */
4405 +void utrace_report_jctl(int notify, int what)
4406 +{
4407 +       struct task_struct *task = current;
4408 +       struct utrace *utrace = task_utrace_struct(task);
4409 +       INIT_REPORT(report);
4410 +
4411 +       spin_unlock_irq(&task->sighand->siglock);
4412 +
4413 +       REPORT(task, utrace, &report, UTRACE_EVENT(JCTL),
4414 +              report_jctl, what, notify);
4415 +
4416 +       spin_lock_irq(&task->sighand->siglock);
4417 +}
4418 +
4419 +/*
4420 + * Called iff UTRACE_EVENT(EXIT) flag is set.
4421 + */
4422 +void utrace_report_exit(long *exit_code)
4423 +{
4424 +       struct task_struct *task = current;
4425 +       struct utrace *utrace = task_utrace_struct(task);
4426 +       INIT_REPORT(report);
4427 +       long orig_code = *exit_code;
4428 +
4429 +       REPORT(task, utrace, &report, UTRACE_EVENT(EXIT),
4430 +              report_exit, orig_code, exit_code);
4431 +
4432 +       if (report.action == UTRACE_STOP)
4433 +               utrace_stop(task, utrace, report.resume_action);
4434 +}
4435 +
4436 +/*
4437 + * Called iff UTRACE_EVENT(DEATH) or UTRACE_EVENT(QUIESCE) flag is set.
4438 + *
4439 + * It is always possible that we are racing with utrace_release_task here.
4440 + * For this reason, utrace_release_task checks for the event bits that get
4441 + * us here, and delays its cleanup for us to do.
4442 + */
4443 +void utrace_report_death(struct task_struct *task, struct utrace *utrace,
4444 +                        bool group_dead, int signal)
4445 +{
4446 +       INIT_REPORT(report);
4447 +
4448 +       BUG_ON(!task->exit_state);
4449 +
4450 +       /*
4451 +        * We are presently considered "quiescent"--which is accurate
4452 +        * inasmuch as we won't run any more user instructions ever again.
4453 +        * But for utrace_control and utrace_set_events to be robust, they
4454 +        * must be sure whether or not we will run any more callbacks.  If
4455 +        * a call comes in before we do, taking the lock here synchronizes
4456 +        * us so we don't run any callbacks just disabled.  Calls that come
4457 +        * in while we're running the callbacks will see the exit.death
4458 +        * flag and know that we are not yet fully quiescent for purposes
4459 +        * of detach bookkeeping.
4460 +        */
4461 +       spin_lock(&utrace->lock);
4462 +       BUG_ON(utrace->death);
4463 +       utrace->death = 1;
4464 +       utrace->resume = UTRACE_RESUME;
4465 +       splice_attaching(utrace);
4466 +       spin_unlock(&utrace->lock);
4467 +
4468 +       REPORT_CALLBACKS(, task, utrace, &report, UTRACE_EVENT(DEATH),
4469 +                        report_death, engine, group_dead, signal);
4470 +
4471 +       utrace_maybe_reap(task, utrace, false);
4472 +}
4473 +
4474 +/*
4475 + * Finish the last reporting pass before returning to user mode.
4476 + */
4477 +static void finish_resume_report(struct task_struct *task,
4478 +                                struct utrace *utrace,
4479 +                                struct utrace_report *report)
4480 +{
4481 +       finish_report_reset(task, utrace, report);
4482 +
4483 +       switch (report->action) {
4484 +       case UTRACE_STOP:
4485 +               utrace_stop(task, utrace, report->resume_action);
4486 +               break;
4487 +
4488 +       case UTRACE_INTERRUPT:
4489 +               if (!signal_pending(task))
4490 +                       set_tsk_thread_flag(task, TIF_SIGPENDING);
4491 +               break;
4492 +
4493 +       case UTRACE_BLOCKSTEP:
4494 +               if (likely(arch_has_block_step())) {
4495 +                       user_enable_block_step(task);
4496 +                       break;
4497 +               }
4498 +
4499 +               /*
4500 +                * This means some callback is to blame for failing
4501 +                * to check arch_has_block_step() itself.  Warn and
4502 +                * then fall through to treat it as SINGLESTEP.
4503 +                */
4504 +               WARN(1, "UTRACE_BLOCKSTEP when !arch_has_block_step()");
4505 +
4506 +       case UTRACE_SINGLESTEP:
4507 +               if (likely(arch_has_single_step())) {
4508 +                       user_enable_single_step(task);
4509 +               } else {
4510 +                       /*
4511 +                        * This means some callback is to blame for failing
4512 +                        * to check arch_has_single_step() itself.  Spew
4513 +                        * about it so the loser will fix his module.
4514 +                        */
4515 +                       WARN(1,
4516 +                            "UTRACE_SINGLESTEP when !arch_has_single_step()");
4517 +               }
4518 +               break;
4519 +
4520 +       case UTRACE_REPORT:
4521 +       case UTRACE_RESUME:
4522 +       default:
4523 +               user_disable_single_step(task);
4524 +               break;
4525 +       }
4526 +}
4527 +
4528 +/*
4529 + * This is called when TIF_NOTIFY_RESUME had been set (and is now clear).
4530 + * We are close to user mode, and this is the place to report or stop.
4531 + * When we return, we're going to user mode or into the signals code.
4532 + */
4533 +void utrace_resume(struct task_struct *task, struct pt_regs *regs)
4534 +{
4535 +       struct utrace *utrace = task_utrace_struct(task);
4536 +       INIT_REPORT(report);
4537 +       struct utrace_engine *engine;
4538 +
4539 +       /*
4540 +        * Some machines get here with interrupts disabled.  The same arch
4541 +        * code path leads to calling into get_signal_to_deliver(), which
4542 +        * implicitly reenables them by virtue of spin_unlock_irq.
4543 +        */
4544 +       local_irq_enable();
4545 +
4546 +       /*
4547 +        * If this flag is still set it's because there was a signal
4548 +        * handler setup done but no report_signal following it.  Clear
4549 +        * the flag before we get to user so it doesn't confuse us later.
4550 +        */
4551 +       if (unlikely(utrace->signal_handler)) {
4552 +               spin_lock(&utrace->lock);
4553 +               utrace->signal_handler = 0;
4554 +               spin_unlock(&utrace->lock);
4555 +       }
4556 +
4557 +       /*
4558 +        * Update our bookkeeping even if there are no callbacks made here.
4559 +        */
4560 +       report.action = start_report(utrace);
4561 +
4562 +       switch (report.action) {
4563 +       case UTRACE_RESUME:
4564 +               /*
4565 +                * Anything we might have done was already handled by
4566 +                * utrace_get_signal(), or this is an entirely spurious
4567 +                * call.  (The arch might use TIF_NOTIFY_RESUME for other
4568 +                * purposes as well as calling us.)
4569 +                */
4570 +               return;
4571 +       case UTRACE_REPORT:
4572 +               if (unlikely(!(task->utrace_flags & UTRACE_EVENT(QUIESCE))))
4573 +                       break;
4574 +               /*
4575 +                * Do a simple reporting pass, with no specific
4576 +                * callback after report_quiesce.
4577 +                */
4578 +               report.action = UTRACE_RESUME;
4579 +               list_for_each_entry(engine, &utrace->attached, entry)
4580 +                       start_callback(utrace, &report, engine, task, 0);
4581 +               break;
4582 +       default:
4583 +               /*
4584 +                * Even if this report was truly spurious, there is no need
4585 +                * for utrace_reset() now.  TIF_NOTIFY_RESUME was already
4586 +                * cleared--it doesn't stay spuriously set.
4587 +                */
4588 +               report.spurious = false;
4589 +               break;
4590 +       }
4591 +
4592 +       /*
4593 +        * Finish the report and either stop or get ready to resume.
4594 +        * If utrace->resume was not UTRACE_REPORT, this applies its
4595 +        * effect now (i.e. step or interrupt).
4596 +        */
4597 +       finish_resume_report(task, utrace, &report);
4598 +}
4599 +
4600 +/*
4601 + * Return true if current has forced signal_pending().
4602 + *
4603 + * This is called only when current->utrace_flags is nonzero, so we know
4604 + * that current->utrace must be set.  It's not inlined in tracehook.h
4605 + * just so that struct utrace can stay opaque outside this file.
4606 + */
4607 +bool utrace_interrupt_pending(void)
4608 +{
4609 +       return task_utrace_struct(current)->resume == UTRACE_INTERRUPT;
4610 +}
4611 +
4612 +/*
4613 + * Take the siglock and push @info back on our queue.
4614 + * Returns with @task->sighand->siglock held.
4615 + */
4616 +static void push_back_signal(struct task_struct *task, siginfo_t *info)
4617 +       __acquires(task->sighand->siglock)
4618 +{
4619 +       struct sigqueue *q;
4620 +
4621 +       if (unlikely(!info->si_signo)) { /* Oh, a wise guy! */
4622 +               spin_lock_irq(&task->sighand->siglock);
4623 +               return;
4624 +       }
4625 +
4626 +       q = sigqueue_alloc();
4627 +       if (likely(q)) {
4628 +               q->flags = 0;
4629 +               copy_siginfo(&q->info, info);
4630 +       }
4631 +
4632 +       spin_lock_irq(&task->sighand->siglock);
4633 +
4634 +       sigaddset(&task->pending.signal, info->si_signo);
4635 +       if (likely(q))
4636 +               list_add(&q->list, &task->pending.list);
4637 +
4638 +       set_tsk_thread_flag(task, TIF_SIGPENDING);
4639 +}
4640 +
4641 +/*
4642 + * This is the hook from the signals code, called with the siglock held.
4643 + * Here is the ideal place to stop.  We also dequeue and intercept signals.
4644 + */
4645 +int utrace_get_signal(struct task_struct *task, struct pt_regs *regs,
4646 +                     siginfo_t *info, struct k_sigaction *return_ka)
4647 +       __releases(task->sighand->siglock)
4648 +       __acquires(task->sighand->siglock)
4649 +{
4650 +       struct utrace *utrace;
4651 +       struct k_sigaction *ka;
4652 +       INIT_REPORT(report);
4653 +       struct utrace_engine *engine;
4654 +       const struct utrace_engine_ops *ops;
4655 +       unsigned long event, want;
4656 +       u32 ret;
4657 +       int signr;
4658 +
4659 +       utrace = task_utrace_struct(task);
4660 +       if (utrace->resume < UTRACE_RESUME ||
4661 +           utrace->pending_attach || utrace->signal_handler) {
4662 +               enum utrace_resume_action resume;
4663 +
4664 +               /*
4665 +                * We've been asked for an explicit report before we
4666 +                * even check for pending signals.
4667 +                */
4668 +
4669 +               spin_unlock_irq(&task->sighand->siglock);
4670 +
4671 +               spin_lock(&utrace->lock);
4672 +
4673 +               splice_attaching(utrace);
4674 +
4675 +               report.result = utrace->signal_handler ?
4676 +                       UTRACE_SIGNAL_HANDLER : UTRACE_SIGNAL_REPORT;
4677 +               utrace->signal_handler = 0;
4678 +
4679 +               resume = utrace->resume;
4680 +               utrace->resume = UTRACE_RESUME;
4681 +
4682 +               spin_unlock(&utrace->lock);
4683 +
4684 +               /*
4685 +                * Make sure signal_pending() only returns true
4686 +                * if there are real signals pending.
4687 +                */
4688 +               if (signal_pending(task)) {
4689 +                       spin_lock_irq(&task->sighand->siglock);
4690 +                       recalc_sigpending();
4691 +                       spin_unlock_irq(&task->sighand->siglock);
4692 +               }
4693 +
4694 +               if (resume > UTRACE_REPORT) {
4695 +                       /*
4696 +                        * We only got here to process utrace->resume.
4697 +                        * Despite no callbacks, this report is not spurious.
4698 +                        */
4699 +                       report.action = resume;
4700 +                       report.spurious = false;
4701 +                       finish_resume_report(task, utrace, &report);
4702 +                       return -1;
4703 +               } else if (!(task->utrace_flags & UTRACE_EVENT(QUIESCE))) {
4704 +                       /*
4705 +                        * We only got here to clear utrace->signal_handler.
4706 +                        */
4707 +                       return -1;
4708 +               }
4709 +
4710 +               /*
4711 +                * Do a reporting pass for no signal, just for EVENT(QUIESCE).
4712 +                * The engine callbacks can fill in *info and *return_ka.
4713 +                * We'll pass NULL for the @orig_ka argument to indicate
4714 +                * that there was no original signal.
4715 +                */
4716 +               event = 0;
4717 +               ka = NULL;
4718 +               memset(return_ka, 0, sizeof *return_ka);
4719 +       } else if (!(task->utrace_flags & UTRACE_EVENT_SIGNAL_ALL) ||
4720 +                  unlikely(task->signal->group_stop_count)) {
4721 +               /*
4722 +                * If no engine is interested in intercepting signals or
4723 +                * we must stop, let the caller just dequeue them normally
4724 +                * or participate in group-stop.
4725 +                */
4726 +               return 0;
4727 +       } else {
4728 +               /*
4729 +                * Steal the next signal so we can let tracing engines
4730 +                * examine it.  From the signal number and sigaction,
4731 +                * determine what normal delivery would do.  If no
4732 +                * engine perturbs it, we'll do that by returning the
4733 +                * signal number after setting *return_ka.
4734 +                */
4735 +               signr = dequeue_signal(task, &task->blocked, info);
4736 +               if (signr == 0)
4737 +                       return signr;
4738 +               BUG_ON(signr != info->si_signo);
4739 +
4740 +               ka = &task->sighand->action[signr - 1];
4741 +               *return_ka = *ka;
4742 +
4743 +               /*
4744 +                * We are never allowed to interfere with SIGKILL.
4745 +                * Just punt after filling in *return_ka for our caller.
4746 +                */
4747 +               if (signr == SIGKILL)
4748 +                       return signr;
4749 +
4750 +               if (ka->sa.sa_handler == SIG_IGN) {
4751 +                       event = UTRACE_EVENT(SIGNAL_IGN);
4752 +                       report.result = UTRACE_SIGNAL_IGN;
4753 +               } else if (ka->sa.sa_handler != SIG_DFL) {
4754 +                       event = UTRACE_EVENT(SIGNAL);
4755 +                       report.result = UTRACE_SIGNAL_DELIVER;
4756 +               } else if (sig_kernel_coredump(signr)) {
4757 +                       event = UTRACE_EVENT(SIGNAL_CORE);
4758 +                       report.result = UTRACE_SIGNAL_CORE;
4759 +               } else if (sig_kernel_ignore(signr)) {
4760 +                       event = UTRACE_EVENT(SIGNAL_IGN);
4761 +                       report.result = UTRACE_SIGNAL_IGN;
4762 +               } else if (signr == SIGSTOP) {
4763 +                       event = UTRACE_EVENT(SIGNAL_STOP);
4764 +                       report.result = UTRACE_SIGNAL_STOP;
4765 +               } else if (sig_kernel_stop(signr)) {
4766 +                       event = UTRACE_EVENT(SIGNAL_STOP);
4767 +                       report.result = UTRACE_SIGNAL_TSTP;
4768 +               } else {
4769 +                       event = UTRACE_EVENT(SIGNAL_TERM);
4770 +                       report.result = UTRACE_SIGNAL_TERM;
4771 +               }
4772 +
4773 +               /*
4774 +                * Now that we know what event type this signal is, we
4775 +                * can short-circuit if no engines care about those.
4776 +                */
4777 +               if ((task->utrace_flags & (event | UTRACE_EVENT(QUIESCE))) == 0)
4778 +                       return signr;
4779 +
4780 +               /*
4781 +                * We have some interested engines, so tell them about
4782 +                * the signal and let them change its disposition.
4783 +                */
4784 +               spin_unlock_irq(&task->sighand->siglock);
4785 +       }
4786 +
4787 +       /*
4788 +        * This reporting pass chooses what signal disposition we'll act on.
4789 +        */
4790 +       list_for_each_entry(engine, &utrace->attached, entry) {
4791 +               /*
4792 +                * See start_callback() comment about this barrier.
4793 +                */
4794 +               utrace->reporting = engine;
4795 +               smp_mb();
4796 +
4797 +               /*
4798 +                * This pairs with the barrier in mark_engine_detached(),
4799 +                * see start_callback() comments.
4800 +                */
4801 +               want = engine->flags;
4802 +               smp_rmb();
4803 +               ops = engine->ops;
4804 +
4805 +               if ((want & (event | UTRACE_EVENT(QUIESCE))) == 0) {
4806 +                       utrace->reporting = NULL;
4807 +                       continue;
4808 +               }
4809 +
4810 +               if (ops->report_signal)
4811 +                       ret = (*ops->report_signal)(
4812 +                               report.result | report.action, engine,
4813 +                               regs, info, ka, return_ka);
4814 +               else
4815 +                       ret = (report.result | (*ops->report_quiesce)(
4816 +                                      report.action, engine, event));
4817 +
4818 +               /*
4819 +                * Avoid a tight loop reporting again and again if some
4820 +                * engine is too stupid.
4821 +                */
4822 +               switch (utrace_resume_action(ret)) {
4823 +               default:
4824 +                       break;
4825 +               case UTRACE_INTERRUPT:
4826 +               case UTRACE_REPORT:
4827 +                       ret = (ret & ~UTRACE_RESUME_MASK) | UTRACE_RESUME;
4828 +                       break;
4829 +               }
4830 +
4831 +               finish_callback(task, utrace, &report, engine, ret);
4832 +       }
4833 +
4834 +       /*
4835 +        * We express the chosen action to the signals code in terms
4836 +        * of a representative signal whose default action does it.
4837 +        * Our caller uses our return value (signr) to decide what to
4838 +        * do, but uses info->si_signo as the signal number to report.
4839 +        */
4840 +       switch (utrace_signal_action(report.result)) {
4841 +       case UTRACE_SIGNAL_TERM:
4842 +               signr = SIGTERM;
4843 +               break;
4844 +
4845 +       case UTRACE_SIGNAL_CORE:
4846 +               signr = SIGQUIT;
4847 +               break;
4848 +
4849 +       case UTRACE_SIGNAL_STOP:
4850 +               signr = SIGSTOP;
4851 +               break;
4852 +
4853 +       case UTRACE_SIGNAL_TSTP:
4854 +               signr = SIGTSTP;
4855 +               break;
4856 +
4857 +       case UTRACE_SIGNAL_DELIVER:
4858 +               signr = info->si_signo;
4859 +
4860 +               if (return_ka->sa.sa_handler == SIG_DFL) {
4861 +                       /*
4862 +                        * We'll do signr's normal default action.
4863 +                        * For ignore, we'll fall through below.
4864 +                        * For stop/death, break locks and returns it.
4865 +                        */
4866 +                       if (likely(signr) && !sig_kernel_ignore(signr))
4867 +                               break;
4868 +               } else if (return_ka->sa.sa_handler != SIG_IGN &&
4869 +                          likely(signr)) {
4870 +                       /*
4871 +                        * Complete the bookkeeping after the report.
4872 +                        * The handler will run.  If an engine wanted to
4873 +                        * stop or step, then make sure we do another
4874 +                        * report after signal handler setup.
4875 +                        */
4876 +                       if (report.action != UTRACE_RESUME)
4877 +                               report.action = UTRACE_INTERRUPT;
4878 +                       finish_report(task, utrace, &report, true);
4879 +
4880 +                       if (unlikely(report.result & UTRACE_SIGNAL_HOLD))
4881 +                               push_back_signal(task, info);
4882 +                       else
4883 +                               spin_lock_irq(&task->sighand->siglock);
4884 +
4885 +                       /*
4886 +                        * We do the SA_ONESHOT work here since the
4887 +                        * normal path will only touch *return_ka now.
4888 +                        */
4889 +                       if (unlikely(return_ka->sa.sa_flags & SA_ONESHOT)) {
4890 +                               return_ka->sa.sa_flags &= ~SA_ONESHOT;
4891 +                               if (likely(valid_signal(signr))) {
4892 +                                       ka = &task->sighand->action[signr - 1];
4893 +                                       ka->sa.sa_handler = SIG_DFL;
4894 +                               }
4895 +                       }
4896 +
4897 +                       return signr;
4898 +               }
4899 +
4900 +               /* Fall through for an ignored signal.  */
4901 +
4902 +       case UTRACE_SIGNAL_IGN:
4903 +       case UTRACE_SIGNAL_REPORT:
4904 +       default:
4905 +               /*
4906 +                * If the signal is being ignored, then we are on the way
4907 +                * directly back to user mode.  We can stop here, or step,
4908 +                * as in utrace_resume(), above.  After we've dealt with that,
4909 +                * our caller will relock and come back through here.
4910 +                */
4911 +               finish_resume_report(task, utrace, &report);
4912 +
4913 +               if (unlikely(fatal_signal_pending(task))) {
4914 +                       /*
4915 +                        * The only reason we woke up now was because of a
4916 +                        * SIGKILL.  Don't do normal dequeuing in case it
4917 +                        * might get a signal other than SIGKILL.  That would
4918 +                        * perturb the death state so it might differ from
4919 +                        * what the debugger would have allowed to happen.
4920 +                        * Instead, pluck out just the SIGKILL to be sure
4921 +                        * we'll die immediately with nothing else different
4922 +                        * from the quiescent state the debugger wanted us in.
4923 +                        */
4924 +                       sigset_t sigkill_only;
4925 +                       siginitsetinv(&sigkill_only, sigmask(SIGKILL));
4926 +                       spin_lock_irq(&task->sighand->siglock);
4927 +                       signr = dequeue_signal(task, &sigkill_only, info);
4928 +                       BUG_ON(signr != SIGKILL);
4929 +                       *return_ka = task->sighand->action[SIGKILL - 1];
4930 +                       return signr;
4931 +               }
4932 +
4933 +               if (unlikely(report.result & UTRACE_SIGNAL_HOLD)) {
4934 +                       push_back_signal(task, info);
4935 +                       spin_unlock_irq(&task->sighand->siglock);
4936 +               }
4937 +
4938 +               return -1;
4939 +       }
4940 +
4941 +       /*
4942 +        * Complete the bookkeeping after the report.
4943 +        * This sets utrace->resume if UTRACE_STOP was used.
4944 +        */
4945 +       finish_report(task, utrace, &report, true);
4946 +
4947 +       return_ka->sa.sa_handler = SIG_DFL;
4948 +
4949 +       /*
4950 +        * If this signal is fatal, si_signo gets through as exit_code.
4951 +        * We can't allow a completely bogus value there or else core
4952 +        * kernel code can freak out.  (If an engine wants to control
4953 +        * the exit_code value exactly, it can do so in report_exit.)
4954 +        * We'll produce a big complaint in dmesg, but won't crash.
4955 +        * That's nicer for debugging your utrace engine.
4956 +        */
4957 +       if (unlikely(info->si_signo & 0x80)) {
4958 +               WARN(1, "utrace engine left bogus si_signo value!");
4959 +               info->si_signo = SIGTRAP;
4960 +       }
4961 +
4962 +       if (unlikely(report.result & UTRACE_SIGNAL_HOLD))
4963 +               push_back_signal(task, info);
4964 +       else
4965 +               spin_lock_irq(&task->sighand->siglock);
4966 +
4967 +       if (sig_kernel_stop(signr))
4968 +               task->signal->flags |= SIGNAL_STOP_DEQUEUED;
4969 +
4970 +       return signr;
4971 +}
4972 +
4973 +/*
4974 + * This gets called after a signal handler has been set up.
4975 + * We set a flag so the next report knows it happened.
4976 + * If we're already stepping, make sure we do a report_signal.
4977 + * If not, make sure we get into utrace_resume() where we can
4978 + * clear the signal_handler flag before resuming.
4979 + */
4980 +void utrace_signal_handler(struct task_struct *task, int stepping)
4981 +{
4982 +       struct utrace *utrace = task_utrace_struct(task);
4983 +
4984 +       spin_lock(&utrace->lock);
4985 +
4986 +       utrace->signal_handler = 1;
4987 +       if (utrace->resume > UTRACE_INTERRUPT) {
4988 +               if (stepping) {
4989 +                       utrace->resume = UTRACE_INTERRUPT;
4990 +                       set_tsk_thread_flag(task, TIF_SIGPENDING);
4991 +               } else if (utrace->resume == UTRACE_RESUME) {
4992 +                       set_tsk_thread_flag(task, TIF_NOTIFY_RESUME);
4993 +               }
4994 +       }
4995 +
4996 +       spin_unlock(&utrace->lock);
4997 +}
4998 +
4999 +/**
5000 + * utrace_prepare_examine - prepare to examine thread state
5001 + * @target:            thread of interest, a &struct task_struct pointer
5002 + * @engine:            engine pointer returned by utrace_attach_task()
5003 + * @exam:              temporary state, a &struct utrace_examiner pointer
5004 + *
5005 + * This call prepares to safely examine the thread @target using
5006 + * &struct user_regset calls, or direct access to thread-synchronous fields.
5007 + *
5008 + * When @target is current, this call is superfluous.  When @target is
5009 + * another thread, it must be held stopped via %UTRACE_STOP by @engine.
5010 + *
5011 + * This call may block the caller until @target stays stopped, so it must
5012 + * be called only after the caller is sure @target is about to unschedule.
5013 + * This means a zero return from a utrace_control() call on @engine giving
5014 + * %UTRACE_STOP, or a report_quiesce() or report_signal() callback to
5015 + * @engine that used %UTRACE_STOP in its return value.
5016 + *
5017 + * Returns -%ESRCH if @target is dead or -%EINVAL if %UTRACE_STOP was
5018 + * not used.  If @target has started running again despite %UTRACE_STOP
5019 + * (for %SIGKILL or a spurious wakeup), this call returns -%EAGAIN.
5020 + *
5021 + * When this call returns zero, it's safe to use &struct user_regset
5022 + * calls and task_user_regset_view() on @target and to examine some of
5023 + * its fields directly.  When the examination is complete, a
5024 + * utrace_finish_examine() call must follow to check whether it was
5025 + * completed safely.
5026 + */
5027 +int utrace_prepare_examine(struct task_struct *target,
5028 +                          struct utrace_engine *engine,
5029 +                          struct utrace_examiner *exam)
5030 +{
5031 +       int ret = 0;
5032 +
5033 +       if (unlikely(target == current))
5034 +               return 0;
5035 +
5036 +       rcu_read_lock();
5037 +       if (unlikely(!engine_wants_stop(engine)))
5038 +               ret = -EINVAL;
5039 +       else if (unlikely(target->exit_state))
5040 +               ret = -ESRCH;
5041 +       else {
5042 +               exam->state = target->state;
5043 +               if (unlikely(exam->state == TASK_RUNNING))
5044 +                       ret = -EAGAIN;
5045 +               else
5046 +                       get_task_struct(target);
5047 +       }
5048 +       rcu_read_unlock();
5049 +
5050 +       if (likely(!ret)) {
5051 +               exam->ncsw = wait_task_inactive(target, exam->state);
5052 +               put_task_struct(target);
5053 +               if (unlikely(!exam->ncsw))
5054 +                       ret = -EAGAIN;
5055 +       }
5056 +
5057 +       return ret;
5058 +}
5059 +EXPORT_SYMBOL_GPL(utrace_prepare_examine);
5060 +
5061 +/**
5062 + * utrace_finish_examine - complete an examination of thread state
5063 + * @target:            thread of interest, a &struct task_struct pointer
5064 + * @engine:            engine pointer returned by utrace_attach_task()
5065 + * @exam:              pointer passed to utrace_prepare_examine() call
5066 + *
5067 + * This call completes an examination on the thread @target begun by a
5068 + * paired utrace_prepare_examine() call with the same arguments that
5069 + * returned success (zero).
5070 + *
5071 + * When @target is current, this call is superfluous.  When @target is
5072 + * another thread, this returns zero if @target has remained unscheduled
5073 + * since the paired utrace_prepare_examine() call returned zero.
5074 + *
5075 + * When this returns an error, any examination done since the paired
5076 + * utrace_prepare_examine() call is unreliable and the data extracted
5077 + * should be discarded.  The error is -%EINVAL if @engine is not
5078 + * keeping @target stopped, or -%EAGAIN if @target woke up unexpectedly.
5079 + */
5080 +int utrace_finish_examine(struct task_struct *target,
5081 +                         struct utrace_engine *engine,
5082 +                         struct utrace_examiner *exam)
5083 +{
5084 +       int ret = 0;
5085 +
5086 +       if (unlikely(target == current))
5087 +               return 0;
5088 +
5089 +       rcu_read_lock();
5090 +       if (unlikely(!engine_wants_stop(engine)))
5091 +               ret = -EINVAL;
5092 +       else if (unlikely(target->state != exam->state))
5093 +               ret = -EAGAIN;
5094 +       else
5095 +               get_task_struct(target);
5096 +       rcu_read_unlock();
5097 +
5098 +       if (likely(!ret)) {
5099 +               unsigned long ncsw = wait_task_inactive(target, exam->state);
5100 +               if (unlikely(ncsw != exam->ncsw))
5101 +                       ret = -EAGAIN;
5102 +               put_task_struct(target);
5103 +       }
5104 +
5105 +       return ret;
5106 +}
5107 +EXPORT_SYMBOL_GPL(utrace_finish_examine);
5108 +
5109 +/*
5110 + * This is declared in linux/regset.h and defined in machine-dependent
5111 + * code.  We put the export here to ensure no machine forgets it.
5112 + */
5113 +EXPORT_SYMBOL_GPL(task_user_regset_view);
5114 +
5115 +/*
5116 + * Called with rcu_read_lock() held.
5117 + */
5118 +void task_utrace_proc_status(struct seq_file *m, struct task_struct *p)
5119 +{
5120 +       seq_printf(m, "Utrace:\t%lx\n", p->utrace_flags);
5121 +}