kernel/rcu/tasks.h

   1 /* SPDX-License-Identifier: GPL-2.0+ */
   2 /*
   3  * Task-based RCU implementations.
   4  *
   5  * Copyright (C) 2020 Paul E. McKenney
   6  */
   7
   8 #ifdef CONFIG_TASKS_RCU_GENERIC
   9
  10 ////////////////////////////////////////////////////////////////////////
  11 //
  12 // Generic data structures.
  13
  14 struct rcu_tasks;
  15 typedef void (*rcu_tasks_gp_func_t)(struct rcu_tasks *rtp);
  16 typedef void (*pregp_func_t)(void);
  17 typedef void (*pertask_func_t)(struct task_struct *t, struct list_head *hop);
  18 typedef void (*postscan_func_t)(struct list_head *hop);
  19 typedef void (*holdouts_func_t)(struct list_head *hop, bool ndrpt, bool *frptp);
  20 typedef void (*postgp_func_t)(struct rcu_tasks *rtp);
  21
  22 /**
  23  * Definition for a Tasks-RCU-like mechanism.
  24  * @cbs_head: Head of callback list.
  25  * @cbs_tail: Tail pointer for callback list.
  26  * @cbs_wq: Wait queue allowning new callback to get kthread's attention.
  27  * @cbs_lock: Lock protecting callback list.
  28  * @kthread_ptr: This flavor's grace-period/callback-invocation kthread.
  29  * @gp_func: This flavor's grace-period-wait function.
  30  * @gp_state: Grace period's most recent state transition (debugging).
  31  * @gp_jiffies: Time of last @gp_state transition.
  32  * @gp_start: Most recent grace-period start in jiffies.
  33  * @n_gps: Number of grace periods completed since boot.
  34  * @n_ipis: Number of IPIs sent to encourage grace periods to end.
  35  * @n_ipis_fails: Number of IPI-send failures.
  36  * @pregp_func: This flavor's pre-grace-period function (optional).
  37  * @pertask_func: This flavor's per-task scan function (optional).
  38  * @postscan_func: This flavor's post-task scan function (optional).
  39  * @holdout_func: This flavor's holdout-list scan function (optional).
  40  * @postgp_func: This flavor's post-grace-period function (optional).
  41  * @call_func: This flavor's call_rcu()-equivalent function.
  42  * @name: This flavor's textual name.
  43  * @kname: This flavor's kthread name.
  44  */
  45 struct rcu_tasks {
  46         struct rcu_head *cbs_head;
  47         struct rcu_head **cbs_tail;
  48         struct wait_queue_head cbs_wq;
  49         raw_spinlock_t cbs_lock;
  50         int gp_state;
  51         unsigned long gp_jiffies;
  52         unsigned long gp_start;
  53         unsigned long n_gps;
  54         unsigned long n_ipis;
  55         unsigned long n_ipis_fails;
  56         struct task_struct *kthread_ptr;
  57         rcu_tasks_gp_func_t gp_func;
  58         pregp_func_t pregp_func;
  59         pertask_func_t pertask_func;
  60         postscan_func_t postscan_func;
  61         holdouts_func_t holdouts_func;
  62         postgp_func_t postgp_func;
  63         call_rcu_func_t call_func;
  64         char *name;
  65         char *kname;
  66 };
  67
  68 #define DEFINE_RCU_TASKS(rt_name, gp, call, n)                          \
  69 static struct rcu_tasks rt_name =                                       \
  70 {                                                                       \
  71         .cbs_tail = &rt_name.cbs_head,                                  \
  72         .cbs_wq = __WAIT_QUEUE_HEAD_INITIALIZER(rt_name.cbs_wq),        \
  73         .cbs_lock = __RAW_SPIN_LOCK_UNLOCKED(rt_name.cbs_lock),         \
  74         .gp_func = gp,                                                  \
  75         .call_func = call,                                              \
  76         .name = n,                                                      \
  77         .kname = #rt_name,                                              \
  78 }
  79
  80 /* Track exiting tasks in order to allow them to be waited for. */
  81 DEFINE_STATIC_SRCU(tasks_rcu_exit_srcu);
  82
  83 /* Avoid IPIing CPUs early in the grace period. */
  84 #define RCU_TASK_IPI_DELAY (HZ / 2)
  85 static int rcu_task_ipi_delay __read_mostly = RCU_TASK_IPI_DELAY;
  86 module_param(rcu_task_ipi_delay, int, 0644);
  87
  88 /* Control stall timeouts.  Disable with <= 0, otherwise jiffies till stall. */
  89 #define RCU_TASK_STALL_TIMEOUT (HZ * 60 * 10)
  90 static int rcu_task_stall_timeout __read_mostly = RCU_TASK_STALL_TIMEOUT;
  91 module_param(rcu_task_stall_timeout, int, 0644);
  92
  93 /* RCU tasks grace-period state for debugging. */
  94 #define RTGS_INIT                0
  95 #define RTGS_WAIT_WAIT_CBS       1
  96 #define RTGS_WAIT_GP             2
  97 #define RTGS_PRE_WAIT_GP         3
  98 #define RTGS_SCAN_TASKLIST       4
  99 #define RTGS_POST_SCAN_TASKLIST  5
 100 #define RTGS_WAIT_SCAN_HOLDOUTS  6
 101 #define RTGS_SCAN_HOLDOUTS       7
 102 #define RTGS_POST_GP             8
 103 #define RTGS_WAIT_READERS        9
 104 #define RTGS_INVOKE_CBS         10
 105 #define RTGS_WAIT_CBS           11
 106 static const char * const rcu_tasks_gp_state_names[] = {
 107         "RTGS_INIT",
 108         "RTGS_WAIT_WAIT_CBS",
 109         "RTGS_WAIT_GP",
 110         "RTGS_PRE_WAIT_GP",
 111         "RTGS_SCAN_TASKLIST",
 112         "RTGS_POST_SCAN_TASKLIST",
 113         "RTGS_WAIT_SCAN_HOLDOUTS",
 114         "RTGS_SCAN_HOLDOUTS",
 115         "RTGS_POST_GP",
 116         "RTGS_WAIT_READERS",
 117         "RTGS_INVOKE_CBS",
 118         "RTGS_WAIT_CBS",
 119 };
 120
 121 ////////////////////////////////////////////////////////////////////////
 122 //
 123 // Generic code.
 124
 125 /* Record grace-period phase and time. */
 126 static void set_tasks_gp_state(struct rcu_tasks *rtp, int newstate)
 127 {
 128         rtp->gp_state = newstate;
 129         rtp->gp_jiffies = jiffies;
 130 }
 131
 132 /* Return state name. */
 133 static const char *tasks_gp_state_getname(struct rcu_tasks *rtp)
 134 {
 135         int i = data_race(rtp->gp_state); // Let KCSAN detect update races
 136         int j = READ_ONCE(i); // Prevent the compiler from reading twice
 137
 138         if (j >= ARRAY_SIZE(rcu_tasks_gp_state_names))
 139                 return "???";
 140         return rcu_tasks_gp_state_names[j];
 141 }
 142
 143 // Enqueue a callback for the specified flavor of Tasks RCU.
 144 static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func,
 145                                    struct rcu_tasks *rtp)
 146 {
 147         unsigned long flags;
 148         bool needwake;
 149
 150         rhp->next = NULL;
 151         rhp->func = func;
 152         raw_spin_lock_irqsave(&rtp->cbs_lock, flags);
 153         needwake = !rtp->cbs_head;
 154         WRITE_ONCE(*rtp->cbs_tail, rhp);
 155         rtp->cbs_tail = &rhp->next;
 156         raw_spin_unlock_irqrestore(&rtp->cbs_lock, flags);
 157         /* We can't create the thread unless interrupts are enabled. */
 158         if (needwake && READ_ONCE(rtp->kthread_ptr))
 159                 wake_up(&rtp->cbs_wq);
 160 }
 161
 162 // Wait for a grace period for the specified flavor of Tasks RCU.
 163 static void synchronize_rcu_tasks_generic(struct rcu_tasks *rtp)
 164 {
 165         /* Complain if the scheduler has not started.  */
 166         RCU_LOCKDEP_WARN(rcu_scheduler_active == RCU_SCHEDULER_INACTIVE,
 167                          "synchronize_rcu_tasks called too soon");
 168
 169         /* Wait for the grace period. */
 170         wait_rcu_gp(rtp->call_func);
 171 }
 172
 173 /* RCU-tasks kthread that detects grace periods and invokes callbacks. */
 174 static int __noreturn rcu_tasks_kthread(void *arg)
 175 {
 176         unsigned long flags;
 177         struct rcu_head *list;
 178         struct rcu_head *next;
 179         struct rcu_tasks *rtp = arg;
 180
 181         /* Run on housekeeping CPUs by default.  Sysadm can move if desired. */
 182         housekeeping_affine(current, HK_FLAG_RCU);
 183         WRITE_ONCE(rtp->kthread_ptr, current); // Let GPs start!
 184
 185         /*
 186          * Each pass through the following loop makes one check for
 187          * newly arrived callbacks, and, if there are some, waits for
 188          * one RCU-tasks grace period and then invokes the callbacks.
 189          * This loop is terminated by the system going down.  ;-)
 190          */
 191         for (;;) {
 192
 193                 /* Pick up any new callbacks. */
 194                 raw_spin_lock_irqsave(&rtp->cbs_lock, flags);
 195                 smp_mb__after_spinlock(); // Order updates vs. GP.
 196                 list = rtp->cbs_head;
 197                 rtp->cbs_head = NULL;
 198                 rtp->cbs_tail = &rtp->cbs_head;
 199                 raw_spin_unlock_irqrestore(&rtp->cbs_lock, flags);
 200
 201                 /* If there were none, wait a bit and start over. */
 202                 if (!list) {
 203                         wait_event_interruptible(rtp->cbs_wq,
 204                                                  READ_ONCE(rtp->cbs_head));
 205                         if (!rtp->cbs_head) {
 206                                 WARN_ON(signal_pending(current));
 207                                 set_tasks_gp_state(rtp, RTGS_WAIT_WAIT_CBS);
 208                                 schedule_timeout_interruptible(HZ/10);
 209                         }
 210                         continue;
 211                 }
 212
 213                 // Wait for one grace period.
 214                 set_tasks_gp_state(rtp, RTGS_WAIT_GP);
 215                 rtp->gp_start = jiffies;
 216                 rtp->gp_func(rtp);
 217                 rtp->n_gps++;
 218
 219                 /* Invoke the callbacks. */
 220                 set_tasks_gp_state(rtp, RTGS_INVOKE_CBS);
 221                 while (list) {
 222                         next = list->next;
 223                         local_bh_disable();
 224                         list->func(list);
 225                         local_bh_enable();
 226                         list = next;
 227                         cond_resched();
 228                 }
 229                 /* Paranoid sleep to keep this from entering a tight loop */
 230                 schedule_timeout_uninterruptible(HZ/10);
 231
 232                 set_tasks_gp_state(rtp, RTGS_WAIT_CBS);
 233         }
 234 }
 235
 236 /* Spawn RCU-tasks grace-period kthread, e.g., at core_initcall() time. */
 237 static void __init rcu_spawn_tasks_kthread_generic(struct rcu_tasks *rtp)
 238 {
 239         struct task_struct *t;
 240
 241         t = kthread_run(rcu_tasks_kthread, rtp, "%s_kthread", rtp->kname);
 242         if (WARN_ONCE(IS_ERR(t), "%s: Could not start %s grace-period kthread, OOM is now expected behavior\n", __func__, rtp->name))
 243                 return;
 244         smp_mb(); /* Ensure others see full kthread. */
 245 }
 246
 247 #ifndef CONFIG_TINY_RCU
 248
 249 /*
 250  * Print any non-default Tasks RCU settings.
 251  */
 252 static void __init rcu_tasks_bootup_oddness(void)
 253 {
 254 #if defined(CONFIG_TASKS_RCU) || defined(CONFIG_TASKS_TRACE_RCU)
 255         if (rcu_task_stall_timeout != RCU_TASK_STALL_TIMEOUT)
 256                 pr_info("\tTasks-RCU CPU stall warnings timeout set to %d (rcu_task_stall_timeout).\n", rcu_task_stall_timeout);
 257 #endif /* #ifdef CONFIG_TASKS_RCU */
 258 #ifdef CONFIG_TASKS_RCU
 259         pr_info("\tTrampoline variant of Tasks RCU enabled.\n");
 260 #endif /* #ifdef CONFIG_TASKS_RCU */
 261 #ifdef CONFIG_TASKS_RUDE_RCU
 262         pr_info("\tRude variant of Tasks RCU enabled.\n");
 263 #endif /* #ifdef CONFIG_TASKS_RUDE_RCU */
 264 #ifdef CONFIG_TASKS_TRACE_RCU
 265         pr_info("\tTracing variant of Tasks RCU enabled.\n");
 266 #endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
 267 }
 268
 269 #endif /* #ifndef CONFIG_TINY_RCU */
 270
 271 /* Dump out rcutorture-relevant state common to all RCU-tasks flavors. */
 272 static void show_rcu_tasks_generic_gp_kthread(struct rcu_tasks *rtp, char *s)
 273 {
 274         pr_info("%s: %s(%d) since %lu g:%lu i:%lu/%lu %c%c %s\n",
 275                 rtp->kname,
 276                 tasks_gp_state_getname(rtp), data_race(rtp->gp_state),
 277                 jiffies - data_race(rtp->gp_jiffies),
 278                 data_race(rtp->n_gps),
 279                 data_race(rtp->n_ipis_fails), data_race(rtp->n_ipis),
 280                 ".k"[!!data_race(rtp->kthread_ptr)],
 281                 ".C"[!!data_race(rtp->cbs_head)],
 282                 s);
 283 }
 284
 285 static void exit_tasks_rcu_finish_trace(struct task_struct *t);
 286
 287 #if defined(CONFIG_TASKS_RCU) || defined(CONFIG_TASKS_TRACE_RCU)
 288
 289 ////////////////////////////////////////////////////////////////////////
 290 //
 291 // Shared code between task-list-scanning variants of Tasks RCU.
 292
 293 /* Wait for one RCU-tasks grace period. */
 294 static void rcu_tasks_wait_gp(struct rcu_tasks *rtp)
 295 {
 296         struct task_struct *g, *t;
 297         unsigned long lastreport;
 298         LIST_HEAD(holdouts);
 299         int fract;
 300
 301         set_tasks_gp_state(rtp, RTGS_PRE_WAIT_GP);
 302         rtp->pregp_func();
 303
 304         /*
 305          * There were callbacks, so we need to wait for an RCU-tasks
 306          * grace period.  Start off by scanning the task list for tasks
 307          * that are not already voluntarily blocked.  Mark these tasks
 308          * and make a list of them in holdouts.
 309          */
 310         set_tasks_gp_state(rtp, RTGS_SCAN_TASKLIST);
 311         rcu_read_lock();
 312         for_each_process_thread(g, t)
 313                 rtp->pertask_func(t, &holdouts);
 314         rcu_read_unlock();
 315
 316         set_tasks_gp_state(rtp, RTGS_POST_SCAN_TASKLIST);
 317         rtp->postscan_func(&holdouts);
 318
 319         /*
 320          * Each pass through the following loop scans the list of holdout
 321          * tasks, removing any that are no longer holdouts.  When the list
 322          * is empty, we are done.
 323          */
 324         lastreport = jiffies;
 325
 326         /* Start off with HZ/10 wait and slowly back off to 1 HZ wait. */
 327         fract = 10;
 328
 329         for (;;) {
 330                 bool firstreport;
 331                 bool needreport;
 332                 int rtst;
 333
 334                 if (list_empty(&holdouts))
 335                         break;
 336
 337                 /* Slowly back off waiting for holdouts */
 338                 set_tasks_gp_state(rtp, RTGS_WAIT_SCAN_HOLDOUTS);
 339                 schedule_timeout_interruptible(HZ/fract);
 340
 341                 if (fract > 1)
 342                         fract--;
 343
 344                 rtst = READ_ONCE(rcu_task_stall_timeout);
 345                 needreport = rtst > 0 && time_after(jiffies, lastreport + rtst);
 346                 if (needreport)
 347                         lastreport = jiffies;
 348                 firstreport = true;
 349                 WARN_ON(signal_pending(current));
 350                 set_tasks_gp_state(rtp, RTGS_SCAN_HOLDOUTS);
 351                 rtp->holdouts_func(&holdouts, needreport, &firstreport);
 352         }
 353
 354         set_tasks_gp_state(rtp, RTGS_POST_GP);
 355         rtp->postgp_func(rtp);
 356 }
 357
 358 #endif /* #if defined(CONFIG_TASKS_RCU) || defined(CONFIG_TASKS_TRACE_RCU) */
 359
 360 #ifdef CONFIG_TASKS_RCU
 361
 362 ////////////////////////////////////////////////////////////////////////
 363 //
 364 // Simple variant of RCU whose quiescent states are voluntary context
 365 // switch, cond_resched_rcu_qs(), user-space execution, and idle.
 366 // As such, grace periods can take one good long time.  There are no
 367 // read-side primitives similar to rcu_read_lock() and rcu_read_unlock()
 368 // because this implementation is intended to get the system into a safe
 369 // state for some of the manipulations involved in tracing and the like.
 370 // Finally, this implementation does not support high call_rcu_tasks()
 371 // rates from multiple CPUs.  If this is required, per-CPU callback lists
 372 // will be needed.
 373
 374 /* Pre-grace-period preparation. */
 375 static void rcu_tasks_pregp_step(void)
 376 {
 377         /*
 378          * Wait for all pre-existing t->on_rq and t->nvcsw transitions
 379          * to complete.  Invoking synchronize_rcu() suffices because all
 380          * these transitions occur with interrupts disabled.  Without this
 381          * synchronize_rcu(), a read-side critical section that started
 382          * before the grace period might be incorrectly seen as having
 383          * started after the grace period.
 384          *
 385          * This synchronize_rcu() also dispenses with the need for a
 386          * memory barrier on the first store to t->rcu_tasks_holdout,
 387          * as it forces the store to happen after the beginning of the
 388          * grace period.
 389          */
 390         synchronize_rcu();
 391 }
 392
 393 /* Per-task initial processing. */
 394 static void rcu_tasks_pertask(struct task_struct *t, struct list_head *hop)
 395 {
 396         if (t != current && READ_ONCE(t->on_rq) && !is_idle_task(t)) {
 397                 get_task_struct(t);
 398                 t->rcu_tasks_nvcsw = READ_ONCE(t->nvcsw);
 399                 WRITE_ONCE(t->rcu_tasks_holdout, true);
 400                 list_add(&t->rcu_tasks_holdout_list, hop);
 401         }
 402 }
 403
 404 /* Processing between scanning taskslist and draining the holdout list. */
 405 void rcu_tasks_postscan(struct list_head *hop)
 406 {
 407         /*
 408          * Wait for tasks that are in the process of exiting.  This
 409          * does only part of the job, ensuring that all tasks that were
 410          * previously exiting reach the point where they have disabled
 411          * preemption, allowing the later synchronize_rcu() to finish
 412          * the job.
 413          */
 414         synchronize_srcu(&tasks_rcu_exit_srcu);
 415 }
 416
 417 /* See if tasks are still holding out, complain if so. */
 418 static void check_holdout_task(struct task_struct *t,
 419                                bool needreport, bool *firstreport)
 420 {
 421         int cpu;
 422
 423         if (!READ_ONCE(t->rcu_tasks_holdout) ||
 424             t->rcu_tasks_nvcsw != READ_ONCE(t->nvcsw) ||
 425             !READ_ONCE(t->on_rq) ||
 426             (IS_ENABLED(CONFIG_NO_HZ_FULL) &&
 427              !is_idle_task(t) && t->rcu_tasks_idle_cpu >= 0)) {
 428                 WRITE_ONCE(t->rcu_tasks_holdout, false);
 429                 list_del_init(&t->rcu_tasks_holdout_list);
 430                 put_task_struct(t);
 431                 return;
 432         }
 433         rcu_request_urgent_qs_task(t);
 434         if (!needreport)
 435                 return;
 436         if (*firstreport) {
 437                 pr_err("INFO: rcu_tasks detected stalls on tasks:\n");
 438                 *firstreport = false;
 439         }
 440         cpu = task_cpu(t);
 441         pr_alert("%p: %c%c nvcsw: %lu/%lu holdout: %d idle_cpu: %d/%d\n",
 442                  t, ".I"[is_idle_task(t)],
 443                  "N."[cpu < 0 || !tick_nohz_full_cpu(cpu)],
 444                  t->rcu_tasks_nvcsw, t->nvcsw, t->rcu_tasks_holdout,
 445                  t->rcu_tasks_idle_cpu, cpu);
 446         sched_show_task(t);
 447 }
 448
 449 /* Scan the holdout lists for tasks no longer holding out. */
 450 static void check_all_holdout_tasks(struct list_head *hop,
 451                                     bool needreport, bool *firstreport)
 452 {
 453         struct task_struct *t, *t1;
 454
 455         list_for_each_entry_safe(t, t1, hop, rcu_tasks_holdout_list) {
 456                 check_holdout_task(t, needreport, firstreport);
 457                 cond_resched();
 458         }
 459 }
 460
 461 /* Finish off the Tasks-RCU grace period. */
 462 static void rcu_tasks_postgp(struct rcu_tasks *rtp)
 463 {
 464         /*
 465          * Because ->on_rq and ->nvcsw are not guaranteed to have a full
 466          * memory barriers prior to them in the schedule() path, memory
 467          * reordering on other CPUs could cause their RCU-tasks read-side
 468          * critical sections to extend past the end of the grace period.
 469          * However, because these ->nvcsw updates are carried out with
 470          * interrupts disabled, we can use synchronize_rcu() to force the
 471          * needed ordering on all such CPUs.
 472          *
 473          * This synchronize_rcu() also confines all ->rcu_tasks_holdout
 474          * accesses to be within the grace period, avoiding the need for
 475          * memory barriers for ->rcu_tasks_holdout accesses.
 476          *
 477          * In addition, this synchronize_rcu() waits for exiting tasks
 478          * to complete their final preempt_disable() region of execution,
 479          * cleaning up after the synchronize_srcu() above.
 480          */
 481         synchronize_rcu();
 482 }
 483
 484 void call_rcu_tasks(struct rcu_head *rhp, rcu_callback_t func);
 485 DEFINE_RCU_TASKS(rcu_tasks, rcu_tasks_wait_gp, call_rcu_tasks, "RCU Tasks");
 486
 487 /**
 488  * call_rcu_tasks() - Queue an RCU for invocation task-based grace period
 489  * @rhp: structure to be used for queueing the RCU updates.
 490  * @func: actual callback function to be invoked after the grace period
 491  *
 492  * The callback function will be invoked some time after a full grace
 493  * period elapses, in other words after all currently executing RCU
 494  * read-side critical sections have completed. call_rcu_tasks() assumes
 495  * that the read-side critical sections end at a voluntary context
 496  * switch (not a preemption!), cond_resched_rcu_qs(), entry into idle,
 497  * or transition to usermode execution.  As such, there are no read-side
 498  * primitives analogous to rcu_read_lock() and rcu_read_unlock() because
 499  * this primitive is intended to determine that all tasks have passed
 500  * through a safe state, not so much for data-strcuture synchronization.
 501  *
 502  * See the description of call_rcu() for more detailed information on
 503  * memory ordering guarantees.
 504  */
 505 void call_rcu_tasks(struct rcu_head *rhp, rcu_callback_t func)
 506 {
 507         call_rcu_tasks_generic(rhp, func, &rcu_tasks);
 508 }
 509 EXPORT_SYMBOL_GPL(call_rcu_tasks);
 510
 511 /**
 512  * synchronize_rcu_tasks - wait until an rcu-tasks grace period has elapsed.
 513  *
 514  * Control will return to the caller some time after a full rcu-tasks
 515  * grace period has elapsed, in other words after all currently
 516  * executing rcu-tasks read-side critical sections have elapsed.  These
 517  * read-side critical sections are delimited by calls to schedule(),
 518  * cond_resched_tasks_rcu_qs(), idle execution, userspace execution, calls
 519  * to synchronize_rcu_tasks(), and (in theory, anyway) cond_resched().
 520  *
 521  * This is a very specialized primitive, intended only for a few uses in
 522  * tracing and other situations requiring manipulation of function
 523  * preambles and profiling hooks.  The synchronize_rcu_tasks() function
 524  * is not (yet) intended for heavy use from multiple CPUs.
 525  *
 526  * See the description of synchronize_rcu() for more detailed information
 527  * on memory ordering guarantees.
 528  */
 529 void synchronize_rcu_tasks(void)
 530 {
 531         synchronize_rcu_tasks_generic(&rcu_tasks);
 532 }
 533 EXPORT_SYMBOL_GPL(synchronize_rcu_tasks);
 534
 535 /**
 536  * rcu_barrier_tasks - Wait for in-flight call_rcu_tasks() callbacks.
 537  *
 538  * Although the current implementation is guaranteed to wait, it is not
 539  * obligated to, for example, if there are no pending callbacks.
 540  */
 541 void rcu_barrier_tasks(void)
 542 {
 543         /* There is only one callback queue, so this is easy.  ;-) */
 544         synchronize_rcu_tasks();
 545 }
 546 EXPORT_SYMBOL_GPL(rcu_barrier_tasks);
 547
 548 static int __init rcu_spawn_tasks_kthread(void)
 549 {
 550         rcu_tasks.pregp_func = rcu_tasks_pregp_step;
 551         rcu_tasks.pertask_func = rcu_tasks_pertask;
 552         rcu_tasks.postscan_func = rcu_tasks_postscan;
 553         rcu_tasks.holdouts_func = check_all_holdout_tasks;
 554         rcu_tasks.postgp_func = rcu_tasks_postgp;
 555         rcu_spawn_tasks_kthread_generic(&rcu_tasks);
 556         return 0;
 557 }
 558 core_initcall(rcu_spawn_tasks_kthread);
 559
 560 static void show_rcu_tasks_classic_gp_kthread(void)
 561 {
 562         show_rcu_tasks_generic_gp_kthread(&rcu_tasks, "");
 563 }
 564
 565 /* Do the srcu_read_lock() for the above synchronize_srcu().  */
 566 void exit_tasks_rcu_start(void) __acquires(&tasks_rcu_exit_srcu)
 567 {
 568         preempt_disable();
 569         current->rcu_tasks_idx = __srcu_read_lock(&tasks_rcu_exit_srcu);
 570         preempt_enable();
 571 }
 572
 573 /* Do the srcu_read_unlock() for the above synchronize_srcu().  */
 574 void exit_tasks_rcu_finish(void) __releases(&tasks_rcu_exit_srcu)
 575 {
 576         struct task_struct *t = current;
 577
 578         preempt_disable();
 579         __srcu_read_unlock(&tasks_rcu_exit_srcu, t->rcu_tasks_idx);
 580         preempt_enable();
 581         exit_tasks_rcu_finish_trace(t);
 582 }
 583
 584 #else /* #ifdef CONFIG_TASKS_RCU */
 585 static void show_rcu_tasks_classic_gp_kthread(void) { }
 586 void exit_tasks_rcu_start(void) { }
 587 void exit_tasks_rcu_finish(void) { exit_tasks_rcu_finish_trace(current); }
 588 #endif /* #else #ifdef CONFIG_TASKS_RCU */
 589
 590 #ifdef CONFIG_TASKS_RUDE_RCU
 591
 592 ////////////////////////////////////////////////////////////////////////
 593 //
 594 // "Rude" variant of Tasks RCU, inspired by Steve Rostedt's trick of
 595 // passing an empty function to schedule_on_each_cpu().  This approach
 596 // provides an asynchronous call_rcu_tasks_rude() API and batching
 597 // of concurrent calls to the synchronous synchronize_rcu_rude() API.
 598 // This sends IPIs far and wide and induces otherwise unnecessary context
 599 // switches on all online CPUs, whether idle or not.
 600
 601 // Empty function to allow workqueues to force a context switch.
 602 static void rcu_tasks_be_rude(struct work_struct *work)
 603 {
 604 }
 605
 606 // Wait for one rude RCU-tasks grace period.
 607 static void rcu_tasks_rude_wait_gp(struct rcu_tasks *rtp)
 608 {
 609         rtp->n_ipis += cpumask_weight(cpu_online_mask);
 610         schedule_on_each_cpu(rcu_tasks_be_rude);
 611 }
 612
 613 void call_rcu_tasks_rude(struct rcu_head *rhp, rcu_callback_t func);
 614 DEFINE_RCU_TASKS(rcu_tasks_rude, rcu_tasks_rude_wait_gp, call_rcu_tasks_rude,
 615                  "RCU Tasks Rude");
 616
 617 /**
 618  * call_rcu_tasks_rude() - Queue a callback rude task-based grace period
 619  * @rhp: structure to be used for queueing the RCU updates.
 620  * @func: actual callback function to be invoked after the grace period
 621  *
 622  * The callback function will be invoked some time after a full grace
 623  * period elapses, in other words after all currently executing RCU
 624  * read-side critical sections have completed. call_rcu_tasks_rude()
 625  * assumes that the read-side critical sections end at context switch,
 626  * cond_resched_rcu_qs(), or transition to usermode execution.  As such,
 627  * there are no read-side primitives analogous to rcu_read_lock() and
 628  * rcu_read_unlock() because this primitive is intended to determine
 629  * that all tasks have passed through a safe state, not so much for
 630  * data-strcuture synchronization.
 631  *
 632  * See the description of call_rcu() for more detailed information on
 633  * memory ordering guarantees.
 634  */
 635 void call_rcu_tasks_rude(struct rcu_head *rhp, rcu_callback_t func)
 636 {
 637         call_rcu_tasks_generic(rhp, func, &rcu_tasks_rude);
 638 }
 639 EXPORT_SYMBOL_GPL(call_rcu_tasks_rude);
 640
 641 /**
 642  * synchronize_rcu_tasks_rude - wait for a rude rcu-tasks grace period
 643  *
 644  * Control will return to the caller some time after a rude rcu-tasks
 645  * grace period has elapsed, in other words after all currently
 646  * executing rcu-tasks read-side critical sections have elapsed.  These
 647  * read-side critical sections are delimited by calls to schedule(),
 648  * cond_resched_tasks_rcu_qs(), userspace execution, and (in theory,
 649  * anyway) cond_resched().
 650  *
 651  * This is a very specialized primitive, intended only for a few uses in
 652  * tracing and other situations requiring manipulation of function preambles
 653  * and profiling hooks.  The synchronize_rcu_tasks_rude() function is not
 654  * (yet) intended for heavy use from multiple CPUs.
 655  *
 656  * See the description of synchronize_rcu() for more detailed information
 657  * on memory ordering guarantees.
 658  */
 659 void synchronize_rcu_tasks_rude(void)
 660 {
 661         synchronize_rcu_tasks_generic(&rcu_tasks_rude);
 662 }
 663 EXPORT_SYMBOL_GPL(synchronize_rcu_tasks_rude);
 664
 665 /**
 666  * rcu_barrier_tasks_rude - Wait for in-flight call_rcu_tasks_rude() callbacks.
 667  *
 668  * Although the current implementation is guaranteed to wait, it is not
 669  * obligated to, for example, if there are no pending callbacks.
 670  */
 671 void rcu_barrier_tasks_rude(void)
 672 {
 673         /* There is only one callback queue, so this is easy.  ;-) */
 674         synchronize_rcu_tasks_rude();
 675 }
 676 EXPORT_SYMBOL_GPL(rcu_barrier_tasks_rude);
 677
 678 static int __init rcu_spawn_tasks_rude_kthread(void)
 679 {
 680         rcu_spawn_tasks_kthread_generic(&rcu_tasks_rude);
 681         return 0;
 682 }
 683 core_initcall(rcu_spawn_tasks_rude_kthread);
 684
 685 static void show_rcu_tasks_rude_gp_kthread(void)
 686 {
 687         show_rcu_tasks_generic_gp_kthread(&rcu_tasks_rude, "");
 688 }
 689
 690 #else /* #ifdef CONFIG_TASKS_RUDE_RCU */
 691 static void show_rcu_tasks_rude_gp_kthread(void) {}
 692 #endif /* #else #ifdef CONFIG_TASKS_RUDE_RCU */
 693
 694 ////////////////////////////////////////////////////////////////////////
 695 //
 696 // Tracing variant of Tasks RCU.  This variant is designed to be used
 697 // to protect tracing hooks, including those of BPF.  This variant
 698 // therefore:
 699 //
 700 // 1.   Has explicit read-side markers to allow finite grace periods
 701 //      in the face of in-kernel loops for PREEMPT=n builds.
 702 //
 703 // 2.   Protects code in the idle loop, exception entry/exit, and
 704 //      CPU-hotplug code paths, similar to the capabilities of SRCU.
 705 //
 706 // 3.   Avoids expensive read-side instruction, having overhead similar
 707 //      to that of Preemptible RCU.
 708 //
 709 // There are of course downsides.  The grace-period code can send IPIs to
 710 // CPUs, even when those CPUs are in the idle loop or in nohz_full userspace.
 711 // It is necessary to scan the full tasklist, much as for Tasks RCU.  There
 712 // is a single callback queue guarded by a single lock, again, much as for
 713 // Tasks RCU.  If needed, these downsides can be at least partially remedied.
 714 //
 715 // Perhaps most important, this variant of RCU does not affect the vanilla
 716 // flavors, rcu_preempt and rcu_sched.  The fact that RCU Tasks Trace
 717 // readers can operate from idle, offline, and exception entry/exit in no
 718 // way allows rcu_preempt and rcu_sched readers to also do so.
 719
 720 // The lockdep state must be outside of #ifdef to be useful.
 721 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 722 static struct lock_class_key rcu_lock_trace_key;
 723 struct lockdep_map rcu_trace_lock_map =
 724         STATIC_LOCKDEP_MAP_INIT("rcu_read_lock_trace", &rcu_lock_trace_key);
 725 EXPORT_SYMBOL_GPL(rcu_trace_lock_map);
 726 #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 727
 728 #ifdef CONFIG_TASKS_TRACE_RCU
 729
 730 atomic_t trc_n_readers_need_end;        // Number of waited-for readers.
 731 DECLARE_WAIT_QUEUE_HEAD(trc_wait);      // List of holdout tasks.
 732
 733 // Record outstanding IPIs to each CPU.  No point in sending two...
 734 static DEFINE_PER_CPU(bool, trc_ipi_to_cpu);
 735
 736 // The number of detections of task quiescent state relying on
 737 // heavyweight readers executing explicit memory barriers.
 738 unsigned long n_heavy_reader_attempts;
 739 unsigned long n_heavy_reader_updates;
 740 unsigned long n_heavy_reader_ofl_updates;
 741
 742 void call_rcu_tasks_trace(struct rcu_head *rhp, rcu_callback_t func);
 743 DEFINE_RCU_TASKS(rcu_tasks_trace, rcu_tasks_wait_gp, call_rcu_tasks_trace,
 744                  "RCU Tasks Trace");
 745
 746 /*
 747  * This irq_work handler allows rcu_read_unlock_trace() to be invoked
 748  * while the scheduler locks are held.
 749  */
 750 static void rcu_read_unlock_iw(struct irq_work *iwp)
 751 {
 752         wake_up(&trc_wait);
 753 }
 754 static DEFINE_IRQ_WORK(rcu_tasks_trace_iw, rcu_read_unlock_iw);
 755
 756 /* If we are the last reader, wake up the grace-period kthread. */
 757 void rcu_read_unlock_trace_special(struct task_struct *t, int nesting)
 758 {
 759         int nq = t->trc_reader_special.b.need_qs;
 760
 761         if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB) &&
 762             t->trc_reader_special.b.need_mb)
 763                 smp_mb(); // Pairs with update-side barriers.
 764         // Update .need_qs before ->trc_reader_nesting for irq/NMI handlers.
 765         if (nq)
 766                 WRITE_ONCE(t->trc_reader_special.b.need_qs, false);
 767         WRITE_ONCE(t->trc_reader_nesting, nesting);
 768         if (nq && atomic_dec_and_test(&trc_n_readers_need_end))
 769                 irq_work_queue(&rcu_tasks_trace_iw);
 770 }
 771 EXPORT_SYMBOL_GPL(rcu_read_unlock_trace_special);
 772
 773 /* Add a task to the holdout list, if it is not already on the list. */
 774 static void trc_add_holdout(struct task_struct *t, struct list_head *bhp)
 775 {
 776         if (list_empty(&t->trc_holdout_list)) {
 777                 get_task_struct(t);
 778                 list_add(&t->trc_holdout_list, bhp);
 779         }
 780 }
 781
 782 /* Remove a task from the holdout list, if it is in fact present. */
 783 static void trc_del_holdout(struct task_struct *t)
 784 {
 785         if (!list_empty(&t->trc_holdout_list)) {
 786                 list_del_init(&t->trc_holdout_list);
 787                 put_task_struct(t);
 788         }
 789 }
 790
 791 /* IPI handler to check task state. */
 792 static void trc_read_check_handler(void *t_in)
 793 {
 794         struct task_struct *t = current;
 795         struct task_struct *texp = t_in;
 796
 797         // If the task is no longer running on this CPU, leave.
 798         if (unlikely(texp != t)) {
 799                 if (WARN_ON_ONCE(atomic_dec_and_test(&trc_n_readers_need_end)))
 800                         wake_up(&trc_wait);
 801                 goto reset_ipi; // Already on holdout list, so will check later.
 802         }
 803
 804         // If the task is not in a read-side critical section, and
 805         // if this is the last reader, awaken the grace-period kthread.
 806         if (likely(!t->trc_reader_nesting)) {
 807                 if (WARN_ON_ONCE(atomic_dec_and_test(&trc_n_readers_need_end)))
 808                         wake_up(&trc_wait);
 809                 // Mark as checked after decrement to avoid false
 810                 // positives on the above WARN_ON_ONCE().
 811                 WRITE_ONCE(t->trc_reader_checked, true);
 812                 goto reset_ipi;
 813         }
 814         WRITE_ONCE(t->trc_reader_checked, true);
 815
 816         // Get here if the task is in a read-side critical section.  Set
 817         // its state so that it will awaken the grace-period kthread upon
 818         // exit from that critical section.
 819         WARN_ON_ONCE(t->trc_reader_special.b.need_qs);
 820         WRITE_ONCE(t->trc_reader_special.b.need_qs, true);
 821
 822 reset_ipi:
 823         // Allow future IPIs to be sent on CPU and for task.
 824         // Also order this IPI handler against any later manipulations of
 825         // the intended task.
 826         smp_store_release(&per_cpu(trc_ipi_to_cpu, smp_processor_id()), false); // ^^^
 827         smp_store_release(&texp->trc_ipi_to_cpu, -1); // ^^^
 828 }
 829
 830 /* Callback function for scheduler to check locked-down task.  */
 831 static bool trc_inspect_reader(struct task_struct *t, void *arg)
 832 {
 833         int cpu = task_cpu(t);
 834         bool in_qs = false;
 835         bool ofl = cpu_is_offline(cpu);
 836
 837         if (task_curr(t)) {
 838                 WARN_ON_ONCE(ofl & !is_idle_task(t));
 839
 840                 // If no chance of heavyweight readers, do it the hard way.
 841                 if (!ofl && !IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB))
 842                         return false;
 843
 844                 // If heavyweight readers are enabled on the remote task,
 845                 // we can inspect its state despite its currently running.
 846                 // However, we cannot safely change its state.
 847                 n_heavy_reader_attempts++;
 848                 if (!ofl && // Check for "running" idle tasks on offline CPUs.
 849                     !rcu_dynticks_zero_in_eqs(cpu, &t->trc_reader_nesting))
 850                         return false; // No quiescent state, do it the hard way.
 851                 n_heavy_reader_updates++;
 852                 if (ofl)
 853                         n_heavy_reader_ofl_updates++;
 854                 in_qs = true;
 855         } else {
 856                 in_qs = likely(!t->trc_reader_nesting);
 857         }
 858
 859         // Mark as checked.  Because this is called from the grace-period
 860         // kthread, also remove the task from the holdout list.
 861         t->trc_reader_checked = true;
 862         trc_del_holdout(t);
 863
 864         if (in_qs)
 865                 return true;  // Already in quiescent state, done!!!
 866
 867         // The task is in a read-side critical section, so set up its
 868         // state so that it will awaken the grace-period kthread upon exit
 869         // from that critical section.
 870         atomic_inc(&trc_n_readers_need_end); // One more to wait on.
 871         WARN_ON_ONCE(t->trc_reader_special.b.need_qs);
 872         WRITE_ONCE(t->trc_reader_special.b.need_qs, true);
 873         return true;
 874 }
 875
 876 /* Attempt to extract the state for the specified task. */
 877 static void trc_wait_for_one_reader(struct task_struct *t,
 878                                     struct list_head *bhp)
 879 {
 880         int cpu;
 881
 882         // If a previous IPI is still in flight, let it complete.
 883         if (smp_load_acquire(&t->trc_ipi_to_cpu) != -1) // Order IPI
 884                 return;
 885
 886         // The current task had better be in a quiescent state.
 887         if (t == current) {
 888                 t->trc_reader_checked = true;
 889                 trc_del_holdout(t);
 890                 WARN_ON_ONCE(t->trc_reader_nesting);
 891                 return;
 892         }
 893
 894         // Attempt to nail down the task for inspection.
 895         get_task_struct(t);
 896         if (try_invoke_on_locked_down_task(t, trc_inspect_reader, NULL)) {
 897                 put_task_struct(t);
 898                 return;
 899         }
 900         put_task_struct(t);
 901
 902         // If currently running, send an IPI, either way, add to list.
 903         trc_add_holdout(t, bhp);
 904         if (task_curr(t) && time_after(jiffies, rcu_tasks_trace.gp_start + rcu_task_ipi_delay)) {
 905                 // The task is currently running, so try IPIing it.
 906                 cpu = task_cpu(t);
 907
 908                 // If there is already an IPI outstanding, let it happen.
 909                 if (per_cpu(trc_ipi_to_cpu, cpu) || t->trc_ipi_to_cpu >= 0)
 910                         return;
 911
 912                 atomic_inc(&trc_n_readers_need_end);
 913                 per_cpu(trc_ipi_to_cpu, cpu) = true;
 914                 t->trc_ipi_to_cpu = cpu;
 915                 rcu_tasks_trace.n_ipis++;
 916                 if (smp_call_function_single(cpu,
 917                                              trc_read_check_handler, t, 0)) {
 918                         // Just in case there is some other reason for
 919                         // failure than the target CPU being offline.
 920                         rcu_tasks_trace.n_ipis_fails++;
 921                         per_cpu(trc_ipi_to_cpu, cpu) = false;
 922                         t->trc_ipi_to_cpu = cpu;
 923                         if (atomic_dec_and_test(&trc_n_readers_need_end)) {
 924                                 WARN_ON_ONCE(1);
 925                                 wake_up(&trc_wait);
 926                         }
 927                 }
 928         }
 929 }
 930
 931 /* Initialize for a new RCU-tasks-trace grace period. */
 932 static void rcu_tasks_trace_pregp_step(void)
 933 {
 934         int cpu;
 935
 936         // Allow for fast-acting IPIs.
 937         atomic_set(&trc_n_readers_need_end, 1);
 938
 939         // There shouldn't be any old IPIs, but...
 940         for_each_possible_cpu(cpu)
 941                 WARN_ON_ONCE(per_cpu(trc_ipi_to_cpu, cpu));
 942
 943         // Disable CPU hotplug across the tasklist scan.
 944         // This also waits for all readers in CPU-hotplug code paths.
 945         cpus_read_lock();
 946 }
 947
 948 /* Do first-round processing for the specified task. */
 949 static void rcu_tasks_trace_pertask(struct task_struct *t,
 950                                     struct list_head *hop)
 951 {
 952         WRITE_ONCE(t->trc_reader_special.b.need_qs, false);
 953         WRITE_ONCE(t->trc_reader_checked, false);
 954         t->trc_ipi_to_cpu = -1;
 955         trc_wait_for_one_reader(t, hop);
 956 }
 957
 958 /*
 959  * Do intermediate processing between task and holdout scans and
 960  * pick up the idle tasks.
 961  */
 962 static void rcu_tasks_trace_postscan(struct list_head *hop)
 963 {
 964         int cpu;
 965
 966         for_each_possible_cpu(cpu)
 967                 rcu_tasks_trace_pertask(idle_task(cpu), hop);
 968
 969         // Re-enable CPU hotplug now that the tasklist scan has completed.
 970         cpus_read_unlock();
 971
 972         // Wait for late-stage exiting tasks to finish exiting.
 973         // These might have passed the call to exit_tasks_rcu_finish().
 974         synchronize_rcu();
 975         // Any tasks that exit after this point will set ->trc_reader_checked.
 976 }
 977
 978 /* Show the state of a task stalling the current RCU tasks trace GP. */
 979 static void show_stalled_task_trace(struct task_struct *t, bool *firstreport)
 980 {
 981         int cpu;
 982
 983         if (*firstreport) {
 984                 pr_err("INFO: rcu_tasks_trace detected stalls on tasks:\n");
 985                 *firstreport = false;
 986         }
 987         // FIXME: This should attempt to use try_invoke_on_nonrunning_task().
 988         cpu = task_cpu(t);
 989         pr_alert("P%d: %c%c%c nesting: %d%c cpu: %d\n",
 990                  t->pid,
 991                  ".I"[READ_ONCE(t->trc_ipi_to_cpu) > 0],
 992                  ".i"[is_idle_task(t)],
 993                  ".N"[cpu > 0 && tick_nohz_full_cpu(cpu)],
 994                  t->trc_reader_nesting,
 995                  " N"[!!t->trc_reader_special.b.need_qs],
 996                  cpu);
 997         sched_show_task(t);
 998 }
 999
1000 /* List stalled IPIs for RCU tasks trace. */
1001 static void show_stalled_ipi_trace(void)
1002 {
1003         int cpu;
1004
1005         for_each_possible_cpu(cpu)
1006                 if (per_cpu(trc_ipi_to_cpu, cpu))
1007                         pr_alert("\tIPI outstanding to CPU %d\n", cpu);
1008 }
1009
1010 /* Do one scan of the holdout list. */
1011 static void check_all_holdout_tasks_trace(struct list_head *hop,
1012                                           bool needreport, bool *firstreport)
1013 {
1014         struct task_struct *g, *t;
1015
1016         // Disable CPU hotplug across the holdout list scan.
1017         cpus_read_lock();
1018
1019         list_for_each_entry_safe(t, g, hop, trc_holdout_list) {
1020                 // If safe and needed, try to check the current task.
1021                 if (READ_ONCE(t->trc_ipi_to_cpu) == -1 &&
1022                     !READ_ONCE(t->trc_reader_checked))
1023                         trc_wait_for_one_reader(t, hop);
1024
1025                 // If check succeeded, remove this task from the list.
1026                 if (READ_ONCE(t->trc_reader_checked))
1027                         trc_del_holdout(t);
1028                 else if (needreport)
1029                         show_stalled_task_trace(t, firstreport);
1030         }
1031
1032         // Re-enable CPU hotplug now that the holdout list scan has completed.
1033         cpus_read_unlock();
1034
1035         if (needreport) {
1036                 if (firstreport)
1037                         pr_err("INFO: rcu_tasks_trace detected stalls? (Late IPI?)\n");
1038                 show_stalled_ipi_trace();
1039         }
1040 }
1041
1042 /* Wait for grace period to complete and provide ordering. */
1043 static void rcu_tasks_trace_postgp(struct rcu_tasks *rtp)
1044 {
1045         bool firstreport;
1046         struct task_struct *g, *t;
1047         LIST_HEAD(holdouts);
1048         long ret;
1049
1050         // Remove the safety count.
1051         smp_mb__before_atomic();  // Order vs. earlier atomics
1052         atomic_dec(&trc_n_readers_need_end);
1053         smp_mb__after_atomic();  // Order vs. later atomics
1054
1055         // Wait for readers.
1056         set_tasks_gp_state(rtp, RTGS_WAIT_READERS);
1057         for (;;) {
1058                 ret = wait_event_idle_exclusive_timeout(
1059                                 trc_wait,
1060                                 atomic_read(&trc_n_readers_need_end) == 0,
1061                                 READ_ONCE(rcu_task_stall_timeout));
1062                 if (ret)
1063                         break;  // Count reached zero.
1064                 // Stall warning time, so make a list of the offenders.
1065                 for_each_process_thread(g, t)
1066                         if (READ_ONCE(t->trc_reader_special.b.need_qs))
1067                                 trc_add_holdout(t, &holdouts);
1068                 firstreport = true;
1069                 list_for_each_entry_safe(t, g, &holdouts, trc_holdout_list)
1070                         if (READ_ONCE(t->trc_reader_special.b.need_qs)) {
1071                                 show_stalled_task_trace(t, &firstreport);
1072                                 trc_del_holdout(t);
1073                         }
1074                 if (firstreport)
1075                         pr_err("INFO: rcu_tasks_trace detected stalls? (Counter/taskslist mismatch?)\n");
1076                 show_stalled_ipi_trace();
1077                 pr_err("\t%d holdouts\n", atomic_read(&trc_n_readers_need_end));
1078         }
1079         smp_mb(); // Caller's code must be ordered after wakeup.
1080                   // Pairs with pretty much every ordering primitive.
1081 }
1082
1083 /* Report any needed quiescent state for this exiting task. */
1084 static void exit_tasks_rcu_finish_trace(struct task_struct *t)
1085 {
1086         WRITE_ONCE(t->trc_reader_checked, true);
1087         WARN_ON_ONCE(t->trc_reader_nesting);
1088         WRITE_ONCE(t->trc_reader_nesting, 0);
1089         if (WARN_ON_ONCE(READ_ONCE(t->trc_reader_special.b.need_qs)))
1090                 rcu_read_unlock_trace_special(t, 0);
1091 }
1092
1093 /**
1094  * call_rcu_tasks_trace() - Queue a callback trace task-based grace period
1095  * @rhp: structure to be used for queueing the RCU updates.
1096  * @func: actual callback function to be invoked after the grace period
1097  *
1098  * The callback function will be invoked some time after a full grace
1099  * period elapses, in other words after all currently executing RCU
1100  * read-side critical sections have completed. call_rcu_tasks_trace()
1101  * assumes that the read-side critical sections end at context switch,
1102  * cond_resched_rcu_qs(), or transition to usermode execution.  As such,
1103  * there are no read-side primitives analogous to rcu_read_lock() and
1104  * rcu_read_unlock() because this primitive is intended to determine
1105  * that all tasks have passed through a safe state, not so much for
1106  * data-strcuture synchronization.
1107  *
1108  * See the description of call_rcu() for more detailed information on
1109  * memory ordering guarantees.
1110  */
1111 void call_rcu_tasks_trace(struct rcu_head *rhp, rcu_callback_t func)
1112 {
1113         call_rcu_tasks_generic(rhp, func, &rcu_tasks_trace);
1114 }
1115 EXPORT_SYMBOL_GPL(call_rcu_tasks_trace);
1116
1117 /**
1118  * synchronize_rcu_tasks_trace - wait for a trace rcu-tasks grace period
1119  *
1120  * Control will return to the caller some time after a trace rcu-tasks
1121  * grace period has elapsed, in other words after all currently
1122  * executing rcu-tasks read-side critical sections have elapsed.  These
1123  * read-side critical sections are delimited by calls to schedule(),
1124  * cond_resched_tasks_rcu_qs(), userspace execution, and (in theory,
1125  * anyway) cond_resched().
1126  *
1127  * This is a very specialized primitive, intended only for a few uses in
1128  * tracing and other situations requiring manipulation of function preambles
1129  * and profiling hooks.  The synchronize_rcu_tasks_trace() function is not
1130  * (yet) intended for heavy use from multiple CPUs.
1131  *
1132  * See the description of synchronize_rcu() for more detailed information
1133  * on memory ordering guarantees.
1134  */
1135 void synchronize_rcu_tasks_trace(void)
1136 {
1137         RCU_LOCKDEP_WARN(lock_is_held(&rcu_trace_lock_map), "Illegal synchronize_rcu_tasks_trace() in RCU Tasks Trace read-side critical section");
1138         synchronize_rcu_tasks_generic(&rcu_tasks_trace);
1139 }
1140 EXPORT_SYMBOL_GPL(synchronize_rcu_tasks_trace);
1141
1142 /**
1143  * rcu_barrier_tasks_trace - Wait for in-flight call_rcu_tasks_trace() callbacks.
1144  *
1145  * Although the current implementation is guaranteed to wait, it is not
1146  * obligated to, for example, if there are no pending callbacks.
1147  */
1148 void rcu_barrier_tasks_trace(void)
1149 {
1150         /* There is only one callback queue, so this is easy.  ;-) */
1151         synchronize_rcu_tasks_trace();
1152 }
1153 EXPORT_SYMBOL_GPL(rcu_barrier_tasks_trace);
1154
1155 static int __init rcu_spawn_tasks_trace_kthread(void)
1156 {
1157         rcu_tasks_trace.pregp_func = rcu_tasks_trace_pregp_step;
1158         rcu_tasks_trace.pertask_func = rcu_tasks_trace_pertask;
1159         rcu_tasks_trace.postscan_func = rcu_tasks_trace_postscan;
1160         rcu_tasks_trace.holdouts_func = check_all_holdout_tasks_trace;
1161         rcu_tasks_trace.postgp_func = rcu_tasks_trace_postgp;
1162         rcu_spawn_tasks_kthread_generic(&rcu_tasks_trace);
1163         return 0;
1164 }
1165 core_initcall(rcu_spawn_tasks_trace_kthread);
1166
1167 static void show_rcu_tasks_trace_gp_kthread(void)
1168 {
1169         char buf[64];
1170
1171         sprintf(buf, "N%d h:%lu/%lu/%lu", atomic_read(&trc_n_readers_need_end),
1172                 data_race(n_heavy_reader_ofl_updates),
1173                 data_race(n_heavy_reader_updates),
1174                 data_race(n_heavy_reader_attempts));
1175         show_rcu_tasks_generic_gp_kthread(&rcu_tasks_trace, buf);
1176 }
1177
1178 #else /* #ifdef CONFIG_TASKS_TRACE_RCU */
1179 static void exit_tasks_rcu_finish_trace(struct task_struct *t) { }
1180 static inline void show_rcu_tasks_trace_gp_kthread(void) {}
1181 #endif /* #else #ifdef CONFIG_TASKS_TRACE_RCU */
1182
1183 void show_rcu_tasks_gp_kthreads(void)
1184 {
1185         show_rcu_tasks_classic_gp_kthread();
1186         show_rcu_tasks_rude_gp_kthread();
1187         show_rcu_tasks_trace_gp_kthread();
1188 }
1189
1190 #else /* #ifdef CONFIG_TASKS_RCU_GENERIC */
1191 static inline void rcu_tasks_bootup_oddness(void) {}
1192 void show_rcu_tasks_gp_kthreads(void) {}
1193 #endif /* #else #ifdef CONFIG_TASKS_RCU_GENERIC */