kernel/rcutree_plugin.h

   1 /*
   2  * Read-Copy Update mechanism for mutual exclusion (tree-based version)
   3  * Internal non-public definitions that provide either classic
   4  * or preemptable semantics.
   5  *
   6  * This program is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License as published by
   8  * the Free Software Foundation; either version 2 of the License, or
   9  * (at your option) any later version.
  10  *
  11  * This program is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14  * GNU General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU General Public License
  17  * along with this program; if not, write to the Free Software
  18  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  19  *
  20  * Copyright Red Hat, 2009
  21  * Copyright IBM Corporation, 2009
  22  *
  23  * Author: Ingo Molnar <mingo@elte.hu>
  24  *         Paul E. McKenney <paulmck@linux.vnet.ibm.com>
  25  */
  26
  27 #include <linux/delay.h>
  28 #include <linux/stop_machine.h>
  29
  30 /*
  31  * Check the RCU kernel configuration parameters and print informative
  32  * messages about anything out of the ordinary.  If you like #ifdef, you
  33  * will love this function.
  34  */
  35 static void __init rcu_bootup_announce_oddness(void)
  36 {
  37 #ifdef CONFIG_RCU_TRACE
  38         printk(KERN_INFO "\tRCU debugfs-based tracing is enabled.\n");
  39 #endif
  40 #if (defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 64) || (!defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 32)
  41         printk(KERN_INFO "\tCONFIG_RCU_FANOUT set to non-default value of %d\n",
  42                CONFIG_RCU_FANOUT);
  43 #endif
  44 #ifdef CONFIG_RCU_FANOUT_EXACT
  45         printk(KERN_INFO "\tHierarchical RCU autobalancing is disabled.\n");
  46 #endif
  47 #ifdef CONFIG_RCU_FAST_NO_HZ
  48         printk(KERN_INFO
  49                "\tRCU dyntick-idle grace-period acceleration is enabled.\n");
  50 #endif
  51 #ifdef CONFIG_PROVE_RCU
  52         printk(KERN_INFO "\tRCU lockdep checking is enabled.\n");
  53 #endif
  54 #ifdef CONFIG_RCU_TORTURE_TEST_RUNNABLE
  55         printk(KERN_INFO "\tRCU torture testing starts during boot.\n");
  56 #endif
  57 #ifndef CONFIG_RCU_CPU_STALL_DETECTOR
  58         printk(KERN_INFO
  59                "\tRCU-based detection of stalled CPUs is disabled.\n");
  60 #endif
  61 #if defined(CONFIG_TREE_PREEMPT_RCU) && !defined(CONFIG_RCU_CPU_STALL_VERBOSE)
  62         printk(KERN_INFO "\tVerbose stalled-CPUs detection is disabled.\n");
  63 #endif
  64 #if NUM_RCU_LVL_4 != 0
  65         printk(KERN_INFO "\tExperimental four-level hierarchy is enabled.\n");
  66 #endif
  67 }
  68
  69 #ifdef CONFIG_TREE_PREEMPT_RCU
  70
  71 struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state);
  72 DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data);
  73
  74 static int rcu_preempted_readers_exp(struct rcu_node *rnp);
  75
  76 /*
  77  * Tell them what RCU they are running.
  78  */
  79 static void __init rcu_bootup_announce(void)
  80 {
  81         printk(KERN_INFO "Preemptable hierarchical RCU implementation.\n");
  82         rcu_bootup_announce_oddness();
  83 }
  84
  85 /*
  86  * Return the number of RCU-preempt batches processed thus far
  87  * for debug and statistics.
  88  */
  89 long rcu_batches_completed_preempt(void)
  90 {
  91         return rcu_preempt_state.completed;
  92 }
  93 EXPORT_SYMBOL_GPL(rcu_batches_completed_preempt);
  94
  95 /*
  96  * Return the number of RCU batches processed thus far for debug & stats.
  97  */
  98 long rcu_batches_completed(void)
  99 {
 100         return rcu_batches_completed_preempt();
 101 }
 102 EXPORT_SYMBOL_GPL(rcu_batches_completed);
 103
 104 /*
 105  * Force a quiescent state for preemptible RCU.
 106  */
 107 void rcu_force_quiescent_state(void)
 108 {
 109         force_quiescent_state(&rcu_preempt_state, 0);
 110 }
 111 EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
 112
 113 /*
 114  * Record a preemptable-RCU quiescent state for the specified CPU.  Note
 115  * that this just means that the task currently running on the CPU is
 116  * not in a quiescent state.  There might be any number of tasks blocked
 117  * while in an RCU read-side critical section.
 118  *
 119  * Unlike the other rcu_*_qs() functions, callers to this function
 120  * must disable irqs in order to protect the assignment to
 121  * ->rcu_read_unlock_special.
 122  */
 123 static void rcu_preempt_qs(int cpu)
 124 {
 125         struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu);
 126
 127         rdp->passed_quiesc_completed = rdp->gpnum - 1;
 128         barrier();
 129         rdp->passed_quiesc = 1;
 130         current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
 131 }
 132
 133 /*
 134  * We have entered the scheduler, and the current task might soon be
 135  * context-switched away from.  If this task is in an RCU read-side
 136  * critical section, we will no longer be able to rely on the CPU to
 137  * record that fact, so we enqueue the task on the appropriate entry
 138  * of the blocked_tasks[] array.  The task will dequeue itself when
 139  * it exits the outermost enclosing RCU read-side critical section.
 140  * Therefore, the current grace period cannot be permitted to complete
 141  * until the blocked_tasks[] entry indexed by the low-order bit of
 142  * rnp->gpnum empties.
 143  *
 144  * Caller must disable preemption.
 145  */
 146 static void rcu_preempt_note_context_switch(int cpu)
 147 {
 148         struct task_struct *t = current;
 149         unsigned long flags;
 150         int phase;
 151         struct rcu_data *rdp;
 152         struct rcu_node *rnp;
 153
 154         if (t->rcu_read_lock_nesting &&
 155             (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {
 156
 157                 /* Possibly blocking in an RCU read-side critical section. */
 158                 rdp = per_cpu_ptr(rcu_preempt_state.rda, cpu);
 159                 rnp = rdp->mynode;
 160                 raw_spin_lock_irqsave(&rnp->lock, flags);
 161                 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
 162                 t->rcu_blocked_node = rnp;
 163
 164                 /*
 165                  * If this CPU has already checked in, then this task
 166                  * will hold up the next grace period rather than the
 167                  * current grace period.  Queue the task accordingly.
 168                  * If the task is queued for the current grace period
 169                  * (i.e., this CPU has not yet passed through a quiescent
 170                  * state for the current grace period), then as long
 171                  * as that task remains queued, the current grace period
 172                  * cannot end.
 173                  *
 174                  * But first, note that the current CPU must still be
 175                  * on line!
 176                  */
 177                 WARN_ON_ONCE((rdp->grpmask & rnp->qsmaskinit) == 0);
 178                 WARN_ON_ONCE(!list_empty(&t->rcu_node_entry));
 179                 phase = (rnp->gpnum + !(rnp->qsmask & rdp->grpmask)) & 0x1;
 180                 list_add(&t->rcu_node_entry, &rnp->blocked_tasks[phase]);
 181                 raw_spin_unlock_irqrestore(&rnp->lock, flags);
 182         }
 183
 184         /*
 185          * Either we were not in an RCU read-side critical section to
 186          * begin with, or we have now recorded that critical section
 187          * globally.  Either way, we can now note a quiescent state
 188          * for this CPU.  Again, if we were in an RCU read-side critical
 189          * section, and if that critical section was blocking the current
 190          * grace period, then the fact that the task has been enqueued
 191          * means that we continue to block the current grace period.
 192          */
 193         local_irq_save(flags);
 194         rcu_preempt_qs(cpu);
 195         local_irq_restore(flags);
 196 }
 197
 198 /*
 199  * Tree-preemptable RCU implementation for rcu_read_lock().
 200  * Just increment ->rcu_read_lock_nesting, shared state will be updated
 201  * if we block.
 202  */
 203 void __rcu_read_lock(void)
 204 {
 205         current->rcu_read_lock_nesting++;
 206         barrier();  /* needed if we ever invoke rcu_read_lock in rcutree.c */
 207 }
 208 EXPORT_SYMBOL_GPL(__rcu_read_lock);
 209
 210 /*
 211  * Check for preempted RCU readers blocking the current grace period
 212  * for the specified rcu_node structure.  If the caller needs a reliable
 213  * answer, it must hold the rcu_node's ->lock.
 214  */
 215 static int rcu_preempted_readers(struct rcu_node *rnp)
 216 {
 217         int phase = rnp->gpnum & 0x1;
 218
 219         return !list_empty(&rnp->blocked_tasks[phase]) ||
 220                !list_empty(&rnp->blocked_tasks[phase + 2]);
 221 }
 222
 223 /*
 224  * Record a quiescent state for all tasks that were previously queued
 225  * on the specified rcu_node structure and that were blocking the current
 226  * RCU grace period.  The caller must hold the specified rnp->lock with
 227  * irqs disabled, and this lock is released upon return, but irqs remain
 228  * disabled.
 229  */
 230 static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
 231         __releases(rnp->lock)
 232 {
 233         unsigned long mask;
 234         struct rcu_node *rnp_p;
 235
 236         if (rnp->qsmask != 0 || rcu_preempted_readers(rnp)) {
 237                 raw_spin_unlock_irqrestore(&rnp->lock, flags);
 238                 return;  /* Still need more quiescent states! */
 239         }
 240
 241         rnp_p = rnp->parent;
 242         if (rnp_p == NULL) {
 243                 /*
 244                  * Either there is only one rcu_node in the tree,
 245                  * or tasks were kicked up to root rcu_node due to
 246                  * CPUs going offline.
 247                  */
 248                 rcu_report_qs_rsp(&rcu_preempt_state, flags);
 249                 return;
 250         }
 251
 252         /* Report up the rest of the hierarchy. */
 253         mask = rnp->grpmask;
 254         raw_spin_unlock(&rnp->lock);    /* irqs remain disabled. */
 255         raw_spin_lock(&rnp_p->lock);    /* irqs already disabled. */
 256         rcu_report_qs_rnp(mask, &rcu_preempt_state, rnp_p, flags);
 257 }
 258
 259 /*
 260  * Handle special cases during rcu_read_unlock(), such as needing to
 261  * notify RCU core processing or task having blocked during the RCU
 262  * read-side critical section.
 263  */
 264 static void rcu_read_unlock_special(struct task_struct *t)
 265 {
 266         int empty;
 267         int empty_exp;
 268         unsigned long flags;
 269         struct rcu_node *rnp;
 270         int special;
 271
 272         /* NMI handlers cannot block and cannot safely manipulate state. */
 273         if (in_nmi())
 274                 return;
 275
 276         local_irq_save(flags);
 277
 278         /*
 279          * If RCU core is waiting for this CPU to exit critical section,
 280          * let it know that we have done so.
 281          */
 282         special = t->rcu_read_unlock_special;
 283         if (special & RCU_READ_UNLOCK_NEED_QS) {
 284                 rcu_preempt_qs(smp_processor_id());
 285         }
 286
 287         /* Hardware IRQ handlers cannot block. */
 288         if (in_irq()) {
 289                 local_irq_restore(flags);
 290                 return;
 291         }
 292
 293         /* Clean up if blocked during RCU read-side critical section. */
 294         if (special & RCU_READ_UNLOCK_BLOCKED) {
 295                 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED;
 296
 297                 /*
 298                  * Remove this task from the list it blocked on.  The
 299                  * task can migrate while we acquire the lock, but at
 300                  * most one time.  So at most two passes through loop.
 301                  */
 302                 for (;;) {
 303                         rnp = t->rcu_blocked_node;
 304                         raw_spin_lock(&rnp->lock);  /* irqs already disabled. */
 305                         if (rnp == t->rcu_blocked_node)
 306                                 break;
 307                         raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
 308                 }
 309                 empty = !rcu_preempted_readers(rnp);
 310                 empty_exp = !rcu_preempted_readers_exp(rnp);
 311                 smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */
 312                 list_del_init(&t->rcu_node_entry);
 313                 t->rcu_blocked_node = NULL;
 314
 315                 /*
 316                  * If this was the last task on the current list, and if
 317                  * we aren't waiting on any CPUs, report the quiescent state.
 318                  * Note that rcu_report_unblock_qs_rnp() releases rnp->lock.
 319                  */
 320                 if (empty)
 321                         raw_spin_unlock_irqrestore(&rnp->lock, flags);
 322                 else
 323                         rcu_report_unblock_qs_rnp(rnp, flags);
 324
 325                 /*
 326                  * If this was the last task on the expedited lists,
 327                  * then we need to report up the rcu_node hierarchy.
 328                  */
 329                 if (!empty_exp && !rcu_preempted_readers_exp(rnp))
 330                         rcu_report_exp_rnp(&rcu_preempt_state, rnp);
 331         } else {
 332                 local_irq_restore(flags);
 333         }
 334 }
 335
 336 /*
 337  * Tree-preemptable RCU implementation for rcu_read_unlock().
 338  * Decrement ->rcu_read_lock_nesting.  If the result is zero (outermost
 339  * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
 340  * invoke rcu_read_unlock_special() to clean up after a context switch
 341  * in an RCU read-side critical section and other special cases.
 342  */
 343 void __rcu_read_unlock(void)
 344 {
 345         struct task_struct *t = current;
 346
 347         barrier();  /* needed if we ever invoke rcu_read_unlock in rcutree.c */
 348         --t->rcu_read_lock_nesting;
 349         barrier();  /* decrement before load of ->rcu_read_unlock_special */
 350         if (t->rcu_read_lock_nesting == 0 &&
 351             unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
 352                 rcu_read_unlock_special(t);
 353 #ifdef CONFIG_PROVE_LOCKING
 354         WARN_ON_ONCE(ACCESS_ONCE(t->rcu_read_lock_nesting) < 0);
 355 #endif /* #ifdef CONFIG_PROVE_LOCKING */
 356 }
 357 EXPORT_SYMBOL_GPL(__rcu_read_unlock);
 358
 359 #ifdef CONFIG_RCU_CPU_STALL_DETECTOR
 360
 361 #ifdef CONFIG_RCU_CPU_STALL_VERBOSE
 362
 363 /*
 364  * Dump detailed information for all tasks blocking the current RCU
 365  * grace period on the specified rcu_node structure.
 366  */
 367 static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
 368 {
 369         unsigned long flags;
 370         struct list_head *lp;
 371         int phase;
 372         struct task_struct *t;
 373
 374         if (rcu_preempted_readers(rnp)) {
 375                 raw_spin_lock_irqsave(&rnp->lock, flags);
 376                 phase = rnp->gpnum & 0x1;
 377                 lp = &rnp->blocked_tasks[phase];
 378                 list_for_each_entry(t, lp, rcu_node_entry)
 379                         sched_show_task(t);
 380                 raw_spin_unlock_irqrestore(&rnp->lock, flags);
 381         }
 382 }
 383
 384 /*
 385  * Dump detailed information for all tasks blocking the current RCU
 386  * grace period.
 387  */
 388 static void rcu_print_detail_task_stall(struct rcu_state *rsp)
 389 {
 390         struct rcu_node *rnp = rcu_get_root(rsp);
 391
 392         rcu_print_detail_task_stall_rnp(rnp);
 393         rcu_for_each_leaf_node(rsp, rnp)
 394                 rcu_print_detail_task_stall_rnp(rnp);
 395 }
 396
 397 #else /* #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */
 398
 399 static void rcu_print_detail_task_stall(struct rcu_state *rsp)
 400 {
 401 }
 402
 403 #endif /* #else #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */
 404
 405 /*
 406  * Scan the current list of tasks blocked within RCU read-side critical
 407  * sections, printing out the tid of each.
 408  */
 409 static void rcu_print_task_stall(struct rcu_node *rnp)
 410 {
 411         struct list_head *lp;
 412         int phase;
 413         struct task_struct *t;
 414
 415         if (rcu_preempted_readers(rnp)) {
 416                 phase = rnp->gpnum & 0x1;
 417                 lp = &rnp->blocked_tasks[phase];
 418                 list_for_each_entry(t, lp, rcu_node_entry)
 419                         printk(" P%d", t->pid);
 420         }
 421 }
 422
 423 /*
 424  * Suppress preemptible RCU's CPU stall warnings by pushing the
 425  * time of the next stall-warning message comfortably far into the
 426  * future.
 427  */
 428 static void rcu_preempt_stall_reset(void)
 429 {
 430         rcu_preempt_state.jiffies_stall = jiffies + ULONG_MAX / 2;
 431 }
 432
 433 #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
 434
 435 /*
 436  * Check that the list of blocked tasks for the newly completed grace
 437  * period is in fact empty.  It is a serious bug to complete a grace
 438  * period that still has RCU readers blocked!  This function must be
 439  * invoked -before- updating this rnp's ->gpnum, and the rnp's ->lock
 440  * must be held by the caller.
 441  */
 442 static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
 443 {
 444         WARN_ON_ONCE(rcu_preempted_readers(rnp));
 445         WARN_ON_ONCE(rnp->qsmask);
 446 }
 447
 448 #ifdef CONFIG_HOTPLUG_CPU
 449
 450 /*
 451  * Handle tasklist migration for case in which all CPUs covered by the
 452  * specified rcu_node have gone offline.  Move them up to the root
 453  * rcu_node.  The reason for not just moving them to the immediate
 454  * parent is to remove the need for rcu_read_unlock_special() to
 455  * make more than two attempts to acquire the target rcu_node's lock.
 456  * Returns true if there were tasks blocking the current RCU grace
 457  * period.
 458  *
 459  * Returns 1 if there was previously a task blocking the current grace
 460  * period on the specified rcu_node structure.
 461  *
 462  * The caller must hold rnp->lock with irqs disabled.
 463  */
 464 static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
 465                                      struct rcu_node *rnp,
 466                                      struct rcu_data *rdp)
 467 {
 468         int i;
 469         struct list_head *lp;
 470         struct list_head *lp_root;
 471         int retval = 0;
 472         struct rcu_node *rnp_root = rcu_get_root(rsp);
 473         struct task_struct *tp;
 474
 475         if (rnp == rnp_root) {
 476                 WARN_ONCE(1, "Last CPU thought to be offlined?");
 477                 return 0;  /* Shouldn't happen: at least one CPU online. */
 478         }
 479         WARN_ON_ONCE(rnp != rdp->mynode &&
 480                      (!list_empty(&rnp->blocked_tasks[0]) ||
 481                       !list_empty(&rnp->blocked_tasks[1]) ||
 482                       !list_empty(&rnp->blocked_tasks[2]) ||
 483                       !list_empty(&rnp->blocked_tasks[3])));
 484
 485         /*
 486          * Move tasks up to root rcu_node.  Rely on the fact that the
 487          * root rcu_node can be at most one ahead of the rest of the
 488          * rcu_nodes in terms of gp_num value.  This fact allows us to
 489          * move the blocked_tasks[] array directly, element by element.
 490          */
 491         if (rcu_preempted_readers(rnp))
 492                 retval |= RCU_OFL_TASKS_NORM_GP;
 493         if (rcu_preempted_readers_exp(rnp))
 494                 retval |= RCU_OFL_TASKS_EXP_GP;
 495         for (i = 0; i < 4; i++) {
 496                 lp = &rnp->blocked_tasks[i];
 497                 lp_root = &rnp_root->blocked_tasks[i];
 498                 while (!list_empty(lp)) {
 499                         tp = list_entry(lp->next, typeof(*tp), rcu_node_entry);
 500                         raw_spin_lock(&rnp_root->lock); /* irqs already disabled */
 501                         list_del(&tp->rcu_node_entry);
 502                         tp->rcu_blocked_node = rnp_root;
 503                         list_add(&tp->rcu_node_entry, lp_root);
 504                         raw_spin_unlock(&rnp_root->lock); /* irqs remain disabled */
 505                 }
 506         }
 507         return retval;
 508 }
 509
 510 /*
 511  * Do CPU-offline processing for preemptable RCU.
 512  */
 513 static void rcu_preempt_offline_cpu(int cpu)
 514 {
 515         __rcu_offline_cpu(cpu, &rcu_preempt_state);
 516 }
 517
 518 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
 519
 520 /*
 521  * Check for a quiescent state from the current CPU.  When a task blocks,
 522  * the task is recorded in the corresponding CPU's rcu_node structure,
 523  * which is checked elsewhere.
 524  *
 525  * Caller must disable hard irqs.
 526  */
 527 static void rcu_preempt_check_callbacks(int cpu)
 528 {
 529         struct task_struct *t = current;
 530
 531         if (t->rcu_read_lock_nesting == 0) {
 532                 rcu_preempt_qs(cpu);
 533                 return;
 534         }
 535         if (per_cpu(rcu_preempt_data, cpu).qs_pending)
 536                 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
 537 }
 538
 539 /*
 540  * Process callbacks for preemptable RCU.
 541  */
 542 static void rcu_preempt_process_callbacks(void)
 543 {
 544         __rcu_process_callbacks(&rcu_preempt_state,
 545                                 &__get_cpu_var(rcu_preempt_data));
 546 }
 547
 548 /*
 549  * Queue a preemptable-RCU callback for invocation after a grace period.
 550  */
 551 void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
 552 {
 553         __call_rcu(head, func, &rcu_preempt_state);
 554 }
 555 EXPORT_SYMBOL_GPL(call_rcu);
 556
 557 /**
 558  * synchronize_rcu - wait until a grace period has elapsed.
 559  *
 560  * Control will return to the caller some time after a full grace
 561  * period has elapsed, in other words after all currently executing RCU
 562  * read-side critical sections have completed.  Note, however, that
 563  * upon return from synchronize_rcu(), the caller might well be executing
 564  * concurrently with new RCU read-side critical sections that began while
 565  * synchronize_rcu() was waiting.  RCU read-side critical sections are
 566  * delimited by rcu_read_lock() and rcu_read_unlock(), and may be nested.
 567  */
 568 void synchronize_rcu(void)
 569 {
 570         struct rcu_synchronize rcu;
 571
 572         if (!rcu_scheduler_active)
 573                 return;
 574
 575         init_rcu_head_on_stack(&rcu.head);
 576         init_completion(&rcu.completion);
 577         /* Will wake me after RCU finished. */
 578         call_rcu(&rcu.head, wakeme_after_rcu);
 579         /* Wait for it. */
 580         wait_for_completion(&rcu.completion);
 581         destroy_rcu_head_on_stack(&rcu.head);
 582 }
 583 EXPORT_SYMBOL_GPL(synchronize_rcu);
 584
 585 static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq);
 586 static long sync_rcu_preempt_exp_count;
 587 static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex);
 588
 589 /*
 590  * Return non-zero if there are any tasks in RCU read-side critical
 591  * sections blocking the current preemptible-RCU expedited grace period.
 592  * If there is no preemptible-RCU expedited grace period currently in
 593  * progress, returns zero unconditionally.
 594  */
 595 static int rcu_preempted_readers_exp(struct rcu_node *rnp)
 596 {
 597         return !list_empty(&rnp->blocked_tasks[2]) ||
 598                !list_empty(&rnp->blocked_tasks[3]);
 599 }
 600
 601 /*
 602  * return non-zero if there is no RCU expedited grace period in progress
 603  * for the specified rcu_node structure, in other words, if all CPUs and
 604  * tasks covered by the specified rcu_node structure have done their bit
 605  * for the current expedited grace period.  Works only for preemptible
 606  * RCU -- other RCU implementation use other means.
 607  *
 608  * Caller must hold sync_rcu_preempt_exp_mutex.
 609  */
 610 static int sync_rcu_preempt_exp_done(struct rcu_node *rnp)
 611 {
 612         return !rcu_preempted_readers_exp(rnp) &&
 613                ACCESS_ONCE(rnp->expmask) == 0;
 614 }
 615
 616 /*
 617  * Report the exit from RCU read-side critical section for the last task
 618  * that queued itself during or before the current expedited preemptible-RCU
 619  * grace period.  This event is reported either to the rcu_node structure on
 620  * which the task was queued or to one of that rcu_node structure's ancestors,
 621  * recursively up the tree.  (Calm down, calm down, we do the recursion
 622  * iteratively!)
 623  *
 624  * Caller must hold sync_rcu_preempt_exp_mutex.
 625  */
 626 static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp)
 627 {
 628         unsigned long flags;
 629         unsigned long mask;
 630
 631         raw_spin_lock_irqsave(&rnp->lock, flags);
 632         for (;;) {
 633                 if (!sync_rcu_preempt_exp_done(rnp))
 634                         break;
 635                 if (rnp->parent == NULL) {
 636                         wake_up(&sync_rcu_preempt_exp_wq);
 637                         break;
 638                 }
 639                 mask = rnp->grpmask;
 640                 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
 641                 rnp = rnp->parent;
 642                 raw_spin_lock(&rnp->lock); /* irqs already disabled */
 643                 rnp->expmask &= ~mask;
 644         }
 645         raw_spin_unlock_irqrestore(&rnp->lock, flags);
 646 }
 647
 648 /*
 649  * Snapshot the tasks blocking the newly started preemptible-RCU expedited
 650  * grace period for the specified rcu_node structure.  If there are no such
 651  * tasks, report it up the rcu_node hierarchy.
 652  *
 653  * Caller must hold sync_rcu_preempt_exp_mutex and rsp->onofflock.
 654  */
 655 static void
 656 sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)
 657 {
 658         int must_wait;
 659
 660         raw_spin_lock(&rnp->lock); /* irqs already disabled */
 661         list_splice_init(&rnp->blocked_tasks[0], &rnp->blocked_tasks[2]);
 662         list_splice_init(&rnp->blocked_tasks[1], &rnp->blocked_tasks[3]);
 663         must_wait = rcu_preempted_readers_exp(rnp);
 664         raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
 665         if (!must_wait)
 666                 rcu_report_exp_rnp(rsp, rnp);
 667 }
 668
 669 /*
 670  * Wait for an rcu-preempt grace period, but expedite it.  The basic idea
 671  * is to invoke synchronize_sched_expedited() to push all the tasks to
 672  * the ->blocked_tasks[] lists, move all entries from the first set of
 673  * ->blocked_tasks[] lists to the second set, and finally wait for this
 674  * second set to drain.
 675  */
 676 void synchronize_rcu_expedited(void)
 677 {
 678         unsigned long flags;
 679         struct rcu_node *rnp;
 680         struct rcu_state *rsp = &rcu_preempt_state;
 681         long snap;
 682         int trycount = 0;
 683
 684         smp_mb(); /* Caller's modifications seen first by other CPUs. */
 685         snap = ACCESS_ONCE(sync_rcu_preempt_exp_count) + 1;
 686         smp_mb(); /* Above access cannot bleed into critical section. */
 687
 688         /*
 689          * Acquire lock, falling back to synchronize_rcu() if too many
 690          * lock-acquisition failures.  Of course, if someone does the
 691          * expedited grace period for us, just leave.
 692          */
 693         while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) {
 694                 if (trycount++ < 10)
 695                         udelay(trycount * num_online_cpus());
 696                 else {
 697                         synchronize_rcu();
 698                         return;
 699                 }
 700                 if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0)
 701                         goto mb_ret; /* Others did our work for us. */
 702         }
 703         if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0)
 704                 goto unlock_mb_ret; /* Others did our work for us. */
 705
 706         /* force all RCU readers onto blocked_tasks[]. */
 707         synchronize_sched_expedited();
 708
 709         raw_spin_lock_irqsave(&rsp->onofflock, flags);
 710
 711         /* Initialize ->expmask for all non-leaf rcu_node structures. */
 712         rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) {
 713                 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
 714                 rnp->expmask = rnp->qsmaskinit;
 715                 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
 716         }
 717
 718         /* Snapshot current state of ->blocked_tasks[] lists. */
 719         rcu_for_each_leaf_node(rsp, rnp)
 720                 sync_rcu_preempt_exp_init(rsp, rnp);
 721         if (NUM_RCU_NODES > 1)
 722                 sync_rcu_preempt_exp_init(rsp, rcu_get_root(rsp));
 723
 724         raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
 725
 726         /* Wait for snapshotted ->blocked_tasks[] lists to drain. */
 727         rnp = rcu_get_root(rsp);
 728         wait_event(sync_rcu_preempt_exp_wq,
 729                    sync_rcu_preempt_exp_done(rnp));
 730
 731         /* Clean up and exit. */
 732         smp_mb(); /* ensure expedited GP seen before counter increment. */
 733         ACCESS_ONCE(sync_rcu_preempt_exp_count)++;
 734 unlock_mb_ret:
 735         mutex_unlock(&sync_rcu_preempt_exp_mutex);
 736 mb_ret:
 737         smp_mb(); /* ensure subsequent action seen after grace period. */
 738 }
 739 EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
 740
 741 /*
 742  * Check to see if there is any immediate preemptable-RCU-related work
 743  * to be done.
 744  */
 745 static int rcu_preempt_pending(int cpu)
 746 {
 747         return __rcu_pending(&rcu_preempt_state,
 748                              &per_cpu(rcu_preempt_data, cpu));
 749 }
 750
 751 /*
 752  * Does preemptable RCU need the CPU to stay out of dynticks mode?
 753  */
 754 static int rcu_preempt_needs_cpu(int cpu)
 755 {
 756         return !!per_cpu(rcu_preempt_data, cpu).nxtlist;
 757 }
 758
 759 /**
 760  * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
 761  */
 762 void rcu_barrier(void)
 763 {
 764         _rcu_barrier(&rcu_preempt_state, call_rcu);
 765 }
 766 EXPORT_SYMBOL_GPL(rcu_barrier);
 767
 768 /*
 769  * Initialize preemptable RCU's per-CPU data.
 770  */
 771 static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
 772 {
 773         rcu_init_percpu_data(cpu, &rcu_preempt_state, 1);
 774 }
 775
 776 /*
 777  * Move preemptable RCU's callbacks from dying CPU to other online CPU.
 778  */
 779 static void rcu_preempt_send_cbs_to_online(void)
 780 {
 781         rcu_send_cbs_to_online(&rcu_preempt_state);
 782 }
 783
 784 /*
 785  * Initialize preemptable RCU's state structures.
 786  */
 787 static void __init __rcu_init_preempt(void)
 788 {
 789         rcu_init_one(&rcu_preempt_state, &rcu_preempt_data);
 790 }
 791
 792 /*
 793  * Check for a task exiting while in a preemptable-RCU read-side
 794  * critical section, clean up if so.  No need to issue warnings,
 795  * as debug_check_no_locks_held() already does this if lockdep
 796  * is enabled.
 797  */
 798 void exit_rcu(void)
 799 {
 800         struct task_struct *t = current;
 801
 802         if (t->rcu_read_lock_nesting == 0)
 803                 return;
 804         t->rcu_read_lock_nesting = 1;
 805         rcu_read_unlock();
 806 }
 807
 808 #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
 809
 810 /*
 811  * Tell them what RCU they are running.
 812  */
 813 static void __init rcu_bootup_announce(void)
 814 {
 815         printk(KERN_INFO "Hierarchical RCU implementation.\n");
 816         rcu_bootup_announce_oddness();
 817 }
 818
 819 /*
 820  * Return the number of RCU batches processed thus far for debug & stats.
 821  */
 822 long rcu_batches_completed(void)
 823 {
 824         return rcu_batches_completed_sched();
 825 }
 826 EXPORT_SYMBOL_GPL(rcu_batches_completed);
 827
 828 /*
 829  * Force a quiescent state for RCU, which, because there is no preemptible
 830  * RCU, becomes the same as rcu-sched.
 831  */
 832 void rcu_force_quiescent_state(void)
 833 {
 834         rcu_sched_force_quiescent_state();
 835 }
 836 EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
 837
 838 /*
 839  * Because preemptable RCU does not exist, we never have to check for
 840  * CPUs being in quiescent states.
 841  */
 842 static void rcu_preempt_note_context_switch(int cpu)
 843 {
 844 }
 845
 846 /*
 847  * Because preemptable RCU does not exist, there are never any preempted
 848  * RCU readers.
 849  */
 850 static int rcu_preempted_readers(struct rcu_node *rnp)
 851 {
 852         return 0;
 853 }
 854
 855 #ifdef CONFIG_HOTPLUG_CPU
 856
 857 /* Because preemptible RCU does not exist, no quieting of tasks. */
 858 static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
 859 {
 860         raw_spin_unlock_irqrestore(&rnp->lock, flags);
 861 }
 862
 863 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
 864
 865 #ifdef CONFIG_RCU_CPU_STALL_DETECTOR
 866
 867 /*
 868  * Because preemptable RCU does not exist, we never have to check for
 869  * tasks blocked within RCU read-side critical sections.
 870  */
 871 static void rcu_print_detail_task_stall(struct rcu_state *rsp)
 872 {
 873 }
 874
 875 /*
 876  * Because preemptable RCU does not exist, we never have to check for
 877  * tasks blocked within RCU read-side critical sections.
 878  */
 879 static void rcu_print_task_stall(struct rcu_node *rnp)
 880 {
 881 }
 882
 883 /*
 884  * Because preemptible RCU does not exist, there is no need to suppress
 885  * its CPU stall warnings.
 886  */
 887 static void rcu_preempt_stall_reset(void)
 888 {
 889 }
 890
 891 #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
 892
 893 /*
 894  * Because there is no preemptable RCU, there can be no readers blocked,
 895  * so there is no need to check for blocked tasks.  So check only for
 896  * bogus qsmask values.
 897  */
 898 static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
 899 {
 900         WARN_ON_ONCE(rnp->qsmask);
 901 }
 902
 903 #ifdef CONFIG_HOTPLUG_CPU
 904
 905 /*
 906  * Because preemptable RCU does not exist, it never needs to migrate
 907  * tasks that were blocked within RCU read-side critical sections, and
 908  * such non-existent tasks cannot possibly have been blocking the current
 909  * grace period.
 910  */
 911 static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
 912                                      struct rcu_node *rnp,
 913                                      struct rcu_data *rdp)
 914 {
 915         return 0;
 916 }
 917
 918 /*
 919  * Because preemptable RCU does not exist, it never needs CPU-offline
 920  * processing.
 921  */
 922 static void rcu_preempt_offline_cpu(int cpu)
 923 {
 924 }
 925
 926 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
 927
 928 /*
 929  * Because preemptable RCU does not exist, it never has any callbacks
 930  * to check.
 931  */
 932 static void rcu_preempt_check_callbacks(int cpu)
 933 {
 934 }
 935
 936 /*
 937  * Because preemptable RCU does not exist, it never has any callbacks
 938  * to process.
 939  */
 940 static void rcu_preempt_process_callbacks(void)
 941 {
 942 }
 943
 944 /*
 945  * Wait for an rcu-preempt grace period, but make it happen quickly.
 946  * But because preemptable RCU does not exist, map to rcu-sched.
 947  */
 948 void synchronize_rcu_expedited(void)
 949 {
 950         synchronize_sched_expedited();
 951 }
 952 EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
 953
 954 #ifdef CONFIG_HOTPLUG_CPU
 955
 956 /*
 957  * Because preemptable RCU does not exist, there is never any need to
 958  * report on tasks preempted in RCU read-side critical sections during
 959  * expedited RCU grace periods.
 960  */
 961 static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp)
 962 {
 963         return;
 964 }
 965
 966 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
 967
 968 /*
 969  * Because preemptable RCU does not exist, it never has any work to do.
 970  */
 971 static int rcu_preempt_pending(int cpu)
 972 {
 973         return 0;
 974 }
 975
 976 /*
 977  * Because preemptable RCU does not exist, it never needs any CPU.
 978  */
 979 static int rcu_preempt_needs_cpu(int cpu)
 980 {
 981         return 0;
 982 }
 983
 984 /*
 985  * Because preemptable RCU does not exist, rcu_barrier() is just
 986  * another name for rcu_barrier_sched().
 987  */
 988 void rcu_barrier(void)
 989 {
 990         rcu_barrier_sched();
 991 }
 992 EXPORT_SYMBOL_GPL(rcu_barrier);
 993
 994 /*
 995  * Because preemptable RCU does not exist, there is no per-CPU
 996  * data to initialize.
 997  */
 998 static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
 999 {
1000 }
1001
1002 /*
1003  * Because there is no preemptable RCU, there are no callbacks to move.
1004  */
1005 static void rcu_preempt_send_cbs_to_online(void)
1006 {
1007 }
1008
1009 /*
1010  * Because preemptable RCU does not exist, it need not be initialized.
1011  */
1012 static void __init __rcu_init_preempt(void)
1013 {
1014 }
1015
1016 #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
1017
1018 #ifndef CONFIG_SMP
1019
1020 void synchronize_sched_expedited(void)
1021 {
1022         cond_resched();
1023 }
1024 EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
1025
1026 #else /* #ifndef CONFIG_SMP */
1027
1028 static atomic_t sync_sched_expedited_started = ATOMIC_INIT(0);
1029 static atomic_t sync_sched_expedited_done = ATOMIC_INIT(0);
1030
1031 static int synchronize_sched_expedited_cpu_stop(void *data)
1032 {
1033         /*
1034          * There must be a full memory barrier on each affected CPU
1035          * between the time that try_stop_cpus() is called and the
1036          * time that it returns.
1037          *
1038          * In the current initial implementation of cpu_stop, the
1039          * above condition is already met when the control reaches
1040          * this point and the following smp_mb() is not strictly
1041          * necessary.  Do smp_mb() anyway for documentation and
1042          * robustness against future implementation changes.
1043          */
1044         smp_mb(); /* See above comment block. */
1045         return 0;
1046 }
1047
1048 /*
1049  * Wait for an rcu-sched grace period to elapse, but use "big hammer"
1050  * approach to force grace period to end quickly.  This consumes
1051  * significant time on all CPUs, and is thus not recommended for
1052  * any sort of common-case code.
1053  *
1054  * Note that it is illegal to call this function while holding any
1055  * lock that is acquired by a CPU-hotplug notifier.  Failing to
1056  * observe this restriction will result in deadlock.
1057  *
1058  * This implementation can be thought of as an application of ticket
1059  * locking to RCU, with sync_sched_expedited_started and
1060  * sync_sched_expedited_done taking on the roles of the halves
1061  * of the ticket-lock word.  Each task atomically increments
1062  * sync_sched_expedited_started upon entry, snapshotting the old value,
1063  * then attempts to stop all the CPUs.  If this succeeds, then each
1064  * CPU will have executed a context switch, resulting in an RCU-sched
1065  * grace period.  We are then done, so we use atomic_cmpxchg() to
1066  * update sync_sched_expedited_done to match our snapshot -- but
1067  * only if someone else has not already advanced past our snapshot.
1068  *
1069  * On the other hand, if try_stop_cpus() fails, we check the value
1070  * of sync_sched_expedited_done.  If it has advanced past our
1071  * initial snapshot, then someone else must have forced a grace period
1072  * some time after we took our snapshot.  In this case, our work is
1073  * done for us, and we can simply return.  Otherwise, we try again,
1074  * but keep our initial snapshot for purposes of checking for someone
1075  * doing our work for us.
1076  *
1077  * If we fail too many times in a row, we fall back to synchronize_sched().
1078  */
1079 void synchronize_sched_expedited(void)
1080 {
1081         int firstsnap, s, snap, trycount = 0;
1082
1083         /* Note that atomic_inc_return() implies full memory barrier. */
1084         firstsnap = snap = atomic_inc_return(&sync_sched_expedited_started);
1085         get_online_cpus();
1086
1087         /*
1088          * Each pass through the following loop attempts to force a
1089          * context switch on each CPU.
1090          */
1091         while (try_stop_cpus(cpu_online_mask,
1092                              synchronize_sched_expedited_cpu_stop,
1093                              NULL) == -EAGAIN) {
1094                 put_online_cpus();
1095
1096                 /* No joy, try again later.  Or just synchronize_sched(). */
1097                 if (trycount++ < 10)
1098                         udelay(trycount * num_online_cpus());
1099                 else {
1100                         synchronize_sched();
1101                         return;
1102                 }
1103
1104                 /* Check to see if someone else did our work for us. */
1105                 s = atomic_read(&sync_sched_expedited_done);
1106                 if (UINT_CMP_GE((unsigned)s, (unsigned)firstsnap)) {
1107                         smp_mb(); /* ensure test happens before caller kfree */
1108                         return;
1109                 }
1110
1111                 /*
1112                  * Refetching sync_sched_expedited_started allows later
1113                  * callers to piggyback on our grace period.  We subtract
1114                  * 1 to get the same token that the last incrementer got.
1115                  * We retry after they started, so our grace period works
1116                  * for them, and they started after our first try, so their
1117                  * grace period works for us.
1118                  */
1119                 get_online_cpus();
1120                 snap = atomic_read(&sync_sched_expedited_started) - 1;
1121                 smp_mb(); /* ensure read is before try_stop_cpus(). */
1122         }
1123
1124         /*
1125          * Everyone up to our most recent fetch is covered by our grace
1126          * period.  Update the counter, but only if our work is still
1127          * relevant -- which it won't be if someone who started later
1128          * than we did beat us to the punch.
1129          */
1130         do {
1131                 s = atomic_read(&sync_sched_expedited_done);
1132                 if (UINT_CMP_GE((unsigned)s, (unsigned)snap)) {
1133                         smp_mb(); /* ensure test happens before caller kfree */
1134                         break;
1135                 }
1136         } while (atomic_cmpxchg(&sync_sched_expedited_done, s, snap) != s);
1137
1138         put_online_cpus();
1139 }
1140 EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
1141
1142 #endif /* #else #ifndef CONFIG_SMP */
1143
1144 #if !defined(CONFIG_RCU_FAST_NO_HZ)
1145
1146 /*
1147  * Check to see if any future RCU-related work will need to be done
1148  * by the current CPU, even if none need be done immediately, returning
1149  * 1 if so.  This function is part of the RCU implementation; it is -not-
1150  * an exported member of the RCU API.
1151  *
1152  * Because we have preemptible RCU, just check whether this CPU needs
1153  * any flavor of RCU.  Do not chew up lots of CPU cycles with preemption
1154  * disabled in a most-likely vain attempt to cause RCU not to need this CPU.
1155  */
1156 int rcu_needs_cpu(int cpu)
1157 {
1158         return rcu_needs_cpu_quick_check(cpu);
1159 }
1160
1161 /*
1162  * Check to see if we need to continue a callback-flush operations to
1163  * allow the last CPU to enter dyntick-idle mode.  But fast dyntick-idle
1164  * entry is not configured, so we never do need to.
1165  */
1166 static void rcu_needs_cpu_flush(void)
1167 {
1168 }
1169
1170 #else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */
1171
1172 #define RCU_NEEDS_CPU_FLUSHES 5
1173 static DEFINE_PER_CPU(int, rcu_dyntick_drain);
1174 static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff);
1175
1176 /*
1177  * Check to see if any future RCU-related work will need to be done
1178  * by the current CPU, even if none need be done immediately, returning
1179  * 1 if so.  This function is part of the RCU implementation; it is -not-
1180  * an exported member of the RCU API.
1181  *
1182  * Because we are not supporting preemptible RCU, attempt to accelerate
1183  * any current grace periods so that RCU no longer needs this CPU, but
1184  * only if all other CPUs are already in dynticks-idle mode.  This will
1185  * allow the CPU cores to be powered down immediately, as opposed to after
1186  * waiting many milliseconds for grace periods to elapse.
1187  *
1188  * Because it is not legal to invoke rcu_process_callbacks() with irqs
1189  * disabled, we do one pass of force_quiescent_state(), then do a
1190  * raise_softirq() to cause rcu_process_callbacks() to be invoked later.
1191  * The per-cpu rcu_dyntick_drain variable controls the sequencing.
1192  */
1193 int rcu_needs_cpu(int cpu)
1194 {
1195         int c = 0;
1196         int snap;
1197         int snap_nmi;
1198         int thatcpu;
1199
1200         /* Check for being in the holdoff period. */
1201         if (per_cpu(rcu_dyntick_holdoff, cpu) == jiffies)
1202                 return rcu_needs_cpu_quick_check(cpu);
1203
1204         /* Don't bother unless we are the last non-dyntick-idle CPU. */
1205         for_each_online_cpu(thatcpu) {
1206                 if (thatcpu == cpu)
1207                         continue;
1208                 snap = per_cpu(rcu_dynticks, thatcpu).dynticks;
1209                 snap_nmi = per_cpu(rcu_dynticks, thatcpu).dynticks_nmi;
1210                 smp_mb(); /* Order sampling of snap with end of grace period. */
1211                 if (((snap & 0x1) != 0) || ((snap_nmi & 0x1) != 0)) {
1212                         per_cpu(rcu_dyntick_drain, cpu) = 0;
1213                         per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1;
1214                         return rcu_needs_cpu_quick_check(cpu);
1215                 }
1216         }
1217
1218         /* Check and update the rcu_dyntick_drain sequencing. */
1219         if (per_cpu(rcu_dyntick_drain, cpu) <= 0) {
1220                 /* First time through, initialize the counter. */
1221                 per_cpu(rcu_dyntick_drain, cpu) = RCU_NEEDS_CPU_FLUSHES;
1222         } else if (--per_cpu(rcu_dyntick_drain, cpu) <= 0) {
1223                 /* We have hit the limit, so time to give up. */
1224                 per_cpu(rcu_dyntick_holdoff, cpu) = jiffies;
1225                 return rcu_needs_cpu_quick_check(cpu);
1226         }
1227
1228         /* Do one step pushing remaining RCU callbacks through. */
1229         if (per_cpu(rcu_sched_data, cpu).nxtlist) {
1230                 rcu_sched_qs(cpu);
1231                 force_quiescent_state(&rcu_sched_state, 0);
1232                 c = c || per_cpu(rcu_sched_data, cpu).nxtlist;
1233         }
1234         if (per_cpu(rcu_bh_data, cpu).nxtlist) {
1235                 rcu_bh_qs(cpu);
1236                 force_quiescent_state(&rcu_bh_state, 0);
1237                 c = c || per_cpu(rcu_bh_data, cpu).nxtlist;
1238         }
1239
1240         /* If RCU callbacks are still pending, RCU still needs this CPU. */
1241         if (c)
1242                 raise_softirq(RCU_SOFTIRQ);
1243         return c;
1244 }
1245
1246 /*
1247  * Check to see if we need to continue a callback-flush operations to
1248  * allow the last CPU to enter dyntick-idle mode.
1249  */
1250 static void rcu_needs_cpu_flush(void)
1251 {
1252         int cpu = smp_processor_id();
1253         unsigned long flags;
1254
1255         if (per_cpu(rcu_dyntick_drain, cpu) <= 0)
1256                 return;
1257         local_irq_save(flags);
1258         (void)rcu_needs_cpu(cpu);
1259         local_irq_restore(flags);
1260 }
1261
1262 #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */