1 // SPDX-License-Identifier: GPL-2.0+
3 * RCU segmented callback lists, function definitions
5 * Copyright IBM Corporation, 2017
7 * Authors: Paul E. McKenney <paulmck@linux.ibm.com>
10 #include <linux/types.h>
11 #include <linux/kernel.h>
12 #include <linux/interrupt.h>
13 #include <linux/rcupdate.h>
15 #include "rcu_segcblist.h"
17 /* Initialize simple callback list. */
18 void rcu_cblist_init(struct rcu_cblist
*rclp
)
21 rclp
->tail
= &rclp
->head
;
26 * Enqueue an rcu_head structure onto the specified callback list.
28 void rcu_cblist_enqueue(struct rcu_cblist
*rclp
, struct rcu_head
*rhp
)
31 rclp
->tail
= &rhp
->next
;
32 WRITE_ONCE(rclp
->len
, rclp
->len
+ 1);
36 * Flush the second rcu_cblist structure onto the first one, obliterating
37 * any contents of the first. If rhp is non-NULL, enqueue it as the sole
38 * element of the second rcu_cblist structure, but ensuring that the second
39 * rcu_cblist structure, if initially non-empty, always appears non-empty
40 * throughout the process. If rdp is NULL, the second rcu_cblist structure
41 * is instead initialized to empty.
43 void rcu_cblist_flush_enqueue(struct rcu_cblist
*drclp
,
44 struct rcu_cblist
*srclp
,
47 drclp
->head
= srclp
->head
;
49 drclp
->tail
= srclp
->tail
;
51 drclp
->tail
= &drclp
->head
;
52 drclp
->len
= srclp
->len
;
54 rcu_cblist_init(srclp
);
58 srclp
->tail
= &rhp
->next
;
59 WRITE_ONCE(srclp
->len
, 1);
64 * Dequeue the oldest rcu_head structure from the specified callback
67 struct rcu_head
*rcu_cblist_dequeue(struct rcu_cblist
*rclp
)
75 rclp
->head
= rhp
->next
;
77 rclp
->tail
= &rclp
->head
;
81 /* Set the length of an rcu_segcblist structure. */
82 static void rcu_segcblist_set_len(struct rcu_segcblist
*rsclp
, long v
)
84 #ifdef CONFIG_RCU_NOCB_CPU
85 atomic_long_set(&rsclp
->len
, v
);
87 WRITE_ONCE(rsclp
->len
, v
);
92 * Increase the numeric length of an rcu_segcblist structure by the
93 * specified amount, which can be negative. This can cause the ->len
94 * field to disagree with the actual number of callbacks on the structure.
95 * This increase is fully ordered with respect to the callers accesses
96 * both before and after.
98 static void rcu_segcblist_add_len(struct rcu_segcblist
*rsclp
, long v
)
100 #ifdef CONFIG_RCU_NOCB_CPU
101 smp_mb__before_atomic(); /* Up to the caller! */
102 atomic_long_add(v
, &rsclp
->len
);
103 smp_mb__after_atomic(); /* Up to the caller! */
105 smp_mb(); /* Up to the caller! */
106 WRITE_ONCE(rsclp
->len
, rsclp
->len
+ v
);
107 smp_mb(); /* Up to the caller! */
112 * Increase the numeric length of an rcu_segcblist structure by one.
113 * This can cause the ->len field to disagree with the actual number of
114 * callbacks on the structure. This increase is fully ordered with respect
115 * to the callers accesses both before and after.
117 void rcu_segcblist_inc_len(struct rcu_segcblist
*rsclp
)
119 rcu_segcblist_add_len(rsclp
, 1);
123 * Exchange the numeric length of the specified rcu_segcblist structure
124 * with the specified value. This can cause the ->len field to disagree
125 * with the actual number of callbacks on the structure. This exchange is
126 * fully ordered with respect to the callers accesses both before and after.
128 static long rcu_segcblist_xchg_len(struct rcu_segcblist
*rsclp
, long v
)
130 #ifdef CONFIG_RCU_NOCB_CPU
131 return atomic_long_xchg(&rsclp
->len
, v
);
133 long ret
= rsclp
->len
;
135 smp_mb(); /* Up to the caller! */
136 WRITE_ONCE(rsclp
->len
, v
);
137 smp_mb(); /* Up to the caller! */
143 * Initialize an rcu_segcblist structure.
145 void rcu_segcblist_init(struct rcu_segcblist
*rsclp
)
149 BUILD_BUG_ON(RCU_NEXT_TAIL
+ 1 != ARRAY_SIZE(rsclp
->gp_seq
));
150 BUILD_BUG_ON(ARRAY_SIZE(rsclp
->tails
) != ARRAY_SIZE(rsclp
->gp_seq
));
152 for (i
= 0; i
< RCU_CBLIST_NSEGS
; i
++)
153 rsclp
->tails
[i
] = &rsclp
->head
;
154 rcu_segcblist_set_len(rsclp
, 0);
159 * Disable the specified rcu_segcblist structure, so that callbacks can
160 * no longer be posted to it. This structure must be empty.
162 void rcu_segcblist_disable(struct rcu_segcblist
*rsclp
)
164 WARN_ON_ONCE(!rcu_segcblist_empty(rsclp
));
165 WARN_ON_ONCE(rcu_segcblist_n_cbs(rsclp
));
170 * Mark the specified rcu_segcblist structure as offloaded. This
171 * structure must be empty.
173 void rcu_segcblist_offload(struct rcu_segcblist
*rsclp
)
175 rsclp
->offloaded
= 1;
179 * Does the specified rcu_segcblist structure contain callbacks that
180 * are ready to be invoked?
182 bool rcu_segcblist_ready_cbs(struct rcu_segcblist
*rsclp
)
184 return rcu_segcblist_is_enabled(rsclp
) &&
185 &rsclp
->head
!= READ_ONCE(rsclp
->tails
[RCU_DONE_TAIL
]);
189 * Does the specified rcu_segcblist structure contain callbacks that
190 * are still pending, that is, not yet ready to be invoked?
192 bool rcu_segcblist_pend_cbs(struct rcu_segcblist
*rsclp
)
194 return rcu_segcblist_is_enabled(rsclp
) &&
195 !rcu_segcblist_restempty(rsclp
, RCU_DONE_TAIL
);
199 * Return a pointer to the first callback in the specified rcu_segcblist
200 * structure. This is useful for diagnostics.
202 struct rcu_head
*rcu_segcblist_first_cb(struct rcu_segcblist
*rsclp
)
204 if (rcu_segcblist_is_enabled(rsclp
))
210 * Return a pointer to the first pending callback in the specified
211 * rcu_segcblist structure. This is useful just after posting a given
212 * callback -- if that callback is the first pending callback, then
213 * you cannot rely on someone else having already started up the required
216 struct rcu_head
*rcu_segcblist_first_pend_cb(struct rcu_segcblist
*rsclp
)
218 if (rcu_segcblist_is_enabled(rsclp
))
219 return *rsclp
->tails
[RCU_DONE_TAIL
];
224 * Return false if there are no CBs awaiting grace periods, otherwise,
225 * return true and store the nearest waited-upon grace period into *lp.
227 bool rcu_segcblist_nextgp(struct rcu_segcblist
*rsclp
, unsigned long *lp
)
229 if (!rcu_segcblist_pend_cbs(rsclp
))
231 *lp
= rsclp
->gp_seq
[RCU_WAIT_TAIL
];
236 * Enqueue the specified callback onto the specified rcu_segcblist
237 * structure, updating accounting as needed. Note that the ->len
238 * field may be accessed locklessly, hence the WRITE_ONCE().
239 * The ->len field is used by rcu_barrier() and friends to determine
240 * if it must post a callback on this structure, and it is OK
241 * for rcu_barrier() to sometimes post callbacks needlessly, but
242 * absolutely not OK for it to ever miss posting a callback.
244 void rcu_segcblist_enqueue(struct rcu_segcblist
*rsclp
,
245 struct rcu_head
*rhp
)
247 rcu_segcblist_inc_len(rsclp
);
248 smp_mb(); /* Ensure counts are updated before callback is enqueued. */
250 WRITE_ONCE(*rsclp
->tails
[RCU_NEXT_TAIL
], rhp
);
251 WRITE_ONCE(rsclp
->tails
[RCU_NEXT_TAIL
], &rhp
->next
);
255 * Entrain the specified callback onto the specified rcu_segcblist at
256 * the end of the last non-empty segment. If the entire rcu_segcblist
257 * is empty, make no change, but return false.
259 * This is intended for use by rcu_barrier()-like primitives, -not-
260 * for normal grace-period use. IMPORTANT: The callback you enqueue
261 * will wait for all prior callbacks, NOT necessarily for a grace
262 * period. You have been warned.
264 bool rcu_segcblist_entrain(struct rcu_segcblist
*rsclp
,
265 struct rcu_head
*rhp
)
269 if (rcu_segcblist_n_cbs(rsclp
) == 0)
271 rcu_segcblist_inc_len(rsclp
);
272 smp_mb(); /* Ensure counts are updated before callback is entrained. */
274 for (i
= RCU_NEXT_TAIL
; i
> RCU_DONE_TAIL
; i
--)
275 if (rsclp
->tails
[i
] != rsclp
->tails
[i
- 1])
277 WRITE_ONCE(*rsclp
->tails
[i
], rhp
);
278 for (; i
<= RCU_NEXT_TAIL
; i
++)
279 WRITE_ONCE(rsclp
->tails
[i
], &rhp
->next
);
284 * Extract only the counts from the specified rcu_segcblist structure,
285 * and place them in the specified rcu_cblist structure. This function
286 * supports both callback orphaning and invocation, hence the separation
287 * of counts and callbacks. (Callbacks ready for invocation must be
288 * orphaned and adopted separately from pending callbacks, but counts
289 * apply to all callbacks. Locking must be used to make sure that
290 * both orphaned-callbacks lists are consistent.)
292 void rcu_segcblist_extract_count(struct rcu_segcblist
*rsclp
,
293 struct rcu_cblist
*rclp
)
295 rclp
->len
= rcu_segcblist_xchg_len(rsclp
, 0);
299 * Extract only those callbacks ready to be invoked from the specified
300 * rcu_segcblist structure and place them in the specified rcu_cblist
303 void rcu_segcblist_extract_done_cbs(struct rcu_segcblist
*rsclp
,
304 struct rcu_cblist
*rclp
)
308 if (!rcu_segcblist_ready_cbs(rsclp
))
309 return; /* Nothing to do. */
310 *rclp
->tail
= rsclp
->head
;
311 WRITE_ONCE(rsclp
->head
, *rsclp
->tails
[RCU_DONE_TAIL
]);
312 WRITE_ONCE(*rsclp
->tails
[RCU_DONE_TAIL
], NULL
);
313 rclp
->tail
= rsclp
->tails
[RCU_DONE_TAIL
];
314 for (i
= RCU_CBLIST_NSEGS
- 1; i
>= RCU_DONE_TAIL
; i
--)
315 if (rsclp
->tails
[i
] == rsclp
->tails
[RCU_DONE_TAIL
])
316 WRITE_ONCE(rsclp
->tails
[i
], &rsclp
->head
);
320 * Extract only those callbacks still pending (not yet ready to be
321 * invoked) from the specified rcu_segcblist structure and place them in
322 * the specified rcu_cblist structure. Note that this loses information
323 * about any callbacks that might have been partway done waiting for
324 * their grace period. Too bad! They will have to start over.
326 void rcu_segcblist_extract_pend_cbs(struct rcu_segcblist
*rsclp
,
327 struct rcu_cblist
*rclp
)
331 if (!rcu_segcblist_pend_cbs(rsclp
))
332 return; /* Nothing to do. */
333 *rclp
->tail
= *rsclp
->tails
[RCU_DONE_TAIL
];
334 rclp
->tail
= rsclp
->tails
[RCU_NEXT_TAIL
];
335 WRITE_ONCE(*rsclp
->tails
[RCU_DONE_TAIL
], NULL
);
336 for (i
= RCU_DONE_TAIL
+ 1; i
< RCU_CBLIST_NSEGS
; i
++)
337 WRITE_ONCE(rsclp
->tails
[i
], rsclp
->tails
[RCU_DONE_TAIL
]);
341 * Insert counts from the specified rcu_cblist structure in the
342 * specified rcu_segcblist structure.
344 void rcu_segcblist_insert_count(struct rcu_segcblist
*rsclp
,
345 struct rcu_cblist
*rclp
)
347 rcu_segcblist_add_len(rsclp
, rclp
->len
);
352 * Move callbacks from the specified rcu_cblist to the beginning of the
353 * done-callbacks segment of the specified rcu_segcblist.
355 void rcu_segcblist_insert_done_cbs(struct rcu_segcblist
*rsclp
,
356 struct rcu_cblist
*rclp
)
361 return; /* No callbacks to move. */
362 *rclp
->tail
= rsclp
->head
;
363 WRITE_ONCE(rsclp
->head
, rclp
->head
);
364 for (i
= RCU_DONE_TAIL
; i
< RCU_CBLIST_NSEGS
; i
++)
365 if (&rsclp
->head
== rsclp
->tails
[i
])
366 WRITE_ONCE(rsclp
->tails
[i
], rclp
->tail
);
370 rclp
->tail
= &rclp
->head
;
374 * Move callbacks from the specified rcu_cblist to the end of the
375 * new-callbacks segment of the specified rcu_segcblist.
377 void rcu_segcblist_insert_pend_cbs(struct rcu_segcblist
*rsclp
,
378 struct rcu_cblist
*rclp
)
381 return; /* Nothing to do. */
382 WRITE_ONCE(*rsclp
->tails
[RCU_NEXT_TAIL
], rclp
->head
);
383 WRITE_ONCE(rsclp
->tails
[RCU_NEXT_TAIL
], rclp
->tail
);
387 * Advance the callbacks in the specified rcu_segcblist structure based
388 * on the current value passed in for the grace-period counter.
390 void rcu_segcblist_advance(struct rcu_segcblist
*rsclp
, unsigned long seq
)
394 WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp
));
395 if (rcu_segcblist_restempty(rsclp
, RCU_DONE_TAIL
))
399 * Find all callbacks whose ->gp_seq numbers indicate that they
400 * are ready to invoke, and put them into the RCU_DONE_TAIL segment.
402 for (i
= RCU_WAIT_TAIL
; i
< RCU_NEXT_TAIL
; i
++) {
403 if (ULONG_CMP_LT(seq
, rsclp
->gp_seq
[i
]))
405 WRITE_ONCE(rsclp
->tails
[RCU_DONE_TAIL
], rsclp
->tails
[i
]);
408 /* If no callbacks moved, nothing more need be done. */
409 if (i
== RCU_WAIT_TAIL
)
412 /* Clean up tail pointers that might have been misordered above. */
413 for (j
= RCU_WAIT_TAIL
; j
< i
; j
++)
414 WRITE_ONCE(rsclp
->tails
[j
], rsclp
->tails
[RCU_DONE_TAIL
]);
417 * Callbacks moved, so clean up the misordered ->tails[] pointers
418 * that now point into the middle of the list of ready-to-invoke
419 * callbacks. The overall effect is to copy down the later pointers
420 * into the gap that was created by the now-ready segments.
422 for (j
= RCU_WAIT_TAIL
; i
< RCU_NEXT_TAIL
; i
++, j
++) {
423 if (rsclp
->tails
[j
] == rsclp
->tails
[RCU_NEXT_TAIL
])
424 break; /* No more callbacks. */
425 WRITE_ONCE(rsclp
->tails
[j
], rsclp
->tails
[i
]);
426 rsclp
->gp_seq
[j
] = rsclp
->gp_seq
[i
];
431 * "Accelerate" callbacks based on more-accurate grace-period information.
432 * The reason for this is that RCU does not synchronize the beginnings and
433 * ends of grace periods, and that callbacks are posted locally. This in
434 * turn means that the callbacks must be labelled conservatively early
435 * on, as getting exact information would degrade both performance and
436 * scalability. When more accurate grace-period information becomes
437 * available, previously posted callbacks can be "accelerated", marking
438 * them to complete at the end of the earlier grace period.
440 * This function operates on an rcu_segcblist structure, and also the
441 * grace-period sequence number seq at which new callbacks would become
442 * ready to invoke. Returns true if there are callbacks that won't be
443 * ready to invoke until seq, false otherwise.
445 bool rcu_segcblist_accelerate(struct rcu_segcblist
*rsclp
, unsigned long seq
)
449 WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp
));
450 if (rcu_segcblist_restempty(rsclp
, RCU_DONE_TAIL
))
454 * Find the segment preceding the oldest segment of callbacks
455 * whose ->gp_seq[] completion is at or after that passed in via
456 * "seq", skipping any empty segments. This oldest segment, along
457 * with any later segments, can be merged in with any newly arrived
458 * callbacks in the RCU_NEXT_TAIL segment, and assigned "seq"
459 * as their ->gp_seq[] grace-period completion sequence number.
461 for (i
= RCU_NEXT_READY_TAIL
; i
> RCU_DONE_TAIL
; i
--)
462 if (rsclp
->tails
[i
] != rsclp
->tails
[i
- 1] &&
463 ULONG_CMP_LT(rsclp
->gp_seq
[i
], seq
))
467 * If all the segments contain callbacks that correspond to
468 * earlier grace-period sequence numbers than "seq", leave.
469 * Assuming that the rcu_segcblist structure has enough
470 * segments in its arrays, this can only happen if some of
471 * the non-done segments contain callbacks that really are
472 * ready to invoke. This situation will get straightened
473 * out by the next call to rcu_segcblist_advance().
475 * Also advance to the oldest segment of callbacks whose
476 * ->gp_seq[] completion is at or after that passed in via "seq",
477 * skipping any empty segments.
479 * Note that segment "i" (and any lower-numbered segments
480 * containing older callbacks) will be unaffected, and their
481 * grace-period numbers remain unchanged. For example, if i ==
482 * WAIT_TAIL, then neither WAIT_TAIL nor DONE_TAIL will be touched.
483 * Instead, the CBs in NEXT_TAIL will be merged with those in
484 * NEXT_READY_TAIL and the grace-period number of NEXT_READY_TAIL
485 * would be updated. NEXT_TAIL would then be empty.
487 if (rcu_segcblist_restempty(rsclp
, i
) || ++i
>= RCU_NEXT_TAIL
)
491 * Merge all later callbacks, including newly arrived callbacks,
492 * into the segment located by the for-loop above. Assign "seq"
493 * as the ->gp_seq[] value in order to correctly handle the case
494 * where there were no pending callbacks in the rcu_segcblist
495 * structure other than in the RCU_NEXT_TAIL segment.
497 for (; i
< RCU_NEXT_TAIL
; i
++) {
498 WRITE_ONCE(rsclp
->tails
[i
], rsclp
->tails
[RCU_NEXT_TAIL
]);
499 rsclp
->gp_seq
[i
] = seq
;
505 * Merge the source rcu_segcblist structure into the destination
506 * rcu_segcblist structure, then initialize the source. Any pending
507 * callbacks from the source get to start over. It is best to
508 * advance and accelerate both the destination and the source
511 void rcu_segcblist_merge(struct rcu_segcblist
*dst_rsclp
,
512 struct rcu_segcblist
*src_rsclp
)
514 struct rcu_cblist donecbs
;
515 struct rcu_cblist pendcbs
;
517 rcu_cblist_init(&donecbs
);
518 rcu_cblist_init(&pendcbs
);
519 rcu_segcblist_extract_count(src_rsclp
, &donecbs
);
520 rcu_segcblist_extract_done_cbs(src_rsclp
, &donecbs
);
521 rcu_segcblist_extract_pend_cbs(src_rsclp
, &pendcbs
);
522 rcu_segcblist_insert_count(dst_rsclp
, &donecbs
);
523 rcu_segcblist_insert_done_cbs(dst_rsclp
, &donecbs
);
524 rcu_segcblist_insert_pend_cbs(dst_rsclp
, &pendcbs
);
525 rcu_segcblist_init(src_rsclp
);