1 // SPDX-License-Identifier: GPL-2.0
2 #define pr_fmt(fmt) "%s() " fmt "\n", __func__
4 #include <linux/generic-radix-tree.h>
6 #include <linux/percpu.h>
7 #include <linux/slab.h>
8 #include <linux/srcu.h>
9 #include <linux/vmalloc.h>
11 #include "rcu_pending.h"
15 #define static_array_for_each(_a, _i) \
16 for (typeof(&(_a)[0]) _i = _a; \
17 _i < (_a) + ARRAY_SIZE(_a); \
20 enum rcu_pending_special
{
21 RCU_PENDING_KVFREE
= 1,
22 RCU_PENDING_CALL_RCU
= 2,
25 #define RCU_PENDING_KVFREE_FN ((rcu_pending_process_fn) (ulong) RCU_PENDING_KVFREE)
26 #define RCU_PENDING_CALL_RCU_FN ((rcu_pending_process_fn) (ulong) RCU_PENDING_CALL_RCU)
28 static inline unsigned long __get_state_synchronize_rcu(struct srcu_struct
*ssp
)
31 ? get_state_synchronize_srcu(ssp
)
32 : get_state_synchronize_rcu();
35 static inline unsigned long __start_poll_synchronize_rcu(struct srcu_struct
*ssp
)
38 ? start_poll_synchronize_srcu(ssp
)
39 : start_poll_synchronize_rcu();
42 static inline bool __poll_state_synchronize_rcu(struct srcu_struct
*ssp
, unsigned long cookie
)
45 ? poll_state_synchronize_srcu(ssp
, cookie
)
46 : poll_state_synchronize_rcu(cookie
);
49 static inline void __rcu_barrier(struct srcu_struct
*ssp
)
56 static inline void __call_rcu(struct srcu_struct
*ssp
, struct rcu_head
*rhp
,
60 call_srcu(ssp
, rhp
, func
);
65 struct rcu_pending_seq
{
67 * We're using a radix tree like a vector - we're just pushing elements
68 * onto the end; we're using a radix tree instead of an actual vector to
69 * avoid reallocation overhead
71 GENRADIX(struct rcu_head
*) objs
;
73 struct rcu_head
**cursor
;
77 struct rcu_pending_list
{
78 struct rcu_head
*head
;
79 struct rcu_head
*tail
;
83 struct rcu_pending_pcpu
{
84 struct rcu_pending
*parent
;
89 * We can't bound the number of unprocessed gp sequence numbers, and we
90 * can't efficiently merge radix trees for expired grace periods, so we
93 DARRAY_PREALLOCATED(struct rcu_pending_seq
, 4) objs
;
95 /* Third entry is for expired objects: */
96 struct rcu_pending_list lists
[NUM_ACTIVE_RCU_POLL_OLDSTATE
+ 1];
100 struct work_struct work
;
103 static bool __rcu_pending_has_pending(struct rcu_pending_pcpu
*p
)
108 static_array_for_each(p
->lists
, i
)
115 static void rcu_pending_list_merge(struct rcu_pending_list
*l1
,
116 struct rcu_pending_list
*l2
)
122 l1
->tail
->next
= l2
->head
;
127 l1
->tail
->next
.next
= (void *) l2
->head
;
131 l2
->head
= l2
->tail
= NULL
;
134 static void rcu_pending_list_add(struct rcu_pending_list
*l
,
148 l
->tail
->next
.next
= (void *) n
;
154 static void merge_expired_lists(struct rcu_pending_pcpu
*p
)
156 struct rcu_pending_list
*expired
= &p
->lists
[NUM_ACTIVE_RCU_POLL_OLDSTATE
];
158 for (struct rcu_pending_list
*i
= p
->lists
; i
< expired
; i
++)
159 if (i
->head
&& __poll_state_synchronize_rcu(p
->parent
->srcu
, i
->seq
))
160 rcu_pending_list_merge(expired
, i
);
164 static inline void kfree_bulk(size_t nr
, void ** p
)
170 #define local_irq_save(flags) \
176 static noinline
void __process_finished_items(struct rcu_pending
*pending
,
177 struct rcu_pending_pcpu
*p
,
180 struct rcu_pending_list
*expired
= &p
->lists
[NUM_ACTIVE_RCU_POLL_OLDSTATE
];
181 struct rcu_pending_seq objs
= {};
182 struct rcu_head
*list
= NULL
;
185 __poll_state_synchronize_rcu(pending
->srcu
, p
->objs
.data
[0].seq
)) {
186 objs
= p
->objs
.data
[0];
187 darray_remove_item(&p
->objs
, p
->objs
.data
);
190 merge_expired_lists(p
);
192 list
= expired
->head
;
193 expired
->head
= expired
->tail
= NULL
;
195 spin_unlock_irqrestore(&p
->lock
, flags
);
197 switch ((ulong
) pending
->process
) {
198 case RCU_PENDING_KVFREE
:
199 for (size_t i
= 0; i
< objs
.nr
; ) {
200 size_t nr_this_node
= min(GENRADIX_NODE_SIZE
/ sizeof(void *), objs
.nr
- i
);
202 kfree_bulk(nr_this_node
, (void **) genradix_ptr(&objs
.objs
, i
));
205 genradix_free(&objs
.objs
);
208 struct rcu_head
*obj
= list
;
212 list
= (void *) obj
->next
.next
;
216 * low bit of pointer indicates whether rcu_head needs
217 * to be freed - kvfree_rcu_mightsleep()
219 BUILD_BUG_ON(ARCH_SLAB_MINALIGN
== 0);
221 void *ptr
= (void *)(((unsigned long) obj
->func
) & ~1UL);
222 bool free_head
= ((unsigned long) obj
->func
) & 1UL;
231 case RCU_PENDING_CALL_RCU
:
232 for (size_t i
= 0; i
< objs
.nr
; i
++) {
233 struct rcu_head
*obj
= *genradix_ptr(&objs
.objs
, i
);
236 genradix_free(&objs
.objs
);
239 struct rcu_head
*obj
= list
;
243 list
= (void *) obj
->next
.next
;
250 for (size_t i
= 0; i
< objs
.nr
; i
++)
251 pending
->process(pending
, *genradix_ptr(&objs
.objs
, i
));
252 genradix_free(&objs
.objs
);
255 struct rcu_head
*obj
= list
;
259 list
= (void *) obj
->next
.next
;
261 pending
->process(pending
, obj
);
267 static bool process_finished_items(struct rcu_pending
*pending
,
268 struct rcu_pending_pcpu
*p
,
272 * XXX: we should grab the gp seq once and avoid multiple function
273 * calls, this is called from __rcu_pending_enqueue() fastpath in
274 * may_sleep==true mode
276 if ((p
->objs
.nr
&& __poll_state_synchronize_rcu(pending
->srcu
, p
->objs
.data
[0].seq
)) ||
277 (p
->lists
[0].head
&& __poll_state_synchronize_rcu(pending
->srcu
, p
->lists
[0].seq
)) ||
278 (p
->lists
[1].head
&& __poll_state_synchronize_rcu(pending
->srcu
, p
->lists
[1].seq
)) ||
280 __process_finished_items(pending
, p
, flags
);
287 static void rcu_pending_work(struct work_struct
*work
)
289 struct rcu_pending_pcpu
*p
=
290 container_of(work
, struct rcu_pending_pcpu
, work
);
291 struct rcu_pending
*pending
= p
->parent
;
295 spin_lock_irqsave(&p
->lock
, flags
);
296 } while (process_finished_items(pending
, p
, flags
));
298 spin_unlock_irqrestore(&p
->lock
, flags
);
301 static void rcu_pending_rcu_cb(struct rcu_head
*rcu
)
303 struct rcu_pending_pcpu
*p
= container_of(rcu
, struct rcu_pending_pcpu
, cb
);
305 schedule_work_on(p
->cpu
, &p
->work
);
308 spin_lock_irqsave(&p
->lock
, flags
);
309 if (__rcu_pending_has_pending(p
)) {
310 spin_unlock_irqrestore(&p
->lock
, flags
);
311 __call_rcu(p
->parent
->srcu
, &p
->cb
, rcu_pending_rcu_cb
);
314 spin_unlock_irqrestore(&p
->lock
, flags
);
318 static __always_inline
struct rcu_pending_seq
*
319 get_object_radix(struct rcu_pending_pcpu
*p
, unsigned long seq
)
321 darray_for_each_reverse(p
->objs
, objs
)
322 if (objs
->seq
== seq
)
325 if (darray_push_gfp(&p
->objs
, ((struct rcu_pending_seq
) { .seq
= seq
}), GFP_ATOMIC
))
328 return &darray_last(p
->objs
);
332 rcu_pending_enqueue_list(struct rcu_pending_pcpu
*p
, unsigned long seq
,
333 struct rcu_head
*head
, void *ptr
,
334 unsigned long *flags
)
339 * kvfree_rcu_mightsleep(): we weren't passed an
340 * rcu_head, but we need one: use the low bit of the
341 * ponter to free to flag that the head needs to be
344 ptr
= (void *)(((unsigned long) ptr
)|1UL);
345 head
= kmalloc(sizeof(*head
), __GFP_NOWARN
);
347 spin_unlock_irqrestore(&p
->lock
, *flags
);
348 head
= kmalloc(sizeof(*head
), GFP_KERNEL
|__GFP_NOFAIL
);
350 * dropped lock, did GFP_KERNEL allocation,
351 * check for gp expiration
353 if (unlikely(__poll_state_synchronize_rcu(p
->parent
->srcu
, seq
))) {
356 spin_lock_irqsave(&p
->lock
, *flags
);
365 for (struct rcu_pending_list
*i
= p
->lists
;
366 i
< p
->lists
+ NUM_ACTIVE_RCU_POLL_OLDSTATE
; i
++) {
368 rcu_pending_list_add(i
, head
);
373 for (struct rcu_pending_list
*i
= p
->lists
;
374 i
< p
->lists
+ NUM_ACTIVE_RCU_POLL_OLDSTATE
; i
++) {
377 rcu_pending_list_add(i
, head
);
382 merge_expired_lists(p
);
387 * __rcu_pending_enqueue: enqueue a pending RCU item, to be processed (via
388 * pending->pracess) once grace period elapses.
390 * Attempt to enqueue items onto a radix tree; if memory allocation fails, fall
391 * back to a linked list.
393 * - If @ptr is NULL, we're enqueuing an item for a generic @pending with a
396 * - If @ptr and @head are both not NULL, we're kvfree_rcu()
398 * - If @ptr is not NULL and @head is, we're kvfree_rcu_mightsleep()
400 * - If @may_sleep is true, will do GFP_KERNEL memory allocations and process
403 static __always_inline
void
404 __rcu_pending_enqueue(struct rcu_pending
*pending
, struct rcu_head
*head
,
405 void *ptr
, bool may_sleep
)
408 struct rcu_pending_pcpu
*p
;
409 struct rcu_pending_seq
*objs
;
410 struct genradix_node
*new_node
= NULL
;
411 unsigned long seq
, flags
;
412 bool start_gp
= false;
414 BUG_ON((ptr
!= NULL
) != (pending
->process
== RCU_PENDING_KVFREE_FN
));
416 local_irq_save(flags
);
417 p
= this_cpu_ptr(pending
->p
);
419 seq
= __get_state_synchronize_rcu(pending
->srcu
);
422 unlikely(process_finished_items(pending
, p
, flags
)))
426 * In kvfree_rcu() mode, the radix tree is only for slab pointers so
427 * that we can do kfree_bulk() - vmalloc pointers always use the linked
430 if (ptr
&& unlikely(is_vmalloc_addr(ptr
)))
433 objs
= get_object_radix(p
, seq
);
437 if (unlikely(!objs
->cursor
)) {
439 * New radix tree nodes must be added under @p->lock because the
440 * tree root is in a darray that can be resized (typically,
441 * genradix supports concurrent unlocked allocation of new
442 * nodes) - hence preallocation and the retry loop:
444 objs
->cursor
= genradix_ptr_alloc_preallocated_inlined(&objs
->objs
,
445 objs
->nr
, &new_node
, GFP_ATOMIC
|__GFP_NOWARN
);
446 if (unlikely(!objs
->cursor
)) {
448 spin_unlock_irqrestore(&p
->lock
, flags
);
450 gfp_t gfp
= GFP_KERNEL
;
454 new_node
= genradix_alloc_node(gfp
);
460 start_gp
= rcu_pending_enqueue_list(p
, seq
, head
, ptr
, &flags
);
465 *objs
->cursor
++ = ptr
?: head
;
466 /* zero cursor if we hit the end of a radix tree node: */
467 if (!(((ulong
) objs
->cursor
) & (GENRADIX_NODE_SIZE
- 1)))
469 start_gp
= !objs
->nr
;
472 if (unlikely(start_gp
)) {
474 * We only have one callback (ideally, we would have one for
475 * every outstanding graceperiod) - so if our callback is
476 * already in flight, we may still have to start a grace period
477 * (since we used get_state() above, not start_poll())
481 __call_rcu(pending
->srcu
, &p
->cb
, rcu_pending_rcu_cb
);
483 __start_poll_synchronize_rcu(pending
->srcu
);
486 spin_unlock_irqrestore(&p
->lock
, flags
);
489 genradix_free_node(new_node
);
492 if (unlikely(__poll_state_synchronize_rcu(pending
->srcu
, seq
))) {
493 switch ((ulong
) pending
->process
) {
494 case RCU_PENDING_KVFREE
:
497 case RCU_PENDING_CALL_RCU
:
501 pending
->process(pending
, head
);
507 local_irq_save(flags
);
508 p
= this_cpu_ptr(pending
->p
);
513 void rcu_pending_enqueue(struct rcu_pending
*pending
, struct rcu_head
*obj
)
515 __rcu_pending_enqueue(pending
, obj
, NULL
, true);
518 static struct rcu_head
*rcu_pending_pcpu_dequeue(struct rcu_pending_pcpu
*p
)
520 struct rcu_head
*ret
= NULL
;
522 spin_lock_irq(&p
->lock
);
523 darray_for_each(p
->objs
, objs
)
525 ret
= *genradix_ptr(&objs
->objs
, --objs
->nr
);
528 genradix_free(&objs
->objs
);
532 static_array_for_each(p
->lists
, i
)
538 i
->head
= (void *) ret
->next
.next
;
545 spin_unlock_irq(&p
->lock
);
550 struct rcu_head
*rcu_pending_dequeue(struct rcu_pending
*pending
)
552 return rcu_pending_pcpu_dequeue(raw_cpu_ptr(pending
->p
));
555 struct rcu_head
*rcu_pending_dequeue_from_all(struct rcu_pending
*pending
)
557 struct rcu_head
*ret
= rcu_pending_dequeue(pending
);
563 for_each_possible_cpu(cpu
) {
564 ret
= rcu_pending_pcpu_dequeue(per_cpu_ptr(pending
->p
, cpu
));
571 static bool rcu_pending_has_pending_or_armed(struct rcu_pending
*pending
)
574 for_each_possible_cpu(cpu
) {
575 struct rcu_pending_pcpu
*p
= per_cpu_ptr(pending
->p
, cpu
);
576 spin_lock_irq(&p
->lock
);
577 if (__rcu_pending_has_pending(p
) || p
->cb_armed
) {
578 spin_unlock_irq(&p
->lock
);
581 spin_unlock_irq(&p
->lock
);
587 void rcu_pending_exit(struct rcu_pending
*pending
)
594 while (rcu_pending_has_pending_or_armed(pending
)) {
595 __rcu_barrier(pending
->srcu
);
597 for_each_possible_cpu(cpu
) {
598 struct rcu_pending_pcpu
*p
= per_cpu_ptr(pending
->p
, cpu
);
599 flush_work(&p
->work
);
603 for_each_possible_cpu(cpu
) {
604 struct rcu_pending_pcpu
*p
= per_cpu_ptr(pending
->p
, cpu
);
605 flush_work(&p
->work
);
608 for_each_possible_cpu(cpu
) {
609 struct rcu_pending_pcpu
*p
= per_cpu_ptr(pending
->p
, cpu
);
611 static_array_for_each(p
->lists
, i
)
614 darray_exit(&p
->objs
);
616 free_percpu(pending
->p
);
620 * rcu_pending_init: - initialize a rcu_pending
622 * @pending: Object to init
623 * @srcu: May optionally be used with an srcu_struct; if NULL, uses normal
625 * @process: Callback function invoked on objects once their RCU barriers
626 * have completed; if NULL, kvfree() is used.
628 int rcu_pending_init(struct rcu_pending
*pending
,
629 struct srcu_struct
*srcu
,
630 rcu_pending_process_fn process
)
632 pending
->p
= alloc_percpu(struct rcu_pending_pcpu
);
637 for_each_possible_cpu(cpu
) {
638 struct rcu_pending_pcpu
*p
= per_cpu_ptr(pending
->p
, cpu
);
641 spin_lock_init(&p
->lock
);
642 darray_init(&p
->objs
);
643 INIT_WORK(&p
->work
, rcu_pending_work
);
646 pending
->srcu
= srcu
;
647 pending
->process
= process
;