Linux 6.13-rc7
[linux.git] / fs / bcachefs / rcu_pending.c
blob40a20192eee8915af49bce0873caf9af7ea5e830
1 // SPDX-License-Identifier: GPL-2.0
2 #define pr_fmt(fmt) "%s() " fmt "\n", __func__
4 #include <linux/generic-radix-tree.h>
5 #include <linux/mm.h>
6 #include <linux/percpu.h>
7 #include <linux/slab.h>
8 #include <linux/srcu.h>
9 #include <linux/vmalloc.h>
11 #include "rcu_pending.h"
12 #include "darray.h"
13 #include "util.h"
15 #define static_array_for_each(_a, _i) \
16 for (typeof(&(_a)[0]) _i = _a; \
17 _i < (_a) + ARRAY_SIZE(_a); \
18 _i++)
20 enum rcu_pending_special {
21 RCU_PENDING_KVFREE = 1,
22 RCU_PENDING_CALL_RCU = 2,
25 #define RCU_PENDING_KVFREE_FN ((rcu_pending_process_fn) (ulong) RCU_PENDING_KVFREE)
26 #define RCU_PENDING_CALL_RCU_FN ((rcu_pending_process_fn) (ulong) RCU_PENDING_CALL_RCU)
28 static inline unsigned long __get_state_synchronize_rcu(struct srcu_struct *ssp)
30 return ssp
31 ? get_state_synchronize_srcu(ssp)
32 : get_state_synchronize_rcu();
35 static inline unsigned long __start_poll_synchronize_rcu(struct srcu_struct *ssp)
37 return ssp
38 ? start_poll_synchronize_srcu(ssp)
39 : start_poll_synchronize_rcu();
42 static inline bool __poll_state_synchronize_rcu(struct srcu_struct *ssp, unsigned long cookie)
44 return ssp
45 ? poll_state_synchronize_srcu(ssp, cookie)
46 : poll_state_synchronize_rcu(cookie);
49 static inline void __rcu_barrier(struct srcu_struct *ssp)
51 return ssp
52 ? srcu_barrier(ssp)
53 : rcu_barrier();
56 static inline void __call_rcu(struct srcu_struct *ssp, struct rcu_head *rhp,
57 rcu_callback_t func)
59 if (ssp)
60 call_srcu(ssp, rhp, func);
61 else
62 call_rcu(rhp, func);
65 struct rcu_pending_seq {
67 * We're using a radix tree like a vector - we're just pushing elements
68 * onto the end; we're using a radix tree instead of an actual vector to
69 * avoid reallocation overhead
71 GENRADIX(struct rcu_head *) objs;
72 size_t nr;
73 struct rcu_head **cursor;
74 unsigned long seq;
77 struct rcu_pending_list {
78 struct rcu_head *head;
79 struct rcu_head *tail;
80 unsigned long seq;
83 struct rcu_pending_pcpu {
84 struct rcu_pending *parent;
85 spinlock_t lock;
86 int cpu;
89 * We can't bound the number of unprocessed gp sequence numbers, and we
90 * can't efficiently merge radix trees for expired grace periods, so we
91 * need darray/vector:
93 DARRAY_PREALLOCATED(struct rcu_pending_seq, 4) objs;
95 /* Third entry is for expired objects: */
96 struct rcu_pending_list lists[NUM_ACTIVE_RCU_POLL_OLDSTATE + 1];
98 struct rcu_head cb;
99 bool cb_armed;
100 struct work_struct work;
103 static bool __rcu_pending_has_pending(struct rcu_pending_pcpu *p)
105 if (p->objs.nr)
106 return true;
108 static_array_for_each(p->lists, i)
109 if (i->head)
110 return true;
112 return false;
115 static void rcu_pending_list_merge(struct rcu_pending_list *l1,
116 struct rcu_pending_list *l2)
118 #ifdef __KERNEL__
119 if (!l1->head)
120 l1->head = l2->head;
121 else
122 l1->tail->next = l2->head;
123 #else
124 if (!l1->head)
125 l1->head = l2->head;
126 else
127 l1->tail->next.next = (void *) l2->head;
128 #endif
130 l1->tail = l2->tail;
131 l2->head = l2->tail = NULL;
134 static void rcu_pending_list_add(struct rcu_pending_list *l,
135 struct rcu_head *n)
137 #ifdef __KERNEL__
138 if (!l->head)
139 l->head = n;
140 else
141 l->tail->next = n;
142 l->tail = n;
143 n->next = NULL;
144 #else
145 if (!l->head)
146 l->head = n;
147 else
148 l->tail->next.next = (void *) n;
149 l->tail = n;
150 n->next.next = NULL;
151 #endif
154 static void merge_expired_lists(struct rcu_pending_pcpu *p)
156 struct rcu_pending_list *expired = &p->lists[NUM_ACTIVE_RCU_POLL_OLDSTATE];
158 for (struct rcu_pending_list *i = p->lists; i < expired; i++)
159 if (i->head && __poll_state_synchronize_rcu(p->parent->srcu, i->seq))
160 rcu_pending_list_merge(expired, i);
163 #ifndef __KERNEL__
164 static inline void kfree_bulk(size_t nr, void ** p)
166 while (nr--)
167 kfree(*p);
170 #define local_irq_save(flags) \
171 do { \
172 flags = 0; \
173 } while (0)
174 #endif
176 static noinline void __process_finished_items(struct rcu_pending *pending,
177 struct rcu_pending_pcpu *p,
178 unsigned long flags)
180 struct rcu_pending_list *expired = &p->lists[NUM_ACTIVE_RCU_POLL_OLDSTATE];
181 struct rcu_pending_seq objs = {};
182 struct rcu_head *list = NULL;
184 if (p->objs.nr &&
185 __poll_state_synchronize_rcu(pending->srcu, p->objs.data[0].seq)) {
186 objs = p->objs.data[0];
187 darray_remove_item(&p->objs, p->objs.data);
190 merge_expired_lists(p);
192 list = expired->head;
193 expired->head = expired->tail = NULL;
195 spin_unlock_irqrestore(&p->lock, flags);
197 switch ((ulong) pending->process) {
198 case RCU_PENDING_KVFREE:
199 for (size_t i = 0; i < objs.nr; ) {
200 size_t nr_this_node = min(GENRADIX_NODE_SIZE / sizeof(void *), objs.nr - i);
202 kfree_bulk(nr_this_node, (void **) genradix_ptr(&objs.objs, i));
203 i += nr_this_node;
205 genradix_free(&objs.objs);
207 while (list) {
208 struct rcu_head *obj = list;
209 #ifdef __KERNEL__
210 list = obj->next;
211 #else
212 list = (void *) obj->next.next;
213 #endif
216 * low bit of pointer indicates whether rcu_head needs
217 * to be freed - kvfree_rcu_mightsleep()
219 BUILD_BUG_ON(ARCH_SLAB_MINALIGN == 0);
221 void *ptr = (void *)(((unsigned long) obj->func) & ~1UL);
222 bool free_head = ((unsigned long) obj->func) & 1UL;
224 kvfree(ptr);
225 if (free_head)
226 kfree(obj);
229 break;
231 case RCU_PENDING_CALL_RCU:
232 for (size_t i = 0; i < objs.nr; i++) {
233 struct rcu_head *obj = *genradix_ptr(&objs.objs, i);
234 obj->func(obj);
236 genradix_free(&objs.objs);
238 while (list) {
239 struct rcu_head *obj = list;
240 #ifdef __KERNEL__
241 list = obj->next;
242 #else
243 list = (void *) obj->next.next;
244 #endif
245 obj->func(obj);
247 break;
249 default:
250 for (size_t i = 0; i < objs.nr; i++)
251 pending->process(pending, *genradix_ptr(&objs.objs, i));
252 genradix_free(&objs.objs);
254 while (list) {
255 struct rcu_head *obj = list;
256 #ifdef __KERNEL__
257 list = obj->next;
258 #else
259 list = (void *) obj->next.next;
260 #endif
261 pending->process(pending, obj);
263 break;
267 static bool process_finished_items(struct rcu_pending *pending,
268 struct rcu_pending_pcpu *p,
269 unsigned long flags)
272 * XXX: we should grab the gp seq once and avoid multiple function
273 * calls, this is called from __rcu_pending_enqueue() fastpath in
274 * may_sleep==true mode
276 if ((p->objs.nr && __poll_state_synchronize_rcu(pending->srcu, p->objs.data[0].seq)) ||
277 (p->lists[0].head && __poll_state_synchronize_rcu(pending->srcu, p->lists[0].seq)) ||
278 (p->lists[1].head && __poll_state_synchronize_rcu(pending->srcu, p->lists[1].seq)) ||
279 p->lists[2].head) {
280 __process_finished_items(pending, p, flags);
281 return true;
284 return false;
287 static void rcu_pending_work(struct work_struct *work)
289 struct rcu_pending_pcpu *p =
290 container_of(work, struct rcu_pending_pcpu, work);
291 struct rcu_pending *pending = p->parent;
292 unsigned long flags;
294 do {
295 spin_lock_irqsave(&p->lock, flags);
296 } while (process_finished_items(pending, p, flags));
298 spin_unlock_irqrestore(&p->lock, flags);
301 static void rcu_pending_rcu_cb(struct rcu_head *rcu)
303 struct rcu_pending_pcpu *p = container_of(rcu, struct rcu_pending_pcpu, cb);
305 schedule_work_on(p->cpu, &p->work);
307 unsigned long flags;
308 spin_lock_irqsave(&p->lock, flags);
309 if (__rcu_pending_has_pending(p)) {
310 spin_unlock_irqrestore(&p->lock, flags);
311 __call_rcu(p->parent->srcu, &p->cb, rcu_pending_rcu_cb);
312 } else {
313 p->cb_armed = false;
314 spin_unlock_irqrestore(&p->lock, flags);
318 static __always_inline struct rcu_pending_seq *
319 get_object_radix(struct rcu_pending_pcpu *p, unsigned long seq)
321 darray_for_each_reverse(p->objs, objs)
322 if (objs->seq == seq)
323 return objs;
325 if (darray_push_gfp(&p->objs, ((struct rcu_pending_seq) { .seq = seq }), GFP_ATOMIC))
326 return NULL;
328 return &darray_last(p->objs);
331 static noinline bool
332 rcu_pending_enqueue_list(struct rcu_pending_pcpu *p, unsigned long seq,
333 struct rcu_head *head, void *ptr,
334 unsigned long *flags)
336 if (ptr) {
337 if (!head) {
339 * kvfree_rcu_mightsleep(): we weren't passed an
340 * rcu_head, but we need one: use the low bit of the
341 * ponter to free to flag that the head needs to be
342 * freed as well:
344 ptr = (void *)(((unsigned long) ptr)|1UL);
345 head = kmalloc(sizeof(*head), __GFP_NOWARN);
346 if (!head) {
347 spin_unlock_irqrestore(&p->lock, *flags);
348 head = kmalloc(sizeof(*head), GFP_KERNEL|__GFP_NOFAIL);
350 * dropped lock, did GFP_KERNEL allocation,
351 * check for gp expiration
353 if (unlikely(__poll_state_synchronize_rcu(p->parent->srcu, seq))) {
354 kvfree(--ptr);
355 kfree(head);
356 spin_lock_irqsave(&p->lock, *flags);
357 return false;
362 head->func = ptr;
364 again:
365 for (struct rcu_pending_list *i = p->lists;
366 i < p->lists + NUM_ACTIVE_RCU_POLL_OLDSTATE; i++) {
367 if (i->seq == seq) {
368 rcu_pending_list_add(i, head);
369 return false;
373 for (struct rcu_pending_list *i = p->lists;
374 i < p->lists + NUM_ACTIVE_RCU_POLL_OLDSTATE; i++) {
375 if (!i->head) {
376 i->seq = seq;
377 rcu_pending_list_add(i, head);
378 return true;
382 merge_expired_lists(p);
383 goto again;
387 * __rcu_pending_enqueue: enqueue a pending RCU item, to be processed (via
388 * pending->pracess) once grace period elapses.
390 * Attempt to enqueue items onto a radix tree; if memory allocation fails, fall
391 * back to a linked list.
393 * - If @ptr is NULL, we're enqueuing an item for a generic @pending with a
394 * process callback
396 * - If @ptr and @head are both not NULL, we're kvfree_rcu()
398 * - If @ptr is not NULL and @head is, we're kvfree_rcu_mightsleep()
400 * - If @may_sleep is true, will do GFP_KERNEL memory allocations and process
401 * expired items.
403 static __always_inline void
404 __rcu_pending_enqueue(struct rcu_pending *pending, struct rcu_head *head,
405 void *ptr, bool may_sleep)
408 struct rcu_pending_pcpu *p;
409 struct rcu_pending_seq *objs;
410 struct genradix_node *new_node = NULL;
411 unsigned long seq, flags;
412 bool start_gp = false;
414 BUG_ON((ptr != NULL) != (pending->process == RCU_PENDING_KVFREE_FN));
416 local_irq_save(flags);
417 p = this_cpu_ptr(pending->p);
418 spin_lock(&p->lock);
419 seq = __get_state_synchronize_rcu(pending->srcu);
420 restart:
421 if (may_sleep &&
422 unlikely(process_finished_items(pending, p, flags)))
423 goto check_expired;
426 * In kvfree_rcu() mode, the radix tree is only for slab pointers so
427 * that we can do kfree_bulk() - vmalloc pointers always use the linked
428 * list:
430 if (ptr && unlikely(is_vmalloc_addr(ptr)))
431 goto list_add;
433 objs = get_object_radix(p, seq);
434 if (unlikely(!objs))
435 goto list_add;
437 if (unlikely(!objs->cursor)) {
439 * New radix tree nodes must be added under @p->lock because the
440 * tree root is in a darray that can be resized (typically,
441 * genradix supports concurrent unlocked allocation of new
442 * nodes) - hence preallocation and the retry loop:
444 objs->cursor = genradix_ptr_alloc_preallocated_inlined(&objs->objs,
445 objs->nr, &new_node, GFP_ATOMIC|__GFP_NOWARN);
446 if (unlikely(!objs->cursor)) {
447 if (may_sleep) {
448 spin_unlock_irqrestore(&p->lock, flags);
450 gfp_t gfp = GFP_KERNEL;
451 if (!head)
452 gfp |= __GFP_NOFAIL;
454 new_node = genradix_alloc_node(gfp);
455 if (!new_node)
456 may_sleep = false;
457 goto check_expired;
459 list_add:
460 start_gp = rcu_pending_enqueue_list(p, seq, head, ptr, &flags);
461 goto start_gp;
465 *objs->cursor++ = ptr ?: head;
466 /* zero cursor if we hit the end of a radix tree node: */
467 if (!(((ulong) objs->cursor) & (GENRADIX_NODE_SIZE - 1)))
468 objs->cursor = NULL;
469 start_gp = !objs->nr;
470 objs->nr++;
471 start_gp:
472 if (unlikely(start_gp)) {
474 * We only have one callback (ideally, we would have one for
475 * every outstanding graceperiod) - so if our callback is
476 * already in flight, we may still have to start a grace period
477 * (since we used get_state() above, not start_poll())
479 if (!p->cb_armed) {
480 p->cb_armed = true;
481 __call_rcu(pending->srcu, &p->cb, rcu_pending_rcu_cb);
482 } else {
483 __start_poll_synchronize_rcu(pending->srcu);
486 spin_unlock_irqrestore(&p->lock, flags);
487 free_node:
488 if (new_node)
489 genradix_free_node(new_node);
490 return;
491 check_expired:
492 if (unlikely(__poll_state_synchronize_rcu(pending->srcu, seq))) {
493 switch ((ulong) pending->process) {
494 case RCU_PENDING_KVFREE:
495 kvfree(ptr);
496 break;
497 case RCU_PENDING_CALL_RCU:
498 head->func(head);
499 break;
500 default:
501 pending->process(pending, head);
502 break;
504 goto free_node;
507 local_irq_save(flags);
508 p = this_cpu_ptr(pending->p);
509 spin_lock(&p->lock);
510 goto restart;
513 void rcu_pending_enqueue(struct rcu_pending *pending, struct rcu_head *obj)
515 __rcu_pending_enqueue(pending, obj, NULL, true);
518 static struct rcu_head *rcu_pending_pcpu_dequeue(struct rcu_pending_pcpu *p)
520 struct rcu_head *ret = NULL;
522 spin_lock_irq(&p->lock);
523 darray_for_each(p->objs, objs)
524 if (objs->nr) {
525 ret = *genradix_ptr(&objs->objs, --objs->nr);
526 objs->cursor = NULL;
527 if (!objs->nr)
528 genradix_free(&objs->objs);
529 goto out;
532 static_array_for_each(p->lists, i)
533 if (i->head) {
534 ret = i->head;
535 #ifdef __KERNEL__
536 i->head = ret->next;
537 #else
538 i->head = (void *) ret->next.next;
539 #endif
540 if (!i->head)
541 i->tail = NULL;
542 goto out;
544 out:
545 spin_unlock_irq(&p->lock);
547 return ret;
550 struct rcu_head *rcu_pending_dequeue(struct rcu_pending *pending)
552 return rcu_pending_pcpu_dequeue(raw_cpu_ptr(pending->p));
555 struct rcu_head *rcu_pending_dequeue_from_all(struct rcu_pending *pending)
557 struct rcu_head *ret = rcu_pending_dequeue(pending);
559 if (ret)
560 return ret;
562 int cpu;
563 for_each_possible_cpu(cpu) {
564 ret = rcu_pending_pcpu_dequeue(per_cpu_ptr(pending->p, cpu));
565 if (ret)
566 break;
568 return ret;
571 static bool rcu_pending_has_pending_or_armed(struct rcu_pending *pending)
573 int cpu;
574 for_each_possible_cpu(cpu) {
575 struct rcu_pending_pcpu *p = per_cpu_ptr(pending->p, cpu);
576 spin_lock_irq(&p->lock);
577 if (__rcu_pending_has_pending(p) || p->cb_armed) {
578 spin_unlock_irq(&p->lock);
579 return true;
581 spin_unlock_irq(&p->lock);
584 return false;
587 void rcu_pending_exit(struct rcu_pending *pending)
589 int cpu;
591 if (!pending->p)
592 return;
594 while (rcu_pending_has_pending_or_armed(pending)) {
595 __rcu_barrier(pending->srcu);
597 for_each_possible_cpu(cpu) {
598 struct rcu_pending_pcpu *p = per_cpu_ptr(pending->p, cpu);
599 flush_work(&p->work);
603 for_each_possible_cpu(cpu) {
604 struct rcu_pending_pcpu *p = per_cpu_ptr(pending->p, cpu);
605 flush_work(&p->work);
608 for_each_possible_cpu(cpu) {
609 struct rcu_pending_pcpu *p = per_cpu_ptr(pending->p, cpu);
611 static_array_for_each(p->lists, i)
612 WARN_ON(i->head);
613 WARN_ON(p->objs.nr);
614 darray_exit(&p->objs);
616 free_percpu(pending->p);
620 * rcu_pending_init: - initialize a rcu_pending
622 * @pending: Object to init
623 * @srcu: May optionally be used with an srcu_struct; if NULL, uses normal
624 * RCU flavor
625 * @process: Callback function invoked on objects once their RCU barriers
626 * have completed; if NULL, kvfree() is used.
628 int rcu_pending_init(struct rcu_pending *pending,
629 struct srcu_struct *srcu,
630 rcu_pending_process_fn process)
632 pending->p = alloc_percpu(struct rcu_pending_pcpu);
633 if (!pending->p)
634 return -ENOMEM;
636 int cpu;
637 for_each_possible_cpu(cpu) {
638 struct rcu_pending_pcpu *p = per_cpu_ptr(pending->p, cpu);
639 p->parent = pending;
640 p->cpu = cpu;
641 spin_lock_init(&p->lock);
642 darray_init(&p->objs);
643 INIT_WORK(&p->work, rcu_pending_work);
646 pending->srcu = srcu;
647 pending->process = process;
649 return 0;