1 /* Copyright (c) 2016 Facebook
3 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of version 2 of the GNU General Public
5 * License as published by the Free Software Foundation.
7 #include <linux/cpumask.h>
8 #include <linux/spinlock.h>
9 #include <linux/percpu.h>
11 #include "bpf_lru_list.h"
13 #define LOCAL_FREE_TARGET (128)
14 #define LOCAL_NR_SCANS LOCAL_FREE_TARGET
16 #define PERCPU_FREE_TARGET (16)
17 #define PERCPU_NR_SCANS PERCPU_FREE_TARGET
19 /* Helpers to get the local list index */
20 #define LOCAL_LIST_IDX(t) ((t) - BPF_LOCAL_LIST_T_OFFSET)
21 #define LOCAL_FREE_LIST_IDX LOCAL_LIST_IDX(BPF_LRU_LOCAL_LIST_T_FREE)
22 #define LOCAL_PENDING_LIST_IDX LOCAL_LIST_IDX(BPF_LRU_LOCAL_LIST_T_PENDING)
23 #define IS_LOCAL_LIST_TYPE(t) ((t) >= BPF_LOCAL_LIST_T_OFFSET)
25 static int get_next_cpu(int cpu
)
27 cpu
= cpumask_next(cpu
, cpu_possible_mask
);
28 if (cpu
>= nr_cpu_ids
)
29 cpu
= cpumask_first(cpu_possible_mask
);
33 /* Local list helpers */
34 static struct list_head
*local_free_list(struct bpf_lru_locallist
*loc_l
)
36 return &loc_l
->lists
[LOCAL_FREE_LIST_IDX
];
39 static struct list_head
*local_pending_list(struct bpf_lru_locallist
*loc_l
)
41 return &loc_l
->lists
[LOCAL_PENDING_LIST_IDX
];
44 /* bpf_lru_node helpers */
45 static bool bpf_lru_node_is_ref(const struct bpf_lru_node
*node
)
50 static void bpf_lru_list_count_inc(struct bpf_lru_list
*l
,
51 enum bpf_lru_list_type type
)
53 if (type
< NR_BPF_LRU_LIST_COUNT
)
57 static void bpf_lru_list_count_dec(struct bpf_lru_list
*l
,
58 enum bpf_lru_list_type type
)
60 if (type
< NR_BPF_LRU_LIST_COUNT
)
64 static void __bpf_lru_node_move_to_free(struct bpf_lru_list
*l
,
65 struct bpf_lru_node
*node
,
66 struct list_head
*free_list
,
67 enum bpf_lru_list_type tgt_free_type
)
69 if (WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(node
->type
)))
72 /* If the removing node is the next_inactive_rotation candidate,
73 * move the next_inactive_rotation pointer also.
75 if (&node
->list
== l
->next_inactive_rotation
)
76 l
->next_inactive_rotation
= l
->next_inactive_rotation
->prev
;
78 bpf_lru_list_count_dec(l
, node
->type
);
80 node
->type
= tgt_free_type
;
81 list_move(&node
->list
, free_list
);
84 /* Move nodes from local list to the LRU list */
85 static void __bpf_lru_node_move_in(struct bpf_lru_list
*l
,
86 struct bpf_lru_node
*node
,
87 enum bpf_lru_list_type tgt_type
)
89 if (WARN_ON_ONCE(!IS_LOCAL_LIST_TYPE(node
->type
)) ||
90 WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(tgt_type
)))
93 bpf_lru_list_count_inc(l
, tgt_type
);
94 node
->type
= tgt_type
;
96 list_move(&node
->list
, &l
->lists
[tgt_type
]);
99 /* Move nodes between or within active and inactive list (like
100 * active to inactive, inactive to active or tail of active back to
101 * the head of active).
103 static void __bpf_lru_node_move(struct bpf_lru_list
*l
,
104 struct bpf_lru_node
*node
,
105 enum bpf_lru_list_type tgt_type
)
107 if (WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(node
->type
)) ||
108 WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(tgt_type
)))
111 if (node
->type
!= tgt_type
) {
112 bpf_lru_list_count_dec(l
, node
->type
);
113 bpf_lru_list_count_inc(l
, tgt_type
);
114 node
->type
= tgt_type
;
118 /* If the moving node is the next_inactive_rotation candidate,
119 * move the next_inactive_rotation pointer also.
121 if (&node
->list
== l
->next_inactive_rotation
)
122 l
->next_inactive_rotation
= l
->next_inactive_rotation
->prev
;
124 list_move(&node
->list
, &l
->lists
[tgt_type
]);
127 static bool bpf_lru_list_inactive_low(const struct bpf_lru_list
*l
)
129 return l
->counts
[BPF_LRU_LIST_T_INACTIVE
] <
130 l
->counts
[BPF_LRU_LIST_T_ACTIVE
];
133 /* Rotate the active list:
135 * 2. If the node has the ref bit set, it will be rotated
136 * back to the head of active list with the ref bit cleared.
137 * Give this node one more chance to survive in the active list.
138 * 3. If the ref bit is not set, move it to the head of the
140 * 4. It will at most scan nr_scans nodes
142 static void __bpf_lru_list_rotate_active(struct bpf_lru
*lru
,
143 struct bpf_lru_list
*l
)
145 struct list_head
*active
= &l
->lists
[BPF_LRU_LIST_T_ACTIVE
];
146 struct bpf_lru_node
*node
, *tmp_node
, *first_node
;
149 first_node
= list_first_entry(active
, struct bpf_lru_node
, list
);
150 list_for_each_entry_safe_reverse(node
, tmp_node
, active
, list
) {
151 if (bpf_lru_node_is_ref(node
))
152 __bpf_lru_node_move(l
, node
, BPF_LRU_LIST_T_ACTIVE
);
154 __bpf_lru_node_move(l
, node
, BPF_LRU_LIST_T_INACTIVE
);
156 if (++i
== lru
->nr_scans
|| node
== first_node
)
161 /* Rotate the inactive list. It starts from the next_inactive_rotation
162 * 1. If the node has ref bit set, it will be moved to the head
163 * of active list with the ref bit cleared.
164 * 2. If the node does not have ref bit set, it will leave it
165 * at its current location (i.e. do nothing) so that it can
166 * be considered during the next inactive_shrink.
167 * 3. It will at most scan nr_scans nodes
169 static void __bpf_lru_list_rotate_inactive(struct bpf_lru
*lru
,
170 struct bpf_lru_list
*l
)
172 struct list_head
*inactive
= &l
->lists
[BPF_LRU_LIST_T_INACTIVE
];
173 struct list_head
*cur
, *last
, *next
= inactive
;
174 struct bpf_lru_node
*node
;
177 if (list_empty(inactive
))
180 last
= l
->next_inactive_rotation
->next
;
181 if (last
== inactive
)
184 cur
= l
->next_inactive_rotation
;
185 while (i
< lru
->nr_scans
) {
186 if (cur
== inactive
) {
191 node
= list_entry(cur
, struct bpf_lru_node
, list
);
193 if (bpf_lru_node_is_ref(node
))
194 __bpf_lru_node_move(l
, node
, BPF_LRU_LIST_T_ACTIVE
);
201 l
->next_inactive_rotation
= next
;
204 /* Shrink the inactive list. It starts from the tail of the
205 * inactive list and only move the nodes without the ref bit
206 * set to the designated free list.
209 __bpf_lru_list_shrink_inactive(struct bpf_lru
*lru
,
210 struct bpf_lru_list
*l
,
211 unsigned int tgt_nshrink
,
212 struct list_head
*free_list
,
213 enum bpf_lru_list_type tgt_free_type
)
215 struct list_head
*inactive
= &l
->lists
[BPF_LRU_LIST_T_INACTIVE
];
216 struct bpf_lru_node
*node
, *tmp_node
, *first_node
;
217 unsigned int nshrinked
= 0;
220 first_node
= list_first_entry(inactive
, struct bpf_lru_node
, list
);
221 list_for_each_entry_safe_reverse(node
, tmp_node
, inactive
, list
) {
222 if (bpf_lru_node_is_ref(node
)) {
223 __bpf_lru_node_move(l
, node
, BPF_LRU_LIST_T_ACTIVE
);
224 } else if (lru
->del_from_htab(lru
->del_arg
, node
)) {
225 __bpf_lru_node_move_to_free(l
, node
, free_list
,
227 if (++nshrinked
== tgt_nshrink
)
231 if (++i
== lru
->nr_scans
)
238 /* 1. Rotate the active list (if needed)
239 * 2. Always rotate the inactive list
241 static void __bpf_lru_list_rotate(struct bpf_lru
*lru
, struct bpf_lru_list
*l
)
243 if (bpf_lru_list_inactive_low(l
))
244 __bpf_lru_list_rotate_active(lru
, l
);
246 __bpf_lru_list_rotate_inactive(lru
, l
);
249 /* Calls __bpf_lru_list_shrink_inactive() to shrink some
250 * ref-bit-cleared nodes and move them to the designated
253 * If it cannot get a free node after calling
254 * __bpf_lru_list_shrink_inactive(). It will just remove
255 * one node from either inactive or active list without
256 * honoring the ref-bit. It prefers inactive list to active
257 * list in this situation.
259 static unsigned int __bpf_lru_list_shrink(struct bpf_lru
*lru
,
260 struct bpf_lru_list
*l
,
261 unsigned int tgt_nshrink
,
262 struct list_head
*free_list
,
263 enum bpf_lru_list_type tgt_free_type
)
266 struct bpf_lru_node
*node
, *tmp_node
;
267 struct list_head
*force_shrink_list
;
268 unsigned int nshrinked
;
270 nshrinked
= __bpf_lru_list_shrink_inactive(lru
, l
, tgt_nshrink
,
271 free_list
, tgt_free_type
);
275 /* Do a force shrink by ignoring the reference bit */
276 if (!list_empty(&l
->lists
[BPF_LRU_LIST_T_INACTIVE
]))
277 force_shrink_list
= &l
->lists
[BPF_LRU_LIST_T_INACTIVE
];
279 force_shrink_list
= &l
->lists
[BPF_LRU_LIST_T_ACTIVE
];
281 list_for_each_entry_safe_reverse(node
, tmp_node
, force_shrink_list
,
283 if (lru
->del_from_htab(lru
->del_arg
, node
)) {
284 __bpf_lru_node_move_to_free(l
, node
, free_list
,
293 /* Flush the nodes from the local pending list to the LRU list */
294 static void __local_list_flush(struct bpf_lru_list
*l
,
295 struct bpf_lru_locallist
*loc_l
)
297 struct bpf_lru_node
*node
, *tmp_node
;
299 list_for_each_entry_safe_reverse(node
, tmp_node
,
300 local_pending_list(loc_l
), list
) {
301 if (bpf_lru_node_is_ref(node
))
302 __bpf_lru_node_move_in(l
, node
, BPF_LRU_LIST_T_ACTIVE
);
304 __bpf_lru_node_move_in(l
, node
,
305 BPF_LRU_LIST_T_INACTIVE
);
309 static void bpf_lru_list_push_free(struct bpf_lru_list
*l
,
310 struct bpf_lru_node
*node
)
314 if (WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(node
->type
)))
317 raw_spin_lock_irqsave(&l
->lock
, flags
);
318 __bpf_lru_node_move(l
, node
, BPF_LRU_LIST_T_FREE
);
319 raw_spin_unlock_irqrestore(&l
->lock
, flags
);
322 static void bpf_lru_list_pop_free_to_local(struct bpf_lru
*lru
,
323 struct bpf_lru_locallist
*loc_l
)
325 struct bpf_lru_list
*l
= &lru
->common_lru
.lru_list
;
326 struct bpf_lru_node
*node
, *tmp_node
;
327 unsigned int nfree
= 0;
329 raw_spin_lock(&l
->lock
);
331 __local_list_flush(l
, loc_l
);
333 __bpf_lru_list_rotate(lru
, l
);
335 list_for_each_entry_safe(node
, tmp_node
, &l
->lists
[BPF_LRU_LIST_T_FREE
],
337 __bpf_lru_node_move_to_free(l
, node
, local_free_list(loc_l
),
338 BPF_LRU_LOCAL_LIST_T_FREE
);
339 if (++nfree
== LOCAL_FREE_TARGET
)
343 if (nfree
< LOCAL_FREE_TARGET
)
344 __bpf_lru_list_shrink(lru
, l
, LOCAL_FREE_TARGET
- nfree
,
345 local_free_list(loc_l
),
346 BPF_LRU_LOCAL_LIST_T_FREE
);
348 raw_spin_unlock(&l
->lock
);
351 static void __local_list_add_pending(struct bpf_lru
*lru
,
352 struct bpf_lru_locallist
*loc_l
,
354 struct bpf_lru_node
*node
,
357 *(u32
*)((void *)node
+ lru
->hash_offset
) = hash
;
359 node
->type
= BPF_LRU_LOCAL_LIST_T_PENDING
;
361 list_add(&node
->list
, local_pending_list(loc_l
));
364 struct bpf_lru_node
*__local_list_pop_free(struct bpf_lru_locallist
*loc_l
)
366 struct bpf_lru_node
*node
;
368 node
= list_first_entry_or_null(local_free_list(loc_l
),
372 list_del(&node
->list
);
377 struct bpf_lru_node
*__local_list_pop_pending(struct bpf_lru
*lru
,
378 struct bpf_lru_locallist
*loc_l
)
380 struct bpf_lru_node
*node
;
384 /* Get from the tail (i.e. older element) of the pending list. */
385 list_for_each_entry_reverse(node
, local_pending_list(loc_l
),
387 if ((!bpf_lru_node_is_ref(node
) || force
) &&
388 lru
->del_from_htab(lru
->del_arg
, node
)) {
389 list_del(&node
->list
);
402 static struct bpf_lru_node
*bpf_percpu_lru_pop_free(struct bpf_lru
*lru
,
405 struct list_head
*free_list
;
406 struct bpf_lru_node
*node
= NULL
;
407 struct bpf_lru_list
*l
;
409 int cpu
= raw_smp_processor_id();
411 l
= per_cpu_ptr(lru
->percpu_lru
, cpu
);
413 raw_spin_lock_irqsave(&l
->lock
, flags
);
415 __bpf_lru_list_rotate(lru
, l
);
417 free_list
= &l
->lists
[BPF_LRU_LIST_T_FREE
];
418 if (list_empty(free_list
))
419 __bpf_lru_list_shrink(lru
, l
, PERCPU_FREE_TARGET
, free_list
,
420 BPF_LRU_LIST_T_FREE
);
422 if (!list_empty(free_list
)) {
423 node
= list_first_entry(free_list
, struct bpf_lru_node
, list
);
424 *(u32
*)((void *)node
+ lru
->hash_offset
) = hash
;
426 __bpf_lru_node_move(l
, node
, BPF_LRU_LIST_T_INACTIVE
);
429 raw_spin_unlock_irqrestore(&l
->lock
, flags
);
434 static struct bpf_lru_node
*bpf_common_lru_pop_free(struct bpf_lru
*lru
,
437 struct bpf_lru_locallist
*loc_l
, *steal_loc_l
;
438 struct bpf_common_lru
*clru
= &lru
->common_lru
;
439 struct bpf_lru_node
*node
;
440 int steal
, first_steal
;
442 int cpu
= raw_smp_processor_id();
444 loc_l
= per_cpu_ptr(clru
->local_list
, cpu
);
446 raw_spin_lock_irqsave(&loc_l
->lock
, flags
);
448 node
= __local_list_pop_free(loc_l
);
450 bpf_lru_list_pop_free_to_local(lru
, loc_l
);
451 node
= __local_list_pop_free(loc_l
);
455 __local_list_add_pending(lru
, loc_l
, cpu
, node
, hash
);
457 raw_spin_unlock_irqrestore(&loc_l
->lock
, flags
);
462 /* No free nodes found from the local free list and
463 * the global LRU list.
465 * Steal from the local free/pending list of the
466 * current CPU and remote CPU in RR. It starts
467 * with the loc_l->next_steal CPU.
470 first_steal
= loc_l
->next_steal
;
473 steal_loc_l
= per_cpu_ptr(clru
->local_list
, steal
);
475 raw_spin_lock_irqsave(&steal_loc_l
->lock
, flags
);
477 node
= __local_list_pop_free(steal_loc_l
);
479 node
= __local_list_pop_pending(lru
, steal_loc_l
);
481 raw_spin_unlock_irqrestore(&steal_loc_l
->lock
, flags
);
483 steal
= get_next_cpu(steal
);
484 } while (!node
&& steal
!= first_steal
);
486 loc_l
->next_steal
= steal
;
489 raw_spin_lock_irqsave(&loc_l
->lock
, flags
);
490 __local_list_add_pending(lru
, loc_l
, cpu
, node
, hash
);
491 raw_spin_unlock_irqrestore(&loc_l
->lock
, flags
);
497 struct bpf_lru_node
*bpf_lru_pop_free(struct bpf_lru
*lru
, u32 hash
)
500 return bpf_percpu_lru_pop_free(lru
, hash
);
502 return bpf_common_lru_pop_free(lru
, hash
);
505 static void bpf_common_lru_push_free(struct bpf_lru
*lru
,
506 struct bpf_lru_node
*node
)
510 if (WARN_ON_ONCE(node
->type
== BPF_LRU_LIST_T_FREE
) ||
511 WARN_ON_ONCE(node
->type
== BPF_LRU_LOCAL_LIST_T_FREE
))
514 if (node
->type
== BPF_LRU_LOCAL_LIST_T_PENDING
) {
515 struct bpf_lru_locallist
*loc_l
;
517 loc_l
= per_cpu_ptr(lru
->common_lru
.local_list
, node
->cpu
);
519 raw_spin_lock_irqsave(&loc_l
->lock
, flags
);
521 if (unlikely(node
->type
!= BPF_LRU_LOCAL_LIST_T_PENDING
)) {
522 raw_spin_unlock_irqrestore(&loc_l
->lock
, flags
);
526 node
->type
= BPF_LRU_LOCAL_LIST_T_FREE
;
528 list_move(&node
->list
, local_free_list(loc_l
));
530 raw_spin_unlock_irqrestore(&loc_l
->lock
, flags
);
535 bpf_lru_list_push_free(&lru
->common_lru
.lru_list
, node
);
538 static void bpf_percpu_lru_push_free(struct bpf_lru
*lru
,
539 struct bpf_lru_node
*node
)
541 struct bpf_lru_list
*l
;
544 l
= per_cpu_ptr(lru
->percpu_lru
, node
->cpu
);
546 raw_spin_lock_irqsave(&l
->lock
, flags
);
548 __bpf_lru_node_move(l
, node
, BPF_LRU_LIST_T_FREE
);
550 raw_spin_unlock_irqrestore(&l
->lock
, flags
);
553 void bpf_lru_push_free(struct bpf_lru
*lru
, struct bpf_lru_node
*node
)
556 bpf_percpu_lru_push_free(lru
, node
);
558 bpf_common_lru_push_free(lru
, node
);
561 void bpf_common_lru_populate(struct bpf_lru
*lru
, void *buf
, u32 node_offset
,
562 u32 elem_size
, u32 nr_elems
)
564 struct bpf_lru_list
*l
= &lru
->common_lru
.lru_list
;
567 for (i
= 0; i
< nr_elems
; i
++) {
568 struct bpf_lru_node
*node
;
570 node
= (struct bpf_lru_node
*)(buf
+ node_offset
);
571 node
->type
= BPF_LRU_LIST_T_FREE
;
573 list_add(&node
->list
, &l
->lists
[BPF_LRU_LIST_T_FREE
]);
578 void bpf_percpu_lru_populate(struct bpf_lru
*lru
, void *buf
, u32 node_offset
,
579 u32 elem_size
, u32 nr_elems
)
583 struct bpf_lru_list
*l
;
585 pcpu_entries
= nr_elems
/ num_possible_cpus();
589 for_each_possible_cpu(cpu
) {
590 struct bpf_lru_node
*node
;
592 l
= per_cpu_ptr(lru
->percpu_lru
, cpu
);
594 node
= (struct bpf_lru_node
*)(buf
+ node_offset
);
596 node
->type
= BPF_LRU_LIST_T_FREE
;
598 list_add(&node
->list
, &l
->lists
[BPF_LRU_LIST_T_FREE
]);
603 if (i
% pcpu_entries
)
608 void bpf_lru_populate(struct bpf_lru
*lru
, void *buf
, u32 node_offset
,
609 u32 elem_size
, u32 nr_elems
)
612 bpf_percpu_lru_populate(lru
, buf
, node_offset
, elem_size
,
615 bpf_common_lru_populate(lru
, buf
, node_offset
, elem_size
,
619 static void bpf_lru_locallist_init(struct bpf_lru_locallist
*loc_l
, int cpu
)
623 for (i
= 0; i
< NR_BPF_LRU_LOCAL_LIST_T
; i
++)
624 INIT_LIST_HEAD(&loc_l
->lists
[i
]);
626 loc_l
->next_steal
= cpu
;
628 raw_spin_lock_init(&loc_l
->lock
);
631 static void bpf_lru_list_init(struct bpf_lru_list
*l
)
635 for (i
= 0; i
< NR_BPF_LRU_LIST_T
; i
++)
636 INIT_LIST_HEAD(&l
->lists
[i
]);
638 for (i
= 0; i
< NR_BPF_LRU_LIST_COUNT
; i
++)
641 l
->next_inactive_rotation
= &l
->lists
[BPF_LRU_LIST_T_INACTIVE
];
643 raw_spin_lock_init(&l
->lock
);
646 int bpf_lru_init(struct bpf_lru
*lru
, bool percpu
, u32 hash_offset
,
647 del_from_htab_func del_from_htab
, void *del_arg
)
652 lru
->percpu_lru
= alloc_percpu(struct bpf_lru_list
);
653 if (!lru
->percpu_lru
)
656 for_each_possible_cpu(cpu
) {
657 struct bpf_lru_list
*l
;
659 l
= per_cpu_ptr(lru
->percpu_lru
, cpu
);
660 bpf_lru_list_init(l
);
662 lru
->nr_scans
= PERCPU_NR_SCANS
;
664 struct bpf_common_lru
*clru
= &lru
->common_lru
;
666 clru
->local_list
= alloc_percpu(struct bpf_lru_locallist
);
667 if (!clru
->local_list
)
670 for_each_possible_cpu(cpu
) {
671 struct bpf_lru_locallist
*loc_l
;
673 loc_l
= per_cpu_ptr(clru
->local_list
, cpu
);
674 bpf_lru_locallist_init(loc_l
, cpu
);
677 bpf_lru_list_init(&clru
->lru_list
);
678 lru
->nr_scans
= LOCAL_NR_SCANS
;
681 lru
->percpu
= percpu
;
682 lru
->del_from_htab
= del_from_htab
;
683 lru
->del_arg
= del_arg
;
684 lru
->hash_offset
= hash_offset
;
689 void bpf_lru_destroy(struct bpf_lru
*lru
)
692 free_percpu(lru
->percpu_lru
);
694 free_percpu(lru
->common_lru
.local_list
);