1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
3 * Copyright (c) 2016 Facebook
7 #include <linux/jhash.h>
8 #include <linux/filter.h>
9 #include <linux/rculist_nulls.h>
10 #include <linux/random.h>
11 #include <uapi/linux/btf.h>
12 #include "percpu_freelist.h"
13 #include "bpf_lru_list.h"
14 #include "map_in_map.h"
16 #define HTAB_CREATE_FLAG_MASK \
17 (BPF_F_NO_PREALLOC | BPF_F_NO_COMMON_LRU | BPF_F_NUMA_NODE | \
18 BPF_F_ACCESS_MASK | BPF_F_ZERO_SEED)
20 #define BATCH_OPS(_name) \
22 _name##_map_lookup_batch, \
23 .map_lookup_and_delete_batch = \
24 _name##_map_lookup_and_delete_batch, \
26 generic_map_update_batch, \
28 generic_map_delete_batch
31 struct hlist_nulls_head head
;
37 struct bucket
*buckets
;
40 struct pcpu_freelist freelist
;
43 struct htab_elem
*__percpu
*extra_elems
;
44 atomic_t count
; /* number of elements in this hashtable */
45 u32 n_buckets
; /* number of hash buckets */
46 u32 elem_size
; /* size of each element in bytes */
50 /* each htab element is struct htab_elem + key + value */
53 struct hlist_nulls_node hash_node
;
57 struct bpf_htab
*htab
;
58 struct pcpu_freelist_node fnode
;
64 struct bpf_lru_node lru_node
;
67 char key
[0] __aligned(8);
70 static bool htab_lru_map_delete_node(void *arg
, struct bpf_lru_node
*node
);
72 static bool htab_is_lru(const struct bpf_htab
*htab
)
74 return htab
->map
.map_type
== BPF_MAP_TYPE_LRU_HASH
||
75 htab
->map
.map_type
== BPF_MAP_TYPE_LRU_PERCPU_HASH
;
78 static bool htab_is_percpu(const struct bpf_htab
*htab
)
80 return htab
->map
.map_type
== BPF_MAP_TYPE_PERCPU_HASH
||
81 htab
->map
.map_type
== BPF_MAP_TYPE_LRU_PERCPU_HASH
;
84 static bool htab_is_prealloc(const struct bpf_htab
*htab
)
86 return !(htab
->map
.map_flags
& BPF_F_NO_PREALLOC
);
89 static inline void htab_elem_set_ptr(struct htab_elem
*l
, u32 key_size
,
92 *(void __percpu
**)(l
->key
+ key_size
) = pptr
;
95 static inline void __percpu
*htab_elem_get_ptr(struct htab_elem
*l
, u32 key_size
)
97 return *(void __percpu
**)(l
->key
+ key_size
);
100 static void *fd_htab_map_get_ptr(const struct bpf_map
*map
, struct htab_elem
*l
)
102 return *(void **)(l
->key
+ roundup(map
->key_size
, 8));
105 static struct htab_elem
*get_htab_elem(struct bpf_htab
*htab
, int i
)
107 return (struct htab_elem
*) (htab
->elems
+ i
* htab
->elem_size
);
110 static void htab_free_elems(struct bpf_htab
*htab
)
114 if (!htab_is_percpu(htab
))
117 for (i
= 0; i
< htab
->map
.max_entries
; i
++) {
120 pptr
= htab_elem_get_ptr(get_htab_elem(htab
, i
),
126 bpf_map_area_free(htab
->elems
);
129 static struct htab_elem
*prealloc_lru_pop(struct bpf_htab
*htab
, void *key
,
132 struct bpf_lru_node
*node
= bpf_lru_pop_free(&htab
->lru
, hash
);
136 l
= container_of(node
, struct htab_elem
, lru_node
);
137 memcpy(l
->key
, key
, htab
->map
.key_size
);
144 static int prealloc_init(struct bpf_htab
*htab
)
146 u32 num_entries
= htab
->map
.max_entries
;
147 int err
= -ENOMEM
, i
;
149 if (!htab_is_percpu(htab
) && !htab_is_lru(htab
))
150 num_entries
+= num_possible_cpus();
152 htab
->elems
= bpf_map_area_alloc(htab
->elem_size
* num_entries
,
153 htab
->map
.numa_node
);
157 if (!htab_is_percpu(htab
))
158 goto skip_percpu_elems
;
160 for (i
= 0; i
< num_entries
; i
++) {
161 u32 size
= round_up(htab
->map
.value_size
, 8);
164 pptr
= __alloc_percpu_gfp(size
, 8, GFP_USER
| __GFP_NOWARN
);
167 htab_elem_set_ptr(get_htab_elem(htab
, i
), htab
->map
.key_size
,
173 if (htab_is_lru(htab
))
174 err
= bpf_lru_init(&htab
->lru
,
175 htab
->map
.map_flags
& BPF_F_NO_COMMON_LRU
,
176 offsetof(struct htab_elem
, hash
) -
177 offsetof(struct htab_elem
, lru_node
),
178 htab_lru_map_delete_node
,
181 err
= pcpu_freelist_init(&htab
->freelist
);
186 if (htab_is_lru(htab
))
187 bpf_lru_populate(&htab
->lru
, htab
->elems
,
188 offsetof(struct htab_elem
, lru_node
),
189 htab
->elem_size
, num_entries
);
191 pcpu_freelist_populate(&htab
->freelist
,
192 htab
->elems
+ offsetof(struct htab_elem
, fnode
),
193 htab
->elem_size
, num_entries
);
198 htab_free_elems(htab
);
202 static void prealloc_destroy(struct bpf_htab
*htab
)
204 htab_free_elems(htab
);
206 if (htab_is_lru(htab
))
207 bpf_lru_destroy(&htab
->lru
);
209 pcpu_freelist_destroy(&htab
->freelist
);
212 static int alloc_extra_elems(struct bpf_htab
*htab
)
214 struct htab_elem
*__percpu
*pptr
, *l_new
;
215 struct pcpu_freelist_node
*l
;
218 pptr
= __alloc_percpu_gfp(sizeof(struct htab_elem
*), 8,
219 GFP_USER
| __GFP_NOWARN
);
223 for_each_possible_cpu(cpu
) {
224 l
= pcpu_freelist_pop(&htab
->freelist
);
225 /* pop will succeed, since prealloc_init()
226 * preallocated extra num_possible_cpus elements
228 l_new
= container_of(l
, struct htab_elem
, fnode
);
229 *per_cpu_ptr(pptr
, cpu
) = l_new
;
231 htab
->extra_elems
= pptr
;
235 /* Called from syscall */
236 static int htab_map_alloc_check(union bpf_attr
*attr
)
238 bool percpu
= (attr
->map_type
== BPF_MAP_TYPE_PERCPU_HASH
||
239 attr
->map_type
== BPF_MAP_TYPE_LRU_PERCPU_HASH
);
240 bool lru
= (attr
->map_type
== BPF_MAP_TYPE_LRU_HASH
||
241 attr
->map_type
== BPF_MAP_TYPE_LRU_PERCPU_HASH
);
242 /* percpu_lru means each cpu has its own LRU list.
243 * it is different from BPF_MAP_TYPE_PERCPU_HASH where
244 * the map's value itself is percpu. percpu_lru has
245 * nothing to do with the map's value.
247 bool percpu_lru
= (attr
->map_flags
& BPF_F_NO_COMMON_LRU
);
248 bool prealloc
= !(attr
->map_flags
& BPF_F_NO_PREALLOC
);
249 bool zero_seed
= (attr
->map_flags
& BPF_F_ZERO_SEED
);
250 int numa_node
= bpf_map_attr_numa_node(attr
);
252 BUILD_BUG_ON(offsetof(struct htab_elem
, htab
) !=
253 offsetof(struct htab_elem
, hash_node
.pprev
));
254 BUILD_BUG_ON(offsetof(struct htab_elem
, fnode
.next
) !=
255 offsetof(struct htab_elem
, hash_node
.pprev
));
257 if (lru
&& !capable(CAP_SYS_ADMIN
))
258 /* LRU implementation is much complicated than other
259 * maps. Hence, limit to CAP_SYS_ADMIN for now.
263 if (zero_seed
&& !capable(CAP_SYS_ADMIN
))
264 /* Guard against local DoS, and discourage production use. */
267 if (attr
->map_flags
& ~HTAB_CREATE_FLAG_MASK
||
268 !bpf_map_flags_access_ok(attr
->map_flags
))
271 if (!lru
&& percpu_lru
)
274 if (lru
&& !prealloc
)
277 if (numa_node
!= NUMA_NO_NODE
&& (percpu
|| percpu_lru
))
280 /* check sanity of attributes.
281 * value_size == 0 may be allowed in the future to use map as a set
283 if (attr
->max_entries
== 0 || attr
->key_size
== 0 ||
284 attr
->value_size
== 0)
287 if (attr
->key_size
> MAX_BPF_STACK
)
288 /* eBPF programs initialize keys on stack, so they cannot be
289 * larger than max stack size
293 if (attr
->value_size
>= KMALLOC_MAX_SIZE
-
294 MAX_BPF_STACK
- sizeof(struct htab_elem
))
295 /* if value_size is bigger, the user space won't be able to
296 * access the elements via bpf syscall. This check also makes
297 * sure that the elem_size doesn't overflow and it's
298 * kmalloc-able later in htab_map_update_elem()
305 static struct bpf_map
*htab_map_alloc(union bpf_attr
*attr
)
307 bool percpu
= (attr
->map_type
== BPF_MAP_TYPE_PERCPU_HASH
||
308 attr
->map_type
== BPF_MAP_TYPE_LRU_PERCPU_HASH
);
309 bool lru
= (attr
->map_type
== BPF_MAP_TYPE_LRU_HASH
||
310 attr
->map_type
== BPF_MAP_TYPE_LRU_PERCPU_HASH
);
311 /* percpu_lru means each cpu has its own LRU list.
312 * it is different from BPF_MAP_TYPE_PERCPU_HASH where
313 * the map's value itself is percpu. percpu_lru has
314 * nothing to do with the map's value.
316 bool percpu_lru
= (attr
->map_flags
& BPF_F_NO_COMMON_LRU
);
317 bool prealloc
= !(attr
->map_flags
& BPF_F_NO_PREALLOC
);
318 struct bpf_htab
*htab
;
322 htab
= kzalloc(sizeof(*htab
), GFP_USER
);
324 return ERR_PTR(-ENOMEM
);
326 bpf_map_init_from_attr(&htab
->map
, attr
);
329 /* ensure each CPU's lru list has >=1 elements.
330 * since we are at it, make each lru list has the same
331 * number of elements.
333 htab
->map
.max_entries
= roundup(attr
->max_entries
,
334 num_possible_cpus());
335 if (htab
->map
.max_entries
< attr
->max_entries
)
336 htab
->map
.max_entries
= rounddown(attr
->max_entries
,
337 num_possible_cpus());
340 /* hash table size must be power of 2 */
341 htab
->n_buckets
= roundup_pow_of_two(htab
->map
.max_entries
);
343 htab
->elem_size
= sizeof(struct htab_elem
) +
344 round_up(htab
->map
.key_size
, 8);
346 htab
->elem_size
+= sizeof(void *);
348 htab
->elem_size
+= round_up(htab
->map
.value_size
, 8);
351 /* prevent zero size kmalloc and check for u32 overflow */
352 if (htab
->n_buckets
== 0 ||
353 htab
->n_buckets
> U32_MAX
/ sizeof(struct bucket
))
356 cost
= (u64
) htab
->n_buckets
* sizeof(struct bucket
) +
357 (u64
) htab
->elem_size
* htab
->map
.max_entries
;
360 cost
+= (u64
) round_up(htab
->map
.value_size
, 8) *
361 num_possible_cpus() * htab
->map
.max_entries
;
363 cost
+= (u64
) htab
->elem_size
* num_possible_cpus();
365 /* if map size is larger than memlock limit, reject it */
366 err
= bpf_map_charge_init(&htab
->map
.memory
, cost
);
371 htab
->buckets
= bpf_map_area_alloc(htab
->n_buckets
*
372 sizeof(struct bucket
),
373 htab
->map
.numa_node
);
377 if (htab
->map
.map_flags
& BPF_F_ZERO_SEED
)
380 htab
->hashrnd
= get_random_int();
382 for (i
= 0; i
< htab
->n_buckets
; i
++) {
383 INIT_HLIST_NULLS_HEAD(&htab
->buckets
[i
].head
, i
);
384 raw_spin_lock_init(&htab
->buckets
[i
].lock
);
388 err
= prealloc_init(htab
);
392 if (!percpu
&& !lru
) {
393 /* lru itself can remove the least used element, so
394 * there is no need for an extra elem during map_update.
396 err
= alloc_extra_elems(htab
);
405 prealloc_destroy(htab
);
407 bpf_map_area_free(htab
->buckets
);
409 bpf_map_charge_finish(&htab
->map
.memory
);
415 static inline u32
htab_map_hash(const void *key
, u32 key_len
, u32 hashrnd
)
417 return jhash(key
, key_len
, hashrnd
);
420 static inline struct bucket
*__select_bucket(struct bpf_htab
*htab
, u32 hash
)
422 return &htab
->buckets
[hash
& (htab
->n_buckets
- 1)];
425 static inline struct hlist_nulls_head
*select_bucket(struct bpf_htab
*htab
, u32 hash
)
427 return &__select_bucket(htab
, hash
)->head
;
430 /* this lookup function can only be called with bucket lock taken */
431 static struct htab_elem
*lookup_elem_raw(struct hlist_nulls_head
*head
, u32 hash
,
432 void *key
, u32 key_size
)
434 struct hlist_nulls_node
*n
;
437 hlist_nulls_for_each_entry_rcu(l
, n
, head
, hash_node
)
438 if (l
->hash
== hash
&& !memcmp(&l
->key
, key
, key_size
))
444 /* can be called without bucket lock. it will repeat the loop in
445 * the unlikely event when elements moved from one bucket into another
446 * while link list is being walked
448 static struct htab_elem
*lookup_nulls_elem_raw(struct hlist_nulls_head
*head
,
450 u32 key_size
, u32 n_buckets
)
452 struct hlist_nulls_node
*n
;
456 hlist_nulls_for_each_entry_rcu(l
, n
, head
, hash_node
)
457 if (l
->hash
== hash
&& !memcmp(&l
->key
, key
, key_size
))
460 if (unlikely(get_nulls_value(n
) != (hash
& (n_buckets
- 1))))
466 /* Called from syscall or from eBPF program directly, so
467 * arguments have to match bpf_map_lookup_elem() exactly.
468 * The return value is adjusted by BPF instructions
469 * in htab_map_gen_lookup().
471 static void *__htab_map_lookup_elem(struct bpf_map
*map
, void *key
)
473 struct bpf_htab
*htab
= container_of(map
, struct bpf_htab
, map
);
474 struct hlist_nulls_head
*head
;
478 /* Must be called with rcu_read_lock. */
479 WARN_ON_ONCE(!rcu_read_lock_held());
481 key_size
= map
->key_size
;
483 hash
= htab_map_hash(key
, key_size
, htab
->hashrnd
);
485 head
= select_bucket(htab
, hash
);
487 l
= lookup_nulls_elem_raw(head
, hash
, key
, key_size
, htab
->n_buckets
);
492 static void *htab_map_lookup_elem(struct bpf_map
*map
, void *key
)
494 struct htab_elem
*l
= __htab_map_lookup_elem(map
, key
);
497 return l
->key
+ round_up(map
->key_size
, 8);
502 /* inline bpf_map_lookup_elem() call.
505 * bpf_map_lookup_elem
506 * map->ops->map_lookup_elem
507 * htab_map_lookup_elem
508 * __htab_map_lookup_elem
511 * __htab_map_lookup_elem
513 static u32
htab_map_gen_lookup(struct bpf_map
*map
, struct bpf_insn
*insn_buf
)
515 struct bpf_insn
*insn
= insn_buf
;
516 const int ret
= BPF_REG_0
;
518 BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem
,
519 (void *(*)(struct bpf_map
*map
, void *key
))NULL
));
520 *insn
++ = BPF_EMIT_CALL(BPF_CAST_CALL(__htab_map_lookup_elem
));
521 *insn
++ = BPF_JMP_IMM(BPF_JEQ
, ret
, 0, 1);
522 *insn
++ = BPF_ALU64_IMM(BPF_ADD
, ret
,
523 offsetof(struct htab_elem
, key
) +
524 round_up(map
->key_size
, 8));
525 return insn
- insn_buf
;
528 static __always_inline
void *__htab_lru_map_lookup_elem(struct bpf_map
*map
,
529 void *key
, const bool mark
)
531 struct htab_elem
*l
= __htab_map_lookup_elem(map
, key
);
535 bpf_lru_node_set_ref(&l
->lru_node
);
536 return l
->key
+ round_up(map
->key_size
, 8);
542 static void *htab_lru_map_lookup_elem(struct bpf_map
*map
, void *key
)
544 return __htab_lru_map_lookup_elem(map
, key
, true);
547 static void *htab_lru_map_lookup_elem_sys(struct bpf_map
*map
, void *key
)
549 return __htab_lru_map_lookup_elem(map
, key
, false);
552 static u32
htab_lru_map_gen_lookup(struct bpf_map
*map
,
553 struct bpf_insn
*insn_buf
)
555 struct bpf_insn
*insn
= insn_buf
;
556 const int ret
= BPF_REG_0
;
557 const int ref_reg
= BPF_REG_1
;
559 BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem
,
560 (void *(*)(struct bpf_map
*map
, void *key
))NULL
));
561 *insn
++ = BPF_EMIT_CALL(BPF_CAST_CALL(__htab_map_lookup_elem
));
562 *insn
++ = BPF_JMP_IMM(BPF_JEQ
, ret
, 0, 4);
563 *insn
++ = BPF_LDX_MEM(BPF_B
, ref_reg
, ret
,
564 offsetof(struct htab_elem
, lru_node
) +
565 offsetof(struct bpf_lru_node
, ref
));
566 *insn
++ = BPF_JMP_IMM(BPF_JNE
, ref_reg
, 0, 1);
567 *insn
++ = BPF_ST_MEM(BPF_B
, ret
,
568 offsetof(struct htab_elem
, lru_node
) +
569 offsetof(struct bpf_lru_node
, ref
),
571 *insn
++ = BPF_ALU64_IMM(BPF_ADD
, ret
,
572 offsetof(struct htab_elem
, key
) +
573 round_up(map
->key_size
, 8));
574 return insn
- insn_buf
;
577 /* It is called from the bpf_lru_list when the LRU needs to delete
578 * older elements from the htab.
580 static bool htab_lru_map_delete_node(void *arg
, struct bpf_lru_node
*node
)
582 struct bpf_htab
*htab
= (struct bpf_htab
*)arg
;
583 struct htab_elem
*l
= NULL
, *tgt_l
;
584 struct hlist_nulls_head
*head
;
585 struct hlist_nulls_node
*n
;
589 tgt_l
= container_of(node
, struct htab_elem
, lru_node
);
590 b
= __select_bucket(htab
, tgt_l
->hash
);
593 raw_spin_lock_irqsave(&b
->lock
, flags
);
595 hlist_nulls_for_each_entry_rcu(l
, n
, head
, hash_node
)
597 hlist_nulls_del_rcu(&l
->hash_node
);
601 raw_spin_unlock_irqrestore(&b
->lock
, flags
);
606 /* Called from syscall */
607 static int htab_map_get_next_key(struct bpf_map
*map
, void *key
, void *next_key
)
609 struct bpf_htab
*htab
= container_of(map
, struct bpf_htab
, map
);
610 struct hlist_nulls_head
*head
;
611 struct htab_elem
*l
, *next_l
;
615 WARN_ON_ONCE(!rcu_read_lock_held());
617 key_size
= map
->key_size
;
620 goto find_first_elem
;
622 hash
= htab_map_hash(key
, key_size
, htab
->hashrnd
);
624 head
= select_bucket(htab
, hash
);
627 l
= lookup_nulls_elem_raw(head
, hash
, key
, key_size
, htab
->n_buckets
);
630 goto find_first_elem
;
632 /* key was found, get next key in the same bucket */
633 next_l
= hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_next_rcu(&l
->hash_node
)),
634 struct htab_elem
, hash_node
);
637 /* if next elem in this hash list is non-zero, just return it */
638 memcpy(next_key
, next_l
->key
, key_size
);
642 /* no more elements in this hash list, go to the next bucket */
643 i
= hash
& (htab
->n_buckets
- 1);
647 /* iterate over buckets */
648 for (; i
< htab
->n_buckets
; i
++) {
649 head
= select_bucket(htab
, i
);
651 /* pick first element in the bucket */
652 next_l
= hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_first_rcu(head
)),
653 struct htab_elem
, hash_node
);
655 /* if it's not empty, just return it */
656 memcpy(next_key
, next_l
->key
, key_size
);
661 /* iterated over all buckets and all elements */
665 static void htab_elem_free(struct bpf_htab
*htab
, struct htab_elem
*l
)
667 if (htab
->map
.map_type
== BPF_MAP_TYPE_PERCPU_HASH
)
668 free_percpu(htab_elem_get_ptr(l
, htab
->map
.key_size
));
672 static void htab_elem_free_rcu(struct rcu_head
*head
)
674 struct htab_elem
*l
= container_of(head
, struct htab_elem
, rcu
);
675 struct bpf_htab
*htab
= l
->htab
;
677 /* must increment bpf_prog_active to avoid kprobe+bpf triggering while
678 * we're calling kfree, otherwise deadlock is possible if kprobes
679 * are placed somewhere inside of slub
682 __this_cpu_inc(bpf_prog_active
);
683 htab_elem_free(htab
, l
);
684 __this_cpu_dec(bpf_prog_active
);
688 static void free_htab_elem(struct bpf_htab
*htab
, struct htab_elem
*l
)
690 struct bpf_map
*map
= &htab
->map
;
692 if (map
->ops
->map_fd_put_ptr
) {
693 void *ptr
= fd_htab_map_get_ptr(map
, l
);
695 map
->ops
->map_fd_put_ptr(ptr
);
698 if (htab_is_prealloc(htab
)) {
699 __pcpu_freelist_push(&htab
->freelist
, &l
->fnode
);
701 atomic_dec(&htab
->count
);
703 call_rcu(&l
->rcu
, htab_elem_free_rcu
);
707 static void pcpu_copy_value(struct bpf_htab
*htab
, void __percpu
*pptr
,
708 void *value
, bool onallcpus
)
711 /* copy true value_size bytes */
712 memcpy(this_cpu_ptr(pptr
), value
, htab
->map
.value_size
);
714 u32 size
= round_up(htab
->map
.value_size
, 8);
717 for_each_possible_cpu(cpu
) {
718 bpf_long_memcpy(per_cpu_ptr(pptr
, cpu
),
725 static bool fd_htab_map_needs_adjust(const struct bpf_htab
*htab
)
727 return htab
->map
.map_type
== BPF_MAP_TYPE_HASH_OF_MAPS
&&
731 static struct htab_elem
*alloc_htab_elem(struct bpf_htab
*htab
, void *key
,
732 void *value
, u32 key_size
, u32 hash
,
733 bool percpu
, bool onallcpus
,
734 struct htab_elem
*old_elem
)
736 u32 size
= htab
->map
.value_size
;
737 bool prealloc
= htab_is_prealloc(htab
);
738 struct htab_elem
*l_new
, **pl_new
;
743 /* if we're updating the existing element,
744 * use per-cpu extra elems to avoid freelist_pop/push
746 pl_new
= this_cpu_ptr(htab
->extra_elems
);
750 struct pcpu_freelist_node
*l
;
752 l
= __pcpu_freelist_pop(&htab
->freelist
);
754 return ERR_PTR(-E2BIG
);
755 l_new
= container_of(l
, struct htab_elem
, fnode
);
758 if (atomic_inc_return(&htab
->count
) > htab
->map
.max_entries
)
760 /* when map is full and update() is replacing
761 * old element, it's ok to allocate, since
762 * old element will be freed immediately.
763 * Otherwise return an error
765 l_new
= ERR_PTR(-E2BIG
);
768 l_new
= kmalloc_node(htab
->elem_size
, GFP_ATOMIC
| __GFP_NOWARN
,
769 htab
->map
.numa_node
);
771 l_new
= ERR_PTR(-ENOMEM
);
774 check_and_init_map_lock(&htab
->map
,
775 l_new
->key
+ round_up(key_size
, 8));
778 memcpy(l_new
->key
, key
, key_size
);
780 size
= round_up(size
, 8);
782 pptr
= htab_elem_get_ptr(l_new
, key_size
);
784 /* alloc_percpu zero-fills */
785 pptr
= __alloc_percpu_gfp(size
, 8,
786 GFP_ATOMIC
| __GFP_NOWARN
);
789 l_new
= ERR_PTR(-ENOMEM
);
794 pcpu_copy_value(htab
, pptr
, value
, onallcpus
);
797 htab_elem_set_ptr(l_new
, key_size
, pptr
);
798 } else if (fd_htab_map_needs_adjust(htab
)) {
799 size
= round_up(size
, 8);
800 memcpy(l_new
->key
+ round_up(key_size
, 8), value
, size
);
802 copy_map_value(&htab
->map
,
803 l_new
->key
+ round_up(key_size
, 8),
810 atomic_dec(&htab
->count
);
814 static int check_flags(struct bpf_htab
*htab
, struct htab_elem
*l_old
,
817 if (l_old
&& (map_flags
& ~BPF_F_LOCK
) == BPF_NOEXIST
)
818 /* elem already exists */
821 if (!l_old
&& (map_flags
& ~BPF_F_LOCK
) == BPF_EXIST
)
822 /* elem doesn't exist, cannot update it */
828 /* Called from syscall or from eBPF program */
829 static int htab_map_update_elem(struct bpf_map
*map
, void *key
, void *value
,
832 struct bpf_htab
*htab
= container_of(map
, struct bpf_htab
, map
);
833 struct htab_elem
*l_new
= NULL
, *l_old
;
834 struct hlist_nulls_head
*head
;
840 if (unlikely((map_flags
& ~BPF_F_LOCK
) > BPF_EXIST
))
844 WARN_ON_ONCE(!rcu_read_lock_held());
846 key_size
= map
->key_size
;
848 hash
= htab_map_hash(key
, key_size
, htab
->hashrnd
);
850 b
= __select_bucket(htab
, hash
);
853 if (unlikely(map_flags
& BPF_F_LOCK
)) {
854 if (unlikely(!map_value_has_spin_lock(map
)))
856 /* find an element without taking the bucket lock */
857 l_old
= lookup_nulls_elem_raw(head
, hash
, key
, key_size
,
859 ret
= check_flags(htab
, l_old
, map_flags
);
863 /* grab the element lock and update value in place */
864 copy_map_value_locked(map
,
865 l_old
->key
+ round_up(key_size
, 8),
869 /* fall through, grab the bucket lock and lookup again.
870 * 99.9% chance that the element won't be found,
871 * but second lookup under lock has to be done.
875 /* bpf_map_update_elem() can be called in_irq() */
876 raw_spin_lock_irqsave(&b
->lock
, flags
);
878 l_old
= lookup_elem_raw(head
, hash
, key
, key_size
);
880 ret
= check_flags(htab
, l_old
, map_flags
);
884 if (unlikely(l_old
&& (map_flags
& BPF_F_LOCK
))) {
885 /* first lookup without the bucket lock didn't find the element,
886 * but second lookup with the bucket lock found it.
887 * This case is highly unlikely, but has to be dealt with:
888 * grab the element lock in addition to the bucket lock
889 * and update element in place
891 copy_map_value_locked(map
,
892 l_old
->key
+ round_up(key_size
, 8),
898 l_new
= alloc_htab_elem(htab
, key
, value
, key_size
, hash
, false, false,
901 /* all pre-allocated elements are in use or memory exhausted */
902 ret
= PTR_ERR(l_new
);
906 /* add new element to the head of the list, so that
907 * concurrent search will find it before old elem
909 hlist_nulls_add_head_rcu(&l_new
->hash_node
, head
);
911 hlist_nulls_del_rcu(&l_old
->hash_node
);
912 if (!htab_is_prealloc(htab
))
913 free_htab_elem(htab
, l_old
);
917 raw_spin_unlock_irqrestore(&b
->lock
, flags
);
921 static int htab_lru_map_update_elem(struct bpf_map
*map
, void *key
, void *value
,
924 struct bpf_htab
*htab
= container_of(map
, struct bpf_htab
, map
);
925 struct htab_elem
*l_new
, *l_old
= NULL
;
926 struct hlist_nulls_head
*head
;
932 if (unlikely(map_flags
> BPF_EXIST
))
936 WARN_ON_ONCE(!rcu_read_lock_held());
938 key_size
= map
->key_size
;
940 hash
= htab_map_hash(key
, key_size
, htab
->hashrnd
);
942 b
= __select_bucket(htab
, hash
);
945 /* For LRU, we need to alloc before taking bucket's
946 * spinlock because getting free nodes from LRU may need
947 * to remove older elements from htab and this removal
948 * operation will need a bucket lock.
950 l_new
= prealloc_lru_pop(htab
, key
, hash
);
953 memcpy(l_new
->key
+ round_up(map
->key_size
, 8), value
, map
->value_size
);
955 /* bpf_map_update_elem() can be called in_irq() */
956 raw_spin_lock_irqsave(&b
->lock
, flags
);
958 l_old
= lookup_elem_raw(head
, hash
, key
, key_size
);
960 ret
= check_flags(htab
, l_old
, map_flags
);
964 /* add new element to the head of the list, so that
965 * concurrent search will find it before old elem
967 hlist_nulls_add_head_rcu(&l_new
->hash_node
, head
);
969 bpf_lru_node_set_ref(&l_new
->lru_node
);
970 hlist_nulls_del_rcu(&l_old
->hash_node
);
975 raw_spin_unlock_irqrestore(&b
->lock
, flags
);
978 bpf_lru_push_free(&htab
->lru
, &l_new
->lru_node
);
980 bpf_lru_push_free(&htab
->lru
, &l_old
->lru_node
);
985 static int __htab_percpu_map_update_elem(struct bpf_map
*map
, void *key
,
986 void *value
, u64 map_flags
,
989 struct bpf_htab
*htab
= container_of(map
, struct bpf_htab
, map
);
990 struct htab_elem
*l_new
= NULL
, *l_old
;
991 struct hlist_nulls_head
*head
;
997 if (unlikely(map_flags
> BPF_EXIST
))
1001 WARN_ON_ONCE(!rcu_read_lock_held());
1003 key_size
= map
->key_size
;
1005 hash
= htab_map_hash(key
, key_size
, htab
->hashrnd
);
1007 b
= __select_bucket(htab
, hash
);
1010 /* bpf_map_update_elem() can be called in_irq() */
1011 raw_spin_lock_irqsave(&b
->lock
, flags
);
1013 l_old
= lookup_elem_raw(head
, hash
, key
, key_size
);
1015 ret
= check_flags(htab
, l_old
, map_flags
);
1020 /* per-cpu hash map can update value in-place */
1021 pcpu_copy_value(htab
, htab_elem_get_ptr(l_old
, key_size
),
1024 l_new
= alloc_htab_elem(htab
, key
, value
, key_size
,
1025 hash
, true, onallcpus
, NULL
);
1026 if (IS_ERR(l_new
)) {
1027 ret
= PTR_ERR(l_new
);
1030 hlist_nulls_add_head_rcu(&l_new
->hash_node
, head
);
1034 raw_spin_unlock_irqrestore(&b
->lock
, flags
);
1038 static int __htab_lru_percpu_map_update_elem(struct bpf_map
*map
, void *key
,
1039 void *value
, u64 map_flags
,
1042 struct bpf_htab
*htab
= container_of(map
, struct bpf_htab
, map
);
1043 struct htab_elem
*l_new
= NULL
, *l_old
;
1044 struct hlist_nulls_head
*head
;
1045 unsigned long flags
;
1050 if (unlikely(map_flags
> BPF_EXIST
))
1054 WARN_ON_ONCE(!rcu_read_lock_held());
1056 key_size
= map
->key_size
;
1058 hash
= htab_map_hash(key
, key_size
, htab
->hashrnd
);
1060 b
= __select_bucket(htab
, hash
);
1063 /* For LRU, we need to alloc before taking bucket's
1064 * spinlock because LRU's elem alloc may need
1065 * to remove older elem from htab and this removal
1066 * operation will need a bucket lock.
1068 if (map_flags
!= BPF_EXIST
) {
1069 l_new
= prealloc_lru_pop(htab
, key
, hash
);
1074 /* bpf_map_update_elem() can be called in_irq() */
1075 raw_spin_lock_irqsave(&b
->lock
, flags
);
1077 l_old
= lookup_elem_raw(head
, hash
, key
, key_size
);
1079 ret
= check_flags(htab
, l_old
, map_flags
);
1084 bpf_lru_node_set_ref(&l_old
->lru_node
);
1086 /* per-cpu hash map can update value in-place */
1087 pcpu_copy_value(htab
, htab_elem_get_ptr(l_old
, key_size
),
1090 pcpu_copy_value(htab
, htab_elem_get_ptr(l_new
, key_size
),
1092 hlist_nulls_add_head_rcu(&l_new
->hash_node
, head
);
1097 raw_spin_unlock_irqrestore(&b
->lock
, flags
);
1099 bpf_lru_push_free(&htab
->lru
, &l_new
->lru_node
);
1103 static int htab_percpu_map_update_elem(struct bpf_map
*map
, void *key
,
1104 void *value
, u64 map_flags
)
1106 return __htab_percpu_map_update_elem(map
, key
, value
, map_flags
, false);
1109 static int htab_lru_percpu_map_update_elem(struct bpf_map
*map
, void *key
,
1110 void *value
, u64 map_flags
)
1112 return __htab_lru_percpu_map_update_elem(map
, key
, value
, map_flags
,
1116 /* Called from syscall or from eBPF program */
1117 static int htab_map_delete_elem(struct bpf_map
*map
, void *key
)
1119 struct bpf_htab
*htab
= container_of(map
, struct bpf_htab
, map
);
1120 struct hlist_nulls_head
*head
;
1122 struct htab_elem
*l
;
1123 unsigned long flags
;
1127 WARN_ON_ONCE(!rcu_read_lock_held());
1129 key_size
= map
->key_size
;
1131 hash
= htab_map_hash(key
, key_size
, htab
->hashrnd
);
1132 b
= __select_bucket(htab
, hash
);
1135 raw_spin_lock_irqsave(&b
->lock
, flags
);
1137 l
= lookup_elem_raw(head
, hash
, key
, key_size
);
1140 hlist_nulls_del_rcu(&l
->hash_node
);
1141 free_htab_elem(htab
, l
);
1145 raw_spin_unlock_irqrestore(&b
->lock
, flags
);
1149 static int htab_lru_map_delete_elem(struct bpf_map
*map
, void *key
)
1151 struct bpf_htab
*htab
= container_of(map
, struct bpf_htab
, map
);
1152 struct hlist_nulls_head
*head
;
1154 struct htab_elem
*l
;
1155 unsigned long flags
;
1159 WARN_ON_ONCE(!rcu_read_lock_held());
1161 key_size
= map
->key_size
;
1163 hash
= htab_map_hash(key
, key_size
, htab
->hashrnd
);
1164 b
= __select_bucket(htab
, hash
);
1167 raw_spin_lock_irqsave(&b
->lock
, flags
);
1169 l
= lookup_elem_raw(head
, hash
, key
, key_size
);
1172 hlist_nulls_del_rcu(&l
->hash_node
);
1176 raw_spin_unlock_irqrestore(&b
->lock
, flags
);
1178 bpf_lru_push_free(&htab
->lru
, &l
->lru_node
);
1182 static void delete_all_elements(struct bpf_htab
*htab
)
1186 for (i
= 0; i
< htab
->n_buckets
; i
++) {
1187 struct hlist_nulls_head
*head
= select_bucket(htab
, i
);
1188 struct hlist_nulls_node
*n
;
1189 struct htab_elem
*l
;
1191 hlist_nulls_for_each_entry_safe(l
, n
, head
, hash_node
) {
1192 hlist_nulls_del_rcu(&l
->hash_node
);
1193 htab_elem_free(htab
, l
);
1198 /* Called when map->refcnt goes to zero, either from workqueue or from syscall */
1199 static void htab_map_free(struct bpf_map
*map
)
1201 struct bpf_htab
*htab
= container_of(map
, struct bpf_htab
, map
);
1203 /* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
1204 * so the programs (can be more than one that used this map) were
1205 * disconnected from events. Wait for outstanding critical sections in
1206 * these programs to complete
1210 /* some of free_htab_elem() callbacks for elements of this map may
1211 * not have executed. Wait for them.
1214 if (!htab_is_prealloc(htab
))
1215 delete_all_elements(htab
);
1217 prealloc_destroy(htab
);
1219 free_percpu(htab
->extra_elems
);
1220 bpf_map_area_free(htab
->buckets
);
1224 static void htab_map_seq_show_elem(struct bpf_map
*map
, void *key
,
1231 value
= htab_map_lookup_elem(map
, key
);
1237 btf_type_seq_show(map
->btf
, map
->btf_key_type_id
, key
, m
);
1239 btf_type_seq_show(map
->btf
, map
->btf_value_type_id
, value
, m
);
1246 __htab_map_lookup_and_delete_batch(struct bpf_map
*map
,
1247 const union bpf_attr
*attr
,
1248 union bpf_attr __user
*uattr
,
1249 bool do_delete
, bool is_lru_map
,
1252 struct bpf_htab
*htab
= container_of(map
, struct bpf_htab
, map
);
1253 u32 bucket_cnt
, total
, key_size
, value_size
, roundup_key_size
;
1254 void *keys
= NULL
, *values
= NULL
, *value
, *dst_key
, *dst_val
;
1255 void __user
*uvalues
= u64_to_user_ptr(attr
->batch
.values
);
1256 void __user
*ukeys
= u64_to_user_ptr(attr
->batch
.keys
);
1257 void *ubatch
= u64_to_user_ptr(attr
->batch
.in_batch
);
1258 u32 batch
, max_count
, size
, bucket_size
;
1259 u64 elem_map_flags
, map_flags
;
1260 struct hlist_nulls_head
*head
;
1261 struct hlist_nulls_node
*n
;
1262 unsigned long flags
;
1263 struct htab_elem
*l
;
1267 elem_map_flags
= attr
->batch
.elem_flags
;
1268 if ((elem_map_flags
& ~BPF_F_LOCK
) ||
1269 ((elem_map_flags
& BPF_F_LOCK
) && !map_value_has_spin_lock(map
)))
1272 map_flags
= attr
->batch
.flags
;
1276 max_count
= attr
->batch
.count
;
1280 if (put_user(0, &uattr
->batch
.count
))
1284 if (ubatch
&& copy_from_user(&batch
, ubatch
, sizeof(batch
)))
1287 if (batch
>= htab
->n_buckets
)
1290 key_size
= htab
->map
.key_size
;
1291 roundup_key_size
= round_up(htab
->map
.key_size
, 8);
1292 value_size
= htab
->map
.value_size
;
1293 size
= round_up(value_size
, 8);
1295 value_size
= size
* num_possible_cpus();
1297 /* while experimenting with hash tables with sizes ranging from 10 to
1298 * 1000, it was observed that a bucket can have upto 5 entries.
1303 /* We cannot do copy_from_user or copy_to_user inside
1304 * the rcu_read_lock. Allocate enough space here.
1306 keys
= kvmalloc(key_size
* bucket_size
, GFP_USER
| __GFP_NOWARN
);
1307 values
= kvmalloc(value_size
* bucket_size
, GFP_USER
| __GFP_NOWARN
);
1308 if (!keys
|| !values
) {
1315 this_cpu_inc(bpf_prog_active
);
1320 b
= &htab
->buckets
[batch
];
1322 raw_spin_lock_irqsave(&b
->lock
, flags
);
1325 hlist_nulls_for_each_entry_rcu(l
, n
, head
, hash_node
)
1328 if (bucket_cnt
> (max_count
- total
)) {
1331 raw_spin_unlock_irqrestore(&b
->lock
, flags
);
1333 this_cpu_dec(bpf_prog_active
);
1338 if (bucket_cnt
> bucket_size
) {
1339 bucket_size
= bucket_cnt
;
1340 raw_spin_unlock_irqrestore(&b
->lock
, flags
);
1342 this_cpu_dec(bpf_prog_active
);
1349 hlist_nulls_for_each_entry_safe(l
, n
, head
, hash_node
) {
1350 memcpy(dst_key
, l
->key
, key_size
);
1354 void __percpu
*pptr
;
1356 pptr
= htab_elem_get_ptr(l
, map
->key_size
);
1357 for_each_possible_cpu(cpu
) {
1358 bpf_long_memcpy(dst_val
+ off
,
1359 per_cpu_ptr(pptr
, cpu
), size
);
1363 value
= l
->key
+ roundup_key_size
;
1364 if (elem_map_flags
& BPF_F_LOCK
)
1365 copy_map_value_locked(map
, dst_val
, value
,
1368 copy_map_value(map
, dst_val
, value
);
1369 check_and_init_map_lock(map
, dst_val
);
1372 hlist_nulls_del_rcu(&l
->hash_node
);
1374 bpf_lru_push_free(&htab
->lru
, &l
->lru_node
);
1376 free_htab_elem(htab
, l
);
1378 dst_key
+= key_size
;
1379 dst_val
+= value_size
;
1382 raw_spin_unlock_irqrestore(&b
->lock
, flags
);
1383 /* If we are not copying data, we can go to next bucket and avoid
1384 * unlocking the rcu.
1386 if (!bucket_cnt
&& (batch
+ 1 < htab
->n_buckets
)) {
1392 this_cpu_dec(bpf_prog_active
);
1394 if (bucket_cnt
&& (copy_to_user(ukeys
+ total
* key_size
, keys
,
1395 key_size
* bucket_cnt
) ||
1396 copy_to_user(uvalues
+ total
* value_size
, values
,
1397 value_size
* bucket_cnt
))) {
1402 total
+= bucket_cnt
;
1404 if (batch
>= htab
->n_buckets
) {
1414 /* copy # of entries and next batch */
1415 ubatch
= u64_to_user_ptr(attr
->batch
.out_batch
);
1416 if (copy_to_user(ubatch
, &batch
, sizeof(batch
)) ||
1417 put_user(total
, &uattr
->batch
.count
))
1427 htab_percpu_map_lookup_batch(struct bpf_map
*map
, const union bpf_attr
*attr
,
1428 union bpf_attr __user
*uattr
)
1430 return __htab_map_lookup_and_delete_batch(map
, attr
, uattr
, false,
1435 htab_percpu_map_lookup_and_delete_batch(struct bpf_map
*map
,
1436 const union bpf_attr
*attr
,
1437 union bpf_attr __user
*uattr
)
1439 return __htab_map_lookup_and_delete_batch(map
, attr
, uattr
, true,
1444 htab_map_lookup_batch(struct bpf_map
*map
, const union bpf_attr
*attr
,
1445 union bpf_attr __user
*uattr
)
1447 return __htab_map_lookup_and_delete_batch(map
, attr
, uattr
, false,
1452 htab_map_lookup_and_delete_batch(struct bpf_map
*map
,
1453 const union bpf_attr
*attr
,
1454 union bpf_attr __user
*uattr
)
1456 return __htab_map_lookup_and_delete_batch(map
, attr
, uattr
, true,
1461 htab_lru_percpu_map_lookup_batch(struct bpf_map
*map
,
1462 const union bpf_attr
*attr
,
1463 union bpf_attr __user
*uattr
)
1465 return __htab_map_lookup_and_delete_batch(map
, attr
, uattr
, false,
1470 htab_lru_percpu_map_lookup_and_delete_batch(struct bpf_map
*map
,
1471 const union bpf_attr
*attr
,
1472 union bpf_attr __user
*uattr
)
1474 return __htab_map_lookup_and_delete_batch(map
, attr
, uattr
, true,
1479 htab_lru_map_lookup_batch(struct bpf_map
*map
, const union bpf_attr
*attr
,
1480 union bpf_attr __user
*uattr
)
1482 return __htab_map_lookup_and_delete_batch(map
, attr
, uattr
, false,
1487 htab_lru_map_lookup_and_delete_batch(struct bpf_map
*map
,
1488 const union bpf_attr
*attr
,
1489 union bpf_attr __user
*uattr
)
1491 return __htab_map_lookup_and_delete_batch(map
, attr
, uattr
, true,
1495 const struct bpf_map_ops htab_map_ops
= {
1496 .map_alloc_check
= htab_map_alloc_check
,
1497 .map_alloc
= htab_map_alloc
,
1498 .map_free
= htab_map_free
,
1499 .map_get_next_key
= htab_map_get_next_key
,
1500 .map_lookup_elem
= htab_map_lookup_elem
,
1501 .map_update_elem
= htab_map_update_elem
,
1502 .map_delete_elem
= htab_map_delete_elem
,
1503 .map_gen_lookup
= htab_map_gen_lookup
,
1504 .map_seq_show_elem
= htab_map_seq_show_elem
,
1508 const struct bpf_map_ops htab_lru_map_ops
= {
1509 .map_alloc_check
= htab_map_alloc_check
,
1510 .map_alloc
= htab_map_alloc
,
1511 .map_free
= htab_map_free
,
1512 .map_get_next_key
= htab_map_get_next_key
,
1513 .map_lookup_elem
= htab_lru_map_lookup_elem
,
1514 .map_lookup_elem_sys_only
= htab_lru_map_lookup_elem_sys
,
1515 .map_update_elem
= htab_lru_map_update_elem
,
1516 .map_delete_elem
= htab_lru_map_delete_elem
,
1517 .map_gen_lookup
= htab_lru_map_gen_lookup
,
1518 .map_seq_show_elem
= htab_map_seq_show_elem
,
1519 BATCH_OPS(htab_lru
),
1522 /* Called from eBPF program */
1523 static void *htab_percpu_map_lookup_elem(struct bpf_map
*map
, void *key
)
1525 struct htab_elem
*l
= __htab_map_lookup_elem(map
, key
);
1528 return this_cpu_ptr(htab_elem_get_ptr(l
, map
->key_size
));
1533 static void *htab_lru_percpu_map_lookup_elem(struct bpf_map
*map
, void *key
)
1535 struct htab_elem
*l
= __htab_map_lookup_elem(map
, key
);
1538 bpf_lru_node_set_ref(&l
->lru_node
);
1539 return this_cpu_ptr(htab_elem_get_ptr(l
, map
->key_size
));
1545 int bpf_percpu_hash_copy(struct bpf_map
*map
, void *key
, void *value
)
1547 struct htab_elem
*l
;
1548 void __percpu
*pptr
;
1553 /* per_cpu areas are zero-filled and bpf programs can only
1554 * access 'value_size' of them, so copying rounded areas
1555 * will not leak any kernel data
1557 size
= round_up(map
->value_size
, 8);
1559 l
= __htab_map_lookup_elem(map
, key
);
1562 /* We do not mark LRU map element here in order to not mess up
1563 * eviction heuristics when user space does a map walk.
1565 pptr
= htab_elem_get_ptr(l
, map
->key_size
);
1566 for_each_possible_cpu(cpu
) {
1567 bpf_long_memcpy(value
+ off
,
1568 per_cpu_ptr(pptr
, cpu
), size
);
1577 int bpf_percpu_hash_update(struct bpf_map
*map
, void *key
, void *value
,
1580 struct bpf_htab
*htab
= container_of(map
, struct bpf_htab
, map
);
1584 if (htab_is_lru(htab
))
1585 ret
= __htab_lru_percpu_map_update_elem(map
, key
, value
,
1588 ret
= __htab_percpu_map_update_elem(map
, key
, value
, map_flags
,
1595 static void htab_percpu_map_seq_show_elem(struct bpf_map
*map
, void *key
,
1598 struct htab_elem
*l
;
1599 void __percpu
*pptr
;
1604 l
= __htab_map_lookup_elem(map
, key
);
1610 btf_type_seq_show(map
->btf
, map
->btf_key_type_id
, key
, m
);
1611 seq_puts(m
, ": {\n");
1612 pptr
= htab_elem_get_ptr(l
, map
->key_size
);
1613 for_each_possible_cpu(cpu
) {
1614 seq_printf(m
, "\tcpu%d: ", cpu
);
1615 btf_type_seq_show(map
->btf
, map
->btf_value_type_id
,
1616 per_cpu_ptr(pptr
, cpu
), m
);
1624 const struct bpf_map_ops htab_percpu_map_ops
= {
1625 .map_alloc_check
= htab_map_alloc_check
,
1626 .map_alloc
= htab_map_alloc
,
1627 .map_free
= htab_map_free
,
1628 .map_get_next_key
= htab_map_get_next_key
,
1629 .map_lookup_elem
= htab_percpu_map_lookup_elem
,
1630 .map_update_elem
= htab_percpu_map_update_elem
,
1631 .map_delete_elem
= htab_map_delete_elem
,
1632 .map_seq_show_elem
= htab_percpu_map_seq_show_elem
,
1633 BATCH_OPS(htab_percpu
),
1636 const struct bpf_map_ops htab_lru_percpu_map_ops
= {
1637 .map_alloc_check
= htab_map_alloc_check
,
1638 .map_alloc
= htab_map_alloc
,
1639 .map_free
= htab_map_free
,
1640 .map_get_next_key
= htab_map_get_next_key
,
1641 .map_lookup_elem
= htab_lru_percpu_map_lookup_elem
,
1642 .map_update_elem
= htab_lru_percpu_map_update_elem
,
1643 .map_delete_elem
= htab_lru_map_delete_elem
,
1644 .map_seq_show_elem
= htab_percpu_map_seq_show_elem
,
1645 BATCH_OPS(htab_lru_percpu
),
1648 static int fd_htab_map_alloc_check(union bpf_attr
*attr
)
1650 if (attr
->value_size
!= sizeof(u32
))
1652 return htab_map_alloc_check(attr
);
1655 static void fd_htab_map_free(struct bpf_map
*map
)
1657 struct bpf_htab
*htab
= container_of(map
, struct bpf_htab
, map
);
1658 struct hlist_nulls_node
*n
;
1659 struct hlist_nulls_head
*head
;
1660 struct htab_elem
*l
;
1663 for (i
= 0; i
< htab
->n_buckets
; i
++) {
1664 head
= select_bucket(htab
, i
);
1666 hlist_nulls_for_each_entry_safe(l
, n
, head
, hash_node
) {
1667 void *ptr
= fd_htab_map_get_ptr(map
, l
);
1669 map
->ops
->map_fd_put_ptr(ptr
);
1676 /* only called from syscall */
1677 int bpf_fd_htab_map_lookup_elem(struct bpf_map
*map
, void *key
, u32
*value
)
1682 if (!map
->ops
->map_fd_sys_lookup_elem
)
1686 ptr
= htab_map_lookup_elem(map
, key
);
1688 *value
= map
->ops
->map_fd_sys_lookup_elem(READ_ONCE(*ptr
));
1696 /* only called from syscall */
1697 int bpf_fd_htab_map_update_elem(struct bpf_map
*map
, struct file
*map_file
,
1698 void *key
, void *value
, u64 map_flags
)
1702 u32 ufd
= *(u32
*)value
;
1704 ptr
= map
->ops
->map_fd_get_ptr(map
, map_file
, ufd
);
1706 return PTR_ERR(ptr
);
1708 ret
= htab_map_update_elem(map
, key
, &ptr
, map_flags
);
1710 map
->ops
->map_fd_put_ptr(ptr
);
1715 static struct bpf_map
*htab_of_map_alloc(union bpf_attr
*attr
)
1717 struct bpf_map
*map
, *inner_map_meta
;
1719 inner_map_meta
= bpf_map_meta_alloc(attr
->inner_map_fd
);
1720 if (IS_ERR(inner_map_meta
))
1721 return inner_map_meta
;
1723 map
= htab_map_alloc(attr
);
1725 bpf_map_meta_free(inner_map_meta
);
1729 map
->inner_map_meta
= inner_map_meta
;
1734 static void *htab_of_map_lookup_elem(struct bpf_map
*map
, void *key
)
1736 struct bpf_map
**inner_map
= htab_map_lookup_elem(map
, key
);
1741 return READ_ONCE(*inner_map
);
1744 static u32
htab_of_map_gen_lookup(struct bpf_map
*map
,
1745 struct bpf_insn
*insn_buf
)
1747 struct bpf_insn
*insn
= insn_buf
;
1748 const int ret
= BPF_REG_0
;
1750 BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem
,
1751 (void *(*)(struct bpf_map
*map
, void *key
))NULL
));
1752 *insn
++ = BPF_EMIT_CALL(BPF_CAST_CALL(__htab_map_lookup_elem
));
1753 *insn
++ = BPF_JMP_IMM(BPF_JEQ
, ret
, 0, 2);
1754 *insn
++ = BPF_ALU64_IMM(BPF_ADD
, ret
,
1755 offsetof(struct htab_elem
, key
) +
1756 round_up(map
->key_size
, 8));
1757 *insn
++ = BPF_LDX_MEM(BPF_DW
, ret
, ret
, 0);
1759 return insn
- insn_buf
;
1762 static void htab_of_map_free(struct bpf_map
*map
)
1764 bpf_map_meta_free(map
->inner_map_meta
);
1765 fd_htab_map_free(map
);
1768 const struct bpf_map_ops htab_of_maps_map_ops
= {
1769 .map_alloc_check
= fd_htab_map_alloc_check
,
1770 .map_alloc
= htab_of_map_alloc
,
1771 .map_free
= htab_of_map_free
,
1772 .map_get_next_key
= htab_map_get_next_key
,
1773 .map_lookup_elem
= htab_of_map_lookup_elem
,
1774 .map_delete_elem
= htab_map_delete_elem
,
1775 .map_fd_get_ptr
= bpf_map_fd_get_ptr
,
1776 .map_fd_put_ptr
= bpf_map_fd_put_ptr
,
1777 .map_fd_sys_lookup_elem
= bpf_map_fd_sys_lookup_elem
,
1778 .map_gen_lookup
= htab_of_map_gen_lookup
,
1779 .map_check_btf
= map_check_no_btf
,