1 //SPDX-License-Identifier: GPL-2.0
2 #include <linux/bpf-cgroup.h>
6 #include <linux/filter.h>
8 #include <linux/rbtree.h>
9 #include <linux/slab.h>
10 #include <uapi/linux/btf.h>
12 DEFINE_PER_CPU(struct bpf_cgroup_storage
*, bpf_cgroup_storage
[MAX_BPF_CGROUP_STORAGE_TYPE
]);
14 #ifdef CONFIG_CGROUP_BPF
16 #define LOCAL_STORAGE_CREATE_FLAG_MASK \
17 (BPF_F_NUMA_NODE | BPF_F_ACCESS_MASK)
19 struct bpf_cgroup_storage_map
{
23 struct bpf_prog_aux
*aux
;
25 struct list_head list
;
28 static struct bpf_cgroup_storage_map
*map_to_storage(struct bpf_map
*map
)
30 return container_of(map
, struct bpf_cgroup_storage_map
, map
);
33 static int bpf_cgroup_storage_key_cmp(
34 const struct bpf_cgroup_storage_key
*key1
,
35 const struct bpf_cgroup_storage_key
*key2
)
37 if (key1
->cgroup_inode_id
< key2
->cgroup_inode_id
)
39 else if (key1
->cgroup_inode_id
> key2
->cgroup_inode_id
)
41 else if (key1
->attach_type
< key2
->attach_type
)
43 else if (key1
->attach_type
> key2
->attach_type
)
48 static struct bpf_cgroup_storage
*cgroup_storage_lookup(
49 struct bpf_cgroup_storage_map
*map
, struct bpf_cgroup_storage_key
*key
,
52 struct rb_root
*root
= &map
->root
;
56 spin_lock_bh(&map
->lock
);
60 struct bpf_cgroup_storage
*storage
;
62 storage
= container_of(node
, struct bpf_cgroup_storage
, node
);
64 switch (bpf_cgroup_storage_key_cmp(key
, &storage
->key
)) {
69 node
= node
->rb_right
;
73 spin_unlock_bh(&map
->lock
);
79 spin_unlock_bh(&map
->lock
);
84 static int cgroup_storage_insert(struct bpf_cgroup_storage_map
*map
,
85 struct bpf_cgroup_storage
*storage
)
87 struct rb_root
*root
= &map
->root
;
88 struct rb_node
**new = &(root
->rb_node
), *parent
= NULL
;
91 struct bpf_cgroup_storage
*this;
93 this = container_of(*new, struct bpf_cgroup_storage
, node
);
96 switch (bpf_cgroup_storage_key_cmp(&storage
->key
, &this->key
)) {
98 new = &((*new)->rb_left
);
101 new = &((*new)->rb_right
);
108 rb_link_node(&storage
->node
, parent
, new);
109 rb_insert_color(&storage
->node
, root
);
114 static void *cgroup_storage_lookup_elem(struct bpf_map
*_map
, void *_key
)
116 struct bpf_cgroup_storage_map
*map
= map_to_storage(_map
);
117 struct bpf_cgroup_storage_key
*key
= _key
;
118 struct bpf_cgroup_storage
*storage
;
120 storage
= cgroup_storage_lookup(map
, key
, false);
124 return &READ_ONCE(storage
->buf
)->data
[0];
127 static int cgroup_storage_update_elem(struct bpf_map
*map
, void *_key
,
128 void *value
, u64 flags
)
130 struct bpf_cgroup_storage_key
*key
= _key
;
131 struct bpf_cgroup_storage
*storage
;
132 struct bpf_storage_buffer
*new;
134 if (unlikely(flags
& ~(BPF_F_LOCK
| BPF_EXIST
| BPF_NOEXIST
)))
137 if (unlikely(flags
& BPF_NOEXIST
))
140 if (unlikely((flags
& BPF_F_LOCK
) &&
141 !map_value_has_spin_lock(map
)))
144 storage
= cgroup_storage_lookup((struct bpf_cgroup_storage_map
*)map
,
149 if (flags
& BPF_F_LOCK
) {
150 copy_map_value_locked(map
, storage
->buf
->data
, value
, false);
154 new = kmalloc_node(sizeof(struct bpf_storage_buffer
) +
156 __GFP_ZERO
| GFP_ATOMIC
| __GFP_NOWARN
,
161 memcpy(&new->data
[0], value
, map
->value_size
);
162 check_and_init_map_lock(map
, new->data
);
164 new = xchg(&storage
->buf
, new);
170 int bpf_percpu_cgroup_storage_copy(struct bpf_map
*_map
, void *_key
,
173 struct bpf_cgroup_storage_map
*map
= map_to_storage(_map
);
174 struct bpf_cgroup_storage_key
*key
= _key
;
175 struct bpf_cgroup_storage
*storage
;
180 storage
= cgroup_storage_lookup(map
, key
, false);
186 /* per_cpu areas are zero-filled and bpf programs can only
187 * access 'value_size' of them, so copying rounded areas
188 * will not leak any kernel data
190 size
= round_up(_map
->value_size
, 8);
191 for_each_possible_cpu(cpu
) {
192 bpf_long_memcpy(value
+ off
,
193 per_cpu_ptr(storage
->percpu_buf
, cpu
), size
);
200 int bpf_percpu_cgroup_storage_update(struct bpf_map
*_map
, void *_key
,
201 void *value
, u64 map_flags
)
203 struct bpf_cgroup_storage_map
*map
= map_to_storage(_map
);
204 struct bpf_cgroup_storage_key
*key
= _key
;
205 struct bpf_cgroup_storage
*storage
;
209 if (map_flags
!= BPF_ANY
&& map_flags
!= BPF_EXIST
)
213 storage
= cgroup_storage_lookup(map
, key
, false);
219 /* the user space will provide round_up(value_size, 8) bytes that
220 * will be copied into per-cpu area. bpf programs can only access
221 * value_size of it. During lookup the same extra bytes will be
222 * returned or zeros which were zero-filled by percpu_alloc,
223 * so no kernel data leaks possible
225 size
= round_up(_map
->value_size
, 8);
226 for_each_possible_cpu(cpu
) {
227 bpf_long_memcpy(per_cpu_ptr(storage
->percpu_buf
, cpu
),
235 static int cgroup_storage_get_next_key(struct bpf_map
*_map
, void *_key
,
238 struct bpf_cgroup_storage_map
*map
= map_to_storage(_map
);
239 struct bpf_cgroup_storage_key
*key
= _key
;
240 struct bpf_cgroup_storage_key
*next
= _next_key
;
241 struct bpf_cgroup_storage
*storage
;
243 spin_lock_bh(&map
->lock
);
245 if (list_empty(&map
->list
))
249 storage
= cgroup_storage_lookup(map
, key
, true);
253 storage
= list_next_entry(storage
, list
);
257 storage
= list_first_entry(&map
->list
,
258 struct bpf_cgroup_storage
, list
);
261 spin_unlock_bh(&map
->lock
);
262 next
->attach_type
= storage
->key
.attach_type
;
263 next
->cgroup_inode_id
= storage
->key
.cgroup_inode_id
;
267 spin_unlock_bh(&map
->lock
);
271 static struct bpf_map
*cgroup_storage_map_alloc(union bpf_attr
*attr
)
273 int numa_node
= bpf_map_attr_numa_node(attr
);
274 struct bpf_cgroup_storage_map
*map
;
275 struct bpf_map_memory mem
;
278 if (attr
->key_size
!= sizeof(struct bpf_cgroup_storage_key
))
279 return ERR_PTR(-EINVAL
);
281 if (attr
->value_size
== 0)
282 return ERR_PTR(-EINVAL
);
284 if (attr
->value_size
> PAGE_SIZE
)
285 return ERR_PTR(-E2BIG
);
287 if (attr
->map_flags
& ~LOCAL_STORAGE_CREATE_FLAG_MASK
||
288 !bpf_map_flags_access_ok(attr
->map_flags
))
289 return ERR_PTR(-EINVAL
);
291 if (attr
->max_entries
)
292 /* max_entries is not used and enforced to be 0 */
293 return ERR_PTR(-EINVAL
);
295 ret
= bpf_map_charge_init(&mem
, sizeof(struct bpf_cgroup_storage_map
));
299 map
= kmalloc_node(sizeof(struct bpf_cgroup_storage_map
),
300 __GFP_ZERO
| GFP_USER
, numa_node
);
302 bpf_map_charge_finish(&mem
);
303 return ERR_PTR(-ENOMEM
);
306 bpf_map_charge_move(&map
->map
.memory
, &mem
);
308 /* copy mandatory map attributes */
309 bpf_map_init_from_attr(&map
->map
, attr
);
311 spin_lock_init(&map
->lock
);
313 INIT_LIST_HEAD(&map
->list
);
318 static void cgroup_storage_map_free(struct bpf_map
*_map
)
320 struct bpf_cgroup_storage_map
*map
= map_to_storage(_map
);
322 WARN_ON(!RB_EMPTY_ROOT(&map
->root
));
323 WARN_ON(!list_empty(&map
->list
));
328 static int cgroup_storage_delete_elem(struct bpf_map
*map
, void *key
)
333 static int cgroup_storage_check_btf(const struct bpf_map
*map
,
334 const struct btf
*btf
,
335 const struct btf_type
*key_type
,
336 const struct btf_type
*value_type
)
338 struct btf_member
*m
;
341 /* Key is expected to be of struct bpf_cgroup_storage_key type,
343 * struct bpf_cgroup_storage_key {
344 * __u64 cgroup_inode_id;
350 * Key_type must be a structure with two fields.
352 if (BTF_INFO_KIND(key_type
->info
) != BTF_KIND_STRUCT
||
353 BTF_INFO_VLEN(key_type
->info
) != 2)
357 * The first field must be a 64 bit integer at 0 offset.
359 m
= (struct btf_member
*)(key_type
+ 1);
360 size
= sizeof_field(struct bpf_cgroup_storage_key
, cgroup_inode_id
);
361 if (!btf_member_is_reg_int(btf
, key_type
, m
, 0, size
))
365 * The second field must be a 32 bit integer at 64 bit offset.
368 offset
= offsetof(struct bpf_cgroup_storage_key
, attach_type
);
369 size
= sizeof_field(struct bpf_cgroup_storage_key
, attach_type
);
370 if (!btf_member_is_reg_int(btf
, key_type
, m
, offset
, size
))
376 static void cgroup_storage_seq_show_elem(struct bpf_map
*map
, void *_key
,
379 enum bpf_cgroup_storage_type stype
= cgroup_storage_type(map
);
380 struct bpf_cgroup_storage_key
*key
= _key
;
381 struct bpf_cgroup_storage
*storage
;
385 storage
= cgroup_storage_lookup(map_to_storage(map
), key
, false);
391 btf_type_seq_show(map
->btf
, map
->btf_key_type_id
, key
, m
);
392 stype
= cgroup_storage_type(map
);
393 if (stype
== BPF_CGROUP_STORAGE_SHARED
) {
395 btf_type_seq_show(map
->btf
, map
->btf_value_type_id
,
396 &READ_ONCE(storage
->buf
)->data
[0], m
);
399 seq_puts(m
, ": {\n");
400 for_each_possible_cpu(cpu
) {
401 seq_printf(m
, "\tcpu%d: ", cpu
);
402 btf_type_seq_show(map
->btf
, map
->btf_value_type_id
,
403 per_cpu_ptr(storage
->percpu_buf
, cpu
),
412 const struct bpf_map_ops cgroup_storage_map_ops
= {
413 .map_alloc
= cgroup_storage_map_alloc
,
414 .map_free
= cgroup_storage_map_free
,
415 .map_get_next_key
= cgroup_storage_get_next_key
,
416 .map_lookup_elem
= cgroup_storage_lookup_elem
,
417 .map_update_elem
= cgroup_storage_update_elem
,
418 .map_delete_elem
= cgroup_storage_delete_elem
,
419 .map_check_btf
= cgroup_storage_check_btf
,
420 .map_seq_show_elem
= cgroup_storage_seq_show_elem
,
423 int bpf_cgroup_storage_assign(struct bpf_prog_aux
*aux
, struct bpf_map
*_map
)
425 enum bpf_cgroup_storage_type stype
= cgroup_storage_type(_map
);
426 struct bpf_cgroup_storage_map
*map
= map_to_storage(_map
);
429 spin_lock_bh(&map
->lock
);
431 if (map
->aux
&& map
->aux
!= aux
)
433 if (aux
->cgroup_storage
[stype
] &&
434 aux
->cgroup_storage
[stype
] != _map
)
438 aux
->cgroup_storage
[stype
] = _map
;
441 spin_unlock_bh(&map
->lock
);
446 void bpf_cgroup_storage_release(struct bpf_prog_aux
*aux
, struct bpf_map
*_map
)
448 enum bpf_cgroup_storage_type stype
= cgroup_storage_type(_map
);
449 struct bpf_cgroup_storage_map
*map
= map_to_storage(_map
);
451 spin_lock_bh(&map
->lock
);
452 if (map
->aux
== aux
) {
453 WARN_ON(aux
->cgroup_storage
[stype
] != _map
);
455 aux
->cgroup_storage
[stype
] = NULL
;
457 spin_unlock_bh(&map
->lock
);
460 static size_t bpf_cgroup_storage_calculate_size(struct bpf_map
*map
, u32
*pages
)
464 if (cgroup_storage_type(map
) == BPF_CGROUP_STORAGE_SHARED
) {
465 size
= sizeof(struct bpf_storage_buffer
) + map
->value_size
;
466 *pages
= round_up(sizeof(struct bpf_cgroup_storage
) + size
,
467 PAGE_SIZE
) >> PAGE_SHIFT
;
469 size
= map
->value_size
;
470 *pages
= round_up(round_up(size
, 8) * num_possible_cpus(),
471 PAGE_SIZE
) >> PAGE_SHIFT
;
477 struct bpf_cgroup_storage
*bpf_cgroup_storage_alloc(struct bpf_prog
*prog
,
478 enum bpf_cgroup_storage_type stype
)
480 struct bpf_cgroup_storage
*storage
;
486 map
= prog
->aux
->cgroup_storage
[stype
];
490 size
= bpf_cgroup_storage_calculate_size(map
, &pages
);
492 if (bpf_map_charge_memlock(map
, pages
))
493 return ERR_PTR(-EPERM
);
495 storage
= kmalloc_node(sizeof(struct bpf_cgroup_storage
),
496 __GFP_ZERO
| GFP_USER
, map
->numa_node
);
500 flags
= __GFP_ZERO
| GFP_USER
;
502 if (stype
== BPF_CGROUP_STORAGE_SHARED
) {
503 storage
->buf
= kmalloc_node(size
, flags
, map
->numa_node
);
506 check_and_init_map_lock(map
, storage
->buf
->data
);
508 storage
->percpu_buf
= __alloc_percpu_gfp(size
, 8, flags
);
509 if (!storage
->percpu_buf
)
513 storage
->map
= (struct bpf_cgroup_storage_map
*)map
;
518 bpf_map_uncharge_memlock(map
, pages
);
520 return ERR_PTR(-ENOMEM
);
523 static void free_shared_cgroup_storage_rcu(struct rcu_head
*rcu
)
525 struct bpf_cgroup_storage
*storage
=
526 container_of(rcu
, struct bpf_cgroup_storage
, rcu
);
532 static void free_percpu_cgroup_storage_rcu(struct rcu_head
*rcu
)
534 struct bpf_cgroup_storage
*storage
=
535 container_of(rcu
, struct bpf_cgroup_storage
, rcu
);
537 free_percpu(storage
->percpu_buf
);
541 void bpf_cgroup_storage_free(struct bpf_cgroup_storage
*storage
)
543 enum bpf_cgroup_storage_type stype
;
550 map
= &storage
->map
->map
;
552 bpf_cgroup_storage_calculate_size(map
, &pages
);
553 bpf_map_uncharge_memlock(map
, pages
);
555 stype
= cgroup_storage_type(map
);
556 if (stype
== BPF_CGROUP_STORAGE_SHARED
)
557 call_rcu(&storage
->rcu
, free_shared_cgroup_storage_rcu
);
559 call_rcu(&storage
->rcu
, free_percpu_cgroup_storage_rcu
);
562 void bpf_cgroup_storage_link(struct bpf_cgroup_storage
*storage
,
563 struct cgroup
*cgroup
,
564 enum bpf_attach_type type
)
566 struct bpf_cgroup_storage_map
*map
;
571 storage
->key
.attach_type
= type
;
572 storage
->key
.cgroup_inode_id
= cgroup_id(cgroup
);
576 spin_lock_bh(&map
->lock
);
577 WARN_ON(cgroup_storage_insert(map
, storage
));
578 list_add(&storage
->list
, &map
->list
);
579 spin_unlock_bh(&map
->lock
);
582 void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage
*storage
)
584 struct bpf_cgroup_storage_map
*map
;
585 struct rb_root
*root
;
592 spin_lock_bh(&map
->lock
);
594 rb_erase(&storage
->node
, root
);
596 list_del(&storage
->list
);
597 spin_unlock_bh(&map
->lock
);