1 //SPDX-License-Identifier: GPL-2.0
2 #include <linux/bpf-cgroup.h>
6 #include <linux/filter.h>
8 #include <linux/rbtree.h>
9 #include <linux/slab.h>
10 #include <uapi/linux/btf.h>
12 DEFINE_PER_CPU(struct bpf_cgroup_storage
*, bpf_cgroup_storage
[MAX_BPF_CGROUP_STORAGE_TYPE
]);
14 #ifdef CONFIG_CGROUP_BPF
16 #include "../cgroup/cgroup-internal.h"
18 #define LOCAL_STORAGE_CREATE_FLAG_MASK \
19 (BPF_F_NUMA_NODE | BPF_F_ACCESS_MASK)
21 struct bpf_cgroup_storage_map
{
26 struct list_head list
;
29 static struct bpf_cgroup_storage_map
*map_to_storage(struct bpf_map
*map
)
31 return container_of(map
, struct bpf_cgroup_storage_map
, map
);
34 static bool attach_type_isolated(const struct bpf_map
*map
)
36 return map
->key_size
== sizeof(struct bpf_cgroup_storage_key
);
39 static int bpf_cgroup_storage_key_cmp(const struct bpf_cgroup_storage_map
*map
,
40 const void *_key1
, const void *_key2
)
42 if (attach_type_isolated(&map
->map
)) {
43 const struct bpf_cgroup_storage_key
*key1
= _key1
;
44 const struct bpf_cgroup_storage_key
*key2
= _key2
;
46 if (key1
->cgroup_inode_id
< key2
->cgroup_inode_id
)
48 else if (key1
->cgroup_inode_id
> key2
->cgroup_inode_id
)
50 else if (key1
->attach_type
< key2
->attach_type
)
52 else if (key1
->attach_type
> key2
->attach_type
)
55 const __u64
*cgroup_inode_id1
= _key1
;
56 const __u64
*cgroup_inode_id2
= _key2
;
58 if (*cgroup_inode_id1
< *cgroup_inode_id2
)
60 else if (*cgroup_inode_id1
> *cgroup_inode_id2
)
66 struct bpf_cgroup_storage
*
67 cgroup_storage_lookup(struct bpf_cgroup_storage_map
*map
,
68 void *key
, bool locked
)
70 struct rb_root
*root
= &map
->root
;
74 spin_lock_bh(&map
->lock
);
78 struct bpf_cgroup_storage
*storage
;
80 storage
= container_of(node
, struct bpf_cgroup_storage
, node
);
82 switch (bpf_cgroup_storage_key_cmp(map
, key
, &storage
->key
)) {
87 node
= node
->rb_right
;
91 spin_unlock_bh(&map
->lock
);
97 spin_unlock_bh(&map
->lock
);
102 static int cgroup_storage_insert(struct bpf_cgroup_storage_map
*map
,
103 struct bpf_cgroup_storage
*storage
)
105 struct rb_root
*root
= &map
->root
;
106 struct rb_node
**new = &(root
->rb_node
), *parent
= NULL
;
109 struct bpf_cgroup_storage
*this;
111 this = container_of(*new, struct bpf_cgroup_storage
, node
);
114 switch (bpf_cgroup_storage_key_cmp(map
, &storage
->key
, &this->key
)) {
116 new = &((*new)->rb_left
);
119 new = &((*new)->rb_right
);
126 rb_link_node(&storage
->node
, parent
, new);
127 rb_insert_color(&storage
->node
, root
);
132 static void *cgroup_storage_lookup_elem(struct bpf_map
*_map
, void *key
)
134 struct bpf_cgroup_storage_map
*map
= map_to_storage(_map
);
135 struct bpf_cgroup_storage
*storage
;
137 storage
= cgroup_storage_lookup(map
, key
, false);
141 return &READ_ONCE(storage
->buf
)->data
[0];
144 static int cgroup_storage_update_elem(struct bpf_map
*map
, void *key
,
145 void *value
, u64 flags
)
147 struct bpf_cgroup_storage
*storage
;
148 struct bpf_storage_buffer
*new;
150 if (unlikely(flags
& ~(BPF_F_LOCK
| BPF_EXIST
)))
153 if (unlikely((flags
& BPF_F_LOCK
) &&
154 !map_value_has_spin_lock(map
)))
157 storage
= cgroup_storage_lookup((struct bpf_cgroup_storage_map
*)map
,
162 if (flags
& BPF_F_LOCK
) {
163 copy_map_value_locked(map
, storage
->buf
->data
, value
, false);
167 new = bpf_map_kmalloc_node(map
, sizeof(struct bpf_storage_buffer
) +
169 __GFP_ZERO
| GFP_ATOMIC
| __GFP_NOWARN
,
174 memcpy(&new->data
[0], value
, map
->value_size
);
175 check_and_init_map_lock(map
, new->data
);
177 new = xchg(&storage
->buf
, new);
183 int bpf_percpu_cgroup_storage_copy(struct bpf_map
*_map
, void *key
,
186 struct bpf_cgroup_storage_map
*map
= map_to_storage(_map
);
187 struct bpf_cgroup_storage
*storage
;
192 storage
= cgroup_storage_lookup(map
, key
, false);
198 /* per_cpu areas are zero-filled and bpf programs can only
199 * access 'value_size' of them, so copying rounded areas
200 * will not leak any kernel data
202 size
= round_up(_map
->value_size
, 8);
203 for_each_possible_cpu(cpu
) {
204 bpf_long_memcpy(value
+ off
,
205 per_cpu_ptr(storage
->percpu_buf
, cpu
), size
);
212 int bpf_percpu_cgroup_storage_update(struct bpf_map
*_map
, void *key
,
213 void *value
, u64 map_flags
)
215 struct bpf_cgroup_storage_map
*map
= map_to_storage(_map
);
216 struct bpf_cgroup_storage
*storage
;
220 if (map_flags
!= BPF_ANY
&& map_flags
!= BPF_EXIST
)
224 storage
= cgroup_storage_lookup(map
, key
, false);
230 /* the user space will provide round_up(value_size, 8) bytes that
231 * will be copied into per-cpu area. bpf programs can only access
232 * value_size of it. During lookup the same extra bytes will be
233 * returned or zeros which were zero-filled by percpu_alloc,
234 * so no kernel data leaks possible
236 size
= round_up(_map
->value_size
, 8);
237 for_each_possible_cpu(cpu
) {
238 bpf_long_memcpy(per_cpu_ptr(storage
->percpu_buf
, cpu
),
246 static int cgroup_storage_get_next_key(struct bpf_map
*_map
, void *key
,
249 struct bpf_cgroup_storage_map
*map
= map_to_storage(_map
);
250 struct bpf_cgroup_storage
*storage
;
252 spin_lock_bh(&map
->lock
);
254 if (list_empty(&map
->list
))
258 storage
= cgroup_storage_lookup(map
, key
, true);
262 storage
= list_next_entry(storage
, list_map
);
266 storage
= list_first_entry(&map
->list
,
267 struct bpf_cgroup_storage
, list_map
);
270 spin_unlock_bh(&map
->lock
);
272 if (attach_type_isolated(&map
->map
)) {
273 struct bpf_cgroup_storage_key
*next
= _next_key
;
274 *next
= storage
->key
;
276 __u64
*next
= _next_key
;
277 *next
= storage
->key
.cgroup_inode_id
;
282 spin_unlock_bh(&map
->lock
);
286 static struct bpf_map
*cgroup_storage_map_alloc(union bpf_attr
*attr
)
288 int numa_node
= bpf_map_attr_numa_node(attr
);
289 struct bpf_cgroup_storage_map
*map
;
291 if (attr
->key_size
!= sizeof(struct bpf_cgroup_storage_key
) &&
292 attr
->key_size
!= sizeof(__u64
))
293 return ERR_PTR(-EINVAL
);
295 if (attr
->value_size
== 0)
296 return ERR_PTR(-EINVAL
);
298 if (attr
->value_size
> PAGE_SIZE
)
299 return ERR_PTR(-E2BIG
);
301 if (attr
->map_flags
& ~LOCAL_STORAGE_CREATE_FLAG_MASK
||
302 !bpf_map_flags_access_ok(attr
->map_flags
))
303 return ERR_PTR(-EINVAL
);
305 if (attr
->max_entries
)
306 /* max_entries is not used and enforced to be 0 */
307 return ERR_PTR(-EINVAL
);
309 map
= kmalloc_node(sizeof(struct bpf_cgroup_storage_map
),
310 __GFP_ZERO
| GFP_USER
| __GFP_ACCOUNT
, numa_node
);
312 return ERR_PTR(-ENOMEM
);
314 /* copy mandatory map attributes */
315 bpf_map_init_from_attr(&map
->map
, attr
);
317 spin_lock_init(&map
->lock
);
319 INIT_LIST_HEAD(&map
->list
);
324 static void cgroup_storage_map_free(struct bpf_map
*_map
)
326 struct bpf_cgroup_storage_map
*map
= map_to_storage(_map
);
327 struct list_head
*storages
= &map
->list
;
328 struct bpf_cgroup_storage
*storage
, *stmp
;
330 mutex_lock(&cgroup_mutex
);
332 list_for_each_entry_safe(storage
, stmp
, storages
, list_map
) {
333 bpf_cgroup_storage_unlink(storage
);
334 bpf_cgroup_storage_free(storage
);
337 mutex_unlock(&cgroup_mutex
);
339 WARN_ON(!RB_EMPTY_ROOT(&map
->root
));
340 WARN_ON(!list_empty(&map
->list
));
345 static int cgroup_storage_delete_elem(struct bpf_map
*map
, void *key
)
350 static int cgroup_storage_check_btf(const struct bpf_map
*map
,
351 const struct btf
*btf
,
352 const struct btf_type
*key_type
,
353 const struct btf_type
*value_type
)
355 if (attach_type_isolated(map
)) {
356 struct btf_member
*m
;
359 /* Key is expected to be of struct bpf_cgroup_storage_key type,
361 * struct bpf_cgroup_storage_key {
362 * __u64 cgroup_inode_id;
368 * Key_type must be a structure with two fields.
370 if (BTF_INFO_KIND(key_type
->info
) != BTF_KIND_STRUCT
||
371 BTF_INFO_VLEN(key_type
->info
) != 2)
375 * The first field must be a 64 bit integer at 0 offset.
377 m
= (struct btf_member
*)(key_type
+ 1);
378 size
= sizeof_field(struct bpf_cgroup_storage_key
, cgroup_inode_id
);
379 if (!btf_member_is_reg_int(btf
, key_type
, m
, 0, size
))
383 * The second field must be a 32 bit integer at 64 bit offset.
386 offset
= offsetof(struct bpf_cgroup_storage_key
, attach_type
);
387 size
= sizeof_field(struct bpf_cgroup_storage_key
, attach_type
);
388 if (!btf_member_is_reg_int(btf
, key_type
, m
, offset
, size
))
394 * Key is expected to be u64, which stores the cgroup_inode_id
397 if (BTF_INFO_KIND(key_type
->info
) != BTF_KIND_INT
)
400 int_data
= *(u32
*)(key_type
+ 1);
401 if (BTF_INT_BITS(int_data
) != 64 || BTF_INT_OFFSET(int_data
))
408 static void cgroup_storage_seq_show_elem(struct bpf_map
*map
, void *key
,
411 enum bpf_cgroup_storage_type stype
= cgroup_storage_type(map
);
412 struct bpf_cgroup_storage
*storage
;
416 storage
= cgroup_storage_lookup(map_to_storage(map
), key
, false);
422 btf_type_seq_show(map
->btf
, map
->btf_key_type_id
, key
, m
);
423 stype
= cgroup_storage_type(map
);
424 if (stype
== BPF_CGROUP_STORAGE_SHARED
) {
426 btf_type_seq_show(map
->btf
, map
->btf_value_type_id
,
427 &READ_ONCE(storage
->buf
)->data
[0], m
);
430 seq_puts(m
, ": {\n");
431 for_each_possible_cpu(cpu
) {
432 seq_printf(m
, "\tcpu%d: ", cpu
);
433 btf_type_seq_show(map
->btf
, map
->btf_value_type_id
,
434 per_cpu_ptr(storage
->percpu_buf
, cpu
),
443 static int cgroup_storage_map_btf_id
;
444 const struct bpf_map_ops cgroup_storage_map_ops
= {
445 .map_alloc
= cgroup_storage_map_alloc
,
446 .map_free
= cgroup_storage_map_free
,
447 .map_get_next_key
= cgroup_storage_get_next_key
,
448 .map_lookup_elem
= cgroup_storage_lookup_elem
,
449 .map_update_elem
= cgroup_storage_update_elem
,
450 .map_delete_elem
= cgroup_storage_delete_elem
,
451 .map_check_btf
= cgroup_storage_check_btf
,
452 .map_seq_show_elem
= cgroup_storage_seq_show_elem
,
453 .map_btf_name
= "bpf_cgroup_storage_map",
454 .map_btf_id
= &cgroup_storage_map_btf_id
,
457 int bpf_cgroup_storage_assign(struct bpf_prog_aux
*aux
, struct bpf_map
*_map
)
459 enum bpf_cgroup_storage_type stype
= cgroup_storage_type(_map
);
461 if (aux
->cgroup_storage
[stype
] &&
462 aux
->cgroup_storage
[stype
] != _map
)
465 aux
->cgroup_storage
[stype
] = _map
;
469 static size_t bpf_cgroup_storage_calculate_size(struct bpf_map
*map
, u32
*pages
)
473 if (cgroup_storage_type(map
) == BPF_CGROUP_STORAGE_SHARED
) {
474 size
= sizeof(struct bpf_storage_buffer
) + map
->value_size
;
475 *pages
= round_up(sizeof(struct bpf_cgroup_storage
) + size
,
476 PAGE_SIZE
) >> PAGE_SHIFT
;
478 size
= map
->value_size
;
479 *pages
= round_up(round_up(size
, 8) * num_possible_cpus(),
480 PAGE_SIZE
) >> PAGE_SHIFT
;
486 struct bpf_cgroup_storage
*bpf_cgroup_storage_alloc(struct bpf_prog
*prog
,
487 enum bpf_cgroup_storage_type stype
)
489 const gfp_t gfp
= __GFP_ZERO
| GFP_USER
;
490 struct bpf_cgroup_storage
*storage
;
495 map
= prog
->aux
->cgroup_storage
[stype
];
499 size
= bpf_cgroup_storage_calculate_size(map
, &pages
);
501 storage
= bpf_map_kmalloc_node(map
, sizeof(struct bpf_cgroup_storage
),
502 gfp
, map
->numa_node
);
506 if (stype
== BPF_CGROUP_STORAGE_SHARED
) {
507 storage
->buf
= bpf_map_kmalloc_node(map
, size
, gfp
,
511 check_and_init_map_lock(map
, storage
->buf
->data
);
513 storage
->percpu_buf
= bpf_map_alloc_percpu(map
, size
, 8, gfp
);
514 if (!storage
->percpu_buf
)
518 storage
->map
= (struct bpf_cgroup_storage_map
*)map
;
524 return ERR_PTR(-ENOMEM
);
527 static void free_shared_cgroup_storage_rcu(struct rcu_head
*rcu
)
529 struct bpf_cgroup_storage
*storage
=
530 container_of(rcu
, struct bpf_cgroup_storage
, rcu
);
536 static void free_percpu_cgroup_storage_rcu(struct rcu_head
*rcu
)
538 struct bpf_cgroup_storage
*storage
=
539 container_of(rcu
, struct bpf_cgroup_storage
, rcu
);
541 free_percpu(storage
->percpu_buf
);
545 void bpf_cgroup_storage_free(struct bpf_cgroup_storage
*storage
)
547 enum bpf_cgroup_storage_type stype
;
553 map
= &storage
->map
->map
;
554 stype
= cgroup_storage_type(map
);
555 if (stype
== BPF_CGROUP_STORAGE_SHARED
)
556 call_rcu(&storage
->rcu
, free_shared_cgroup_storage_rcu
);
558 call_rcu(&storage
->rcu
, free_percpu_cgroup_storage_rcu
);
561 void bpf_cgroup_storage_link(struct bpf_cgroup_storage
*storage
,
562 struct cgroup
*cgroup
,
563 enum bpf_attach_type type
)
565 struct bpf_cgroup_storage_map
*map
;
570 storage
->key
.attach_type
= type
;
571 storage
->key
.cgroup_inode_id
= cgroup_id(cgroup
);
575 spin_lock_bh(&map
->lock
);
576 WARN_ON(cgroup_storage_insert(map
, storage
));
577 list_add(&storage
->list_map
, &map
->list
);
578 list_add(&storage
->list_cg
, &cgroup
->bpf
.storages
);
579 spin_unlock_bh(&map
->lock
);
582 void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage
*storage
)
584 struct bpf_cgroup_storage_map
*map
;
585 struct rb_root
*root
;
592 spin_lock_bh(&map
->lock
);
594 rb_erase(&storage
->node
, root
);
596 list_del(&storage
->list_map
);
597 list_del(&storage
->list_cg
);
598 spin_unlock_bh(&map
->lock
);