1 //SPDX-License-Identifier: GPL-2.0
2 #include <linux/bpf-cgroup.h>
6 #include <linux/filter.h>
8 #include <linux/rbtree.h>
9 #include <linux/slab.h>
10 #include <uapi/linux/btf.h>
12 DEFINE_PER_CPU(struct bpf_cgroup_storage
*, bpf_cgroup_storage
[MAX_BPF_CGROUP_STORAGE_TYPE
]);
14 #ifdef CONFIG_CGROUP_BPF
16 #define LOCAL_STORAGE_CREATE_FLAG_MASK \
17 (BPF_F_NUMA_NODE | BPF_F_ACCESS_MASK)
19 struct bpf_cgroup_storage_map
{
23 struct bpf_prog
*prog
;
25 struct list_head list
;
28 static struct bpf_cgroup_storage_map
*map_to_storage(struct bpf_map
*map
)
30 return container_of(map
, struct bpf_cgroup_storage_map
, map
);
33 static int bpf_cgroup_storage_key_cmp(
34 const struct bpf_cgroup_storage_key
*key1
,
35 const struct bpf_cgroup_storage_key
*key2
)
37 if (key1
->cgroup_inode_id
< key2
->cgroup_inode_id
)
39 else if (key1
->cgroup_inode_id
> key2
->cgroup_inode_id
)
41 else if (key1
->attach_type
< key2
->attach_type
)
43 else if (key1
->attach_type
> key2
->attach_type
)
48 static struct bpf_cgroup_storage
*cgroup_storage_lookup(
49 struct bpf_cgroup_storage_map
*map
, struct bpf_cgroup_storage_key
*key
,
52 struct rb_root
*root
= &map
->root
;
56 spin_lock_bh(&map
->lock
);
60 struct bpf_cgroup_storage
*storage
;
62 storage
= container_of(node
, struct bpf_cgroup_storage
, node
);
64 switch (bpf_cgroup_storage_key_cmp(key
, &storage
->key
)) {
69 node
= node
->rb_right
;
73 spin_unlock_bh(&map
->lock
);
79 spin_unlock_bh(&map
->lock
);
84 static int cgroup_storage_insert(struct bpf_cgroup_storage_map
*map
,
85 struct bpf_cgroup_storage
*storage
)
87 struct rb_root
*root
= &map
->root
;
88 struct rb_node
**new = &(root
->rb_node
), *parent
= NULL
;
91 struct bpf_cgroup_storage
*this;
93 this = container_of(*new, struct bpf_cgroup_storage
, node
);
96 switch (bpf_cgroup_storage_key_cmp(&storage
->key
, &this->key
)) {
98 new = &((*new)->rb_left
);
101 new = &((*new)->rb_right
);
108 rb_link_node(&storage
->node
, parent
, new);
109 rb_insert_color(&storage
->node
, root
);
114 static void *cgroup_storage_lookup_elem(struct bpf_map
*_map
, void *_key
)
116 struct bpf_cgroup_storage_map
*map
= map_to_storage(_map
);
117 struct bpf_cgroup_storage_key
*key
= _key
;
118 struct bpf_cgroup_storage
*storage
;
120 storage
= cgroup_storage_lookup(map
, key
, false);
124 return &READ_ONCE(storage
->buf
)->data
[0];
127 static int cgroup_storage_update_elem(struct bpf_map
*map
, void *_key
,
128 void *value
, u64 flags
)
130 struct bpf_cgroup_storage_key
*key
= _key
;
131 struct bpf_cgroup_storage
*storage
;
132 struct bpf_storage_buffer
*new;
134 if (unlikely(flags
& ~(BPF_F_LOCK
| BPF_EXIST
| BPF_NOEXIST
)))
137 if (unlikely(flags
& BPF_NOEXIST
))
140 if (unlikely((flags
& BPF_F_LOCK
) &&
141 !map_value_has_spin_lock(map
)))
144 storage
= cgroup_storage_lookup((struct bpf_cgroup_storage_map
*)map
,
149 if (flags
& BPF_F_LOCK
) {
150 copy_map_value_locked(map
, storage
->buf
->data
, value
, false);
154 new = kmalloc_node(sizeof(struct bpf_storage_buffer
) +
156 __GFP_ZERO
| GFP_ATOMIC
| __GFP_NOWARN
,
161 memcpy(&new->data
[0], value
, map
->value_size
);
162 check_and_init_map_lock(map
, new->data
);
164 new = xchg(&storage
->buf
, new);
170 int bpf_percpu_cgroup_storage_copy(struct bpf_map
*_map
, void *_key
,
173 struct bpf_cgroup_storage_map
*map
= map_to_storage(_map
);
174 struct bpf_cgroup_storage_key
*key
= _key
;
175 struct bpf_cgroup_storage
*storage
;
180 storage
= cgroup_storage_lookup(map
, key
, false);
186 /* per_cpu areas are zero-filled and bpf programs can only
187 * access 'value_size' of them, so copying rounded areas
188 * will not leak any kernel data
190 size
= round_up(_map
->value_size
, 8);
191 for_each_possible_cpu(cpu
) {
192 bpf_long_memcpy(value
+ off
,
193 per_cpu_ptr(storage
->percpu_buf
, cpu
), size
);
200 int bpf_percpu_cgroup_storage_update(struct bpf_map
*_map
, void *_key
,
201 void *value
, u64 map_flags
)
203 struct bpf_cgroup_storage_map
*map
= map_to_storage(_map
);
204 struct bpf_cgroup_storage_key
*key
= _key
;
205 struct bpf_cgroup_storage
*storage
;
209 if (map_flags
!= BPF_ANY
&& map_flags
!= BPF_EXIST
)
213 storage
= cgroup_storage_lookup(map
, key
, false);
219 /* the user space will provide round_up(value_size, 8) bytes that
220 * will be copied into per-cpu area. bpf programs can only access
221 * value_size of it. During lookup the same extra bytes will be
222 * returned or zeros which were zero-filled by percpu_alloc,
223 * so no kernel data leaks possible
225 size
= round_up(_map
->value_size
, 8);
226 for_each_possible_cpu(cpu
) {
227 bpf_long_memcpy(per_cpu_ptr(storage
->percpu_buf
, cpu
),
235 static int cgroup_storage_get_next_key(struct bpf_map
*_map
, void *_key
,
238 struct bpf_cgroup_storage_map
*map
= map_to_storage(_map
);
239 struct bpf_cgroup_storage_key
*key
= _key
;
240 struct bpf_cgroup_storage_key
*next
= _next_key
;
241 struct bpf_cgroup_storage
*storage
;
243 spin_lock_bh(&map
->lock
);
245 if (list_empty(&map
->list
))
249 storage
= cgroup_storage_lookup(map
, key
, true);
253 storage
= list_next_entry(storage
, list
);
257 storage
= list_first_entry(&map
->list
,
258 struct bpf_cgroup_storage
, list
);
261 spin_unlock_bh(&map
->lock
);
262 next
->attach_type
= storage
->key
.attach_type
;
263 next
->cgroup_inode_id
= storage
->key
.cgroup_inode_id
;
267 spin_unlock_bh(&map
->lock
);
271 static struct bpf_map
*cgroup_storage_map_alloc(union bpf_attr
*attr
)
273 int numa_node
= bpf_map_attr_numa_node(attr
);
274 struct bpf_cgroup_storage_map
*map
;
276 if (attr
->key_size
!= sizeof(struct bpf_cgroup_storage_key
))
277 return ERR_PTR(-EINVAL
);
279 if (attr
->value_size
== 0)
280 return ERR_PTR(-EINVAL
);
282 if (attr
->value_size
> PAGE_SIZE
)
283 return ERR_PTR(-E2BIG
);
285 if (attr
->map_flags
& ~LOCAL_STORAGE_CREATE_FLAG_MASK
||
286 !bpf_map_flags_access_ok(attr
->map_flags
))
287 return ERR_PTR(-EINVAL
);
289 if (attr
->max_entries
)
290 /* max_entries is not used and enforced to be 0 */
291 return ERR_PTR(-EINVAL
);
293 map
= kmalloc_node(sizeof(struct bpf_cgroup_storage_map
),
294 __GFP_ZERO
| GFP_USER
, numa_node
);
296 return ERR_PTR(-ENOMEM
);
298 map
->map
.pages
= round_up(sizeof(struct bpf_cgroup_storage_map
),
299 PAGE_SIZE
) >> PAGE_SHIFT
;
301 /* copy mandatory map attributes */
302 bpf_map_init_from_attr(&map
->map
, attr
);
304 spin_lock_init(&map
->lock
);
306 INIT_LIST_HEAD(&map
->list
);
311 static void cgroup_storage_map_free(struct bpf_map
*_map
)
313 struct bpf_cgroup_storage_map
*map
= map_to_storage(_map
);
315 WARN_ON(!RB_EMPTY_ROOT(&map
->root
));
316 WARN_ON(!list_empty(&map
->list
));
321 static int cgroup_storage_delete_elem(struct bpf_map
*map
, void *key
)
326 static int cgroup_storage_check_btf(const struct bpf_map
*map
,
327 const struct btf
*btf
,
328 const struct btf_type
*key_type
,
329 const struct btf_type
*value_type
)
331 struct btf_member
*m
;
334 /* Key is expected to be of struct bpf_cgroup_storage_key type,
336 * struct bpf_cgroup_storage_key {
337 * __u64 cgroup_inode_id;
343 * Key_type must be a structure with two fields.
345 if (BTF_INFO_KIND(key_type
->info
) != BTF_KIND_STRUCT
||
346 BTF_INFO_VLEN(key_type
->info
) != 2)
350 * The first field must be a 64 bit integer at 0 offset.
352 m
= (struct btf_member
*)(key_type
+ 1);
353 size
= FIELD_SIZEOF(struct bpf_cgroup_storage_key
, cgroup_inode_id
);
354 if (!btf_member_is_reg_int(btf
, key_type
, m
, 0, size
))
358 * The second field must be a 32 bit integer at 64 bit offset.
361 offset
= offsetof(struct bpf_cgroup_storage_key
, attach_type
);
362 size
= FIELD_SIZEOF(struct bpf_cgroup_storage_key
, attach_type
);
363 if (!btf_member_is_reg_int(btf
, key_type
, m
, offset
, size
))
369 static void cgroup_storage_seq_show_elem(struct bpf_map
*map
, void *_key
,
372 enum bpf_cgroup_storage_type stype
= cgroup_storage_type(map
);
373 struct bpf_cgroup_storage_key
*key
= _key
;
374 struct bpf_cgroup_storage
*storage
;
378 storage
= cgroup_storage_lookup(map_to_storage(map
), key
, false);
384 btf_type_seq_show(map
->btf
, map
->btf_key_type_id
, key
, m
);
385 stype
= cgroup_storage_type(map
);
386 if (stype
== BPF_CGROUP_STORAGE_SHARED
) {
388 btf_type_seq_show(map
->btf
, map
->btf_value_type_id
,
389 &READ_ONCE(storage
->buf
)->data
[0], m
);
392 seq_puts(m
, ": {\n");
393 for_each_possible_cpu(cpu
) {
394 seq_printf(m
, "\tcpu%d: ", cpu
);
395 btf_type_seq_show(map
->btf
, map
->btf_value_type_id
,
396 per_cpu_ptr(storage
->percpu_buf
, cpu
),
405 const struct bpf_map_ops cgroup_storage_map_ops
= {
406 .map_alloc
= cgroup_storage_map_alloc
,
407 .map_free
= cgroup_storage_map_free
,
408 .map_get_next_key
= cgroup_storage_get_next_key
,
409 .map_lookup_elem
= cgroup_storage_lookup_elem
,
410 .map_update_elem
= cgroup_storage_update_elem
,
411 .map_delete_elem
= cgroup_storage_delete_elem
,
412 .map_check_btf
= cgroup_storage_check_btf
,
413 .map_seq_show_elem
= cgroup_storage_seq_show_elem
,
416 int bpf_cgroup_storage_assign(struct bpf_prog
*prog
, struct bpf_map
*_map
)
418 enum bpf_cgroup_storage_type stype
= cgroup_storage_type(_map
);
419 struct bpf_cgroup_storage_map
*map
= map_to_storage(_map
);
422 spin_lock_bh(&map
->lock
);
424 if (map
->prog
&& map
->prog
!= prog
)
426 if (prog
->aux
->cgroup_storage
[stype
] &&
427 prog
->aux
->cgroup_storage
[stype
] != _map
)
431 prog
->aux
->cgroup_storage
[stype
] = _map
;
434 spin_unlock_bh(&map
->lock
);
439 void bpf_cgroup_storage_release(struct bpf_prog
*prog
, struct bpf_map
*_map
)
441 enum bpf_cgroup_storage_type stype
= cgroup_storage_type(_map
);
442 struct bpf_cgroup_storage_map
*map
= map_to_storage(_map
);
444 spin_lock_bh(&map
->lock
);
445 if (map
->prog
== prog
) {
446 WARN_ON(prog
->aux
->cgroup_storage
[stype
] != _map
);
448 prog
->aux
->cgroup_storage
[stype
] = NULL
;
450 spin_unlock_bh(&map
->lock
);
453 static size_t bpf_cgroup_storage_calculate_size(struct bpf_map
*map
, u32
*pages
)
457 if (cgroup_storage_type(map
) == BPF_CGROUP_STORAGE_SHARED
) {
458 size
= sizeof(struct bpf_storage_buffer
) + map
->value_size
;
459 *pages
= round_up(sizeof(struct bpf_cgroup_storage
) + size
,
460 PAGE_SIZE
) >> PAGE_SHIFT
;
462 size
= map
->value_size
;
463 *pages
= round_up(round_up(size
, 8) * num_possible_cpus(),
464 PAGE_SIZE
) >> PAGE_SHIFT
;
470 struct bpf_cgroup_storage
*bpf_cgroup_storage_alloc(struct bpf_prog
*prog
,
471 enum bpf_cgroup_storage_type stype
)
473 struct bpf_cgroup_storage
*storage
;
479 map
= prog
->aux
->cgroup_storage
[stype
];
483 size
= bpf_cgroup_storage_calculate_size(map
, &pages
);
485 if (bpf_map_charge_memlock(map
, pages
))
486 return ERR_PTR(-EPERM
);
488 storage
= kmalloc_node(sizeof(struct bpf_cgroup_storage
),
489 __GFP_ZERO
| GFP_USER
, map
->numa_node
);
493 flags
= __GFP_ZERO
| GFP_USER
;
495 if (stype
== BPF_CGROUP_STORAGE_SHARED
) {
496 storage
->buf
= kmalloc_node(size
, flags
, map
->numa_node
);
499 check_and_init_map_lock(map
, storage
->buf
->data
);
501 storage
->percpu_buf
= __alloc_percpu_gfp(size
, 8, flags
);
502 if (!storage
->percpu_buf
)
506 storage
->map
= (struct bpf_cgroup_storage_map
*)map
;
511 bpf_map_uncharge_memlock(map
, pages
);
513 return ERR_PTR(-ENOMEM
);
516 static void free_shared_cgroup_storage_rcu(struct rcu_head
*rcu
)
518 struct bpf_cgroup_storage
*storage
=
519 container_of(rcu
, struct bpf_cgroup_storage
, rcu
);
525 static void free_percpu_cgroup_storage_rcu(struct rcu_head
*rcu
)
527 struct bpf_cgroup_storage
*storage
=
528 container_of(rcu
, struct bpf_cgroup_storage
, rcu
);
530 free_percpu(storage
->percpu_buf
);
534 void bpf_cgroup_storage_free(struct bpf_cgroup_storage
*storage
)
536 enum bpf_cgroup_storage_type stype
;
543 map
= &storage
->map
->map
;
545 bpf_cgroup_storage_calculate_size(map
, &pages
);
546 bpf_map_uncharge_memlock(map
, pages
);
548 stype
= cgroup_storage_type(map
);
549 if (stype
== BPF_CGROUP_STORAGE_SHARED
)
550 call_rcu(&storage
->rcu
, free_shared_cgroup_storage_rcu
);
552 call_rcu(&storage
->rcu
, free_percpu_cgroup_storage_rcu
);
555 void bpf_cgroup_storage_link(struct bpf_cgroup_storage
*storage
,
556 struct cgroup
*cgroup
,
557 enum bpf_attach_type type
)
559 struct bpf_cgroup_storage_map
*map
;
564 storage
->key
.attach_type
= type
;
565 storage
->key
.cgroup_inode_id
= cgroup
->kn
->id
.id
;
569 spin_lock_bh(&map
->lock
);
570 WARN_ON(cgroup_storage_insert(map
, storage
));
571 list_add(&storage
->list
, &map
->list
);
572 spin_unlock_bh(&map
->lock
);
575 void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage
*storage
)
577 struct bpf_cgroup_storage_map
*map
;
578 struct rb_root
*root
;
585 spin_lock_bh(&map
->lock
);
587 rb_erase(&storage
->node
, root
);
589 list_del(&storage
->list
);
590 spin_unlock_bh(&map
->lock
);