1 //SPDX-License-Identifier: GPL-2.0
2 #include <linux/bpf-cgroup.h>
5 #include <linux/filter.h>
7 #include <linux/rbtree.h>
8 #include <linux/slab.h>
10 DEFINE_PER_CPU(struct bpf_cgroup_storage
*,
11 bpf_cgroup_storage
[MAX_BPF_CGROUP_STORAGE_TYPE
]);
13 #ifdef CONFIG_CGROUP_BPF
15 #define LOCAL_STORAGE_CREATE_FLAG_MASK \
16 (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
18 struct bpf_cgroup_storage_map
{
22 struct bpf_prog
*prog
;
24 struct list_head list
;
27 static struct bpf_cgroup_storage_map
*map_to_storage(struct bpf_map
*map
)
29 return container_of(map
, struct bpf_cgroup_storage_map
, map
);
32 static int bpf_cgroup_storage_key_cmp(
33 const struct bpf_cgroup_storage_key
*key1
,
34 const struct bpf_cgroup_storage_key
*key2
)
36 if (key1
->cgroup_inode_id
< key2
->cgroup_inode_id
)
38 else if (key1
->cgroup_inode_id
> key2
->cgroup_inode_id
)
40 else if (key1
->attach_type
< key2
->attach_type
)
42 else if (key1
->attach_type
> key2
->attach_type
)
47 static struct bpf_cgroup_storage
*cgroup_storage_lookup(
48 struct bpf_cgroup_storage_map
*map
, struct bpf_cgroup_storage_key
*key
,
51 struct rb_root
*root
= &map
->root
;
55 spin_lock_bh(&map
->lock
);
59 struct bpf_cgroup_storage
*storage
;
61 storage
= container_of(node
, struct bpf_cgroup_storage
, node
);
63 switch (bpf_cgroup_storage_key_cmp(key
, &storage
->key
)) {
68 node
= node
->rb_right
;
72 spin_unlock_bh(&map
->lock
);
78 spin_unlock_bh(&map
->lock
);
83 static int cgroup_storage_insert(struct bpf_cgroup_storage_map
*map
,
84 struct bpf_cgroup_storage
*storage
)
86 struct rb_root
*root
= &map
->root
;
87 struct rb_node
**new = &(root
->rb_node
), *parent
= NULL
;
90 struct bpf_cgroup_storage
*this;
92 this = container_of(*new, struct bpf_cgroup_storage
, node
);
95 switch (bpf_cgroup_storage_key_cmp(&storage
->key
, &this->key
)) {
97 new = &((*new)->rb_left
);
100 new = &((*new)->rb_right
);
107 rb_link_node(&storage
->node
, parent
, new);
108 rb_insert_color(&storage
->node
, root
);
113 static void *cgroup_storage_lookup_elem(struct bpf_map
*_map
, void *_key
)
115 struct bpf_cgroup_storage_map
*map
= map_to_storage(_map
);
116 struct bpf_cgroup_storage_key
*key
= _key
;
117 struct bpf_cgroup_storage
*storage
;
119 storage
= cgroup_storage_lookup(map
, key
, false);
123 return &READ_ONCE(storage
->buf
)->data
[0];
126 static int cgroup_storage_update_elem(struct bpf_map
*map
, void *_key
,
127 void *value
, u64 flags
)
129 struct bpf_cgroup_storage_key
*key
= _key
;
130 struct bpf_cgroup_storage
*storage
;
131 struct bpf_storage_buffer
*new;
133 if (flags
!= BPF_ANY
&& flags
!= BPF_EXIST
)
136 storage
= cgroup_storage_lookup((struct bpf_cgroup_storage_map
*)map
,
141 new = kmalloc_node(sizeof(struct bpf_storage_buffer
) +
142 map
->value_size
, __GFP_ZERO
| GFP_USER
,
147 memcpy(&new->data
[0], value
, map
->value_size
);
149 new = xchg(&storage
->buf
, new);
155 int bpf_percpu_cgroup_storage_copy(struct bpf_map
*_map
, void *_key
,
158 struct bpf_cgroup_storage_map
*map
= map_to_storage(_map
);
159 struct bpf_cgroup_storage_key
*key
= _key
;
160 struct bpf_cgroup_storage
*storage
;
165 storage
= cgroup_storage_lookup(map
, key
, false);
171 /* per_cpu areas are zero-filled and bpf programs can only
172 * access 'value_size' of them, so copying rounded areas
173 * will not leak any kernel data
175 size
= round_up(_map
->value_size
, 8);
176 for_each_possible_cpu(cpu
) {
177 bpf_long_memcpy(value
+ off
,
178 per_cpu_ptr(storage
->percpu_buf
, cpu
), size
);
185 int bpf_percpu_cgroup_storage_update(struct bpf_map
*_map
, void *_key
,
186 void *value
, u64 map_flags
)
188 struct bpf_cgroup_storage_map
*map
= map_to_storage(_map
);
189 struct bpf_cgroup_storage_key
*key
= _key
;
190 struct bpf_cgroup_storage
*storage
;
194 if (map_flags
!= BPF_ANY
&& map_flags
!= BPF_EXIST
)
198 storage
= cgroup_storage_lookup(map
, key
, false);
204 /* the user space will provide round_up(value_size, 8) bytes that
205 * will be copied into per-cpu area. bpf programs can only access
206 * value_size of it. During lookup the same extra bytes will be
207 * returned or zeros which were zero-filled by percpu_alloc,
208 * so no kernel data leaks possible
210 size
= round_up(_map
->value_size
, 8);
211 for_each_possible_cpu(cpu
) {
212 bpf_long_memcpy(per_cpu_ptr(storage
->percpu_buf
, cpu
),
220 static int cgroup_storage_get_next_key(struct bpf_map
*_map
, void *_key
,
223 struct bpf_cgroup_storage_map
*map
= map_to_storage(_map
);
224 struct bpf_cgroup_storage_key
*key
= _key
;
225 struct bpf_cgroup_storage_key
*next
= _next_key
;
226 struct bpf_cgroup_storage
*storage
;
228 spin_lock_bh(&map
->lock
);
230 if (list_empty(&map
->list
))
234 storage
= cgroup_storage_lookup(map
, key
, true);
238 storage
= list_next_entry(storage
, list
);
242 storage
= list_first_entry(&map
->list
,
243 struct bpf_cgroup_storage
, list
);
246 spin_unlock_bh(&map
->lock
);
247 next
->attach_type
= storage
->key
.attach_type
;
248 next
->cgroup_inode_id
= storage
->key
.cgroup_inode_id
;
252 spin_unlock_bh(&map
->lock
);
256 static struct bpf_map
*cgroup_storage_map_alloc(union bpf_attr
*attr
)
258 int numa_node
= bpf_map_attr_numa_node(attr
);
259 struct bpf_cgroup_storage_map
*map
;
261 if (attr
->key_size
!= sizeof(struct bpf_cgroup_storage_key
))
262 return ERR_PTR(-EINVAL
);
264 if (attr
->value_size
== 0)
265 return ERR_PTR(-EINVAL
);
267 if (attr
->value_size
> PAGE_SIZE
)
268 return ERR_PTR(-E2BIG
);
270 if (attr
->map_flags
& ~LOCAL_STORAGE_CREATE_FLAG_MASK
)
271 /* reserved bits should not be used */
272 return ERR_PTR(-EINVAL
);
274 if (attr
->max_entries
)
275 /* max_entries is not used and enforced to be 0 */
276 return ERR_PTR(-EINVAL
);
278 map
= kmalloc_node(sizeof(struct bpf_cgroup_storage_map
),
279 __GFP_ZERO
| GFP_USER
, numa_node
);
281 return ERR_PTR(-ENOMEM
);
283 map
->map
.pages
= round_up(sizeof(struct bpf_cgroup_storage_map
),
284 PAGE_SIZE
) >> PAGE_SHIFT
;
286 /* copy mandatory map attributes */
287 bpf_map_init_from_attr(&map
->map
, attr
);
289 spin_lock_init(&map
->lock
);
291 INIT_LIST_HEAD(&map
->list
);
296 static void cgroup_storage_map_free(struct bpf_map
*_map
)
298 struct bpf_cgroup_storage_map
*map
= map_to_storage(_map
);
300 WARN_ON(!RB_EMPTY_ROOT(&map
->root
));
301 WARN_ON(!list_empty(&map
->list
));
306 static int cgroup_storage_delete_elem(struct bpf_map
*map
, void *key
)
311 const struct bpf_map_ops cgroup_storage_map_ops
= {
312 .map_alloc
= cgroup_storage_map_alloc
,
313 .map_free
= cgroup_storage_map_free
,
314 .map_get_next_key
= cgroup_storage_get_next_key
,
315 .map_lookup_elem
= cgroup_storage_lookup_elem
,
316 .map_update_elem
= cgroup_storage_update_elem
,
317 .map_delete_elem
= cgroup_storage_delete_elem
,
318 .map_check_btf
= map_check_no_btf
,
321 int bpf_cgroup_storage_assign(struct bpf_prog
*prog
, struct bpf_map
*_map
)
323 enum bpf_cgroup_storage_type stype
= cgroup_storage_type(_map
);
324 struct bpf_cgroup_storage_map
*map
= map_to_storage(_map
);
327 spin_lock_bh(&map
->lock
);
329 if (map
->prog
&& map
->prog
!= prog
)
331 if (prog
->aux
->cgroup_storage
[stype
] &&
332 prog
->aux
->cgroup_storage
[stype
] != _map
)
336 prog
->aux
->cgroup_storage
[stype
] = _map
;
339 spin_unlock_bh(&map
->lock
);
344 void bpf_cgroup_storage_release(struct bpf_prog
*prog
, struct bpf_map
*_map
)
346 enum bpf_cgroup_storage_type stype
= cgroup_storage_type(_map
);
347 struct bpf_cgroup_storage_map
*map
= map_to_storage(_map
);
349 spin_lock_bh(&map
->lock
);
350 if (map
->prog
== prog
) {
351 WARN_ON(prog
->aux
->cgroup_storage
[stype
] != _map
);
353 prog
->aux
->cgroup_storage
[stype
] = NULL
;
355 spin_unlock_bh(&map
->lock
);
358 static size_t bpf_cgroup_storage_calculate_size(struct bpf_map
*map
, u32
*pages
)
362 if (cgroup_storage_type(map
) == BPF_CGROUP_STORAGE_SHARED
) {
363 size
= sizeof(struct bpf_storage_buffer
) + map
->value_size
;
364 *pages
= round_up(sizeof(struct bpf_cgroup_storage
) + size
,
365 PAGE_SIZE
) >> PAGE_SHIFT
;
367 size
= map
->value_size
;
368 *pages
= round_up(round_up(size
, 8) * num_possible_cpus(),
369 PAGE_SIZE
) >> PAGE_SHIFT
;
375 struct bpf_cgroup_storage
*bpf_cgroup_storage_alloc(struct bpf_prog
*prog
,
376 enum bpf_cgroup_storage_type stype
)
378 struct bpf_cgroup_storage
*storage
;
384 map
= prog
->aux
->cgroup_storage
[stype
];
388 size
= bpf_cgroup_storage_calculate_size(map
, &pages
);
390 if (bpf_map_charge_memlock(map
, pages
))
391 return ERR_PTR(-EPERM
);
393 storage
= kmalloc_node(sizeof(struct bpf_cgroup_storage
),
394 __GFP_ZERO
| GFP_USER
, map
->numa_node
);
398 flags
= __GFP_ZERO
| GFP_USER
;
400 if (stype
== BPF_CGROUP_STORAGE_SHARED
) {
401 storage
->buf
= kmalloc_node(size
, flags
, map
->numa_node
);
405 storage
->percpu_buf
= __alloc_percpu_gfp(size
, 8, flags
);
406 if (!storage
->percpu_buf
)
410 storage
->map
= (struct bpf_cgroup_storage_map
*)map
;
415 bpf_map_uncharge_memlock(map
, pages
);
417 return ERR_PTR(-ENOMEM
);
420 static void free_shared_cgroup_storage_rcu(struct rcu_head
*rcu
)
422 struct bpf_cgroup_storage
*storage
=
423 container_of(rcu
, struct bpf_cgroup_storage
, rcu
);
429 static void free_percpu_cgroup_storage_rcu(struct rcu_head
*rcu
)
431 struct bpf_cgroup_storage
*storage
=
432 container_of(rcu
, struct bpf_cgroup_storage
, rcu
);
434 free_percpu(storage
->percpu_buf
);
438 void bpf_cgroup_storage_free(struct bpf_cgroup_storage
*storage
)
440 enum bpf_cgroup_storage_type stype
;
447 map
= &storage
->map
->map
;
449 bpf_cgroup_storage_calculate_size(map
, &pages
);
450 bpf_map_uncharge_memlock(map
, pages
);
452 stype
= cgroup_storage_type(map
);
453 if (stype
== BPF_CGROUP_STORAGE_SHARED
)
454 call_rcu(&storage
->rcu
, free_shared_cgroup_storage_rcu
);
456 call_rcu(&storage
->rcu
, free_percpu_cgroup_storage_rcu
);
459 void bpf_cgroup_storage_link(struct bpf_cgroup_storage
*storage
,
460 struct cgroup
*cgroup
,
461 enum bpf_attach_type type
)
463 struct bpf_cgroup_storage_map
*map
;
468 storage
->key
.attach_type
= type
;
469 storage
->key
.cgroup_inode_id
= cgroup
->kn
->id
.id
;
473 spin_lock_bh(&map
->lock
);
474 WARN_ON(cgroup_storage_insert(map
, storage
));
475 list_add(&storage
->list
, &map
->list
);
476 spin_unlock_bh(&map
->lock
);
479 void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage
*storage
)
481 struct bpf_cgroup_storage_map
*map
;
482 struct rb_root
*root
;
489 spin_lock_bh(&map
->lock
);
491 rb_erase(&storage
->node
, root
);
493 list_del(&storage
->list
);
494 spin_unlock_bh(&map
->lock
);