1 // SPDX-License-Identifier: GPL-2.0
3 * queue_stack_maps.c: BPF queue and stack maps
5 * Copyright (c) 2018 Politecnico di Torino
8 #include <linux/list.h>
9 #include <linux/slab.h>
10 #include <linux/capability.h>
11 #include "percpu_freelist.h"
13 #define QUEUE_STACK_CREATE_FLAG_MASK \
14 (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
17 struct bpf_queue_stack
{
21 u32 size
; /* max_entries + 1 */
23 char elements
[0] __aligned(8);
26 static struct bpf_queue_stack
*bpf_queue_stack(struct bpf_map
*map
)
28 return container_of(map
, struct bpf_queue_stack
, map
);
31 static bool queue_stack_map_is_empty(struct bpf_queue_stack
*qs
)
33 return qs
->head
== qs
->tail
;
36 static bool queue_stack_map_is_full(struct bpf_queue_stack
*qs
)
38 u32 head
= qs
->head
+ 1;
40 if (unlikely(head
>= qs
->size
))
43 return head
== qs
->tail
;
46 /* Called from syscall */
47 static int queue_stack_map_alloc_check(union bpf_attr
*attr
)
49 if (!capable(CAP_SYS_ADMIN
))
52 /* check sanity of attributes */
53 if (attr
->max_entries
== 0 || attr
->key_size
!= 0 ||
54 attr
->value_size
== 0 ||
55 attr
->map_flags
& ~QUEUE_STACK_CREATE_FLAG_MASK
)
58 if (attr
->value_size
> KMALLOC_MAX_SIZE
)
59 /* if value_size is bigger, the user space won't be able to
60 * access the elements.
67 static struct bpf_map
*queue_stack_map_alloc(union bpf_attr
*attr
)
69 int ret
, numa_node
= bpf_map_attr_numa_node(attr
);
70 struct bpf_queue_stack
*qs
;
71 u64 size
, queue_size
, cost
;
73 size
= (u64
) attr
->max_entries
+ 1;
74 cost
= queue_size
= sizeof(*qs
) + size
* attr
->value_size
;
75 if (cost
>= U32_MAX
- PAGE_SIZE
)
76 return ERR_PTR(-E2BIG
);
78 cost
= round_up(cost
, PAGE_SIZE
) >> PAGE_SHIFT
;
80 ret
= bpf_map_precharge_memlock(cost
);
84 qs
= bpf_map_area_alloc(queue_size
, numa_node
);
86 return ERR_PTR(-ENOMEM
);
88 memset(qs
, 0, sizeof(*qs
));
90 bpf_map_init_from_attr(&qs
->map
, attr
);
95 raw_spin_lock_init(&qs
->lock
);
100 /* Called when map->refcnt goes to zero, either from workqueue or from syscall */
101 static void queue_stack_map_free(struct bpf_map
*map
)
103 struct bpf_queue_stack
*qs
= bpf_queue_stack(map
);
105 /* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
106 * so the programs (can be more than one that used this map) were
107 * disconnected from events. Wait for outstanding critical sections in
108 * these programs to complete
112 bpf_map_area_free(qs
);
115 static int __queue_map_get(struct bpf_map
*map
, void *value
, bool delete)
117 struct bpf_queue_stack
*qs
= bpf_queue_stack(map
);
122 raw_spin_lock_irqsave(&qs
->lock
, flags
);
124 if (queue_stack_map_is_empty(qs
)) {
125 memset(value
, 0, qs
->map
.value_size
);
130 ptr
= &qs
->elements
[qs
->tail
* qs
->map
.value_size
];
131 memcpy(value
, ptr
, qs
->map
.value_size
);
134 if (unlikely(++qs
->tail
>= qs
->size
))
139 raw_spin_unlock_irqrestore(&qs
->lock
, flags
);
144 static int __stack_map_get(struct bpf_map
*map
, void *value
, bool delete)
146 struct bpf_queue_stack
*qs
= bpf_queue_stack(map
);
152 raw_spin_lock_irqsave(&qs
->lock
, flags
);
154 if (queue_stack_map_is_empty(qs
)) {
155 memset(value
, 0, qs
->map
.value_size
);
160 index
= qs
->head
- 1;
161 if (unlikely(index
>= qs
->size
))
162 index
= qs
->size
- 1;
164 ptr
= &qs
->elements
[index
* qs
->map
.value_size
];
165 memcpy(value
, ptr
, qs
->map
.value_size
);
171 raw_spin_unlock_irqrestore(&qs
->lock
, flags
);
175 /* Called from syscall or from eBPF program */
176 static int queue_map_peek_elem(struct bpf_map
*map
, void *value
)
178 return __queue_map_get(map
, value
, false);
181 /* Called from syscall or from eBPF program */
182 static int stack_map_peek_elem(struct bpf_map
*map
, void *value
)
184 return __stack_map_get(map
, value
, false);
187 /* Called from syscall or from eBPF program */
188 static int queue_map_pop_elem(struct bpf_map
*map
, void *value
)
190 return __queue_map_get(map
, value
, true);
193 /* Called from syscall or from eBPF program */
194 static int stack_map_pop_elem(struct bpf_map
*map
, void *value
)
196 return __stack_map_get(map
, value
, true);
199 /* Called from syscall or from eBPF program */
200 static int queue_stack_map_push_elem(struct bpf_map
*map
, void *value
,
203 struct bpf_queue_stack
*qs
= bpf_queue_stack(map
);
204 unsigned long irq_flags
;
208 /* BPF_EXIST is used to force making room for a new element in case the
211 bool replace
= (flags
& BPF_EXIST
);
213 /* Check supported flags for queue and stack maps */
214 if (flags
& BPF_NOEXIST
|| flags
> BPF_EXIST
)
217 raw_spin_lock_irqsave(&qs
->lock
, irq_flags
);
219 if (queue_stack_map_is_full(qs
)) {
224 /* advance tail pointer to overwrite oldest element */
225 if (unlikely(++qs
->tail
>= qs
->size
))
229 dst
= &qs
->elements
[qs
->head
* qs
->map
.value_size
];
230 memcpy(dst
, value
, qs
->map
.value_size
);
232 if (unlikely(++qs
->head
>= qs
->size
))
236 raw_spin_unlock_irqrestore(&qs
->lock
, irq_flags
);
240 /* Called from syscall or from eBPF program */
241 static void *queue_stack_map_lookup_elem(struct bpf_map
*map
, void *key
)
246 /* Called from syscall or from eBPF program */
247 static int queue_stack_map_update_elem(struct bpf_map
*map
, void *key
,
248 void *value
, u64 flags
)
253 /* Called from syscall or from eBPF program */
254 static int queue_stack_map_delete_elem(struct bpf_map
*map
, void *key
)
259 /* Called from syscall */
260 static int queue_stack_map_get_next_key(struct bpf_map
*map
, void *key
,
266 const struct bpf_map_ops queue_map_ops
= {
267 .map_alloc_check
= queue_stack_map_alloc_check
,
268 .map_alloc
= queue_stack_map_alloc
,
269 .map_free
= queue_stack_map_free
,
270 .map_lookup_elem
= queue_stack_map_lookup_elem
,
271 .map_update_elem
= queue_stack_map_update_elem
,
272 .map_delete_elem
= queue_stack_map_delete_elem
,
273 .map_push_elem
= queue_stack_map_push_elem
,
274 .map_pop_elem
= queue_map_pop_elem
,
275 .map_peek_elem
= queue_map_peek_elem
,
276 .map_get_next_key
= queue_stack_map_get_next_key
,
279 const struct bpf_map_ops stack_map_ops
= {
280 .map_alloc_check
= queue_stack_map_alloc_check
,
281 .map_alloc
= queue_stack_map_alloc
,
282 .map_free
= queue_stack_map_free
,
283 .map_lookup_elem
= queue_stack_map_lookup_elem
,
284 .map_update_elem
= queue_stack_map_update_elem
,
285 .map_delete_elem
= queue_stack_map_delete_elem
,
286 .map_push_elem
= queue_stack_map_push_elem
,
287 .map_pop_elem
= stack_map_pop_elem
,
288 .map_peek_elem
= stack_map_peek_elem
,
289 .map_get_next_key
= queue_stack_map_get_next_key
,