1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
3 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of version 2 of the GNU General Public
5 * License as published by the Free Software Foundation.
7 * This program is distributed in the hope that it will be useful, but
8 * WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10 * General Public License for more details.
12 #include <linux/bpf.h>
13 #include <linux/err.h>
14 #include <linux/vmalloc.h>
15 #include <linux/slab.h>
17 #include <linux/filter.h>
18 #include <linux/perf_event.h>
20 static void bpf_array_free_percpu(struct bpf_array
*array
)
24 for (i
= 0; i
< array
->map
.max_entries
; i
++)
25 free_percpu(array
->pptrs
[i
]);
28 static int bpf_array_alloc_percpu(struct bpf_array
*array
)
33 for (i
= 0; i
< array
->map
.max_entries
; i
++) {
34 ptr
= __alloc_percpu_gfp(array
->elem_size
, 8,
35 GFP_USER
| __GFP_NOWARN
);
37 bpf_array_free_percpu(array
);
40 array
->pptrs
[i
] = ptr
;
46 /* Called from syscall */
47 static struct bpf_map
*array_map_alloc(union bpf_attr
*attr
)
49 bool percpu
= attr
->map_type
== BPF_MAP_TYPE_PERCPU_ARRAY
;
50 struct bpf_array
*array
;
54 /* check sanity of attributes */
55 if (attr
->max_entries
== 0 || attr
->key_size
!= 4 ||
56 attr
->value_size
== 0 || attr
->map_flags
)
57 return ERR_PTR(-EINVAL
);
59 if (attr
->value_size
>= 1 << (KMALLOC_SHIFT_MAX
- 1))
60 /* if value_size is bigger, the user space won't be able to
61 * access the elements.
63 return ERR_PTR(-E2BIG
);
65 elem_size
= round_up(attr
->value_size
, 8);
67 array_size
= sizeof(*array
);
69 array_size
+= (u64
) attr
->max_entries
* sizeof(void *);
71 array_size
+= (u64
) attr
->max_entries
* elem_size
;
73 /* make sure there is no u32 overflow later in round_up() */
74 if (array_size
>= U32_MAX
- PAGE_SIZE
)
75 return ERR_PTR(-ENOMEM
);
78 /* allocate all map elements and zero-initialize them */
79 array
= kzalloc(array_size
, GFP_USER
| __GFP_NOWARN
);
81 array
= vzalloc(array_size
);
83 return ERR_PTR(-ENOMEM
);
86 /* copy mandatory map attributes */
87 array
->map
.map_type
= attr
->map_type
;
88 array
->map
.key_size
= attr
->key_size
;
89 array
->map
.value_size
= attr
->value_size
;
90 array
->map
.max_entries
= attr
->max_entries
;
91 array
->elem_size
= elem_size
;
96 array_size
+= (u64
) attr
->max_entries
* elem_size
* num_possible_cpus();
98 if (array_size
>= U32_MAX
- PAGE_SIZE
||
99 elem_size
> PCPU_MIN_UNIT_SIZE
|| bpf_array_alloc_percpu(array
)) {
101 return ERR_PTR(-ENOMEM
);
104 array
->map
.pages
= round_up(array_size
, PAGE_SIZE
) >> PAGE_SHIFT
;
109 /* Called from syscall or from eBPF program */
110 static void *array_map_lookup_elem(struct bpf_map
*map
, void *key
)
112 struct bpf_array
*array
= container_of(map
, struct bpf_array
, map
);
113 u32 index
= *(u32
*)key
;
115 if (unlikely(index
>= array
->map
.max_entries
))
118 return array
->value
+ array
->elem_size
* index
;
121 /* Called from eBPF program */
122 static void *percpu_array_map_lookup_elem(struct bpf_map
*map
, void *key
)
124 struct bpf_array
*array
= container_of(map
, struct bpf_array
, map
);
125 u32 index
= *(u32
*)key
;
127 if (unlikely(index
>= array
->map
.max_entries
))
130 return this_cpu_ptr(array
->pptrs
[index
]);
133 int bpf_percpu_array_copy(struct bpf_map
*map
, void *key
, void *value
)
135 struct bpf_array
*array
= container_of(map
, struct bpf_array
, map
);
136 u32 index
= *(u32
*)key
;
141 if (unlikely(index
>= array
->map
.max_entries
))
144 /* per_cpu areas are zero-filled and bpf programs can only
145 * access 'value_size' of them, so copying rounded areas
146 * will not leak any kernel data
148 size
= round_up(map
->value_size
, 8);
150 pptr
= array
->pptrs
[index
];
151 for_each_possible_cpu(cpu
) {
152 bpf_long_memcpy(value
+ off
, per_cpu_ptr(pptr
, cpu
), size
);
159 /* Called from syscall */
160 static int array_map_get_next_key(struct bpf_map
*map
, void *key
, void *next_key
)
162 struct bpf_array
*array
= container_of(map
, struct bpf_array
, map
);
163 u32 index
= *(u32
*)key
;
164 u32
*next
= (u32
*)next_key
;
166 if (index
>= array
->map
.max_entries
) {
171 if (index
== array
->map
.max_entries
- 1)
178 /* Called from syscall or from eBPF program */
179 static int array_map_update_elem(struct bpf_map
*map
, void *key
, void *value
,
182 struct bpf_array
*array
= container_of(map
, struct bpf_array
, map
);
183 u32 index
= *(u32
*)key
;
185 if (unlikely(map_flags
> BPF_EXIST
))
189 if (unlikely(index
>= array
->map
.max_entries
))
190 /* all elements were pre-allocated, cannot insert a new one */
193 if (unlikely(map_flags
== BPF_NOEXIST
))
194 /* all elements already exist */
197 if (array
->map
.map_type
== BPF_MAP_TYPE_PERCPU_ARRAY
)
198 memcpy(this_cpu_ptr(array
->pptrs
[index
]),
199 value
, map
->value_size
);
201 memcpy(array
->value
+ array
->elem_size
* index
,
202 value
, map
->value_size
);
206 int bpf_percpu_array_update(struct bpf_map
*map
, void *key
, void *value
,
209 struct bpf_array
*array
= container_of(map
, struct bpf_array
, map
);
210 u32 index
= *(u32
*)key
;
215 if (unlikely(map_flags
> BPF_EXIST
))
219 if (unlikely(index
>= array
->map
.max_entries
))
220 /* all elements were pre-allocated, cannot insert a new one */
223 if (unlikely(map_flags
== BPF_NOEXIST
))
224 /* all elements already exist */
227 /* the user space will provide round_up(value_size, 8) bytes that
228 * will be copied into per-cpu area. bpf programs can only access
229 * value_size of it. During lookup the same extra bytes will be
230 * returned or zeros which were zero-filled by percpu_alloc,
231 * so no kernel data leaks possible
233 size
= round_up(map
->value_size
, 8);
235 pptr
= array
->pptrs
[index
];
236 for_each_possible_cpu(cpu
) {
237 bpf_long_memcpy(per_cpu_ptr(pptr
, cpu
), value
+ off
, size
);
244 /* Called from syscall or from eBPF program */
245 static int array_map_delete_elem(struct bpf_map
*map
, void *key
)
250 /* Called when map->refcnt goes to zero, either from workqueue or from syscall */
251 static void array_map_free(struct bpf_map
*map
)
253 struct bpf_array
*array
= container_of(map
, struct bpf_array
, map
);
255 /* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
256 * so the programs (can be more than one that used this map) were
257 * disconnected from events. Wait for outstanding programs to complete
262 if (array
->map
.map_type
== BPF_MAP_TYPE_PERCPU_ARRAY
)
263 bpf_array_free_percpu(array
);
268 static const struct bpf_map_ops array_ops
= {
269 .map_alloc
= array_map_alloc
,
270 .map_free
= array_map_free
,
271 .map_get_next_key
= array_map_get_next_key
,
272 .map_lookup_elem
= array_map_lookup_elem
,
273 .map_update_elem
= array_map_update_elem
,
274 .map_delete_elem
= array_map_delete_elem
,
277 static struct bpf_map_type_list array_type __read_mostly
= {
279 .type
= BPF_MAP_TYPE_ARRAY
,
282 static const struct bpf_map_ops percpu_array_ops
= {
283 .map_alloc
= array_map_alloc
,
284 .map_free
= array_map_free
,
285 .map_get_next_key
= array_map_get_next_key
,
286 .map_lookup_elem
= percpu_array_map_lookup_elem
,
287 .map_update_elem
= array_map_update_elem
,
288 .map_delete_elem
= array_map_delete_elem
,
291 static struct bpf_map_type_list percpu_array_type __read_mostly
= {
292 .ops
= &percpu_array_ops
,
293 .type
= BPF_MAP_TYPE_PERCPU_ARRAY
,
296 static int __init
register_array_map(void)
298 bpf_register_map_type(&array_type
);
299 bpf_register_map_type(&percpu_array_type
);
302 late_initcall(register_array_map
);
304 static struct bpf_map
*fd_array_map_alloc(union bpf_attr
*attr
)
306 /* only file descriptors can be stored in this type of map */
307 if (attr
->value_size
!= sizeof(u32
))
308 return ERR_PTR(-EINVAL
);
309 return array_map_alloc(attr
);
312 static void fd_array_map_free(struct bpf_map
*map
)
314 struct bpf_array
*array
= container_of(map
, struct bpf_array
, map
);
319 /* make sure it's empty */
320 for (i
= 0; i
< array
->map
.max_entries
; i
++)
321 BUG_ON(array
->ptrs
[i
] != NULL
);
325 static void *fd_array_map_lookup_elem(struct bpf_map
*map
, void *key
)
330 /* only called from syscall */
331 static int fd_array_map_update_elem(struct bpf_map
*map
, void *key
,
332 void *value
, u64 map_flags
)
334 struct bpf_array
*array
= container_of(map
, struct bpf_array
, map
);
335 void *new_ptr
, *old_ptr
;
336 u32 index
= *(u32
*)key
, ufd
;
338 if (map_flags
!= BPF_ANY
)
341 if (index
>= array
->map
.max_entries
)
345 new_ptr
= map
->ops
->map_fd_get_ptr(map
, ufd
);
347 return PTR_ERR(new_ptr
);
349 old_ptr
= xchg(array
->ptrs
+ index
, new_ptr
);
351 map
->ops
->map_fd_put_ptr(old_ptr
);
356 static int fd_array_map_delete_elem(struct bpf_map
*map
, void *key
)
358 struct bpf_array
*array
= container_of(map
, struct bpf_array
, map
);
360 u32 index
= *(u32
*)key
;
362 if (index
>= array
->map
.max_entries
)
365 old_ptr
= xchg(array
->ptrs
+ index
, NULL
);
367 map
->ops
->map_fd_put_ptr(old_ptr
);
374 static void *prog_fd_array_get_ptr(struct bpf_map
*map
, int fd
)
376 struct bpf_array
*array
= container_of(map
, struct bpf_array
, map
);
377 struct bpf_prog
*prog
= bpf_prog_get(fd
);
381 if (!bpf_prog_array_compatible(array
, prog
)) {
383 return ERR_PTR(-EINVAL
);
388 static void prog_fd_array_put_ptr(void *ptr
)
390 struct bpf_prog
*prog
= ptr
;
392 bpf_prog_put_rcu(prog
);
395 /* decrement refcnt of all bpf_progs that are stored in this map */
396 void bpf_fd_array_map_clear(struct bpf_map
*map
)
398 struct bpf_array
*array
= container_of(map
, struct bpf_array
, map
);
401 for (i
= 0; i
< array
->map
.max_entries
; i
++)
402 fd_array_map_delete_elem(map
, &i
);
405 static const struct bpf_map_ops prog_array_ops
= {
406 .map_alloc
= fd_array_map_alloc
,
407 .map_free
= fd_array_map_free
,
408 .map_get_next_key
= array_map_get_next_key
,
409 .map_lookup_elem
= fd_array_map_lookup_elem
,
410 .map_update_elem
= fd_array_map_update_elem
,
411 .map_delete_elem
= fd_array_map_delete_elem
,
412 .map_fd_get_ptr
= prog_fd_array_get_ptr
,
413 .map_fd_put_ptr
= prog_fd_array_put_ptr
,
416 static struct bpf_map_type_list prog_array_type __read_mostly
= {
417 .ops
= &prog_array_ops
,
418 .type
= BPF_MAP_TYPE_PROG_ARRAY
,
421 static int __init
register_prog_array_map(void)
423 bpf_register_map_type(&prog_array_type
);
426 late_initcall(register_prog_array_map
);
428 static void perf_event_array_map_free(struct bpf_map
*map
)
430 bpf_fd_array_map_clear(map
);
431 fd_array_map_free(map
);
434 static void *perf_event_fd_array_get_ptr(struct bpf_map
*map
, int fd
)
436 struct perf_event
*event
;
437 const struct perf_event_attr
*attr
;
440 file
= perf_event_get(fd
);
444 event
= file
->private_data
;
446 attr
= perf_event_attrs(event
);
453 if (attr
->type
== PERF_TYPE_RAW
)
456 if (attr
->type
== PERF_TYPE_HARDWARE
)
459 if (attr
->type
== PERF_TYPE_SOFTWARE
&&
460 attr
->config
== PERF_COUNT_SW_BPF_OUTPUT
)
464 return ERR_PTR(-EINVAL
);
467 static void perf_event_fd_array_put_ptr(void *ptr
)
469 fput((struct file
*)ptr
);
472 static const struct bpf_map_ops perf_event_array_ops
= {
473 .map_alloc
= fd_array_map_alloc
,
474 .map_free
= perf_event_array_map_free
,
475 .map_get_next_key
= array_map_get_next_key
,
476 .map_lookup_elem
= fd_array_map_lookup_elem
,
477 .map_update_elem
= fd_array_map_update_elem
,
478 .map_delete_elem
= fd_array_map_delete_elem
,
479 .map_fd_get_ptr
= perf_event_fd_array_get_ptr
,
480 .map_fd_put_ptr
= perf_event_fd_array_put_ptr
,
483 static struct bpf_map_type_list perf_event_array_type __read_mostly
= {
484 .ops
= &perf_event_array_ops
,
485 .type
= BPF_MAP_TYPE_PERF_EVENT_ARRAY
,
488 static int __init
register_perf_event_array_map(void)
490 bpf_register_map_type(&perf_event_array_type
);
493 late_initcall(register_perf_event_array_map
);