1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2011-2015 PLUMgrid, http://plumgrid.com
3 * Copyright (c) 2016 Facebook
5 #include <linux/kernel.h>
6 #include <linux/types.h>
7 #include <linux/slab.h>
9 #include <linux/bpf_verifier.h>
10 #include <linux/bpf_perf_event.h>
11 #include <linux/btf.h>
12 #include <linux/filter.h>
13 #include <linux/uaccess.h>
14 #include <linux/ctype.h>
15 #include <linux/kprobes.h>
16 #include <linux/spinlock.h>
17 #include <linux/syscalls.h>
18 #include <linux/error-injection.h>
19 #include <linux/btf_ids.h>
20 #include <linux/bpf_lsm.h>
21 #include <linux/fprobe.h>
22 #include <linux/bsearch.h>
23 #include <linux/sort.h>
24 #include <linux/key.h>
25 #include <linux/verification.h>
26 #include <linux/namei.h>
28 #include <net/bpf_sk_storage.h>
30 #include <uapi/linux/bpf.h>
31 #include <uapi/linux/btf.h>
35 #include "trace_probe.h"
38 #define CREATE_TRACE_POINTS
39 #include "bpf_trace.h"
41 #define bpf_event_rcu_dereference(p) \
42 rcu_dereference_protected(p, lockdep_is_held(&bpf_event_mutex))
44 #define MAX_UPROBE_MULTI_CNT (1U << 20)
45 #define MAX_KPROBE_MULTI_CNT (1U << 20)
48 struct bpf_trace_module
{
49 struct module
*module
;
50 struct list_head list
;
53 static LIST_HEAD(bpf_trace_modules
);
54 static DEFINE_MUTEX(bpf_module_mutex
);
56 static struct bpf_raw_event_map
*bpf_get_raw_tracepoint_module(const char *name
)
58 struct bpf_raw_event_map
*btp
, *ret
= NULL
;
59 struct bpf_trace_module
*btm
;
62 mutex_lock(&bpf_module_mutex
);
63 list_for_each_entry(btm
, &bpf_trace_modules
, list
) {
64 for (i
= 0; i
< btm
->module
->num_bpf_raw_events
; ++i
) {
65 btp
= &btm
->module
->bpf_raw_events
[i
];
66 if (!strcmp(btp
->tp
->name
, name
)) {
67 if (try_module_get(btm
->module
))
74 mutex_unlock(&bpf_module_mutex
);
78 static struct bpf_raw_event_map
*bpf_get_raw_tracepoint_module(const char *name
)
82 #endif /* CONFIG_MODULES */
84 u64
bpf_get_stackid(u64 r1
, u64 r2
, u64 r3
, u64 r4
, u64 r5
);
85 u64
bpf_get_stack(u64 r1
, u64 r2
, u64 r3
, u64 r4
, u64 r5
);
87 static int bpf_btf_printf_prepare(struct btf_ptr
*ptr
, u32 btf_ptr_size
,
88 u64 flags
, const struct btf
**btf
,
90 static u64
bpf_kprobe_multi_cookie(struct bpf_run_ctx
*ctx
);
91 static u64
bpf_kprobe_multi_entry_ip(struct bpf_run_ctx
*ctx
);
93 static u64
bpf_uprobe_multi_cookie(struct bpf_run_ctx
*ctx
);
94 static u64
bpf_uprobe_multi_entry_ip(struct bpf_run_ctx
*ctx
);
97 * trace_call_bpf - invoke BPF program
98 * @call: tracepoint event
99 * @ctx: opaque context pointer
101 * kprobe handlers execute BPF programs via this helper.
102 * Can be used from static tracepoints in the future.
104 * Return: BPF programs always return an integer which is interpreted by
106 * 0 - return from kprobe (event is filtered out)
107 * 1 - store kprobe event into ring buffer
108 * Other values are reserved and currently alias to 1
110 unsigned int trace_call_bpf(struct trace_event_call
*call
, void *ctx
)
116 if (unlikely(__this_cpu_inc_return(bpf_prog_active
) != 1)) {
118 * since some bpf program is already running on this cpu,
119 * don't call into another bpf program (same or different)
120 * and don't send kprobe event into ring-buffer,
121 * so return zero here
124 bpf_prog_inc_misses_counters(rcu_dereference(call
->prog_array
));
131 * Instead of moving rcu_read_lock/rcu_dereference/rcu_read_unlock
132 * to all call sites, we did a bpf_prog_array_valid() there to check
133 * whether call->prog_array is empty or not, which is
134 * a heuristic to speed up execution.
136 * If bpf_prog_array_valid() fetched prog_array was
137 * non-NULL, we go into trace_call_bpf() and do the actual
138 * proper rcu_dereference() under RCU lock.
139 * If it turns out that prog_array is NULL then, we bail out.
140 * For the opposite, if the bpf_prog_array_valid() fetched pointer
141 * was NULL, you'll skip the prog_array with the risk of missing
142 * out of events when it was updated in between this and the
143 * rcu_dereference() which is accepted risk.
146 ret
= bpf_prog_run_array(rcu_dereference(call
->prog_array
),
151 __this_cpu_dec(bpf_prog_active
);
156 #ifdef CONFIG_BPF_KPROBE_OVERRIDE
157 BPF_CALL_2(bpf_override_return
, struct pt_regs
*, regs
, unsigned long, rc
)
159 regs_set_return_value(regs
, rc
);
160 override_function_with_return(regs
);
164 static const struct bpf_func_proto bpf_override_return_proto
= {
165 .func
= bpf_override_return
,
167 .ret_type
= RET_INTEGER
,
168 .arg1_type
= ARG_PTR_TO_CTX
,
169 .arg2_type
= ARG_ANYTHING
,
173 static __always_inline
int
174 bpf_probe_read_user_common(void *dst
, u32 size
, const void __user
*unsafe_ptr
)
178 ret
= copy_from_user_nofault(dst
, unsafe_ptr
, size
);
179 if (unlikely(ret
< 0))
180 memset(dst
, 0, size
);
184 BPF_CALL_3(bpf_probe_read_user
, void *, dst
, u32
, size
,
185 const void __user
*, unsafe_ptr
)
187 return bpf_probe_read_user_common(dst
, size
, unsafe_ptr
);
190 const struct bpf_func_proto bpf_probe_read_user_proto
= {
191 .func
= bpf_probe_read_user
,
193 .ret_type
= RET_INTEGER
,
194 .arg1_type
= ARG_PTR_TO_UNINIT_MEM
,
195 .arg2_type
= ARG_CONST_SIZE_OR_ZERO
,
196 .arg3_type
= ARG_ANYTHING
,
199 static __always_inline
int
200 bpf_probe_read_user_str_common(void *dst
, u32 size
,
201 const void __user
*unsafe_ptr
)
206 * NB: We rely on strncpy_from_user() not copying junk past the NUL
207 * terminator into `dst`.
209 * strncpy_from_user() does long-sized strides in the fast path. If the
210 * strncpy does not mask out the bytes after the NUL in `unsafe_ptr`,
211 * then there could be junk after the NUL in `dst`. If user takes `dst`
212 * and keys a hash map with it, then semantically identical strings can
213 * occupy multiple entries in the map.
215 ret
= strncpy_from_user_nofault(dst
, unsafe_ptr
, size
);
216 if (unlikely(ret
< 0))
217 memset(dst
, 0, size
);
221 BPF_CALL_3(bpf_probe_read_user_str
, void *, dst
, u32
, size
,
222 const void __user
*, unsafe_ptr
)
224 return bpf_probe_read_user_str_common(dst
, size
, unsafe_ptr
);
227 const struct bpf_func_proto bpf_probe_read_user_str_proto
= {
228 .func
= bpf_probe_read_user_str
,
230 .ret_type
= RET_INTEGER
,
231 .arg1_type
= ARG_PTR_TO_UNINIT_MEM
,
232 .arg2_type
= ARG_CONST_SIZE_OR_ZERO
,
233 .arg3_type
= ARG_ANYTHING
,
236 BPF_CALL_3(bpf_probe_read_kernel
, void *, dst
, u32
, size
,
237 const void *, unsafe_ptr
)
239 return bpf_probe_read_kernel_common(dst
, size
, unsafe_ptr
);
242 const struct bpf_func_proto bpf_probe_read_kernel_proto
= {
243 .func
= bpf_probe_read_kernel
,
245 .ret_type
= RET_INTEGER
,
246 .arg1_type
= ARG_PTR_TO_UNINIT_MEM
,
247 .arg2_type
= ARG_CONST_SIZE_OR_ZERO
,
248 .arg3_type
= ARG_ANYTHING
,
251 static __always_inline
int
252 bpf_probe_read_kernel_str_common(void *dst
, u32 size
, const void *unsafe_ptr
)
257 * The strncpy_from_kernel_nofault() call will likely not fill the
258 * entire buffer, but that's okay in this circumstance as we're probing
259 * arbitrary memory anyway similar to bpf_probe_read_*() and might
260 * as well probe the stack. Thus, memory is explicitly cleared
261 * only in error case, so that improper users ignoring return
262 * code altogether don't copy garbage; otherwise length of string
263 * is returned that can be used for bpf_perf_event_output() et al.
265 ret
= strncpy_from_kernel_nofault(dst
, unsafe_ptr
, size
);
266 if (unlikely(ret
< 0))
267 memset(dst
, 0, size
);
271 BPF_CALL_3(bpf_probe_read_kernel_str
, void *, dst
, u32
, size
,
272 const void *, unsafe_ptr
)
274 return bpf_probe_read_kernel_str_common(dst
, size
, unsafe_ptr
);
277 const struct bpf_func_proto bpf_probe_read_kernel_str_proto
= {
278 .func
= bpf_probe_read_kernel_str
,
280 .ret_type
= RET_INTEGER
,
281 .arg1_type
= ARG_PTR_TO_UNINIT_MEM
,
282 .arg2_type
= ARG_CONST_SIZE_OR_ZERO
,
283 .arg3_type
= ARG_ANYTHING
,
286 #ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
287 BPF_CALL_3(bpf_probe_read_compat
, void *, dst
, u32
, size
,
288 const void *, unsafe_ptr
)
290 if ((unsigned long)unsafe_ptr
< TASK_SIZE
) {
291 return bpf_probe_read_user_common(dst
, size
,
292 (__force
void __user
*)unsafe_ptr
);
294 return bpf_probe_read_kernel_common(dst
, size
, unsafe_ptr
);
297 static const struct bpf_func_proto bpf_probe_read_compat_proto
= {
298 .func
= bpf_probe_read_compat
,
300 .ret_type
= RET_INTEGER
,
301 .arg1_type
= ARG_PTR_TO_UNINIT_MEM
,
302 .arg2_type
= ARG_CONST_SIZE_OR_ZERO
,
303 .arg3_type
= ARG_ANYTHING
,
306 BPF_CALL_3(bpf_probe_read_compat_str
, void *, dst
, u32
, size
,
307 const void *, unsafe_ptr
)
309 if ((unsigned long)unsafe_ptr
< TASK_SIZE
) {
310 return bpf_probe_read_user_str_common(dst
, size
,
311 (__force
void __user
*)unsafe_ptr
);
313 return bpf_probe_read_kernel_str_common(dst
, size
, unsafe_ptr
);
316 static const struct bpf_func_proto bpf_probe_read_compat_str_proto
= {
317 .func
= bpf_probe_read_compat_str
,
319 .ret_type
= RET_INTEGER
,
320 .arg1_type
= ARG_PTR_TO_UNINIT_MEM
,
321 .arg2_type
= ARG_CONST_SIZE_OR_ZERO
,
322 .arg3_type
= ARG_ANYTHING
,
324 #endif /* CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE */
326 BPF_CALL_3(bpf_probe_write_user
, void __user
*, unsafe_ptr
, const void *, src
,
330 * Ensure we're in user context which is safe for the helper to
331 * run. This helper has no business in a kthread.
333 * access_ok() should prevent writing to non-user memory, but in
334 * some situations (nommu, temporary switch, etc) access_ok() does
335 * not provide enough validation, hence the check on KERNEL_DS.
337 * nmi_uaccess_okay() ensures the probe is not run in an interim
338 * state, when the task or mm are switched. This is specifically
339 * required to prevent the use of temporary mm.
342 if (unlikely(in_interrupt() ||
343 current
->flags
& (PF_KTHREAD
| PF_EXITING
)))
345 if (unlikely(!nmi_uaccess_okay()))
348 return copy_to_user_nofault(unsafe_ptr
, src
, size
);
351 static const struct bpf_func_proto bpf_probe_write_user_proto
= {
352 .func
= bpf_probe_write_user
,
354 .ret_type
= RET_INTEGER
,
355 .arg1_type
= ARG_ANYTHING
,
356 .arg2_type
= ARG_PTR_TO_MEM
| MEM_RDONLY
,
357 .arg3_type
= ARG_CONST_SIZE
,
360 static const struct bpf_func_proto
*bpf_get_probe_write_proto(void)
362 if (!capable(CAP_SYS_ADMIN
))
365 pr_warn_ratelimited("%s[%d] is installing a program with bpf_probe_write_user helper that may corrupt user memory!",
366 current
->comm
, task_pid_nr(current
));
368 return &bpf_probe_write_user_proto
;
371 #define MAX_TRACE_PRINTK_VARARGS 3
372 #define BPF_TRACE_PRINTK_SIZE 1024
374 BPF_CALL_5(bpf_trace_printk
, char *, fmt
, u32
, fmt_size
, u64
, arg1
,
375 u64
, arg2
, u64
, arg3
)
377 u64 args
[MAX_TRACE_PRINTK_VARARGS
] = { arg1
, arg2
, arg3
};
378 struct bpf_bprintf_data data
= {
379 .get_bin_args
= true,
384 ret
= bpf_bprintf_prepare(fmt
, fmt_size
, args
,
385 MAX_TRACE_PRINTK_VARARGS
, &data
);
389 ret
= bstr_printf(data
.buf
, MAX_BPRINTF_BUF
, fmt
, data
.bin_args
);
391 trace_bpf_trace_printk(data
.buf
);
393 bpf_bprintf_cleanup(&data
);
398 static const struct bpf_func_proto bpf_trace_printk_proto
= {
399 .func
= bpf_trace_printk
,
401 .ret_type
= RET_INTEGER
,
402 .arg1_type
= ARG_PTR_TO_MEM
| MEM_RDONLY
,
403 .arg2_type
= ARG_CONST_SIZE
,
406 static void __set_printk_clr_event(void)
409 * This program might be calling bpf_trace_printk,
410 * so enable the associated bpf_trace/bpf_trace_printk event.
411 * Repeat this each time as it is possible a user has
412 * disabled bpf_trace_printk events. By loading a program
413 * calling bpf_trace_printk() however the user has expressed
414 * the intent to see such events.
416 if (trace_set_clr_event("bpf_trace", "bpf_trace_printk", 1))
417 pr_warn_ratelimited("could not enable bpf_trace_printk events");
420 const struct bpf_func_proto
*bpf_get_trace_printk_proto(void)
422 __set_printk_clr_event();
423 return &bpf_trace_printk_proto
;
426 BPF_CALL_4(bpf_trace_vprintk
, char *, fmt
, u32
, fmt_size
, const void *, args
,
429 struct bpf_bprintf_data data
= {
430 .get_bin_args
= true,
435 if (data_len
& 7 || data_len
> MAX_BPRINTF_VARARGS
* 8 ||
438 num_args
= data_len
/ 8;
440 ret
= bpf_bprintf_prepare(fmt
, fmt_size
, args
, num_args
, &data
);
444 ret
= bstr_printf(data
.buf
, MAX_BPRINTF_BUF
, fmt
, data
.bin_args
);
446 trace_bpf_trace_printk(data
.buf
);
448 bpf_bprintf_cleanup(&data
);
453 static const struct bpf_func_proto bpf_trace_vprintk_proto
= {
454 .func
= bpf_trace_vprintk
,
456 .ret_type
= RET_INTEGER
,
457 .arg1_type
= ARG_PTR_TO_MEM
| MEM_RDONLY
,
458 .arg2_type
= ARG_CONST_SIZE
,
459 .arg3_type
= ARG_PTR_TO_MEM
| PTR_MAYBE_NULL
| MEM_RDONLY
,
460 .arg4_type
= ARG_CONST_SIZE_OR_ZERO
,
463 const struct bpf_func_proto
*bpf_get_trace_vprintk_proto(void)
465 __set_printk_clr_event();
466 return &bpf_trace_vprintk_proto
;
469 BPF_CALL_5(bpf_seq_printf
, struct seq_file
*, m
, char *, fmt
, u32
, fmt_size
,
470 const void *, args
, u32
, data_len
)
472 struct bpf_bprintf_data data
= {
473 .get_bin_args
= true,
477 if (data_len
& 7 || data_len
> MAX_BPRINTF_VARARGS
* 8 ||
480 num_args
= data_len
/ 8;
482 err
= bpf_bprintf_prepare(fmt
, fmt_size
, args
, num_args
, &data
);
486 seq_bprintf(m
, fmt
, data
.bin_args
);
488 bpf_bprintf_cleanup(&data
);
490 return seq_has_overflowed(m
) ? -EOVERFLOW
: 0;
493 BTF_ID_LIST_SINGLE(btf_seq_file_ids
, struct, seq_file
)
495 static const struct bpf_func_proto bpf_seq_printf_proto
= {
496 .func
= bpf_seq_printf
,
498 .ret_type
= RET_INTEGER
,
499 .arg1_type
= ARG_PTR_TO_BTF_ID
,
500 .arg1_btf_id
= &btf_seq_file_ids
[0],
501 .arg2_type
= ARG_PTR_TO_MEM
| MEM_RDONLY
,
502 .arg3_type
= ARG_CONST_SIZE
,
503 .arg4_type
= ARG_PTR_TO_MEM
| PTR_MAYBE_NULL
| MEM_RDONLY
,
504 .arg5_type
= ARG_CONST_SIZE_OR_ZERO
,
507 BPF_CALL_3(bpf_seq_write
, struct seq_file
*, m
, const void *, data
, u32
, len
)
509 return seq_write(m
, data
, len
) ? -EOVERFLOW
: 0;
512 static const struct bpf_func_proto bpf_seq_write_proto
= {
513 .func
= bpf_seq_write
,
515 .ret_type
= RET_INTEGER
,
516 .arg1_type
= ARG_PTR_TO_BTF_ID
,
517 .arg1_btf_id
= &btf_seq_file_ids
[0],
518 .arg2_type
= ARG_PTR_TO_MEM
| MEM_RDONLY
,
519 .arg3_type
= ARG_CONST_SIZE_OR_ZERO
,
522 BPF_CALL_4(bpf_seq_printf_btf
, struct seq_file
*, m
, struct btf_ptr
*, ptr
,
523 u32
, btf_ptr_size
, u64
, flags
)
525 const struct btf
*btf
;
529 ret
= bpf_btf_printf_prepare(ptr
, btf_ptr_size
, flags
, &btf
, &btf_id
);
533 return btf_type_seq_show_flags(btf
, btf_id
, ptr
->ptr
, m
, flags
);
536 static const struct bpf_func_proto bpf_seq_printf_btf_proto
= {
537 .func
= bpf_seq_printf_btf
,
539 .ret_type
= RET_INTEGER
,
540 .arg1_type
= ARG_PTR_TO_BTF_ID
,
541 .arg1_btf_id
= &btf_seq_file_ids
[0],
542 .arg2_type
= ARG_PTR_TO_MEM
| MEM_RDONLY
,
543 .arg3_type
= ARG_CONST_SIZE_OR_ZERO
,
544 .arg4_type
= ARG_ANYTHING
,
547 static __always_inline
int
548 get_map_perf_counter(struct bpf_map
*map
, u64 flags
,
549 u64
*value
, u64
*enabled
, u64
*running
)
551 struct bpf_array
*array
= container_of(map
, struct bpf_array
, map
);
552 unsigned int cpu
= smp_processor_id();
553 u64 index
= flags
& BPF_F_INDEX_MASK
;
554 struct bpf_event_entry
*ee
;
556 if (unlikely(flags
& ~(BPF_F_INDEX_MASK
)))
558 if (index
== BPF_F_CURRENT_CPU
)
560 if (unlikely(index
>= array
->map
.max_entries
))
563 ee
= READ_ONCE(array
->ptrs
[index
]);
567 return perf_event_read_local(ee
->event
, value
, enabled
, running
);
570 BPF_CALL_2(bpf_perf_event_read
, struct bpf_map
*, map
, u64
, flags
)
575 err
= get_map_perf_counter(map
, flags
, &value
, NULL
, NULL
);
577 * this api is ugly since we miss [-22..-2] range of valid
578 * counter values, but that's uapi
585 static const struct bpf_func_proto bpf_perf_event_read_proto
= {
586 .func
= bpf_perf_event_read
,
588 .ret_type
= RET_INTEGER
,
589 .arg1_type
= ARG_CONST_MAP_PTR
,
590 .arg2_type
= ARG_ANYTHING
,
593 BPF_CALL_4(bpf_perf_event_read_value
, struct bpf_map
*, map
, u64
, flags
,
594 struct bpf_perf_event_value
*, buf
, u32
, size
)
598 if (unlikely(size
!= sizeof(struct bpf_perf_event_value
)))
600 err
= get_map_perf_counter(map
, flags
, &buf
->counter
, &buf
->enabled
,
606 memset(buf
, 0, size
);
610 static const struct bpf_func_proto bpf_perf_event_read_value_proto
= {
611 .func
= bpf_perf_event_read_value
,
613 .ret_type
= RET_INTEGER
,
614 .arg1_type
= ARG_CONST_MAP_PTR
,
615 .arg2_type
= ARG_ANYTHING
,
616 .arg3_type
= ARG_PTR_TO_UNINIT_MEM
,
617 .arg4_type
= ARG_CONST_SIZE
,
620 static __always_inline u64
621 __bpf_perf_event_output(struct pt_regs
*regs
, struct bpf_map
*map
,
622 u64 flags
, struct perf_sample_data
*sd
)
624 struct bpf_array
*array
= container_of(map
, struct bpf_array
, map
);
625 unsigned int cpu
= smp_processor_id();
626 u64 index
= flags
& BPF_F_INDEX_MASK
;
627 struct bpf_event_entry
*ee
;
628 struct perf_event
*event
;
630 if (index
== BPF_F_CURRENT_CPU
)
632 if (unlikely(index
>= array
->map
.max_entries
))
635 ee
= READ_ONCE(array
->ptrs
[index
]);
640 if (unlikely(event
->attr
.type
!= PERF_TYPE_SOFTWARE
||
641 event
->attr
.config
!= PERF_COUNT_SW_BPF_OUTPUT
))
644 if (unlikely(event
->oncpu
!= cpu
))
647 return perf_event_output(event
, sd
, regs
);
651 * Support executing tracepoints in normal, irq, and nmi context that each call
652 * bpf_perf_event_output
654 struct bpf_trace_sample_data
{
655 struct perf_sample_data sds
[3];
658 static DEFINE_PER_CPU(struct bpf_trace_sample_data
, bpf_trace_sds
);
659 static DEFINE_PER_CPU(int, bpf_trace_nest_level
);
660 BPF_CALL_5(bpf_perf_event_output
, struct pt_regs
*, regs
, struct bpf_map
*, map
,
661 u64
, flags
, void *, data
, u64
, size
)
663 struct bpf_trace_sample_data
*sds
;
664 struct perf_raw_record raw
= {
670 struct perf_sample_data
*sd
;
674 sds
= this_cpu_ptr(&bpf_trace_sds
);
675 nest_level
= this_cpu_inc_return(bpf_trace_nest_level
);
677 if (WARN_ON_ONCE(nest_level
> ARRAY_SIZE(sds
->sds
))) {
682 sd
= &sds
->sds
[nest_level
- 1];
684 if (unlikely(flags
& ~(BPF_F_INDEX_MASK
))) {
689 perf_sample_data_init(sd
, 0, 0);
690 perf_sample_save_raw_data(sd
, &raw
);
692 err
= __bpf_perf_event_output(regs
, map
, flags
, sd
);
694 this_cpu_dec(bpf_trace_nest_level
);
699 static const struct bpf_func_proto bpf_perf_event_output_proto
= {
700 .func
= bpf_perf_event_output
,
702 .ret_type
= RET_INTEGER
,
703 .arg1_type
= ARG_PTR_TO_CTX
,
704 .arg2_type
= ARG_CONST_MAP_PTR
,
705 .arg3_type
= ARG_ANYTHING
,
706 .arg4_type
= ARG_PTR_TO_MEM
| MEM_RDONLY
,
707 .arg5_type
= ARG_CONST_SIZE_OR_ZERO
,
710 static DEFINE_PER_CPU(int, bpf_event_output_nest_level
);
711 struct bpf_nested_pt_regs
{
712 struct pt_regs regs
[3];
714 static DEFINE_PER_CPU(struct bpf_nested_pt_regs
, bpf_pt_regs
);
715 static DEFINE_PER_CPU(struct bpf_trace_sample_data
, bpf_misc_sds
);
717 u64
bpf_event_output(struct bpf_map
*map
, u64 flags
, void *meta
, u64 meta_size
,
718 void *ctx
, u64 ctx_size
, bpf_ctx_copy_t ctx_copy
)
720 struct perf_raw_frag frag
= {
725 struct perf_raw_record raw
= {
728 .next
= ctx_size
? &frag
: NULL
,
734 struct perf_sample_data
*sd
;
735 struct pt_regs
*regs
;
740 nest_level
= this_cpu_inc_return(bpf_event_output_nest_level
);
742 if (WARN_ON_ONCE(nest_level
> ARRAY_SIZE(bpf_misc_sds
.sds
))) {
746 sd
= this_cpu_ptr(&bpf_misc_sds
.sds
[nest_level
- 1]);
747 regs
= this_cpu_ptr(&bpf_pt_regs
.regs
[nest_level
- 1]);
749 perf_fetch_caller_regs(regs
);
750 perf_sample_data_init(sd
, 0, 0);
751 perf_sample_save_raw_data(sd
, &raw
);
753 ret
= __bpf_perf_event_output(regs
, map
, flags
, sd
);
755 this_cpu_dec(bpf_event_output_nest_level
);
760 BPF_CALL_0(bpf_get_current_task
)
762 return (long) current
;
765 const struct bpf_func_proto bpf_get_current_task_proto
= {
766 .func
= bpf_get_current_task
,
768 .ret_type
= RET_INTEGER
,
771 BPF_CALL_0(bpf_get_current_task_btf
)
773 return (unsigned long) current
;
776 const struct bpf_func_proto bpf_get_current_task_btf_proto
= {
777 .func
= bpf_get_current_task_btf
,
779 .ret_type
= RET_PTR_TO_BTF_ID_TRUSTED
,
780 .ret_btf_id
= &btf_tracing_ids
[BTF_TRACING_TYPE_TASK
],
783 BPF_CALL_1(bpf_task_pt_regs
, struct task_struct
*, task
)
785 return (unsigned long) task_pt_regs(task
);
788 BTF_ID_LIST(bpf_task_pt_regs_ids
)
789 BTF_ID(struct, pt_regs
)
791 const struct bpf_func_proto bpf_task_pt_regs_proto
= {
792 .func
= bpf_task_pt_regs
,
794 .arg1_type
= ARG_PTR_TO_BTF_ID
,
795 .arg1_btf_id
= &btf_tracing_ids
[BTF_TRACING_TYPE_TASK
],
796 .ret_type
= RET_PTR_TO_BTF_ID
,
797 .ret_btf_id
= &bpf_task_pt_regs_ids
[0],
800 struct send_signal_irq_work
{
801 struct irq_work irq_work
;
802 struct task_struct
*task
;
807 static DEFINE_PER_CPU(struct send_signal_irq_work
, send_signal_work
);
809 static void do_bpf_send_signal(struct irq_work
*entry
)
811 struct send_signal_irq_work
*work
;
813 work
= container_of(entry
, struct send_signal_irq_work
, irq_work
);
814 group_send_sig_info(work
->sig
, SEND_SIG_PRIV
, work
->task
, work
->type
);
815 put_task_struct(work
->task
);
818 static int bpf_send_signal_common(u32 sig
, enum pid_type type
)
820 struct send_signal_irq_work
*work
= NULL
;
822 /* Similar to bpf_probe_write_user, task needs to be
823 * in a sound condition and kernel memory access be
824 * permitted in order to send signal to the current
827 if (unlikely(current
->flags
& (PF_KTHREAD
| PF_EXITING
)))
829 if (unlikely(!nmi_uaccess_okay()))
831 /* Task should not be pid=1 to avoid kernel panic. */
832 if (unlikely(is_global_init(current
)))
835 if (irqs_disabled()) {
836 /* Do an early check on signal validity. Otherwise,
837 * the error is lost in deferred irq_work.
839 if (unlikely(!valid_signal(sig
)))
842 work
= this_cpu_ptr(&send_signal_work
);
843 if (irq_work_is_busy(&work
->irq_work
))
846 /* Add the current task, which is the target of sending signal,
847 * to the irq_work. The current task may change when queued
848 * irq works get executed.
850 work
->task
= get_task_struct(current
);
853 irq_work_queue(&work
->irq_work
);
857 return group_send_sig_info(sig
, SEND_SIG_PRIV
, current
, type
);
860 BPF_CALL_1(bpf_send_signal
, u32
, sig
)
862 return bpf_send_signal_common(sig
, PIDTYPE_TGID
);
865 static const struct bpf_func_proto bpf_send_signal_proto
= {
866 .func
= bpf_send_signal
,
868 .ret_type
= RET_INTEGER
,
869 .arg1_type
= ARG_ANYTHING
,
872 BPF_CALL_1(bpf_send_signal_thread
, u32
, sig
)
874 return bpf_send_signal_common(sig
, PIDTYPE_PID
);
877 static const struct bpf_func_proto bpf_send_signal_thread_proto
= {
878 .func
= bpf_send_signal_thread
,
880 .ret_type
= RET_INTEGER
,
881 .arg1_type
= ARG_ANYTHING
,
884 BPF_CALL_3(bpf_d_path
, struct path
*, path
, char *, buf
, u32
, sz
)
894 * The path pointer is verified as trusted and safe to use,
895 * but let's double check it's valid anyway to workaround
896 * potentially broken verifier.
898 len
= copy_from_kernel_nofault(©
, path
, sizeof(*path
));
902 p
= d_path(©
, buf
, sz
);
907 memmove(buf
, p
, len
);
913 BTF_SET_START(btf_allowlist_d_path
)
914 #ifdef CONFIG_SECURITY
915 BTF_ID(func
, security_file_permission
)
916 BTF_ID(func
, security_inode_getattr
)
917 BTF_ID(func
, security_file_open
)
919 #ifdef CONFIG_SECURITY_PATH
920 BTF_ID(func
, security_path_truncate
)
922 BTF_ID(func
, vfs_truncate
)
923 BTF_ID(func
, vfs_fallocate
)
924 BTF_ID(func
, dentry_open
)
925 BTF_ID(func
, vfs_getattr
)
926 BTF_ID(func
, filp_close
)
927 BTF_SET_END(btf_allowlist_d_path
)
929 static bool bpf_d_path_allowed(const struct bpf_prog
*prog
)
931 if (prog
->type
== BPF_PROG_TYPE_TRACING
&&
932 prog
->expected_attach_type
== BPF_TRACE_ITER
)
935 if (prog
->type
== BPF_PROG_TYPE_LSM
)
936 return bpf_lsm_is_sleepable_hook(prog
->aux
->attach_btf_id
);
938 return btf_id_set_contains(&btf_allowlist_d_path
,
939 prog
->aux
->attach_btf_id
);
942 BTF_ID_LIST_SINGLE(bpf_d_path_btf_ids
, struct, path
)
944 static const struct bpf_func_proto bpf_d_path_proto
= {
947 .ret_type
= RET_INTEGER
,
948 .arg1_type
= ARG_PTR_TO_BTF_ID
,
949 .arg1_btf_id
= &bpf_d_path_btf_ids
[0],
950 .arg2_type
= ARG_PTR_TO_MEM
,
951 .arg3_type
= ARG_CONST_SIZE_OR_ZERO
,
952 .allowed
= bpf_d_path_allowed
,
955 #define BTF_F_ALL (BTF_F_COMPACT | BTF_F_NONAME | \
956 BTF_F_PTR_RAW | BTF_F_ZERO)
958 static int bpf_btf_printf_prepare(struct btf_ptr
*ptr
, u32 btf_ptr_size
,
959 u64 flags
, const struct btf
**btf
,
962 const struct btf_type
*t
;
964 if (unlikely(flags
& ~(BTF_F_ALL
)))
967 if (btf_ptr_size
!= sizeof(struct btf_ptr
))
970 *btf
= bpf_get_btf_vmlinux();
972 if (IS_ERR_OR_NULL(*btf
))
973 return IS_ERR(*btf
) ? PTR_ERR(*btf
) : -EINVAL
;
975 if (ptr
->type_id
> 0)
976 *btf_id
= ptr
->type_id
;
981 t
= btf_type_by_id(*btf
, *btf_id
);
982 if (*btf_id
<= 0 || !t
)
988 BPF_CALL_5(bpf_snprintf_btf
, char *, str
, u32
, str_size
, struct btf_ptr
*, ptr
,
989 u32
, btf_ptr_size
, u64
, flags
)
991 const struct btf
*btf
;
995 ret
= bpf_btf_printf_prepare(ptr
, btf_ptr_size
, flags
, &btf
, &btf_id
);
999 return btf_type_snprintf_show(btf
, btf_id
, ptr
->ptr
, str
, str_size
,
1003 const struct bpf_func_proto bpf_snprintf_btf_proto
= {
1004 .func
= bpf_snprintf_btf
,
1006 .ret_type
= RET_INTEGER
,
1007 .arg1_type
= ARG_PTR_TO_MEM
,
1008 .arg2_type
= ARG_CONST_SIZE
,
1009 .arg3_type
= ARG_PTR_TO_MEM
| MEM_RDONLY
,
1010 .arg4_type
= ARG_CONST_SIZE
,
1011 .arg5_type
= ARG_ANYTHING
,
1014 BPF_CALL_1(bpf_get_func_ip_tracing
, void *, ctx
)
1016 /* This helper call is inlined by verifier. */
1017 return ((u64
*)ctx
)[-2];
1020 static const struct bpf_func_proto bpf_get_func_ip_proto_tracing
= {
1021 .func
= bpf_get_func_ip_tracing
,
1023 .ret_type
= RET_INTEGER
,
1024 .arg1_type
= ARG_PTR_TO_CTX
,
1027 #ifdef CONFIG_X86_KERNEL_IBT
1028 static unsigned long get_entry_ip(unsigned long fentry_ip
)
1032 /* We want to be extra safe in case entry ip is on the page edge,
1033 * but otherwise we need to avoid get_kernel_nofault()'s overhead.
1035 if ((fentry_ip
& ~PAGE_MASK
) < ENDBR_INSN_SIZE
) {
1036 if (get_kernel_nofault(instr
, (u32
*)(fentry_ip
- ENDBR_INSN_SIZE
)))
1039 instr
= *(u32
*)(fentry_ip
- ENDBR_INSN_SIZE
);
1041 if (is_endbr(instr
))
1042 fentry_ip
-= ENDBR_INSN_SIZE
;
1046 #define get_entry_ip(fentry_ip) fentry_ip
1049 BPF_CALL_1(bpf_get_func_ip_kprobe
, struct pt_regs
*, regs
)
1051 struct bpf_trace_run_ctx
*run_ctx __maybe_unused
;
1054 #ifdef CONFIG_UPROBES
1055 run_ctx
= container_of(current
->bpf_ctx
, struct bpf_trace_run_ctx
, run_ctx
);
1056 if (run_ctx
->is_uprobe
)
1057 return ((struct uprobe_dispatch_data
*)current
->utask
->vaddr
)->bp_addr
;
1060 kp
= kprobe_running();
1062 if (!kp
|| !(kp
->flags
& KPROBE_FLAG_ON_FUNC_ENTRY
))
1065 return get_entry_ip((uintptr_t)kp
->addr
);
1068 static const struct bpf_func_proto bpf_get_func_ip_proto_kprobe
= {
1069 .func
= bpf_get_func_ip_kprobe
,
1071 .ret_type
= RET_INTEGER
,
1072 .arg1_type
= ARG_PTR_TO_CTX
,
1075 BPF_CALL_1(bpf_get_func_ip_kprobe_multi
, struct pt_regs
*, regs
)
1077 return bpf_kprobe_multi_entry_ip(current
->bpf_ctx
);
1080 static const struct bpf_func_proto bpf_get_func_ip_proto_kprobe_multi
= {
1081 .func
= bpf_get_func_ip_kprobe_multi
,
1083 .ret_type
= RET_INTEGER
,
1084 .arg1_type
= ARG_PTR_TO_CTX
,
1087 BPF_CALL_1(bpf_get_attach_cookie_kprobe_multi
, struct pt_regs
*, regs
)
1089 return bpf_kprobe_multi_cookie(current
->bpf_ctx
);
1092 static const struct bpf_func_proto bpf_get_attach_cookie_proto_kmulti
= {
1093 .func
= bpf_get_attach_cookie_kprobe_multi
,
1095 .ret_type
= RET_INTEGER
,
1096 .arg1_type
= ARG_PTR_TO_CTX
,
1099 BPF_CALL_1(bpf_get_func_ip_uprobe_multi
, struct pt_regs
*, regs
)
1101 return bpf_uprobe_multi_entry_ip(current
->bpf_ctx
);
1104 static const struct bpf_func_proto bpf_get_func_ip_proto_uprobe_multi
= {
1105 .func
= bpf_get_func_ip_uprobe_multi
,
1107 .ret_type
= RET_INTEGER
,
1108 .arg1_type
= ARG_PTR_TO_CTX
,
1111 BPF_CALL_1(bpf_get_attach_cookie_uprobe_multi
, struct pt_regs
*, regs
)
1113 return bpf_uprobe_multi_cookie(current
->bpf_ctx
);
1116 static const struct bpf_func_proto bpf_get_attach_cookie_proto_umulti
= {
1117 .func
= bpf_get_attach_cookie_uprobe_multi
,
1119 .ret_type
= RET_INTEGER
,
1120 .arg1_type
= ARG_PTR_TO_CTX
,
1123 BPF_CALL_1(bpf_get_attach_cookie_trace
, void *, ctx
)
1125 struct bpf_trace_run_ctx
*run_ctx
;
1127 run_ctx
= container_of(current
->bpf_ctx
, struct bpf_trace_run_ctx
, run_ctx
);
1128 return run_ctx
->bpf_cookie
;
1131 static const struct bpf_func_proto bpf_get_attach_cookie_proto_trace
= {
1132 .func
= bpf_get_attach_cookie_trace
,
1134 .ret_type
= RET_INTEGER
,
1135 .arg1_type
= ARG_PTR_TO_CTX
,
1138 BPF_CALL_1(bpf_get_attach_cookie_pe
, struct bpf_perf_event_data_kern
*, ctx
)
1140 return ctx
->event
->bpf_cookie
;
1143 static const struct bpf_func_proto bpf_get_attach_cookie_proto_pe
= {
1144 .func
= bpf_get_attach_cookie_pe
,
1146 .ret_type
= RET_INTEGER
,
1147 .arg1_type
= ARG_PTR_TO_CTX
,
1150 BPF_CALL_1(bpf_get_attach_cookie_tracing
, void *, ctx
)
1152 struct bpf_trace_run_ctx
*run_ctx
;
1154 run_ctx
= container_of(current
->bpf_ctx
, struct bpf_trace_run_ctx
, run_ctx
);
1155 return run_ctx
->bpf_cookie
;
1158 static const struct bpf_func_proto bpf_get_attach_cookie_proto_tracing
= {
1159 .func
= bpf_get_attach_cookie_tracing
,
1161 .ret_type
= RET_INTEGER
,
1162 .arg1_type
= ARG_PTR_TO_CTX
,
1165 BPF_CALL_3(bpf_get_branch_snapshot
, void *, buf
, u32
, size
, u64
, flags
)
1167 static const u32 br_entry_size
= sizeof(struct perf_branch_entry
);
1168 u32 entry_cnt
= size
/ br_entry_size
;
1170 entry_cnt
= static_call(perf_snapshot_branch_stack
)(buf
, entry_cnt
);
1172 if (unlikely(flags
))
1178 return entry_cnt
* br_entry_size
;
1181 static const struct bpf_func_proto bpf_get_branch_snapshot_proto
= {
1182 .func
= bpf_get_branch_snapshot
,
1184 .ret_type
= RET_INTEGER
,
1185 .arg1_type
= ARG_PTR_TO_UNINIT_MEM
,
1186 .arg2_type
= ARG_CONST_SIZE_OR_ZERO
,
1189 BPF_CALL_3(get_func_arg
, void *, ctx
, u32
, n
, u64
*, value
)
1191 /* This helper call is inlined by verifier. */
1192 u64 nr_args
= ((u64
*)ctx
)[-1];
1194 if ((u64
) n
>= nr_args
)
1196 *value
= ((u64
*)ctx
)[n
];
1200 static const struct bpf_func_proto bpf_get_func_arg_proto
= {
1201 .func
= get_func_arg
,
1202 .ret_type
= RET_INTEGER
,
1203 .arg1_type
= ARG_PTR_TO_CTX
,
1204 .arg2_type
= ARG_ANYTHING
,
1205 .arg3_type
= ARG_PTR_TO_FIXED_SIZE_MEM
| MEM_UNINIT
| MEM_ALIGNED
,
1206 .arg3_size
= sizeof(u64
),
1209 BPF_CALL_2(get_func_ret
, void *, ctx
, u64
*, value
)
1211 /* This helper call is inlined by verifier. */
1212 u64 nr_args
= ((u64
*)ctx
)[-1];
1214 *value
= ((u64
*)ctx
)[nr_args
];
1218 static const struct bpf_func_proto bpf_get_func_ret_proto
= {
1219 .func
= get_func_ret
,
1220 .ret_type
= RET_INTEGER
,
1221 .arg1_type
= ARG_PTR_TO_CTX
,
1222 .arg2_type
= ARG_PTR_TO_FIXED_SIZE_MEM
| MEM_UNINIT
| MEM_ALIGNED
,
1223 .arg2_size
= sizeof(u64
),
1226 BPF_CALL_1(get_func_arg_cnt
, void *, ctx
)
1228 /* This helper call is inlined by verifier. */
1229 return ((u64
*)ctx
)[-1];
1232 static const struct bpf_func_proto bpf_get_func_arg_cnt_proto
= {
1233 .func
= get_func_arg_cnt
,
1234 .ret_type
= RET_INTEGER
,
1235 .arg1_type
= ARG_PTR_TO_CTX
,
1239 __bpf_kfunc_start_defs();
1242 * bpf_lookup_user_key - lookup a key by its serial
1243 * @serial: key handle serial number
1244 * @flags: lookup-specific flags
1246 * Search a key with a given *serial* and the provided *flags*.
1247 * If found, increment the reference count of the key by one, and
1248 * return it in the bpf_key structure.
1250 * The bpf_key structure must be passed to bpf_key_put() when done
1251 * with it, so that the key reference count is decremented and the
1252 * bpf_key structure is freed.
1254 * Permission checks are deferred to the time the key is used by
1255 * one of the available key-specific kfuncs.
1257 * Set *flags* with KEY_LOOKUP_CREATE, to attempt creating a requested
1258 * special keyring (e.g. session keyring), if it doesn't yet exist.
1259 * Set *flags* with KEY_LOOKUP_PARTIAL, to lookup a key without waiting
1260 * for the key construction, and to retrieve uninstantiated keys (keys
1261 * without data attached to them).
1263 * Return: a bpf_key pointer with a valid key pointer if the key is found, a
1264 * NULL pointer otherwise.
1266 __bpf_kfunc
struct bpf_key
*bpf_lookup_user_key(u32 serial
, u64 flags
)
1269 struct bpf_key
*bkey
;
1271 if (flags
& ~KEY_LOOKUP_ALL
)
1275 * Permission check is deferred until the key is used, as the
1276 * intent of the caller is unknown here.
1278 key_ref
= lookup_user_key(serial
, flags
, KEY_DEFER_PERM_CHECK
);
1279 if (IS_ERR(key_ref
))
1282 bkey
= kmalloc(sizeof(*bkey
), GFP_KERNEL
);
1284 key_put(key_ref_to_ptr(key_ref
));
1288 bkey
->key
= key_ref_to_ptr(key_ref
);
1289 bkey
->has_ref
= true;
1295 * bpf_lookup_system_key - lookup a key by a system-defined ID
1298 * Obtain a bpf_key structure with a key pointer set to the passed key ID.
1299 * The key pointer is marked as invalid, to prevent bpf_key_put() from
1300 * attempting to decrement the key reference count on that pointer. The key
1301 * pointer set in such way is currently understood only by
1302 * verify_pkcs7_signature().
1304 * Set *id* to one of the values defined in include/linux/verification.h:
1305 * 0 for the primary keyring (immutable keyring of system keys);
1306 * VERIFY_USE_SECONDARY_KEYRING for both the primary and secondary keyring
1307 * (where keys can be added only if they are vouched for by existing keys
1308 * in those keyrings); VERIFY_USE_PLATFORM_KEYRING for the platform
1309 * keyring (primarily used by the integrity subsystem to verify a kexec'ed
1310 * kerned image and, possibly, the initramfs signature).
1312 * Return: a bpf_key pointer with an invalid key pointer set from the
1313 * pre-determined ID on success, a NULL pointer otherwise
1315 __bpf_kfunc
struct bpf_key
*bpf_lookup_system_key(u64 id
)
1317 struct bpf_key
*bkey
;
1319 if (system_keyring_id_check(id
) < 0)
1322 bkey
= kmalloc(sizeof(*bkey
), GFP_ATOMIC
);
1326 bkey
->key
= (struct key
*)(unsigned long)id
;
1327 bkey
->has_ref
= false;
1333 * bpf_key_put - decrement key reference count if key is valid and free bpf_key
1334 * @bkey: bpf_key structure
1336 * Decrement the reference count of the key inside *bkey*, if the pointer
1337 * is valid, and free *bkey*.
1339 __bpf_kfunc
void bpf_key_put(struct bpf_key
*bkey
)
1347 #ifdef CONFIG_SYSTEM_DATA_VERIFICATION
1349 * bpf_verify_pkcs7_signature - verify a PKCS#7 signature
1350 * @data_p: data to verify
1351 * @sig_p: signature of the data
1352 * @trusted_keyring: keyring with keys trusted for signature verification
1354 * Verify the PKCS#7 signature *sig_ptr* against the supplied *data_ptr*
1355 * with keys in a keyring referenced by *trusted_keyring*.
1357 * Return: 0 on success, a negative value on error.
1359 __bpf_kfunc
int bpf_verify_pkcs7_signature(struct bpf_dynptr
*data_p
,
1360 struct bpf_dynptr
*sig_p
,
1361 struct bpf_key
*trusted_keyring
)
1363 struct bpf_dynptr_kern
*data_ptr
= (struct bpf_dynptr_kern
*)data_p
;
1364 struct bpf_dynptr_kern
*sig_ptr
= (struct bpf_dynptr_kern
*)sig_p
;
1365 const void *data
, *sig
;
1366 u32 data_len
, sig_len
;
1369 if (trusted_keyring
->has_ref
) {
1371 * Do the permission check deferred in bpf_lookup_user_key().
1372 * See bpf_lookup_user_key() for more details.
1374 * A call to key_task_permission() here would be redundant, as
1375 * it is already done by keyring_search() called by
1376 * find_asymmetric_key().
1378 ret
= key_validate(trusted_keyring
->key
);
1383 data_len
= __bpf_dynptr_size(data_ptr
);
1384 data
= __bpf_dynptr_data(data_ptr
, data_len
);
1385 sig_len
= __bpf_dynptr_size(sig_ptr
);
1386 sig
= __bpf_dynptr_data(sig_ptr
, sig_len
);
1388 return verify_pkcs7_signature(data
, data_len
, sig
, sig_len
,
1389 trusted_keyring
->key
,
1390 VERIFYING_UNSPECIFIED_SIGNATURE
, NULL
,
1393 #endif /* CONFIG_SYSTEM_DATA_VERIFICATION */
1395 __bpf_kfunc_end_defs();
1397 BTF_KFUNCS_START(key_sig_kfunc_set
)
1398 BTF_ID_FLAGS(func
, bpf_lookup_user_key
, KF_ACQUIRE
| KF_RET_NULL
| KF_SLEEPABLE
)
1399 BTF_ID_FLAGS(func
, bpf_lookup_system_key
, KF_ACQUIRE
| KF_RET_NULL
)
1400 BTF_ID_FLAGS(func
, bpf_key_put
, KF_RELEASE
)
1401 #ifdef CONFIG_SYSTEM_DATA_VERIFICATION
1402 BTF_ID_FLAGS(func
, bpf_verify_pkcs7_signature
, KF_SLEEPABLE
)
1404 BTF_KFUNCS_END(key_sig_kfunc_set
)
1406 static const struct btf_kfunc_id_set bpf_key_sig_kfunc_set
= {
1407 .owner
= THIS_MODULE
,
1408 .set
= &key_sig_kfunc_set
,
1411 static int __init
bpf_key_sig_kfuncs_init(void)
1413 return register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING
,
1414 &bpf_key_sig_kfunc_set
);
1417 late_initcall(bpf_key_sig_kfuncs_init
);
1418 #endif /* CONFIG_KEYS */
1420 static const struct bpf_func_proto
*
1421 bpf_tracing_func_proto(enum bpf_func_id func_id
, const struct bpf_prog
*prog
)
1424 case BPF_FUNC_map_lookup_elem
:
1425 return &bpf_map_lookup_elem_proto
;
1426 case BPF_FUNC_map_update_elem
:
1427 return &bpf_map_update_elem_proto
;
1428 case BPF_FUNC_map_delete_elem
:
1429 return &bpf_map_delete_elem_proto
;
1430 case BPF_FUNC_map_push_elem
:
1431 return &bpf_map_push_elem_proto
;
1432 case BPF_FUNC_map_pop_elem
:
1433 return &bpf_map_pop_elem_proto
;
1434 case BPF_FUNC_map_peek_elem
:
1435 return &bpf_map_peek_elem_proto
;
1436 case BPF_FUNC_map_lookup_percpu_elem
:
1437 return &bpf_map_lookup_percpu_elem_proto
;
1438 case BPF_FUNC_ktime_get_ns
:
1439 return &bpf_ktime_get_ns_proto
;
1440 case BPF_FUNC_ktime_get_boot_ns
:
1441 return &bpf_ktime_get_boot_ns_proto
;
1442 case BPF_FUNC_tail_call
:
1443 return &bpf_tail_call_proto
;
1444 case BPF_FUNC_get_current_task
:
1445 return &bpf_get_current_task_proto
;
1446 case BPF_FUNC_get_current_task_btf
:
1447 return &bpf_get_current_task_btf_proto
;
1448 case BPF_FUNC_task_pt_regs
:
1449 return &bpf_task_pt_regs_proto
;
1450 case BPF_FUNC_get_current_uid_gid
:
1451 return &bpf_get_current_uid_gid_proto
;
1452 case BPF_FUNC_get_current_comm
:
1453 return &bpf_get_current_comm_proto
;
1454 case BPF_FUNC_trace_printk
:
1455 return bpf_get_trace_printk_proto();
1456 case BPF_FUNC_get_smp_processor_id
:
1457 return &bpf_get_smp_processor_id_proto
;
1458 case BPF_FUNC_get_numa_node_id
:
1459 return &bpf_get_numa_node_id_proto
;
1460 case BPF_FUNC_perf_event_read
:
1461 return &bpf_perf_event_read_proto
;
1462 case BPF_FUNC_get_prandom_u32
:
1463 return &bpf_get_prandom_u32_proto
;
1464 case BPF_FUNC_probe_write_user
:
1465 return security_locked_down(LOCKDOWN_BPF_WRITE_USER
) < 0 ?
1466 NULL
: bpf_get_probe_write_proto();
1467 case BPF_FUNC_probe_read_user
:
1468 return &bpf_probe_read_user_proto
;
1469 case BPF_FUNC_probe_read_kernel
:
1470 return security_locked_down(LOCKDOWN_BPF_READ_KERNEL
) < 0 ?
1471 NULL
: &bpf_probe_read_kernel_proto
;
1472 case BPF_FUNC_probe_read_user_str
:
1473 return &bpf_probe_read_user_str_proto
;
1474 case BPF_FUNC_probe_read_kernel_str
:
1475 return security_locked_down(LOCKDOWN_BPF_READ_KERNEL
) < 0 ?
1476 NULL
: &bpf_probe_read_kernel_str_proto
;
1477 #ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
1478 case BPF_FUNC_probe_read
:
1479 return security_locked_down(LOCKDOWN_BPF_READ_KERNEL
) < 0 ?
1480 NULL
: &bpf_probe_read_compat_proto
;
1481 case BPF_FUNC_probe_read_str
:
1482 return security_locked_down(LOCKDOWN_BPF_READ_KERNEL
) < 0 ?
1483 NULL
: &bpf_probe_read_compat_str_proto
;
1485 #ifdef CONFIG_CGROUPS
1486 case BPF_FUNC_cgrp_storage_get
:
1487 return &bpf_cgrp_storage_get_proto
;
1488 case BPF_FUNC_cgrp_storage_delete
:
1489 return &bpf_cgrp_storage_delete_proto
;
1490 case BPF_FUNC_current_task_under_cgroup
:
1491 return &bpf_current_task_under_cgroup_proto
;
1493 case BPF_FUNC_send_signal
:
1494 return &bpf_send_signal_proto
;
1495 case BPF_FUNC_send_signal_thread
:
1496 return &bpf_send_signal_thread_proto
;
1497 case BPF_FUNC_perf_event_read_value
:
1498 return &bpf_perf_event_read_value_proto
;
1499 case BPF_FUNC_ringbuf_output
:
1500 return &bpf_ringbuf_output_proto
;
1501 case BPF_FUNC_ringbuf_reserve
:
1502 return &bpf_ringbuf_reserve_proto
;
1503 case BPF_FUNC_ringbuf_submit
:
1504 return &bpf_ringbuf_submit_proto
;
1505 case BPF_FUNC_ringbuf_discard
:
1506 return &bpf_ringbuf_discard_proto
;
1507 case BPF_FUNC_ringbuf_query
:
1508 return &bpf_ringbuf_query_proto
;
1509 case BPF_FUNC_jiffies64
:
1510 return &bpf_jiffies64_proto
;
1511 case BPF_FUNC_get_task_stack
:
1512 return prog
->sleepable
? &bpf_get_task_stack_sleepable_proto
1513 : &bpf_get_task_stack_proto
;
1514 case BPF_FUNC_copy_from_user
:
1515 return &bpf_copy_from_user_proto
;
1516 case BPF_FUNC_copy_from_user_task
:
1517 return &bpf_copy_from_user_task_proto
;
1518 case BPF_FUNC_snprintf_btf
:
1519 return &bpf_snprintf_btf_proto
;
1520 case BPF_FUNC_per_cpu_ptr
:
1521 return &bpf_per_cpu_ptr_proto
;
1522 case BPF_FUNC_this_cpu_ptr
:
1523 return &bpf_this_cpu_ptr_proto
;
1524 case BPF_FUNC_task_storage_get
:
1525 if (bpf_prog_check_recur(prog
))
1526 return &bpf_task_storage_get_recur_proto
;
1527 return &bpf_task_storage_get_proto
;
1528 case BPF_FUNC_task_storage_delete
:
1529 if (bpf_prog_check_recur(prog
))
1530 return &bpf_task_storage_delete_recur_proto
;
1531 return &bpf_task_storage_delete_proto
;
1532 case BPF_FUNC_for_each_map_elem
:
1533 return &bpf_for_each_map_elem_proto
;
1534 case BPF_FUNC_snprintf
:
1535 return &bpf_snprintf_proto
;
1536 case BPF_FUNC_get_func_ip
:
1537 return &bpf_get_func_ip_proto_tracing
;
1538 case BPF_FUNC_get_branch_snapshot
:
1539 return &bpf_get_branch_snapshot_proto
;
1540 case BPF_FUNC_find_vma
:
1541 return &bpf_find_vma_proto
;
1542 case BPF_FUNC_trace_vprintk
:
1543 return bpf_get_trace_vprintk_proto();
1545 return bpf_base_func_proto(func_id
, prog
);
1549 static bool is_kprobe_multi(const struct bpf_prog
*prog
)
1551 return prog
->expected_attach_type
== BPF_TRACE_KPROBE_MULTI
||
1552 prog
->expected_attach_type
== BPF_TRACE_KPROBE_SESSION
;
1555 static inline bool is_kprobe_session(const struct bpf_prog
*prog
)
1557 return prog
->expected_attach_type
== BPF_TRACE_KPROBE_SESSION
;
1560 static const struct bpf_func_proto
*
1561 kprobe_prog_func_proto(enum bpf_func_id func_id
, const struct bpf_prog
*prog
)
1564 case BPF_FUNC_perf_event_output
:
1565 return &bpf_perf_event_output_proto
;
1566 case BPF_FUNC_get_stackid
:
1567 return &bpf_get_stackid_proto
;
1568 case BPF_FUNC_get_stack
:
1569 return prog
->sleepable
? &bpf_get_stack_sleepable_proto
: &bpf_get_stack_proto
;
1570 #ifdef CONFIG_BPF_KPROBE_OVERRIDE
1571 case BPF_FUNC_override_return
:
1572 return &bpf_override_return_proto
;
1574 case BPF_FUNC_get_func_ip
:
1575 if (is_kprobe_multi(prog
))
1576 return &bpf_get_func_ip_proto_kprobe_multi
;
1577 if (prog
->expected_attach_type
== BPF_TRACE_UPROBE_MULTI
)
1578 return &bpf_get_func_ip_proto_uprobe_multi
;
1579 return &bpf_get_func_ip_proto_kprobe
;
1580 case BPF_FUNC_get_attach_cookie
:
1581 if (is_kprobe_multi(prog
))
1582 return &bpf_get_attach_cookie_proto_kmulti
;
1583 if (prog
->expected_attach_type
== BPF_TRACE_UPROBE_MULTI
)
1584 return &bpf_get_attach_cookie_proto_umulti
;
1585 return &bpf_get_attach_cookie_proto_trace
;
1587 return bpf_tracing_func_proto(func_id
, prog
);
1591 /* bpf+kprobe programs can access fields of 'struct pt_regs' */
1592 static bool kprobe_prog_is_valid_access(int off
, int size
, enum bpf_access_type type
,
1593 const struct bpf_prog
*prog
,
1594 struct bpf_insn_access_aux
*info
)
1596 if (off
< 0 || off
>= sizeof(struct pt_regs
))
1598 if (type
!= BPF_READ
)
1600 if (off
% size
!= 0)
1603 * Assertion for 32 bit to make sure last 8 byte access
1604 * (BPF_DW) to the last 4 byte member is disallowed.
1606 if (off
+ size
> sizeof(struct pt_regs
))
1612 const struct bpf_verifier_ops kprobe_verifier_ops
= {
1613 .get_func_proto
= kprobe_prog_func_proto
,
1614 .is_valid_access
= kprobe_prog_is_valid_access
,
1617 const struct bpf_prog_ops kprobe_prog_ops
= {
1620 BPF_CALL_5(bpf_perf_event_output_tp
, void *, tp_buff
, struct bpf_map
*, map
,
1621 u64
, flags
, void *, data
, u64
, size
)
1623 struct pt_regs
*regs
= *(struct pt_regs
**)tp_buff
;
1626 * r1 points to perf tracepoint buffer where first 8 bytes are hidden
1627 * from bpf program and contain a pointer to 'struct pt_regs'. Fetch it
1628 * from there and call the same bpf_perf_event_output() helper inline.
1630 return ____bpf_perf_event_output(regs
, map
, flags
, data
, size
);
1633 static const struct bpf_func_proto bpf_perf_event_output_proto_tp
= {
1634 .func
= bpf_perf_event_output_tp
,
1636 .ret_type
= RET_INTEGER
,
1637 .arg1_type
= ARG_PTR_TO_CTX
,
1638 .arg2_type
= ARG_CONST_MAP_PTR
,
1639 .arg3_type
= ARG_ANYTHING
,
1640 .arg4_type
= ARG_PTR_TO_MEM
| MEM_RDONLY
,
1641 .arg5_type
= ARG_CONST_SIZE_OR_ZERO
,
1644 BPF_CALL_3(bpf_get_stackid_tp
, void *, tp_buff
, struct bpf_map
*, map
,
1647 struct pt_regs
*regs
= *(struct pt_regs
**)tp_buff
;
1650 * Same comment as in bpf_perf_event_output_tp(), only that this time
1651 * the other helper's function body cannot be inlined due to being
1652 * external, thus we need to call raw helper function.
1654 return bpf_get_stackid((unsigned long) regs
, (unsigned long) map
,
1658 static const struct bpf_func_proto bpf_get_stackid_proto_tp
= {
1659 .func
= bpf_get_stackid_tp
,
1661 .ret_type
= RET_INTEGER
,
1662 .arg1_type
= ARG_PTR_TO_CTX
,
1663 .arg2_type
= ARG_CONST_MAP_PTR
,
1664 .arg3_type
= ARG_ANYTHING
,
1667 BPF_CALL_4(bpf_get_stack_tp
, void *, tp_buff
, void *, buf
, u32
, size
,
1670 struct pt_regs
*regs
= *(struct pt_regs
**)tp_buff
;
1672 return bpf_get_stack((unsigned long) regs
, (unsigned long) buf
,
1673 (unsigned long) size
, flags
, 0);
1676 static const struct bpf_func_proto bpf_get_stack_proto_tp
= {
1677 .func
= bpf_get_stack_tp
,
1679 .ret_type
= RET_INTEGER
,
1680 .arg1_type
= ARG_PTR_TO_CTX
,
1681 .arg2_type
= ARG_PTR_TO_UNINIT_MEM
,
1682 .arg3_type
= ARG_CONST_SIZE_OR_ZERO
,
1683 .arg4_type
= ARG_ANYTHING
,
1686 static const struct bpf_func_proto
*
1687 tp_prog_func_proto(enum bpf_func_id func_id
, const struct bpf_prog
*prog
)
1690 case BPF_FUNC_perf_event_output
:
1691 return &bpf_perf_event_output_proto_tp
;
1692 case BPF_FUNC_get_stackid
:
1693 return &bpf_get_stackid_proto_tp
;
1694 case BPF_FUNC_get_stack
:
1695 return &bpf_get_stack_proto_tp
;
1696 case BPF_FUNC_get_attach_cookie
:
1697 return &bpf_get_attach_cookie_proto_trace
;
1699 return bpf_tracing_func_proto(func_id
, prog
);
1703 static bool tp_prog_is_valid_access(int off
, int size
, enum bpf_access_type type
,
1704 const struct bpf_prog
*prog
,
1705 struct bpf_insn_access_aux
*info
)
1707 if (off
< sizeof(void *) || off
>= PERF_MAX_TRACE_SIZE
)
1709 if (type
!= BPF_READ
)
1711 if (off
% size
!= 0)
1714 BUILD_BUG_ON(PERF_MAX_TRACE_SIZE
% sizeof(__u64
));
1718 const struct bpf_verifier_ops tracepoint_verifier_ops
= {
1719 .get_func_proto
= tp_prog_func_proto
,
1720 .is_valid_access
= tp_prog_is_valid_access
,
1723 const struct bpf_prog_ops tracepoint_prog_ops
= {
1726 BPF_CALL_3(bpf_perf_prog_read_value
, struct bpf_perf_event_data_kern
*, ctx
,
1727 struct bpf_perf_event_value
*, buf
, u32
, size
)
1731 if (unlikely(size
!= sizeof(struct bpf_perf_event_value
)))
1733 err
= perf_event_read_local(ctx
->event
, &buf
->counter
, &buf
->enabled
,
1739 memset(buf
, 0, size
);
1743 static const struct bpf_func_proto bpf_perf_prog_read_value_proto
= {
1744 .func
= bpf_perf_prog_read_value
,
1746 .ret_type
= RET_INTEGER
,
1747 .arg1_type
= ARG_PTR_TO_CTX
,
1748 .arg2_type
= ARG_PTR_TO_UNINIT_MEM
,
1749 .arg3_type
= ARG_CONST_SIZE
,
1752 BPF_CALL_4(bpf_read_branch_records
, struct bpf_perf_event_data_kern
*, ctx
,
1753 void *, buf
, u32
, size
, u64
, flags
)
1755 static const u32 br_entry_size
= sizeof(struct perf_branch_entry
);
1756 struct perf_branch_stack
*br_stack
= ctx
->data
->br_stack
;
1759 if (unlikely(flags
& ~BPF_F_GET_BRANCH_RECORDS_SIZE
))
1762 if (unlikely(!(ctx
->data
->sample_flags
& PERF_SAMPLE_BRANCH_STACK
)))
1765 if (unlikely(!br_stack
))
1768 if (flags
& BPF_F_GET_BRANCH_RECORDS_SIZE
)
1769 return br_stack
->nr
* br_entry_size
;
1771 if (!buf
|| (size
% br_entry_size
!= 0))
1774 to_copy
= min_t(u32
, br_stack
->nr
* br_entry_size
, size
);
1775 memcpy(buf
, br_stack
->entries
, to_copy
);
1780 static const struct bpf_func_proto bpf_read_branch_records_proto
= {
1781 .func
= bpf_read_branch_records
,
1783 .ret_type
= RET_INTEGER
,
1784 .arg1_type
= ARG_PTR_TO_CTX
,
1785 .arg2_type
= ARG_PTR_TO_MEM_OR_NULL
,
1786 .arg3_type
= ARG_CONST_SIZE_OR_ZERO
,
1787 .arg4_type
= ARG_ANYTHING
,
1790 static const struct bpf_func_proto
*
1791 pe_prog_func_proto(enum bpf_func_id func_id
, const struct bpf_prog
*prog
)
1794 case BPF_FUNC_perf_event_output
:
1795 return &bpf_perf_event_output_proto_tp
;
1796 case BPF_FUNC_get_stackid
:
1797 return &bpf_get_stackid_proto_pe
;
1798 case BPF_FUNC_get_stack
:
1799 return &bpf_get_stack_proto_pe
;
1800 case BPF_FUNC_perf_prog_read_value
:
1801 return &bpf_perf_prog_read_value_proto
;
1802 case BPF_FUNC_read_branch_records
:
1803 return &bpf_read_branch_records_proto
;
1804 case BPF_FUNC_get_attach_cookie
:
1805 return &bpf_get_attach_cookie_proto_pe
;
1807 return bpf_tracing_func_proto(func_id
, prog
);
1812 * bpf_raw_tp_regs are separate from bpf_pt_regs used from skb/xdp
1813 * to avoid potential recursive reuse issue when/if tracepoints are added
1814 * inside bpf_*_event_output, bpf_get_stackid and/or bpf_get_stack.
1816 * Since raw tracepoints run despite bpf_prog_active, support concurrent usage
1817 * in normal, irq, and nmi context.
1819 struct bpf_raw_tp_regs
{
1820 struct pt_regs regs
[3];
1822 static DEFINE_PER_CPU(struct bpf_raw_tp_regs
, bpf_raw_tp_regs
);
1823 static DEFINE_PER_CPU(int, bpf_raw_tp_nest_level
);
1824 static struct pt_regs
*get_bpf_raw_tp_regs(void)
1826 struct bpf_raw_tp_regs
*tp_regs
= this_cpu_ptr(&bpf_raw_tp_regs
);
1827 int nest_level
= this_cpu_inc_return(bpf_raw_tp_nest_level
);
1829 if (WARN_ON_ONCE(nest_level
> ARRAY_SIZE(tp_regs
->regs
))) {
1830 this_cpu_dec(bpf_raw_tp_nest_level
);
1831 return ERR_PTR(-EBUSY
);
1834 return &tp_regs
->regs
[nest_level
- 1];
1837 static void put_bpf_raw_tp_regs(void)
1839 this_cpu_dec(bpf_raw_tp_nest_level
);
1842 BPF_CALL_5(bpf_perf_event_output_raw_tp
, struct bpf_raw_tracepoint_args
*, args
,
1843 struct bpf_map
*, map
, u64
, flags
, void *, data
, u64
, size
)
1845 struct pt_regs
*regs
= get_bpf_raw_tp_regs();
1849 return PTR_ERR(regs
);
1851 perf_fetch_caller_regs(regs
);
1852 ret
= ____bpf_perf_event_output(regs
, map
, flags
, data
, size
);
1854 put_bpf_raw_tp_regs();
1858 static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp
= {
1859 .func
= bpf_perf_event_output_raw_tp
,
1861 .ret_type
= RET_INTEGER
,
1862 .arg1_type
= ARG_PTR_TO_CTX
,
1863 .arg2_type
= ARG_CONST_MAP_PTR
,
1864 .arg3_type
= ARG_ANYTHING
,
1865 .arg4_type
= ARG_PTR_TO_MEM
| MEM_RDONLY
,
1866 .arg5_type
= ARG_CONST_SIZE_OR_ZERO
,
1869 extern const struct bpf_func_proto bpf_skb_output_proto
;
1870 extern const struct bpf_func_proto bpf_xdp_output_proto
;
1871 extern const struct bpf_func_proto bpf_xdp_get_buff_len_trace_proto
;
1873 BPF_CALL_3(bpf_get_stackid_raw_tp
, struct bpf_raw_tracepoint_args
*, args
,
1874 struct bpf_map
*, map
, u64
, flags
)
1876 struct pt_regs
*regs
= get_bpf_raw_tp_regs();
1880 return PTR_ERR(regs
);
1882 perf_fetch_caller_regs(regs
);
1883 /* similar to bpf_perf_event_output_tp, but pt_regs fetched differently */
1884 ret
= bpf_get_stackid((unsigned long) regs
, (unsigned long) map
,
1886 put_bpf_raw_tp_regs();
1890 static const struct bpf_func_proto bpf_get_stackid_proto_raw_tp
= {
1891 .func
= bpf_get_stackid_raw_tp
,
1893 .ret_type
= RET_INTEGER
,
1894 .arg1_type
= ARG_PTR_TO_CTX
,
1895 .arg2_type
= ARG_CONST_MAP_PTR
,
1896 .arg3_type
= ARG_ANYTHING
,
1899 BPF_CALL_4(bpf_get_stack_raw_tp
, struct bpf_raw_tracepoint_args
*, args
,
1900 void *, buf
, u32
, size
, u64
, flags
)
1902 struct pt_regs
*regs
= get_bpf_raw_tp_regs();
1906 return PTR_ERR(regs
);
1908 perf_fetch_caller_regs(regs
);
1909 ret
= bpf_get_stack((unsigned long) regs
, (unsigned long) buf
,
1910 (unsigned long) size
, flags
, 0);
1911 put_bpf_raw_tp_regs();
1915 static const struct bpf_func_proto bpf_get_stack_proto_raw_tp
= {
1916 .func
= bpf_get_stack_raw_tp
,
1918 .ret_type
= RET_INTEGER
,
1919 .arg1_type
= ARG_PTR_TO_CTX
,
1920 .arg2_type
= ARG_PTR_TO_MEM
| MEM_RDONLY
,
1921 .arg3_type
= ARG_CONST_SIZE_OR_ZERO
,
1922 .arg4_type
= ARG_ANYTHING
,
1925 static const struct bpf_func_proto
*
1926 raw_tp_prog_func_proto(enum bpf_func_id func_id
, const struct bpf_prog
*prog
)
1929 case BPF_FUNC_perf_event_output
:
1930 return &bpf_perf_event_output_proto_raw_tp
;
1931 case BPF_FUNC_get_stackid
:
1932 return &bpf_get_stackid_proto_raw_tp
;
1933 case BPF_FUNC_get_stack
:
1934 return &bpf_get_stack_proto_raw_tp
;
1935 case BPF_FUNC_get_attach_cookie
:
1936 return &bpf_get_attach_cookie_proto_tracing
;
1938 return bpf_tracing_func_proto(func_id
, prog
);
1942 const struct bpf_func_proto
*
1943 tracing_prog_func_proto(enum bpf_func_id func_id
, const struct bpf_prog
*prog
)
1945 const struct bpf_func_proto
*fn
;
1949 case BPF_FUNC_skb_output
:
1950 return &bpf_skb_output_proto
;
1951 case BPF_FUNC_xdp_output
:
1952 return &bpf_xdp_output_proto
;
1953 case BPF_FUNC_skc_to_tcp6_sock
:
1954 return &bpf_skc_to_tcp6_sock_proto
;
1955 case BPF_FUNC_skc_to_tcp_sock
:
1956 return &bpf_skc_to_tcp_sock_proto
;
1957 case BPF_FUNC_skc_to_tcp_timewait_sock
:
1958 return &bpf_skc_to_tcp_timewait_sock_proto
;
1959 case BPF_FUNC_skc_to_tcp_request_sock
:
1960 return &bpf_skc_to_tcp_request_sock_proto
;
1961 case BPF_FUNC_skc_to_udp6_sock
:
1962 return &bpf_skc_to_udp6_sock_proto
;
1963 case BPF_FUNC_skc_to_unix_sock
:
1964 return &bpf_skc_to_unix_sock_proto
;
1965 case BPF_FUNC_skc_to_mptcp_sock
:
1966 return &bpf_skc_to_mptcp_sock_proto
;
1967 case BPF_FUNC_sk_storage_get
:
1968 return &bpf_sk_storage_get_tracing_proto
;
1969 case BPF_FUNC_sk_storage_delete
:
1970 return &bpf_sk_storage_delete_tracing_proto
;
1971 case BPF_FUNC_sock_from_file
:
1972 return &bpf_sock_from_file_proto
;
1973 case BPF_FUNC_get_socket_cookie
:
1974 return &bpf_get_socket_ptr_cookie_proto
;
1975 case BPF_FUNC_xdp_get_buff_len
:
1976 return &bpf_xdp_get_buff_len_trace_proto
;
1978 case BPF_FUNC_seq_printf
:
1979 return prog
->expected_attach_type
== BPF_TRACE_ITER
?
1980 &bpf_seq_printf_proto
:
1982 case BPF_FUNC_seq_write
:
1983 return prog
->expected_attach_type
== BPF_TRACE_ITER
?
1984 &bpf_seq_write_proto
:
1986 case BPF_FUNC_seq_printf_btf
:
1987 return prog
->expected_attach_type
== BPF_TRACE_ITER
?
1988 &bpf_seq_printf_btf_proto
:
1990 case BPF_FUNC_d_path
:
1991 return &bpf_d_path_proto
;
1992 case BPF_FUNC_get_func_arg
:
1993 return bpf_prog_has_trampoline(prog
) ? &bpf_get_func_arg_proto
: NULL
;
1994 case BPF_FUNC_get_func_ret
:
1995 return bpf_prog_has_trampoline(prog
) ? &bpf_get_func_ret_proto
: NULL
;
1996 case BPF_FUNC_get_func_arg_cnt
:
1997 return bpf_prog_has_trampoline(prog
) ? &bpf_get_func_arg_cnt_proto
: NULL
;
1998 case BPF_FUNC_get_attach_cookie
:
1999 if (prog
->type
== BPF_PROG_TYPE_TRACING
&&
2000 prog
->expected_attach_type
== BPF_TRACE_RAW_TP
)
2001 return &bpf_get_attach_cookie_proto_tracing
;
2002 return bpf_prog_has_trampoline(prog
) ? &bpf_get_attach_cookie_proto_tracing
: NULL
;
2004 fn
= raw_tp_prog_func_proto(func_id
, prog
);
2005 if (!fn
&& prog
->expected_attach_type
== BPF_TRACE_ITER
)
2006 fn
= bpf_iter_get_func_proto(func_id
, prog
);
2011 static bool raw_tp_prog_is_valid_access(int off
, int size
,
2012 enum bpf_access_type type
,
2013 const struct bpf_prog
*prog
,
2014 struct bpf_insn_access_aux
*info
)
2016 return bpf_tracing_ctx_access(off
, size
, type
);
2019 static bool tracing_prog_is_valid_access(int off
, int size
,
2020 enum bpf_access_type type
,
2021 const struct bpf_prog
*prog
,
2022 struct bpf_insn_access_aux
*info
)
2024 return bpf_tracing_btf_ctx_access(off
, size
, type
, prog
, info
);
2027 int __weak
bpf_prog_test_run_tracing(struct bpf_prog
*prog
,
2028 const union bpf_attr
*kattr
,
2029 union bpf_attr __user
*uattr
)
2034 const struct bpf_verifier_ops raw_tracepoint_verifier_ops
= {
2035 .get_func_proto
= raw_tp_prog_func_proto
,
2036 .is_valid_access
= raw_tp_prog_is_valid_access
,
2039 const struct bpf_prog_ops raw_tracepoint_prog_ops
= {
2041 .test_run
= bpf_prog_test_run_raw_tp
,
2045 const struct bpf_verifier_ops tracing_verifier_ops
= {
2046 .get_func_proto
= tracing_prog_func_proto
,
2047 .is_valid_access
= tracing_prog_is_valid_access
,
2050 const struct bpf_prog_ops tracing_prog_ops
= {
2051 .test_run
= bpf_prog_test_run_tracing
,
2054 static bool raw_tp_writable_prog_is_valid_access(int off
, int size
,
2055 enum bpf_access_type type
,
2056 const struct bpf_prog
*prog
,
2057 struct bpf_insn_access_aux
*info
)
2060 if (size
!= sizeof(u64
) || type
!= BPF_READ
)
2062 info
->reg_type
= PTR_TO_TP_BUFFER
;
2064 return raw_tp_prog_is_valid_access(off
, size
, type
, prog
, info
);
2067 const struct bpf_verifier_ops raw_tracepoint_writable_verifier_ops
= {
2068 .get_func_proto
= raw_tp_prog_func_proto
,
2069 .is_valid_access
= raw_tp_writable_prog_is_valid_access
,
2072 const struct bpf_prog_ops raw_tracepoint_writable_prog_ops
= {
2075 static bool pe_prog_is_valid_access(int off
, int size
, enum bpf_access_type type
,
2076 const struct bpf_prog
*prog
,
2077 struct bpf_insn_access_aux
*info
)
2079 const int size_u64
= sizeof(u64
);
2081 if (off
< 0 || off
>= sizeof(struct bpf_perf_event_data
))
2083 if (type
!= BPF_READ
)
2085 if (off
% size
!= 0) {
2086 if (sizeof(unsigned long) != 4)
2090 if (off
% size
!= 4)
2095 case bpf_ctx_range(struct bpf_perf_event_data
, sample_period
):
2096 bpf_ctx_record_field_size(info
, size_u64
);
2097 if (!bpf_ctx_narrow_access_ok(off
, size
, size_u64
))
2100 case bpf_ctx_range(struct bpf_perf_event_data
, addr
):
2101 bpf_ctx_record_field_size(info
, size_u64
);
2102 if (!bpf_ctx_narrow_access_ok(off
, size
, size_u64
))
2106 if (size
!= sizeof(long))
2113 static u32
pe_prog_convert_ctx_access(enum bpf_access_type type
,
2114 const struct bpf_insn
*si
,
2115 struct bpf_insn
*insn_buf
,
2116 struct bpf_prog
*prog
, u32
*target_size
)
2118 struct bpf_insn
*insn
= insn_buf
;
2121 case offsetof(struct bpf_perf_event_data
, sample_period
):
2122 *insn
++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern
,
2123 data
), si
->dst_reg
, si
->src_reg
,
2124 offsetof(struct bpf_perf_event_data_kern
, data
));
2125 *insn
++ = BPF_LDX_MEM(BPF_DW
, si
->dst_reg
, si
->dst_reg
,
2126 bpf_target_off(struct perf_sample_data
, period
, 8,
2129 case offsetof(struct bpf_perf_event_data
, addr
):
2130 *insn
++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern
,
2131 data
), si
->dst_reg
, si
->src_reg
,
2132 offsetof(struct bpf_perf_event_data_kern
, data
));
2133 *insn
++ = BPF_LDX_MEM(BPF_DW
, si
->dst_reg
, si
->dst_reg
,
2134 bpf_target_off(struct perf_sample_data
, addr
, 8,
2138 *insn
++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern
,
2139 regs
), si
->dst_reg
, si
->src_reg
,
2140 offsetof(struct bpf_perf_event_data_kern
, regs
));
2141 *insn
++ = BPF_LDX_MEM(BPF_SIZEOF(long), si
->dst_reg
, si
->dst_reg
,
2146 return insn
- insn_buf
;
2149 const struct bpf_verifier_ops perf_event_verifier_ops
= {
2150 .get_func_proto
= pe_prog_func_proto
,
2151 .is_valid_access
= pe_prog_is_valid_access
,
2152 .convert_ctx_access
= pe_prog_convert_ctx_access
,
2155 const struct bpf_prog_ops perf_event_prog_ops
= {
2158 static DEFINE_MUTEX(bpf_event_mutex
);
2160 #define BPF_TRACE_MAX_PROGS 64
2162 int perf_event_attach_bpf_prog(struct perf_event
*event
,
2163 struct bpf_prog
*prog
,
2166 struct bpf_prog_array
*old_array
;
2167 struct bpf_prog_array
*new_array
;
2171 * Kprobe override only works if they are on the function entry,
2172 * and only if they are on the opt-in list.
2174 if (prog
->kprobe_override
&&
2175 (!trace_kprobe_on_func_entry(event
->tp_event
) ||
2176 !trace_kprobe_error_injectable(event
->tp_event
)))
2179 mutex_lock(&bpf_event_mutex
);
2184 old_array
= bpf_event_rcu_dereference(event
->tp_event
->prog_array
);
2186 bpf_prog_array_length(old_array
) >= BPF_TRACE_MAX_PROGS
) {
2191 ret
= bpf_prog_array_copy(old_array
, NULL
, prog
, bpf_cookie
, &new_array
);
2195 /* set the new array to event->tp_event and set event->prog */
2197 event
->bpf_cookie
= bpf_cookie
;
2198 rcu_assign_pointer(event
->tp_event
->prog_array
, new_array
);
2199 bpf_prog_array_free_sleepable(old_array
);
2202 mutex_unlock(&bpf_event_mutex
);
2206 void perf_event_detach_bpf_prog(struct perf_event
*event
)
2208 struct bpf_prog_array
*old_array
;
2209 struct bpf_prog_array
*new_array
;
2212 mutex_lock(&bpf_event_mutex
);
2217 old_array
= bpf_event_rcu_dereference(event
->tp_event
->prog_array
);
2218 ret
= bpf_prog_array_copy(old_array
, event
->prog
, NULL
, 0, &new_array
);
2222 bpf_prog_array_delete_safe(old_array
, event
->prog
);
2224 rcu_assign_pointer(event
->tp_event
->prog_array
, new_array
);
2225 bpf_prog_array_free_sleepable(old_array
);
2228 bpf_prog_put(event
->prog
);
2232 mutex_unlock(&bpf_event_mutex
);
2235 int perf_event_query_prog_array(struct perf_event
*event
, void __user
*info
)
2237 struct perf_event_query_bpf __user
*uquery
= info
;
2238 struct perf_event_query_bpf query
= {};
2239 struct bpf_prog_array
*progs
;
2240 u32
*ids
, prog_cnt
, ids_len
;
2243 if (!perfmon_capable())
2245 if (event
->attr
.type
!= PERF_TYPE_TRACEPOINT
)
2247 if (copy_from_user(&query
, uquery
, sizeof(query
)))
2250 ids_len
= query
.ids_len
;
2251 if (ids_len
> BPF_TRACE_MAX_PROGS
)
2253 ids
= kcalloc(ids_len
, sizeof(u32
), GFP_USER
| __GFP_NOWARN
);
2257 * The above kcalloc returns ZERO_SIZE_PTR when ids_len = 0, which
2258 * is required when user only wants to check for uquery->prog_cnt.
2259 * There is no need to check for it since the case is handled
2260 * gracefully in bpf_prog_array_copy_info.
2263 mutex_lock(&bpf_event_mutex
);
2264 progs
= bpf_event_rcu_dereference(event
->tp_event
->prog_array
);
2265 ret
= bpf_prog_array_copy_info(progs
, ids
, ids_len
, &prog_cnt
);
2266 mutex_unlock(&bpf_event_mutex
);
2268 if (copy_to_user(&uquery
->prog_cnt
, &prog_cnt
, sizeof(prog_cnt
)) ||
2269 copy_to_user(uquery
->ids
, ids
, ids_len
* sizeof(u32
)))
2276 extern struct bpf_raw_event_map __start__bpf_raw_tp
[];
2277 extern struct bpf_raw_event_map __stop__bpf_raw_tp
[];
2279 struct bpf_raw_event_map
*bpf_get_raw_tracepoint(const char *name
)
2281 struct bpf_raw_event_map
*btp
= __start__bpf_raw_tp
;
2283 for (; btp
< __stop__bpf_raw_tp
; btp
++) {
2284 if (!strcmp(btp
->tp
->name
, name
))
2288 return bpf_get_raw_tracepoint_module(name
);
2291 void bpf_put_raw_tracepoint(struct bpf_raw_event_map
*btp
)
2296 mod
= __module_address((unsigned long)btp
);
2301 static __always_inline
2302 void __bpf_trace_run(struct bpf_raw_tp_link
*link
, u64
*args
)
2304 struct bpf_prog
*prog
= link
->link
.prog
;
2305 struct bpf_run_ctx
*old_run_ctx
;
2306 struct bpf_trace_run_ctx run_ctx
;
2309 if (unlikely(this_cpu_inc_return(*(prog
->active
)) != 1)) {
2310 bpf_prog_inc_misses_counter(prog
);
2314 run_ctx
.bpf_cookie
= link
->cookie
;
2315 old_run_ctx
= bpf_set_run_ctx(&run_ctx
.run_ctx
);
2318 (void) bpf_prog_run(prog
, args
);
2321 bpf_reset_run_ctx(old_run_ctx
);
2323 this_cpu_dec(*(prog
->active
));
2326 #define UNPACK(...) __VA_ARGS__
2327 #define REPEAT_1(FN, DL, X, ...) FN(X)
2328 #define REPEAT_2(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_1(FN, DL, __VA_ARGS__)
2329 #define REPEAT_3(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_2(FN, DL, __VA_ARGS__)
2330 #define REPEAT_4(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_3(FN, DL, __VA_ARGS__)
2331 #define REPEAT_5(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_4(FN, DL, __VA_ARGS__)
2332 #define REPEAT_6(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_5(FN, DL, __VA_ARGS__)
2333 #define REPEAT_7(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_6(FN, DL, __VA_ARGS__)
2334 #define REPEAT_8(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_7(FN, DL, __VA_ARGS__)
2335 #define REPEAT_9(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_8(FN, DL, __VA_ARGS__)
2336 #define REPEAT_10(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_9(FN, DL, __VA_ARGS__)
2337 #define REPEAT_11(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_10(FN, DL, __VA_ARGS__)
2338 #define REPEAT_12(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_11(FN, DL, __VA_ARGS__)
2339 #define REPEAT(X, FN, DL, ...) REPEAT_##X(FN, DL, __VA_ARGS__)
2341 #define SARG(X) u64 arg##X
2342 #define COPY(X) args[X] = arg##X
2344 #define __DL_COM (,)
2345 #define __DL_SEM (;)
2347 #define __SEQ_0_11 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
2349 #define BPF_TRACE_DEFN_x(x) \
2350 void bpf_trace_run##x(struct bpf_raw_tp_link *link, \
2351 REPEAT(x, SARG, __DL_COM, __SEQ_0_11)) \
2354 REPEAT(x, COPY, __DL_SEM, __SEQ_0_11); \
2355 __bpf_trace_run(link, args); \
2357 EXPORT_SYMBOL_GPL(bpf_trace_run##x)
2358 BPF_TRACE_DEFN_x(1);
2359 BPF_TRACE_DEFN_x(2);
2360 BPF_TRACE_DEFN_x(3);
2361 BPF_TRACE_DEFN_x(4);
2362 BPF_TRACE_DEFN_x(5);
2363 BPF_TRACE_DEFN_x(6);
2364 BPF_TRACE_DEFN_x(7);
2365 BPF_TRACE_DEFN_x(8);
2366 BPF_TRACE_DEFN_x(9);
2367 BPF_TRACE_DEFN_x(10);
2368 BPF_TRACE_DEFN_x(11);
2369 BPF_TRACE_DEFN_x(12);
2371 int bpf_probe_register(struct bpf_raw_event_map
*btp
, struct bpf_raw_tp_link
*link
)
2373 struct tracepoint
*tp
= btp
->tp
;
2374 struct bpf_prog
*prog
= link
->link
.prog
;
2377 * check that program doesn't access arguments beyond what's
2378 * available in this tracepoint
2380 if (prog
->aux
->max_ctx_offset
> btp
->num_args
* sizeof(u64
))
2383 if (prog
->aux
->max_tp_access
> btp
->writable_size
)
2386 return tracepoint_probe_register_may_exist(tp
, (void *)btp
->bpf_func
, link
);
2389 int bpf_probe_unregister(struct bpf_raw_event_map
*btp
, struct bpf_raw_tp_link
*link
)
2391 return tracepoint_probe_unregister(btp
->tp
, (void *)btp
->bpf_func
, link
);
2394 int bpf_get_perf_event_info(const struct perf_event
*event
, u32
*prog_id
,
2395 u32
*fd_type
, const char **buf
,
2396 u64
*probe_offset
, u64
*probe_addr
,
2397 unsigned long *missed
)
2399 bool is_tracepoint
, is_syscall_tp
;
2400 struct bpf_prog
*prog
;
2407 /* not supporting BPF_PROG_TYPE_PERF_EVENT yet */
2408 if (prog
->type
== BPF_PROG_TYPE_PERF_EVENT
)
2411 *prog_id
= prog
->aux
->id
;
2412 flags
= event
->tp_event
->flags
;
2413 is_tracepoint
= flags
& TRACE_EVENT_FL_TRACEPOINT
;
2414 is_syscall_tp
= is_syscall_trace_event(event
->tp_event
);
2416 if (is_tracepoint
|| is_syscall_tp
) {
2417 *buf
= is_tracepoint
? event
->tp_event
->tp
->name
2418 : event
->tp_event
->name
;
2419 /* We allow NULL pointer for tracepoint */
2421 *fd_type
= BPF_FD_TYPE_TRACEPOINT
;
2423 *probe_offset
= 0x0;
2429 #ifdef CONFIG_KPROBE_EVENTS
2430 if (flags
& TRACE_EVENT_FL_KPROBE
)
2431 err
= bpf_get_kprobe_info(event
, fd_type
, buf
,
2432 probe_offset
, probe_addr
, missed
,
2433 event
->attr
.type
== PERF_TYPE_TRACEPOINT
);
2435 #ifdef CONFIG_UPROBE_EVENTS
2436 if (flags
& TRACE_EVENT_FL_UPROBE
)
2437 err
= bpf_get_uprobe_info(event
, fd_type
, buf
,
2438 probe_offset
, probe_addr
,
2439 event
->attr
.type
== PERF_TYPE_TRACEPOINT
);
2446 static int __init
send_signal_irq_work_init(void)
2449 struct send_signal_irq_work
*work
;
2451 for_each_possible_cpu(cpu
) {
2452 work
= per_cpu_ptr(&send_signal_work
, cpu
);
2453 init_irq_work(&work
->irq_work
, do_bpf_send_signal
);
2458 subsys_initcall(send_signal_irq_work_init
);
2460 #ifdef CONFIG_MODULES
2461 static int bpf_event_notify(struct notifier_block
*nb
, unsigned long op
,
2464 struct bpf_trace_module
*btm
, *tmp
;
2465 struct module
*mod
= module
;
2468 if (mod
->num_bpf_raw_events
== 0 ||
2469 (op
!= MODULE_STATE_COMING
&& op
!= MODULE_STATE_GOING
))
2472 mutex_lock(&bpf_module_mutex
);
2475 case MODULE_STATE_COMING
:
2476 btm
= kzalloc(sizeof(*btm
), GFP_KERNEL
);
2478 btm
->module
= module
;
2479 list_add(&btm
->list
, &bpf_trace_modules
);
2484 case MODULE_STATE_GOING
:
2485 list_for_each_entry_safe(btm
, tmp
, &bpf_trace_modules
, list
) {
2486 if (btm
->module
== module
) {
2487 list_del(&btm
->list
);
2495 mutex_unlock(&bpf_module_mutex
);
2498 return notifier_from_errno(ret
);
2501 static struct notifier_block bpf_module_nb
= {
2502 .notifier_call
= bpf_event_notify
,
2505 static int __init
bpf_event_init(void)
2507 register_module_notifier(&bpf_module_nb
);
2511 fs_initcall(bpf_event_init
);
2512 #endif /* CONFIG_MODULES */
2514 struct bpf_session_run_ctx
{
2515 struct bpf_run_ctx run_ctx
;
2520 #ifdef CONFIG_FPROBE
2521 struct bpf_kprobe_multi_link
{
2522 struct bpf_link link
;
2524 unsigned long *addrs
;
2528 struct module
**mods
;
2532 struct bpf_kprobe_multi_run_ctx
{
2533 struct bpf_session_run_ctx session_ctx
;
2534 struct bpf_kprobe_multi_link
*link
;
2535 unsigned long entry_ip
;
2543 static int copy_user_syms(struct user_syms
*us
, unsigned long __user
*usyms
, u32 cnt
)
2545 unsigned long __user usymbol
;
2546 const char **syms
= NULL
;
2547 char *buf
= NULL
, *p
;
2551 syms
= kvmalloc_array(cnt
, sizeof(*syms
), GFP_KERNEL
);
2555 buf
= kvmalloc_array(cnt
, KSYM_NAME_LEN
, GFP_KERNEL
);
2559 for (p
= buf
, i
= 0; i
< cnt
; i
++) {
2560 if (__get_user(usymbol
, usyms
+ i
)) {
2564 err
= strncpy_from_user(p
, (const char __user
*) usymbol
, KSYM_NAME_LEN
);
2565 if (err
== KSYM_NAME_LEN
)
2585 static void kprobe_multi_put_modules(struct module
**mods
, u32 cnt
)
2589 for (i
= 0; i
< cnt
; i
++)
2590 module_put(mods
[i
]);
2593 static void free_user_syms(struct user_syms
*us
)
2599 static void bpf_kprobe_multi_link_release(struct bpf_link
*link
)
2601 struct bpf_kprobe_multi_link
*kmulti_link
;
2603 kmulti_link
= container_of(link
, struct bpf_kprobe_multi_link
, link
);
2604 unregister_fprobe(&kmulti_link
->fp
);
2605 kprobe_multi_put_modules(kmulti_link
->mods
, kmulti_link
->mods_cnt
);
2608 static void bpf_kprobe_multi_link_dealloc(struct bpf_link
*link
)
2610 struct bpf_kprobe_multi_link
*kmulti_link
;
2612 kmulti_link
= container_of(link
, struct bpf_kprobe_multi_link
, link
);
2613 kvfree(kmulti_link
->addrs
);
2614 kvfree(kmulti_link
->cookies
);
2615 kfree(kmulti_link
->mods
);
2619 static int bpf_kprobe_multi_link_fill_link_info(const struct bpf_link
*link
,
2620 struct bpf_link_info
*info
)
2622 u64 __user
*ucookies
= u64_to_user_ptr(info
->kprobe_multi
.cookies
);
2623 u64 __user
*uaddrs
= u64_to_user_ptr(info
->kprobe_multi
.addrs
);
2624 struct bpf_kprobe_multi_link
*kmulti_link
;
2625 u32 ucount
= info
->kprobe_multi
.count
;
2628 if (!uaddrs
^ !ucount
)
2630 if (ucookies
&& !ucount
)
2633 kmulti_link
= container_of(link
, struct bpf_kprobe_multi_link
, link
);
2634 info
->kprobe_multi
.count
= kmulti_link
->cnt
;
2635 info
->kprobe_multi
.flags
= kmulti_link
->flags
;
2636 info
->kprobe_multi
.missed
= kmulti_link
->fp
.nmissed
;
2640 if (ucount
< kmulti_link
->cnt
)
2643 ucount
= kmulti_link
->cnt
;
2646 if (kmulti_link
->cookies
) {
2647 if (copy_to_user(ucookies
, kmulti_link
->cookies
, ucount
* sizeof(u64
)))
2650 for (i
= 0; i
< ucount
; i
++) {
2651 if (put_user(0, ucookies
+ i
))
2657 if (kallsyms_show_value(current_cred())) {
2658 if (copy_to_user(uaddrs
, kmulti_link
->addrs
, ucount
* sizeof(u64
)))
2661 for (i
= 0; i
< ucount
; i
++) {
2662 if (put_user(0, uaddrs
+ i
))
2669 static const struct bpf_link_ops bpf_kprobe_multi_link_lops
= {
2670 .release
= bpf_kprobe_multi_link_release
,
2671 .dealloc_deferred
= bpf_kprobe_multi_link_dealloc
,
2672 .fill_link_info
= bpf_kprobe_multi_link_fill_link_info
,
2675 static void bpf_kprobe_multi_cookie_swap(void *a
, void *b
, int size
, const void *priv
)
2677 const struct bpf_kprobe_multi_link
*link
= priv
;
2678 unsigned long *addr_a
= a
, *addr_b
= b
;
2679 u64
*cookie_a
, *cookie_b
;
2681 cookie_a
= link
->cookies
+ (addr_a
- link
->addrs
);
2682 cookie_b
= link
->cookies
+ (addr_b
- link
->addrs
);
2684 /* swap addr_a/addr_b and cookie_a/cookie_b values */
2685 swap(*addr_a
, *addr_b
);
2686 swap(*cookie_a
, *cookie_b
);
2689 static int bpf_kprobe_multi_addrs_cmp(const void *a
, const void *b
)
2691 const unsigned long *addr_a
= a
, *addr_b
= b
;
2693 if (*addr_a
== *addr_b
)
2695 return *addr_a
< *addr_b
? -1 : 1;
2698 static int bpf_kprobe_multi_cookie_cmp(const void *a
, const void *b
, const void *priv
)
2700 return bpf_kprobe_multi_addrs_cmp(a
, b
);
2703 static u64
bpf_kprobe_multi_cookie(struct bpf_run_ctx
*ctx
)
2705 struct bpf_kprobe_multi_run_ctx
*run_ctx
;
2706 struct bpf_kprobe_multi_link
*link
;
2707 u64
*cookie
, entry_ip
;
2708 unsigned long *addr
;
2710 if (WARN_ON_ONCE(!ctx
))
2712 run_ctx
= container_of(current
->bpf_ctx
, struct bpf_kprobe_multi_run_ctx
,
2713 session_ctx
.run_ctx
);
2714 link
= run_ctx
->link
;
2717 entry_ip
= run_ctx
->entry_ip
;
2718 addr
= bsearch(&entry_ip
, link
->addrs
, link
->cnt
, sizeof(entry_ip
),
2719 bpf_kprobe_multi_addrs_cmp
);
2722 cookie
= link
->cookies
+ (addr
- link
->addrs
);
2726 static u64
bpf_kprobe_multi_entry_ip(struct bpf_run_ctx
*ctx
)
2728 struct bpf_kprobe_multi_run_ctx
*run_ctx
;
2730 run_ctx
= container_of(current
->bpf_ctx
, struct bpf_kprobe_multi_run_ctx
,
2731 session_ctx
.run_ctx
);
2732 return run_ctx
->entry_ip
;
2736 kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link
*link
,
2737 unsigned long entry_ip
, struct pt_regs
*regs
,
2738 bool is_return
, void *data
)
2740 struct bpf_kprobe_multi_run_ctx run_ctx
= {
2742 .is_return
= is_return
,
2746 .entry_ip
= entry_ip
,
2748 struct bpf_run_ctx
*old_run_ctx
;
2751 if (unlikely(__this_cpu_inc_return(bpf_prog_active
) != 1)) {
2752 bpf_prog_inc_misses_counter(link
->link
.prog
);
2759 old_run_ctx
= bpf_set_run_ctx(&run_ctx
.session_ctx
.run_ctx
);
2760 err
= bpf_prog_run(link
->link
.prog
, regs
);
2761 bpf_reset_run_ctx(old_run_ctx
);
2766 __this_cpu_dec(bpf_prog_active
);
2771 kprobe_multi_link_handler(struct fprobe
*fp
, unsigned long fentry_ip
,
2772 unsigned long ret_ip
, struct pt_regs
*regs
,
2775 struct bpf_kprobe_multi_link
*link
;
2778 link
= container_of(fp
, struct bpf_kprobe_multi_link
, fp
);
2779 err
= kprobe_multi_link_prog_run(link
, get_entry_ip(fentry_ip
), regs
, false, data
);
2780 return is_kprobe_session(link
->link
.prog
) ? err
: 0;
2784 kprobe_multi_link_exit_handler(struct fprobe
*fp
, unsigned long fentry_ip
,
2785 unsigned long ret_ip
, struct pt_regs
*regs
,
2788 struct bpf_kprobe_multi_link
*link
;
2790 link
= container_of(fp
, struct bpf_kprobe_multi_link
, fp
);
2791 kprobe_multi_link_prog_run(link
, get_entry_ip(fentry_ip
), regs
, true, data
);
2794 static int symbols_cmp_r(const void *a
, const void *b
, const void *priv
)
2796 const char **str_a
= (const char **) a
;
2797 const char **str_b
= (const char **) b
;
2799 return strcmp(*str_a
, *str_b
);
2802 struct multi_symbols_sort
{
2807 static void symbols_swap_r(void *a
, void *b
, int size
, const void *priv
)
2809 const struct multi_symbols_sort
*data
= priv
;
2810 const char **name_a
= a
, **name_b
= b
;
2812 swap(*name_a
, *name_b
);
2814 /* If defined, swap also related cookies. */
2815 if (data
->cookies
) {
2816 u64
*cookie_a
, *cookie_b
;
2818 cookie_a
= data
->cookies
+ (name_a
- data
->funcs
);
2819 cookie_b
= data
->cookies
+ (name_b
- data
->funcs
);
2820 swap(*cookie_a
, *cookie_b
);
2824 struct modules_array
{
2825 struct module
**mods
;
2830 static int add_module(struct modules_array
*arr
, struct module
*mod
)
2832 struct module
**mods
;
2834 if (arr
->mods_cnt
== arr
->mods_cap
) {
2835 arr
->mods_cap
= max(16, arr
->mods_cap
* 3 / 2);
2836 mods
= krealloc_array(arr
->mods
, arr
->mods_cap
, sizeof(*mods
), GFP_KERNEL
);
2842 arr
->mods
[arr
->mods_cnt
] = mod
;
2847 static bool has_module(struct modules_array
*arr
, struct module
*mod
)
2851 for (i
= arr
->mods_cnt
- 1; i
>= 0; i
--) {
2852 if (arr
->mods
[i
] == mod
)
2858 static int get_modules_for_addrs(struct module
***mods
, unsigned long *addrs
, u32 addrs_cnt
)
2860 struct modules_array arr
= {};
2863 for (i
= 0; i
< addrs_cnt
; i
++) {
2867 mod
= __module_address(addrs
[i
]);
2868 /* Either no module or we it's already stored */
2869 if (!mod
|| has_module(&arr
, mod
)) {
2873 if (!try_module_get(mod
))
2878 err
= add_module(&arr
, mod
);
2885 /* We return either err < 0 in case of error, ... */
2887 kprobe_multi_put_modules(arr
.mods
, arr
.mods_cnt
);
2892 /* or number of modules found if everything is ok. */
2894 return arr
.mods_cnt
;
2897 static int addrs_check_error_injection_list(unsigned long *addrs
, u32 cnt
)
2901 for (i
= 0; i
< cnt
; i
++) {
2902 if (!within_error_injection_list(addrs
[i
]))
2908 int bpf_kprobe_multi_link_attach(const union bpf_attr
*attr
, struct bpf_prog
*prog
)
2910 struct bpf_kprobe_multi_link
*link
= NULL
;
2911 struct bpf_link_primer link_primer
;
2912 void __user
*ucookies
;
2913 unsigned long *addrs
;
2914 u32 flags
, cnt
, size
;
2915 void __user
*uaddrs
;
2916 u64
*cookies
= NULL
;
2920 /* no support for 32bit archs yet */
2921 if (sizeof(u64
) != sizeof(void *))
2924 if (!is_kprobe_multi(prog
))
2927 flags
= attr
->link_create
.kprobe_multi
.flags
;
2928 if (flags
& ~BPF_F_KPROBE_MULTI_RETURN
)
2931 uaddrs
= u64_to_user_ptr(attr
->link_create
.kprobe_multi
.addrs
);
2932 usyms
= u64_to_user_ptr(attr
->link_create
.kprobe_multi
.syms
);
2933 if (!!uaddrs
== !!usyms
)
2936 cnt
= attr
->link_create
.kprobe_multi
.cnt
;
2939 if (cnt
> MAX_KPROBE_MULTI_CNT
)
2942 size
= cnt
* sizeof(*addrs
);
2943 addrs
= kvmalloc_array(cnt
, sizeof(*addrs
), GFP_KERNEL
);
2947 ucookies
= u64_to_user_ptr(attr
->link_create
.kprobe_multi
.cookies
);
2949 cookies
= kvmalloc_array(cnt
, sizeof(*addrs
), GFP_KERNEL
);
2954 if (copy_from_user(cookies
, ucookies
, size
)) {
2961 if (copy_from_user(addrs
, uaddrs
, size
)) {
2966 struct multi_symbols_sort data
= {
2969 struct user_syms us
;
2971 err
= copy_user_syms(&us
, usyms
, cnt
);
2976 data
.funcs
= us
.syms
;
2978 sort_r(us
.syms
, cnt
, sizeof(*us
.syms
), symbols_cmp_r
,
2979 symbols_swap_r
, &data
);
2981 err
= ftrace_lookup_symbols(us
.syms
, cnt
, addrs
);
2982 free_user_syms(&us
);
2987 if (prog
->kprobe_override
&& addrs_check_error_injection_list(addrs
, cnt
)) {
2992 link
= kzalloc(sizeof(*link
), GFP_KERNEL
);
2998 bpf_link_init(&link
->link
, BPF_LINK_TYPE_KPROBE_MULTI
,
2999 &bpf_kprobe_multi_link_lops
, prog
);
3001 err
= bpf_link_prime(&link
->link
, &link_primer
);
3005 if (!(flags
& BPF_F_KPROBE_MULTI_RETURN
))
3006 link
->fp
.entry_handler
= kprobe_multi_link_handler
;
3007 if ((flags
& BPF_F_KPROBE_MULTI_RETURN
) || is_kprobe_session(prog
))
3008 link
->fp
.exit_handler
= kprobe_multi_link_exit_handler
;
3009 if (is_kprobe_session(prog
))
3010 link
->fp
.entry_data_size
= sizeof(u64
);
3012 link
->addrs
= addrs
;
3013 link
->cookies
= cookies
;
3015 link
->flags
= flags
;
3019 * Sorting addresses will trigger sorting cookies as well
3020 * (check bpf_kprobe_multi_cookie_swap). This way we can
3021 * find cookie based on the address in bpf_get_attach_cookie
3024 sort_r(addrs
, cnt
, sizeof(*addrs
),
3025 bpf_kprobe_multi_cookie_cmp
,
3026 bpf_kprobe_multi_cookie_swap
,
3030 err
= get_modules_for_addrs(&link
->mods
, addrs
, cnt
);
3032 bpf_link_cleanup(&link_primer
);
3035 link
->mods_cnt
= err
;
3037 err
= register_fprobe_ips(&link
->fp
, addrs
, cnt
);
3039 kprobe_multi_put_modules(link
->mods
, link
->mods_cnt
);
3040 bpf_link_cleanup(&link_primer
);
3044 return bpf_link_settle(&link_primer
);
3052 #else /* !CONFIG_FPROBE */
3053 int bpf_kprobe_multi_link_attach(const union bpf_attr
*attr
, struct bpf_prog
*prog
)
3057 static u64
bpf_kprobe_multi_cookie(struct bpf_run_ctx
*ctx
)
3061 static u64
bpf_kprobe_multi_entry_ip(struct bpf_run_ctx
*ctx
)
3067 #ifdef CONFIG_UPROBES
3068 struct bpf_uprobe_multi_link
;
3071 struct bpf_uprobe_multi_link
*link
;
3073 unsigned long ref_ctr_offset
;
3075 struct uprobe
*uprobe
;
3076 struct uprobe_consumer consumer
;
3079 struct bpf_uprobe_multi_link
{
3081 struct bpf_link link
;
3084 struct bpf_uprobe
*uprobes
;
3085 struct task_struct
*task
;
3088 struct bpf_uprobe_multi_run_ctx
{
3089 struct bpf_run_ctx run_ctx
;
3090 unsigned long entry_ip
;
3091 struct bpf_uprobe
*uprobe
;
3094 static void bpf_uprobe_unregister(struct bpf_uprobe
*uprobes
, u32 cnt
)
3098 for (i
= 0; i
< cnt
; i
++)
3099 uprobe_unregister_nosync(uprobes
[i
].uprobe
, &uprobes
[i
].consumer
);
3102 uprobe_unregister_sync();
3105 static void bpf_uprobe_multi_link_release(struct bpf_link
*link
)
3107 struct bpf_uprobe_multi_link
*umulti_link
;
3109 umulti_link
= container_of(link
, struct bpf_uprobe_multi_link
, link
);
3110 bpf_uprobe_unregister(umulti_link
->uprobes
, umulti_link
->cnt
);
3111 if (umulti_link
->task
)
3112 put_task_struct(umulti_link
->task
);
3113 path_put(&umulti_link
->path
);
3116 static void bpf_uprobe_multi_link_dealloc(struct bpf_link
*link
)
3118 struct bpf_uprobe_multi_link
*umulti_link
;
3120 umulti_link
= container_of(link
, struct bpf_uprobe_multi_link
, link
);
3121 kvfree(umulti_link
->uprobes
);
3125 static int bpf_uprobe_multi_link_fill_link_info(const struct bpf_link
*link
,
3126 struct bpf_link_info
*info
)
3128 u64 __user
*uref_ctr_offsets
= u64_to_user_ptr(info
->uprobe_multi
.ref_ctr_offsets
);
3129 u64 __user
*ucookies
= u64_to_user_ptr(info
->uprobe_multi
.cookies
);
3130 u64 __user
*uoffsets
= u64_to_user_ptr(info
->uprobe_multi
.offsets
);
3131 u64 __user
*upath
= u64_to_user_ptr(info
->uprobe_multi
.path
);
3132 u32 upath_size
= info
->uprobe_multi
.path_size
;
3133 struct bpf_uprobe_multi_link
*umulti_link
;
3134 u32 ucount
= info
->uprobe_multi
.count
;
3139 if (!upath
^ !upath_size
)
3142 if ((uoffsets
|| uref_ctr_offsets
|| ucookies
) && !ucount
)
3145 umulti_link
= container_of(link
, struct bpf_uprobe_multi_link
, link
);
3146 info
->uprobe_multi
.count
= umulti_link
->cnt
;
3147 info
->uprobe_multi
.flags
= umulti_link
->flags
;
3148 info
->uprobe_multi
.pid
= umulti_link
->task
?
3149 task_pid_nr_ns(umulti_link
->task
, task_active_pid_ns(current
)) : 0;
3151 upath_size
= upath_size
? min_t(u32
, upath_size
, PATH_MAX
) : PATH_MAX
;
3152 buf
= kmalloc(upath_size
, GFP_KERNEL
);
3155 p
= d_path(&umulti_link
->path
, buf
, upath_size
);
3160 upath_size
= buf
+ upath_size
- p
;
3163 left
= copy_to_user(upath
, p
, upath_size
);
3167 info
->uprobe_multi
.path_size
= upath_size
;
3169 if (!uoffsets
&& !ucookies
&& !uref_ctr_offsets
)
3172 if (ucount
< umulti_link
->cnt
)
3175 ucount
= umulti_link
->cnt
;
3177 for (i
= 0; i
< ucount
; i
++) {
3179 put_user(umulti_link
->uprobes
[i
].offset
, uoffsets
+ i
))
3181 if (uref_ctr_offsets
&&
3182 put_user(umulti_link
->uprobes
[i
].ref_ctr_offset
, uref_ctr_offsets
+ i
))
3185 put_user(umulti_link
->uprobes
[i
].cookie
, ucookies
+ i
))
3192 static const struct bpf_link_ops bpf_uprobe_multi_link_lops
= {
3193 .release
= bpf_uprobe_multi_link_release
,
3194 .dealloc_deferred
= bpf_uprobe_multi_link_dealloc
,
3195 .fill_link_info
= bpf_uprobe_multi_link_fill_link_info
,
3198 static int uprobe_prog_run(struct bpf_uprobe
*uprobe
,
3199 unsigned long entry_ip
,
3200 struct pt_regs
*regs
)
3202 struct bpf_uprobe_multi_link
*link
= uprobe
->link
;
3203 struct bpf_uprobe_multi_run_ctx run_ctx
= {
3204 .entry_ip
= entry_ip
,
3207 struct bpf_prog
*prog
= link
->link
.prog
;
3208 bool sleepable
= prog
->sleepable
;
3209 struct bpf_run_ctx
*old_run_ctx
;
3212 if (link
->task
&& !same_thread_group(current
, link
->task
))
3216 rcu_read_lock_trace();
3222 old_run_ctx
= bpf_set_run_ctx(&run_ctx
.run_ctx
);
3223 err
= bpf_prog_run(link
->link
.prog
, regs
);
3224 bpf_reset_run_ctx(old_run_ctx
);
3229 rcu_read_unlock_trace();
3236 uprobe_multi_link_filter(struct uprobe_consumer
*con
, struct mm_struct
*mm
)
3238 struct bpf_uprobe
*uprobe
;
3240 uprobe
= container_of(con
, struct bpf_uprobe
, consumer
);
3241 return uprobe
->link
->task
->mm
== mm
;
3245 uprobe_multi_link_handler(struct uprobe_consumer
*con
, struct pt_regs
*regs
)
3247 struct bpf_uprobe
*uprobe
;
3249 uprobe
= container_of(con
, struct bpf_uprobe
, consumer
);
3250 return uprobe_prog_run(uprobe
, instruction_pointer(regs
), regs
);
3254 uprobe_multi_link_ret_handler(struct uprobe_consumer
*con
, unsigned long func
, struct pt_regs
*regs
)
3256 struct bpf_uprobe
*uprobe
;
3258 uprobe
= container_of(con
, struct bpf_uprobe
, consumer
);
3259 return uprobe_prog_run(uprobe
, func
, regs
);
3262 static u64
bpf_uprobe_multi_entry_ip(struct bpf_run_ctx
*ctx
)
3264 struct bpf_uprobe_multi_run_ctx
*run_ctx
;
3266 run_ctx
= container_of(current
->bpf_ctx
, struct bpf_uprobe_multi_run_ctx
, run_ctx
);
3267 return run_ctx
->entry_ip
;
3270 static u64
bpf_uprobe_multi_cookie(struct bpf_run_ctx
*ctx
)
3272 struct bpf_uprobe_multi_run_ctx
*run_ctx
;
3274 run_ctx
= container_of(current
->bpf_ctx
, struct bpf_uprobe_multi_run_ctx
, run_ctx
);
3275 return run_ctx
->uprobe
->cookie
;
3278 int bpf_uprobe_multi_link_attach(const union bpf_attr
*attr
, struct bpf_prog
*prog
)
3280 struct bpf_uprobe_multi_link
*link
= NULL
;
3281 unsigned long __user
*uref_ctr_offsets
;
3282 struct bpf_link_primer link_primer
;
3283 struct bpf_uprobe
*uprobes
= NULL
;
3284 struct task_struct
*task
= NULL
;
3285 unsigned long __user
*uoffsets
;
3286 u64 __user
*ucookies
;
3294 /* no support for 32bit archs yet */
3295 if (sizeof(u64
) != sizeof(void *))
3298 if (prog
->expected_attach_type
!= BPF_TRACE_UPROBE_MULTI
)
3301 flags
= attr
->link_create
.uprobe_multi
.flags
;
3302 if (flags
& ~BPF_F_UPROBE_MULTI_RETURN
)
3306 * path, offsets and cnt are mandatory,
3307 * ref_ctr_offsets and cookies are optional
3309 upath
= u64_to_user_ptr(attr
->link_create
.uprobe_multi
.path
);
3310 uoffsets
= u64_to_user_ptr(attr
->link_create
.uprobe_multi
.offsets
);
3311 cnt
= attr
->link_create
.uprobe_multi
.cnt
;
3312 pid
= attr
->link_create
.uprobe_multi
.pid
;
3314 if (!upath
|| !uoffsets
|| !cnt
|| pid
< 0)
3316 if (cnt
> MAX_UPROBE_MULTI_CNT
)
3319 uref_ctr_offsets
= u64_to_user_ptr(attr
->link_create
.uprobe_multi
.ref_ctr_offsets
);
3320 ucookies
= u64_to_user_ptr(attr
->link_create
.uprobe_multi
.cookies
);
3322 name
= strndup_user(upath
, PATH_MAX
);
3324 err
= PTR_ERR(name
);
3328 err
= kern_path(name
, LOOKUP_FOLLOW
, &path
);
3333 if (!d_is_reg(path
.dentry
)) {
3335 goto error_path_put
;
3339 task
= get_pid_task(find_vpid(pid
), PIDTYPE_TGID
);
3342 goto error_path_put
;
3348 link
= kzalloc(sizeof(*link
), GFP_KERNEL
);
3349 uprobes
= kvcalloc(cnt
, sizeof(*uprobes
), GFP_KERNEL
);
3351 if (!uprobes
|| !link
)
3354 for (i
= 0; i
< cnt
; i
++) {
3355 if (__get_user(uprobes
[i
].offset
, uoffsets
+ i
)) {
3359 if (uprobes
[i
].offset
< 0) {
3363 if (uref_ctr_offsets
&& __get_user(uprobes
[i
].ref_ctr_offset
, uref_ctr_offsets
+ i
)) {
3367 if (ucookies
&& __get_user(uprobes
[i
].cookie
, ucookies
+ i
)) {
3372 uprobes
[i
].link
= link
;
3374 if (flags
& BPF_F_UPROBE_MULTI_RETURN
)
3375 uprobes
[i
].consumer
.ret_handler
= uprobe_multi_link_ret_handler
;
3377 uprobes
[i
].consumer
.handler
= uprobe_multi_link_handler
;
3380 uprobes
[i
].consumer
.filter
= uprobe_multi_link_filter
;
3384 link
->uprobes
= uprobes
;
3387 link
->flags
= flags
;
3389 bpf_link_init(&link
->link
, BPF_LINK_TYPE_UPROBE_MULTI
,
3390 &bpf_uprobe_multi_link_lops
, prog
);
3392 for (i
= 0; i
< cnt
; i
++) {
3393 uprobes
[i
].uprobe
= uprobe_register(d_real_inode(link
->path
.dentry
),
3395 uprobes
[i
].ref_ctr_offset
,
3396 &uprobes
[i
].consumer
);
3397 if (IS_ERR(uprobes
[i
].uprobe
)) {
3398 err
= PTR_ERR(uprobes
[i
].uprobe
);
3400 goto error_unregister
;
3404 err
= bpf_link_prime(&link
->link
, &link_primer
);
3406 goto error_unregister
;
3408 return bpf_link_settle(&link_primer
);
3411 bpf_uprobe_unregister(uprobes
, link
->cnt
);
3417 put_task_struct(task
);
3422 #else /* !CONFIG_UPROBES */
3423 int bpf_uprobe_multi_link_attach(const union bpf_attr
*attr
, struct bpf_prog
*prog
)
3427 static u64
bpf_uprobe_multi_cookie(struct bpf_run_ctx
*ctx
)
3431 static u64
bpf_uprobe_multi_entry_ip(struct bpf_run_ctx
*ctx
)
3435 #endif /* CONFIG_UPROBES */
3437 __bpf_kfunc_start_defs();
3439 __bpf_kfunc
bool bpf_session_is_return(void)
3441 struct bpf_session_run_ctx
*session_ctx
;
3443 session_ctx
= container_of(current
->bpf_ctx
, struct bpf_session_run_ctx
, run_ctx
);
3444 return session_ctx
->is_return
;
3447 __bpf_kfunc __u64
*bpf_session_cookie(void)
3449 struct bpf_session_run_ctx
*session_ctx
;
3451 session_ctx
= container_of(current
->bpf_ctx
, struct bpf_session_run_ctx
, run_ctx
);
3452 return session_ctx
->data
;
3455 __bpf_kfunc_end_defs();
3457 BTF_KFUNCS_START(kprobe_multi_kfunc_set_ids
)
3458 BTF_ID_FLAGS(func
, bpf_session_is_return
)
3459 BTF_ID_FLAGS(func
, bpf_session_cookie
)
3460 BTF_KFUNCS_END(kprobe_multi_kfunc_set_ids
)
3462 static int bpf_kprobe_multi_filter(const struct bpf_prog
*prog
, u32 kfunc_id
)
3464 if (!btf_id_set8_contains(&kprobe_multi_kfunc_set_ids
, kfunc_id
))
3467 if (!is_kprobe_session(prog
))
3473 static const struct btf_kfunc_id_set bpf_kprobe_multi_kfunc_set
= {
3474 .owner
= THIS_MODULE
,
3475 .set
= &kprobe_multi_kfunc_set_ids
,
3476 .filter
= bpf_kprobe_multi_filter
,
3479 static int __init
bpf_kprobe_multi_kfuncs_init(void)
3481 return register_btf_kfunc_id_set(BPF_PROG_TYPE_KPROBE
, &bpf_kprobe_multi_kfunc_set
);
3484 late_initcall(bpf_kprobe_multi_kfuncs_init
);