1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2019 Facebook */
3 #include <linux/hash.h>
5 #include <linux/filter.h>
6 #include <linux/ftrace.h>
7 #include <linux/rbtree_latch.h>
8 #include <linux/perf_event.h>
10 #include <linux/rcupdate_trace.h>
11 #include <linux/rcupdate_wait.h>
12 #include <linux/static_call.h>
13 #include <linux/bpf_verifier.h>
14 #include <linux/bpf_lsm.h>
15 #include <linux/delay.h>
17 /* dummy _ops. The verifier will operate on target program's ops. */
18 const struct bpf_verifier_ops bpf_extension_verifier_ops
= {
20 const struct bpf_prog_ops bpf_extension_prog_ops
= {
23 /* btf_vmlinux has ~22k attachable functions. 1k htab is enough. */
24 #define TRAMPOLINE_HASH_BITS 10
25 #define TRAMPOLINE_TABLE_SIZE (1 << TRAMPOLINE_HASH_BITS)
27 static struct hlist_head trampoline_table
[TRAMPOLINE_TABLE_SIZE
];
29 /* serializes access to trampoline_table */
30 static DEFINE_MUTEX(trampoline_mutex
);
32 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
33 static int bpf_trampoline_update(struct bpf_trampoline
*tr
, bool lock_direct_mutex
);
35 static int bpf_tramp_ftrace_ops_func(struct ftrace_ops
*ops
, enum ftrace_ops_cmd cmd
)
37 struct bpf_trampoline
*tr
= ops
->private;
40 if (cmd
== FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_SELF
) {
41 /* This is called inside register_ftrace_direct_multi(), so
42 * tr->mutex is already locked.
44 lockdep_assert_held_once(&tr
->mutex
);
46 /* Instead of updating the trampoline here, we propagate
47 * -EAGAIN to register_ftrace_direct(). Then we can
48 * retry register_ftrace_direct() after updating the
51 if ((tr
->flags
& BPF_TRAMP_F_CALL_ORIG
) &&
52 !(tr
->flags
& BPF_TRAMP_F_ORIG_STACK
)) {
53 if (WARN_ON_ONCE(tr
->flags
& BPF_TRAMP_F_SHARE_IPMODIFY
))
56 tr
->flags
|= BPF_TRAMP_F_SHARE_IPMODIFY
;
63 /* The normal locking order is
64 * tr->mutex => direct_mutex (ftrace.c) => ftrace_lock (ftrace.c)
66 * The following two commands are called from
68 * prepare_direct_functions_for_ipmodify
69 * cleanup_direct_functions_after_ipmodify
71 * In both cases, direct_mutex is already locked. Use
72 * mutex_trylock(&tr->mutex) to avoid deadlock in race condition
73 * (something else is making changes to this same trampoline).
75 if (!mutex_trylock(&tr
->mutex
)) {
76 /* sleep 1 ms to make sure whatever holding tr->mutex makes
84 case FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_PEER
:
85 tr
->flags
|= BPF_TRAMP_F_SHARE_IPMODIFY
;
87 if ((tr
->flags
& BPF_TRAMP_F_CALL_ORIG
) &&
88 !(tr
->flags
& BPF_TRAMP_F_ORIG_STACK
))
89 ret
= bpf_trampoline_update(tr
, false /* lock_direct_mutex */);
91 case FTRACE_OPS_CMD_DISABLE_SHARE_IPMODIFY_PEER
:
92 tr
->flags
&= ~BPF_TRAMP_F_SHARE_IPMODIFY
;
94 if (tr
->flags
& BPF_TRAMP_F_ORIG_STACK
)
95 ret
= bpf_trampoline_update(tr
, false /* lock_direct_mutex */);
102 mutex_unlock(&tr
->mutex
);
107 bool bpf_prog_has_trampoline(const struct bpf_prog
*prog
)
109 enum bpf_attach_type eatype
= prog
->expected_attach_type
;
110 enum bpf_prog_type ptype
= prog
->type
;
112 return (ptype
== BPF_PROG_TYPE_TRACING
&&
113 (eatype
== BPF_TRACE_FENTRY
|| eatype
== BPF_TRACE_FEXIT
||
114 eatype
== BPF_MODIFY_RETURN
)) ||
115 (ptype
== BPF_PROG_TYPE_LSM
&& eatype
== BPF_LSM_MAC
);
118 void bpf_image_ksym_init(void *data
, unsigned int size
, struct bpf_ksym
*ksym
)
120 ksym
->start
= (unsigned long) data
;
121 ksym
->end
= ksym
->start
+ size
;
124 void bpf_image_ksym_add(struct bpf_ksym
*ksym
)
127 perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF
, ksym
->start
,
128 PAGE_SIZE
, false, ksym
->name
);
131 void bpf_image_ksym_del(struct bpf_ksym
*ksym
)
134 perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF
, ksym
->start
,
135 PAGE_SIZE
, true, ksym
->name
);
138 static struct bpf_trampoline
*bpf_trampoline_lookup(u64 key
)
140 struct bpf_trampoline
*tr
;
141 struct hlist_head
*head
;
144 mutex_lock(&trampoline_mutex
);
145 head
= &trampoline_table
[hash_64(key
, TRAMPOLINE_HASH_BITS
)];
146 hlist_for_each_entry(tr
, head
, hlist
) {
147 if (tr
->key
== key
) {
148 refcount_inc(&tr
->refcnt
);
152 tr
= kzalloc(sizeof(*tr
), GFP_KERNEL
);
155 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
156 tr
->fops
= kzalloc(sizeof(struct ftrace_ops
), GFP_KERNEL
);
162 tr
->fops
->private = tr
;
163 tr
->fops
->ops_func
= bpf_tramp_ftrace_ops_func
;
167 INIT_HLIST_NODE(&tr
->hlist
);
168 hlist_add_head(&tr
->hlist
, head
);
169 refcount_set(&tr
->refcnt
, 1);
170 mutex_init(&tr
->mutex
);
171 for (i
= 0; i
< BPF_TRAMP_MAX
; i
++)
172 INIT_HLIST_HEAD(&tr
->progs_hlist
[i
]);
174 mutex_unlock(&trampoline_mutex
);
178 static int unregister_fentry(struct bpf_trampoline
*tr
, void *old_addr
)
180 void *ip
= tr
->func
.addr
;
183 if (tr
->func
.ftrace_managed
)
184 ret
= unregister_ftrace_direct(tr
->fops
, (long)old_addr
, false);
186 ret
= bpf_arch_text_poke(ip
, BPF_MOD_CALL
, old_addr
, NULL
);
191 static int modify_fentry(struct bpf_trampoline
*tr
, void *old_addr
, void *new_addr
,
192 bool lock_direct_mutex
)
194 void *ip
= tr
->func
.addr
;
197 if (tr
->func
.ftrace_managed
) {
198 if (lock_direct_mutex
)
199 ret
= modify_ftrace_direct(tr
->fops
, (long)new_addr
);
201 ret
= modify_ftrace_direct_nolock(tr
->fops
, (long)new_addr
);
203 ret
= bpf_arch_text_poke(ip
, BPF_MOD_CALL
, old_addr
, new_addr
);
208 /* first time registering */
209 static int register_fentry(struct bpf_trampoline
*tr
, void *new_addr
)
211 void *ip
= tr
->func
.addr
;
215 faddr
= ftrace_location((unsigned long)ip
);
219 tr
->func
.ftrace_managed
= true;
222 if (tr
->func
.ftrace_managed
) {
223 ftrace_set_filter_ip(tr
->fops
, (unsigned long)ip
, 0, 1);
224 ret
= register_ftrace_direct(tr
->fops
, (long)new_addr
);
226 ret
= bpf_arch_text_poke(ip
, BPF_MOD_CALL
, NULL
, new_addr
);
232 static struct bpf_tramp_links
*
233 bpf_trampoline_get_progs(const struct bpf_trampoline
*tr
, int *total
, bool *ip_arg
)
235 struct bpf_tramp_link
*link
;
236 struct bpf_tramp_links
*tlinks
;
237 struct bpf_tramp_link
**links
;
241 tlinks
= kcalloc(BPF_TRAMP_MAX
, sizeof(*tlinks
), GFP_KERNEL
);
243 return ERR_PTR(-ENOMEM
);
245 for (kind
= 0; kind
< BPF_TRAMP_MAX
; kind
++) {
246 tlinks
[kind
].nr_links
= tr
->progs_cnt
[kind
];
247 *total
+= tr
->progs_cnt
[kind
];
248 links
= tlinks
[kind
].links
;
250 hlist_for_each_entry(link
, &tr
->progs_hlist
[kind
], tramp_hlist
) {
251 *ip_arg
|= link
->link
.prog
->call_get_func_ip
;
258 static void bpf_tramp_image_free(struct bpf_tramp_image
*im
)
260 bpf_image_ksym_del(&im
->ksym
);
261 arch_free_bpf_trampoline(im
->image
, im
->size
);
262 bpf_jit_uncharge_modmem(im
->size
);
263 percpu_ref_exit(&im
->pcref
);
267 static void __bpf_tramp_image_put_deferred(struct work_struct
*work
)
269 struct bpf_tramp_image
*im
;
271 im
= container_of(work
, struct bpf_tramp_image
, work
);
272 bpf_tramp_image_free(im
);
275 /* callback, fexit step 3 or fentry step 2 */
276 static void __bpf_tramp_image_put_rcu(struct rcu_head
*rcu
)
278 struct bpf_tramp_image
*im
;
280 im
= container_of(rcu
, struct bpf_tramp_image
, rcu
);
281 INIT_WORK(&im
->work
, __bpf_tramp_image_put_deferred
);
282 schedule_work(&im
->work
);
285 /* callback, fexit step 2. Called after percpu_ref_kill confirms. */
286 static void __bpf_tramp_image_release(struct percpu_ref
*pcref
)
288 struct bpf_tramp_image
*im
;
290 im
= container_of(pcref
, struct bpf_tramp_image
, pcref
);
291 call_rcu_tasks(&im
->rcu
, __bpf_tramp_image_put_rcu
);
294 /* callback, fexit or fentry step 1 */
295 static void __bpf_tramp_image_put_rcu_tasks(struct rcu_head
*rcu
)
297 struct bpf_tramp_image
*im
;
299 im
= container_of(rcu
, struct bpf_tramp_image
, rcu
);
300 if (im
->ip_after_call
)
301 /* the case of fmod_ret/fexit trampoline and CONFIG_PREEMPTION=y */
302 percpu_ref_kill(&im
->pcref
);
304 /* the case of fentry trampoline */
305 call_rcu_tasks(&im
->rcu
, __bpf_tramp_image_put_rcu
);
308 static void bpf_tramp_image_put(struct bpf_tramp_image
*im
)
310 /* The trampoline image that calls original function is using:
311 * rcu_read_lock_trace to protect sleepable bpf progs
312 * rcu_read_lock to protect normal bpf progs
313 * percpu_ref to protect trampoline itself
314 * rcu tasks to protect trampoline asm not covered by percpu_ref
315 * (which are few asm insns before __bpf_tramp_enter and
316 * after __bpf_tramp_exit)
318 * The trampoline is unreachable before bpf_tramp_image_put().
320 * First, patch the trampoline to avoid calling into fexit progs.
321 * The progs will be freed even if the original function is still
322 * executing or sleeping.
323 * In case of CONFIG_PREEMPT=y use call_rcu_tasks() to wait on
324 * first few asm instructions to execute and call into
325 * __bpf_tramp_enter->percpu_ref_get.
326 * Then use percpu_ref_kill to wait for the trampoline and the original
327 * function to finish.
328 * Then use call_rcu_tasks() to make sure few asm insns in
329 * the trampoline epilogue are done as well.
331 * In !PREEMPT case the task that got interrupted in the first asm
332 * insns won't go through an RCU quiescent state which the
333 * percpu_ref_kill will be waiting for. Hence the first
334 * call_rcu_tasks() is not necessary.
336 if (im
->ip_after_call
) {
337 int err
= bpf_arch_text_poke(im
->ip_after_call
, BPF_MOD_JUMP
,
338 NULL
, im
->ip_epilogue
);
340 if (IS_ENABLED(CONFIG_TASKS_RCU
))
341 call_rcu_tasks(&im
->rcu
, __bpf_tramp_image_put_rcu_tasks
);
343 percpu_ref_kill(&im
->pcref
);
347 /* The trampoline without fexit and fmod_ret progs doesn't call original
348 * function and doesn't use percpu_ref.
349 * Use call_rcu_tasks_trace() to wait for sleepable progs to finish.
350 * Then use call_rcu_tasks() to wait for the rest of trampoline asm
353 call_rcu_tasks_trace(&im
->rcu
, __bpf_tramp_image_put_rcu_tasks
);
356 static struct bpf_tramp_image
*bpf_tramp_image_alloc(u64 key
, int size
)
358 struct bpf_tramp_image
*im
;
359 struct bpf_ksym
*ksym
;
363 im
= kzalloc(sizeof(*im
), GFP_KERNEL
);
367 err
= bpf_jit_charge_modmem(size
);
373 im
->image
= image
= arch_alloc_bpf_trampoline(size
);
377 err
= percpu_ref_init(&im
->pcref
, __bpf_tramp_image_release
, 0, GFP_KERNEL
);
382 INIT_LIST_HEAD_RCU(&ksym
->lnode
);
383 snprintf(ksym
->name
, KSYM_NAME_LEN
, "bpf_trampoline_%llu", key
);
384 bpf_image_ksym_init(image
, size
, ksym
);
385 bpf_image_ksym_add(ksym
);
389 arch_free_bpf_trampoline(im
->image
, im
->size
);
391 bpf_jit_uncharge_modmem(size
);
398 static int bpf_trampoline_update(struct bpf_trampoline
*tr
, bool lock_direct_mutex
)
400 struct bpf_tramp_image
*im
;
401 struct bpf_tramp_links
*tlinks
;
402 u32 orig_flags
= tr
->flags
;
404 int err
, total
, size
;
406 tlinks
= bpf_trampoline_get_progs(tr
, &total
, &ip_arg
);
408 return PTR_ERR(tlinks
);
411 err
= unregister_fentry(tr
, tr
->cur_image
->image
);
412 bpf_tramp_image_put(tr
->cur_image
);
413 tr
->cur_image
= NULL
;
417 /* clear all bits except SHARE_IPMODIFY and TAIL_CALL_CTX */
418 tr
->flags
&= (BPF_TRAMP_F_SHARE_IPMODIFY
| BPF_TRAMP_F_TAIL_CALL_CTX
);
420 if (tlinks
[BPF_TRAMP_FEXIT
].nr_links
||
421 tlinks
[BPF_TRAMP_MODIFY_RETURN
].nr_links
) {
422 /* NOTE: BPF_TRAMP_F_RESTORE_REGS and BPF_TRAMP_F_SKIP_FRAME
423 * should not be set together.
425 tr
->flags
|= BPF_TRAMP_F_CALL_ORIG
| BPF_TRAMP_F_SKIP_FRAME
;
427 tr
->flags
|= BPF_TRAMP_F_RESTORE_REGS
;
431 tr
->flags
|= BPF_TRAMP_F_IP_ARG
;
433 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
435 if ((tr
->flags
& BPF_TRAMP_F_SHARE_IPMODIFY
) &&
436 (tr
->flags
& BPF_TRAMP_F_CALL_ORIG
))
437 tr
->flags
|= BPF_TRAMP_F_ORIG_STACK
;
440 size
= arch_bpf_trampoline_size(&tr
->func
.model
, tr
->flags
,
441 tlinks
, tr
->func
.addr
);
447 if (size
> PAGE_SIZE
) {
452 im
= bpf_tramp_image_alloc(tr
->key
, size
);
458 err
= arch_prepare_bpf_trampoline(im
, im
->image
, im
->image
+ size
,
459 &tr
->func
.model
, tr
->flags
, tlinks
,
464 err
= arch_protect_bpf_trampoline(im
->image
, im
->size
);
468 WARN_ON(tr
->cur_image
&& total
== 0);
470 /* progs already running at this address */
471 err
= modify_fentry(tr
, tr
->cur_image
->image
, im
->image
, lock_direct_mutex
);
473 /* first time registering */
474 err
= register_fentry(tr
, im
->image
);
476 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
477 if (err
== -EAGAIN
) {
478 /* -EAGAIN from bpf_tramp_ftrace_ops_func. Now
479 * BPF_TRAMP_F_SHARE_IPMODIFY is set, we can generate the
480 * trampoline again, and retry register.
482 /* reset fops->func and fops->trampoline for re-register */
483 tr
->fops
->func
= NULL
;
484 tr
->fops
->trampoline
= 0;
486 /* free im memory and reallocate later */
487 bpf_tramp_image_free(im
);
495 bpf_tramp_image_put(tr
->cur_image
);
498 /* If any error happens, restore previous flags */
500 tr
->flags
= orig_flags
;
505 bpf_tramp_image_free(im
);
509 static enum bpf_tramp_prog_type
bpf_attach_type_to_tramp(struct bpf_prog
*prog
)
511 switch (prog
->expected_attach_type
) {
512 case BPF_TRACE_FENTRY
:
513 return BPF_TRAMP_FENTRY
;
514 case BPF_MODIFY_RETURN
:
515 return BPF_TRAMP_MODIFY_RETURN
;
516 case BPF_TRACE_FEXIT
:
517 return BPF_TRAMP_FEXIT
;
519 if (!prog
->aux
->attach_func_proto
->type
)
520 /* The function returns void, we cannot modify its
523 return BPF_TRAMP_FEXIT
;
525 return BPF_TRAMP_MODIFY_RETURN
;
527 return BPF_TRAMP_REPLACE
;
531 static int bpf_freplace_check_tgt_prog(struct bpf_prog
*tgt_prog
)
533 struct bpf_prog_aux
*aux
= tgt_prog
->aux
;
535 guard(mutex
)(&aux
->ext_mutex
);
536 if (aux
->prog_array_member_cnt
)
537 /* Program extensions can not extend target prog when the target
538 * prog has been updated to any prog_array map as tail callee.
539 * It's to prevent a potential infinite loop like:
540 * tgt prog entry -> tgt prog subprog -> freplace prog entry
541 * --tailcall-> tgt prog entry.
545 aux
->is_extended
= true;
549 static int __bpf_trampoline_link_prog(struct bpf_tramp_link
*link
,
550 struct bpf_trampoline
*tr
,
551 struct bpf_prog
*tgt_prog
)
553 enum bpf_tramp_prog_type kind
;
554 struct bpf_tramp_link
*link_exiting
;
558 kind
= bpf_attach_type_to_tramp(link
->link
.prog
);
559 if (tr
->extension_prog
)
560 /* cannot attach fentry/fexit if extension prog is attached.
561 * cannot overwrite extension prog either.
565 for (i
= 0; i
< BPF_TRAMP_MAX
; i
++)
566 cnt
+= tr
->progs_cnt
[i
];
568 if (kind
== BPF_TRAMP_REPLACE
) {
569 /* Cannot attach extension if fentry/fexit are in use. */
572 err
= bpf_freplace_check_tgt_prog(tgt_prog
);
575 tr
->extension_prog
= link
->link
.prog
;
576 return bpf_arch_text_poke(tr
->func
.addr
, BPF_MOD_JUMP
, NULL
,
577 link
->link
.prog
->bpf_func
);
579 if (cnt
>= BPF_MAX_TRAMP_LINKS
)
581 if (!hlist_unhashed(&link
->tramp_hlist
))
582 /* prog already linked */
584 hlist_for_each_entry(link_exiting
, &tr
->progs_hlist
[kind
], tramp_hlist
) {
585 if (link_exiting
->link
.prog
!= link
->link
.prog
)
587 /* prog already linked */
591 hlist_add_head(&link
->tramp_hlist
, &tr
->progs_hlist
[kind
]);
592 tr
->progs_cnt
[kind
]++;
593 err
= bpf_trampoline_update(tr
, true /* lock_direct_mutex */);
595 hlist_del_init(&link
->tramp_hlist
);
596 tr
->progs_cnt
[kind
]--;
601 int bpf_trampoline_link_prog(struct bpf_tramp_link
*link
,
602 struct bpf_trampoline
*tr
,
603 struct bpf_prog
*tgt_prog
)
607 mutex_lock(&tr
->mutex
);
608 err
= __bpf_trampoline_link_prog(link
, tr
, tgt_prog
);
609 mutex_unlock(&tr
->mutex
);
613 static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link
*link
,
614 struct bpf_trampoline
*tr
,
615 struct bpf_prog
*tgt_prog
)
617 enum bpf_tramp_prog_type kind
;
620 kind
= bpf_attach_type_to_tramp(link
->link
.prog
);
621 if (kind
== BPF_TRAMP_REPLACE
) {
622 WARN_ON_ONCE(!tr
->extension_prog
);
623 err
= bpf_arch_text_poke(tr
->func
.addr
, BPF_MOD_JUMP
,
624 tr
->extension_prog
->bpf_func
, NULL
);
625 tr
->extension_prog
= NULL
;
626 guard(mutex
)(&tgt_prog
->aux
->ext_mutex
);
627 tgt_prog
->aux
->is_extended
= false;
630 hlist_del_init(&link
->tramp_hlist
);
631 tr
->progs_cnt
[kind
]--;
632 return bpf_trampoline_update(tr
, true /* lock_direct_mutex */);
635 /* bpf_trampoline_unlink_prog() should never fail. */
636 int bpf_trampoline_unlink_prog(struct bpf_tramp_link
*link
,
637 struct bpf_trampoline
*tr
,
638 struct bpf_prog
*tgt_prog
)
642 mutex_lock(&tr
->mutex
);
643 err
= __bpf_trampoline_unlink_prog(link
, tr
, tgt_prog
);
644 mutex_unlock(&tr
->mutex
);
648 #if defined(CONFIG_CGROUP_BPF) && defined(CONFIG_BPF_LSM)
649 static void bpf_shim_tramp_link_release(struct bpf_link
*link
)
651 struct bpf_shim_tramp_link
*shim_link
=
652 container_of(link
, struct bpf_shim_tramp_link
, link
.link
);
654 /* paired with 'shim_link->trampoline = tr' in bpf_trampoline_link_cgroup_shim */
655 if (!shim_link
->trampoline
)
658 WARN_ON_ONCE(bpf_trampoline_unlink_prog(&shim_link
->link
, shim_link
->trampoline
, NULL
));
659 bpf_trampoline_put(shim_link
->trampoline
);
662 static void bpf_shim_tramp_link_dealloc(struct bpf_link
*link
)
664 struct bpf_shim_tramp_link
*shim_link
=
665 container_of(link
, struct bpf_shim_tramp_link
, link
.link
);
670 static const struct bpf_link_ops bpf_shim_tramp_link_lops
= {
671 .release
= bpf_shim_tramp_link_release
,
672 .dealloc
= bpf_shim_tramp_link_dealloc
,
675 static struct bpf_shim_tramp_link
*cgroup_shim_alloc(const struct bpf_prog
*prog
,
679 struct bpf_shim_tramp_link
*shim_link
= NULL
;
682 shim_link
= kzalloc(sizeof(*shim_link
), GFP_USER
);
686 p
= bpf_prog_alloc(1, 0);
693 p
->bpf_func
= bpf_func
;
695 p
->aux
->cgroup_atype
= cgroup_atype
;
696 p
->aux
->attach_func_proto
= prog
->aux
->attach_func_proto
;
697 p
->aux
->attach_btf_id
= prog
->aux
->attach_btf_id
;
698 p
->aux
->attach_btf
= prog
->aux
->attach_btf
;
699 btf_get(p
->aux
->attach_btf
);
700 p
->type
= BPF_PROG_TYPE_LSM
;
701 p
->expected_attach_type
= BPF_LSM_MAC
;
703 bpf_link_init(&shim_link
->link
.link
, BPF_LINK_TYPE_UNSPEC
,
704 &bpf_shim_tramp_link_lops
, p
);
705 bpf_cgroup_atype_get(p
->aux
->attach_btf_id
, cgroup_atype
);
710 static struct bpf_shim_tramp_link
*cgroup_shim_find(struct bpf_trampoline
*tr
,
713 struct bpf_tramp_link
*link
;
716 for (kind
= 0; kind
< BPF_TRAMP_MAX
; kind
++) {
717 hlist_for_each_entry(link
, &tr
->progs_hlist
[kind
], tramp_hlist
) {
718 struct bpf_prog
*p
= link
->link
.prog
;
720 if (p
->bpf_func
== bpf_func
)
721 return container_of(link
, struct bpf_shim_tramp_link
, link
);
728 int bpf_trampoline_link_cgroup_shim(struct bpf_prog
*prog
,
731 struct bpf_shim_tramp_link
*shim_link
= NULL
;
732 struct bpf_attach_target_info tgt_info
= {};
733 struct bpf_trampoline
*tr
;
738 err
= bpf_check_attach_target(NULL
, prog
, NULL
,
739 prog
->aux
->attach_btf_id
,
744 key
= bpf_trampoline_compute_key(NULL
, prog
->aux
->attach_btf
,
745 prog
->aux
->attach_btf_id
);
747 bpf_lsm_find_cgroup_shim(prog
, &bpf_func
);
748 tr
= bpf_trampoline_get(key
, &tgt_info
);
752 mutex_lock(&tr
->mutex
);
754 shim_link
= cgroup_shim_find(tr
, bpf_func
);
756 /* Reusing existing shim attached by the other program. */
757 bpf_link_inc(&shim_link
->link
.link
);
759 mutex_unlock(&tr
->mutex
);
760 bpf_trampoline_put(tr
); /* bpf_trampoline_get above */
764 /* Allocate and install new shim. */
766 shim_link
= cgroup_shim_alloc(prog
, bpf_func
, cgroup_atype
);
772 err
= __bpf_trampoline_link_prog(&shim_link
->link
, tr
, NULL
);
776 shim_link
->trampoline
= tr
;
777 /* note, we're still holding tr refcnt from above */
779 mutex_unlock(&tr
->mutex
);
783 mutex_unlock(&tr
->mutex
);
786 bpf_link_put(&shim_link
->link
.link
);
788 /* have to release tr while _not_ holding its mutex */
789 bpf_trampoline_put(tr
); /* bpf_trampoline_get above */
794 void bpf_trampoline_unlink_cgroup_shim(struct bpf_prog
*prog
)
796 struct bpf_shim_tramp_link
*shim_link
= NULL
;
797 struct bpf_trampoline
*tr
;
801 key
= bpf_trampoline_compute_key(NULL
, prog
->aux
->attach_btf
,
802 prog
->aux
->attach_btf_id
);
804 bpf_lsm_find_cgroup_shim(prog
, &bpf_func
);
805 tr
= bpf_trampoline_lookup(key
);
806 if (WARN_ON_ONCE(!tr
))
809 mutex_lock(&tr
->mutex
);
810 shim_link
= cgroup_shim_find(tr
, bpf_func
);
811 mutex_unlock(&tr
->mutex
);
814 bpf_link_put(&shim_link
->link
.link
);
816 bpf_trampoline_put(tr
); /* bpf_trampoline_lookup above */
820 struct bpf_trampoline
*bpf_trampoline_get(u64 key
,
821 struct bpf_attach_target_info
*tgt_info
)
823 struct bpf_trampoline
*tr
;
825 tr
= bpf_trampoline_lookup(key
);
829 mutex_lock(&tr
->mutex
);
833 memcpy(&tr
->func
.model
, &tgt_info
->fmodel
, sizeof(tgt_info
->fmodel
));
834 tr
->func
.addr
= (void *)tgt_info
->tgt_addr
;
836 mutex_unlock(&tr
->mutex
);
840 void bpf_trampoline_put(struct bpf_trampoline
*tr
)
846 mutex_lock(&trampoline_mutex
);
847 if (!refcount_dec_and_test(&tr
->refcnt
))
849 WARN_ON_ONCE(mutex_is_locked(&tr
->mutex
));
851 for (i
= 0; i
< BPF_TRAMP_MAX
; i
++)
852 if (WARN_ON_ONCE(!hlist_empty(&tr
->progs_hlist
[i
])))
855 /* This code will be executed even when the last bpf_tramp_image
856 * is alive. All progs are detached from the trampoline and the
857 * trampoline image is patched with jmp into epilogue to skip
858 * fexit progs. The fentry-only trampoline will be freed via
859 * multiple rcu callbacks.
861 hlist_del(&tr
->hlist
);
863 ftrace_free_filter(tr
->fops
);
868 mutex_unlock(&trampoline_mutex
);
871 #define NO_START_TIME 1
872 static __always_inline u64 notrace
bpf_prog_start_time(void)
874 u64 start
= NO_START_TIME
;
876 if (static_branch_unlikely(&bpf_stats_enabled_key
)) {
877 start
= sched_clock();
878 if (unlikely(!start
))
879 start
= NO_START_TIME
;
884 /* The logic is similar to bpf_prog_run(), but with an explicit
885 * rcu_read_lock() and migrate_disable() which are required
886 * for the trampoline. The macro is split into
887 * call __bpf_prog_enter
888 * call prog->bpf_func
889 * call __bpf_prog_exit
891 * __bpf_prog_enter returns:
892 * 0 - skip execution of the bpf prog
893 * 1 - execute bpf prog
894 * [2..MAX_U64] - execute bpf prog and record execution time.
895 * This is start time.
897 static u64 notrace
__bpf_prog_enter_recur(struct bpf_prog
*prog
, struct bpf_tramp_run_ctx
*run_ctx
)
903 run_ctx
->saved_run_ctx
= bpf_set_run_ctx(&run_ctx
->run_ctx
);
905 if (unlikely(this_cpu_inc_return(*(prog
->active
)) != 1)) {
906 bpf_prog_inc_misses_counter(prog
);
907 if (prog
->aux
->recursion_detected
)
908 prog
->aux
->recursion_detected(prog
);
911 return bpf_prog_start_time();
914 static void notrace
update_prog_stats(struct bpf_prog
*prog
,
917 struct bpf_prog_stats
*stats
;
919 if (static_branch_unlikely(&bpf_stats_enabled_key
) &&
920 /* static_key could be enabled in __bpf_prog_enter*
921 * and disabled in __bpf_prog_exit*.
923 * Hence check that 'start' is valid.
925 start
> NO_START_TIME
) {
926 u64 duration
= sched_clock() - start
;
929 stats
= this_cpu_ptr(prog
->stats
);
930 flags
= u64_stats_update_begin_irqsave(&stats
->syncp
);
931 u64_stats_inc(&stats
->cnt
);
932 u64_stats_add(&stats
->nsecs
, duration
);
933 u64_stats_update_end_irqrestore(&stats
->syncp
, flags
);
937 static void notrace
__bpf_prog_exit_recur(struct bpf_prog
*prog
, u64 start
,
938 struct bpf_tramp_run_ctx
*run_ctx
)
941 bpf_reset_run_ctx(run_ctx
->saved_run_ctx
);
943 update_prog_stats(prog
, start
);
944 this_cpu_dec(*(prog
->active
));
949 static u64 notrace
__bpf_prog_enter_lsm_cgroup(struct bpf_prog
*prog
,
950 struct bpf_tramp_run_ctx
*run_ctx
)
953 /* Runtime stats are exported via actual BPF_LSM_CGROUP
954 * programs, not the shims.
959 run_ctx
->saved_run_ctx
= bpf_set_run_ctx(&run_ctx
->run_ctx
);
961 return NO_START_TIME
;
964 static void notrace
__bpf_prog_exit_lsm_cgroup(struct bpf_prog
*prog
, u64 start
,
965 struct bpf_tramp_run_ctx
*run_ctx
)
968 bpf_reset_run_ctx(run_ctx
->saved_run_ctx
);
974 u64 notrace
__bpf_prog_enter_sleepable_recur(struct bpf_prog
*prog
,
975 struct bpf_tramp_run_ctx
*run_ctx
)
977 rcu_read_lock_trace();
981 run_ctx
->saved_run_ctx
= bpf_set_run_ctx(&run_ctx
->run_ctx
);
983 if (unlikely(this_cpu_inc_return(*(prog
->active
)) != 1)) {
984 bpf_prog_inc_misses_counter(prog
);
985 if (prog
->aux
->recursion_detected
)
986 prog
->aux
->recursion_detected(prog
);
989 return bpf_prog_start_time();
992 void notrace
__bpf_prog_exit_sleepable_recur(struct bpf_prog
*prog
, u64 start
,
993 struct bpf_tramp_run_ctx
*run_ctx
)
995 bpf_reset_run_ctx(run_ctx
->saved_run_ctx
);
997 update_prog_stats(prog
, start
);
998 this_cpu_dec(*(prog
->active
));
1000 rcu_read_unlock_trace();
1003 static u64 notrace
__bpf_prog_enter_sleepable(struct bpf_prog
*prog
,
1004 struct bpf_tramp_run_ctx
*run_ctx
)
1006 rcu_read_lock_trace();
1010 run_ctx
->saved_run_ctx
= bpf_set_run_ctx(&run_ctx
->run_ctx
);
1012 return bpf_prog_start_time();
1015 static void notrace
__bpf_prog_exit_sleepable(struct bpf_prog
*prog
, u64 start
,
1016 struct bpf_tramp_run_ctx
*run_ctx
)
1018 bpf_reset_run_ctx(run_ctx
->saved_run_ctx
);
1020 update_prog_stats(prog
, start
);
1022 rcu_read_unlock_trace();
1025 static u64 notrace
__bpf_prog_enter(struct bpf_prog
*prog
,
1026 struct bpf_tramp_run_ctx
*run_ctx
)
1032 run_ctx
->saved_run_ctx
= bpf_set_run_ctx(&run_ctx
->run_ctx
);
1034 return bpf_prog_start_time();
1037 static void notrace
__bpf_prog_exit(struct bpf_prog
*prog
, u64 start
,
1038 struct bpf_tramp_run_ctx
*run_ctx
)
1041 bpf_reset_run_ctx(run_ctx
->saved_run_ctx
);
1043 update_prog_stats(prog
, start
);
1048 void notrace
__bpf_tramp_enter(struct bpf_tramp_image
*tr
)
1050 percpu_ref_get(&tr
->pcref
);
1053 void notrace
__bpf_tramp_exit(struct bpf_tramp_image
*tr
)
1055 percpu_ref_put(&tr
->pcref
);
1058 bpf_trampoline_enter_t
bpf_trampoline_enter(const struct bpf_prog
*prog
)
1060 bool sleepable
= prog
->sleepable
;
1062 if (bpf_prog_check_recur(prog
))
1063 return sleepable
? __bpf_prog_enter_sleepable_recur
:
1064 __bpf_prog_enter_recur
;
1066 if (resolve_prog_type(prog
) == BPF_PROG_TYPE_LSM
&&
1067 prog
->expected_attach_type
== BPF_LSM_CGROUP
)
1068 return __bpf_prog_enter_lsm_cgroup
;
1070 return sleepable
? __bpf_prog_enter_sleepable
: __bpf_prog_enter
;
1073 bpf_trampoline_exit_t
bpf_trampoline_exit(const struct bpf_prog
*prog
)
1075 bool sleepable
= prog
->sleepable
;
1077 if (bpf_prog_check_recur(prog
))
1078 return sleepable
? __bpf_prog_exit_sleepable_recur
:
1079 __bpf_prog_exit_recur
;
1081 if (resolve_prog_type(prog
) == BPF_PROG_TYPE_LSM
&&
1082 prog
->expected_attach_type
== BPF_LSM_CGROUP
)
1083 return __bpf_prog_exit_lsm_cgroup
;
1085 return sleepable
? __bpf_prog_exit_sleepable
: __bpf_prog_exit
;
1089 arch_prepare_bpf_trampoline(struct bpf_tramp_image
*im
, void *image
, void *image_end
,
1090 const struct btf_func_model
*m
, u32 flags
,
1091 struct bpf_tramp_links
*tlinks
,
1097 void * __weak
arch_alloc_bpf_trampoline(unsigned int size
)
1101 if (WARN_ON_ONCE(size
> PAGE_SIZE
))
1103 image
= bpf_jit_alloc_exec(PAGE_SIZE
);
1105 set_vm_flush_reset_perms(image
);
1109 void __weak
arch_free_bpf_trampoline(void *image
, unsigned int size
)
1111 WARN_ON_ONCE(size
> PAGE_SIZE
);
1112 /* bpf_jit_free_exec doesn't need "size", but
1113 * bpf_prog_pack_free() needs it.
1115 bpf_jit_free_exec(image
);
1118 int __weak
arch_protect_bpf_trampoline(void *image
, unsigned int size
)
1120 WARN_ON_ONCE(size
> PAGE_SIZE
);
1121 return set_memory_rox((long)image
, 1);
1124 int __weak
arch_bpf_trampoline_size(const struct btf_func_model
*m
, u32 flags
,
1125 struct bpf_tramp_links
*tlinks
, void *func_addr
)
1130 static int __init
init_trampolines(void)
1134 for (i
= 0; i
< TRAMPOLINE_TABLE_SIZE
; i
++)
1135 INIT_HLIST_HEAD(&trampoline_table
[i
]);
1138 late_initcall(init_trampolines
);