1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2019 Facebook */
3 #include <linux/hash.h>
5 #include <linux/filter.h>
6 #include <linux/ftrace.h>
7 #include <linux/rbtree_latch.h>
8 #include <linux/perf_event.h>
10 #include <linux/rcupdate_trace.h>
11 #include <linux/rcupdate_wait.h>
13 /* dummy _ops. The verifier will operate on target program's ops. */
14 const struct bpf_verifier_ops bpf_extension_verifier_ops
= {
16 const struct bpf_prog_ops bpf_extension_prog_ops
= {
19 /* btf_vmlinux has ~22k attachable functions. 1k htab is enough. */
20 #define TRAMPOLINE_HASH_BITS 10
21 #define TRAMPOLINE_TABLE_SIZE (1 << TRAMPOLINE_HASH_BITS)
23 static struct hlist_head trampoline_table
[TRAMPOLINE_TABLE_SIZE
];
25 /* serializes access to trampoline_table */
26 static DEFINE_MUTEX(trampoline_mutex
);
28 void *bpf_jit_alloc_exec_page(void)
32 image
= bpf_jit_alloc_exec(PAGE_SIZE
);
36 set_vm_flush_reset_perms(image
);
37 /* Keep image as writeable. The alternative is to keep flipping ro/rw
38 * everytime new program is attached or detached.
40 set_memory_x((long)image
, 1);
44 void bpf_image_ksym_add(void *data
, struct bpf_ksym
*ksym
)
46 ksym
->start
= (unsigned long) data
;
47 ksym
->end
= ksym
->start
+ PAGE_SIZE
;
49 perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF
, ksym
->start
,
50 PAGE_SIZE
, false, ksym
->name
);
53 void bpf_image_ksym_del(struct bpf_ksym
*ksym
)
56 perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF
, ksym
->start
,
57 PAGE_SIZE
, true, ksym
->name
);
60 static void bpf_trampoline_ksym_add(struct bpf_trampoline
*tr
)
62 struct bpf_ksym
*ksym
= &tr
->ksym
;
64 snprintf(ksym
->name
, KSYM_NAME_LEN
, "bpf_trampoline_%llu", tr
->key
);
65 bpf_image_ksym_add(tr
->image
, ksym
);
68 static struct bpf_trampoline
*bpf_trampoline_lookup(u64 key
)
70 struct bpf_trampoline
*tr
;
71 struct hlist_head
*head
;
75 mutex_lock(&trampoline_mutex
);
76 head
= &trampoline_table
[hash_64(key
, TRAMPOLINE_HASH_BITS
)];
77 hlist_for_each_entry(tr
, head
, hlist
) {
79 refcount_inc(&tr
->refcnt
);
83 tr
= kzalloc(sizeof(*tr
), GFP_KERNEL
);
87 /* is_root was checked earlier. No need for bpf_jit_charge_modmem() */
88 image
= bpf_jit_alloc_exec_page();
96 INIT_HLIST_NODE(&tr
->hlist
);
97 hlist_add_head(&tr
->hlist
, head
);
98 refcount_set(&tr
->refcnt
, 1);
99 mutex_init(&tr
->mutex
);
100 for (i
= 0; i
< BPF_TRAMP_MAX
; i
++)
101 INIT_HLIST_HEAD(&tr
->progs_hlist
[i
]);
103 INIT_LIST_HEAD_RCU(&tr
->ksym
.lnode
);
104 bpf_trampoline_ksym_add(tr
);
106 mutex_unlock(&trampoline_mutex
);
110 static int is_ftrace_location(void *ip
)
114 addr
= ftrace_location((long)ip
);
117 if (WARN_ON_ONCE(addr
!= (long)ip
))
122 static int unregister_fentry(struct bpf_trampoline
*tr
, void *old_addr
)
124 void *ip
= tr
->func
.addr
;
127 if (tr
->func
.ftrace_managed
)
128 ret
= unregister_ftrace_direct((long)ip
, (long)old_addr
);
130 ret
= bpf_arch_text_poke(ip
, BPF_MOD_CALL
, old_addr
, NULL
);
134 static int modify_fentry(struct bpf_trampoline
*tr
, void *old_addr
, void *new_addr
)
136 void *ip
= tr
->func
.addr
;
139 if (tr
->func
.ftrace_managed
)
140 ret
= modify_ftrace_direct((long)ip
, (long)old_addr
, (long)new_addr
);
142 ret
= bpf_arch_text_poke(ip
, BPF_MOD_CALL
, old_addr
, new_addr
);
146 /* first time registering */
147 static int register_fentry(struct bpf_trampoline
*tr
, void *new_addr
)
149 void *ip
= tr
->func
.addr
;
152 ret
= is_ftrace_location(ip
);
155 tr
->func
.ftrace_managed
= ret
;
157 if (tr
->func
.ftrace_managed
)
158 ret
= register_ftrace_direct((long)ip
, (long)new_addr
);
160 ret
= bpf_arch_text_poke(ip
, BPF_MOD_CALL
, NULL
, new_addr
);
164 static struct bpf_tramp_progs
*
165 bpf_trampoline_get_progs(const struct bpf_trampoline
*tr
, int *total
)
167 const struct bpf_prog_aux
*aux
;
168 struct bpf_tramp_progs
*tprogs
;
169 struct bpf_prog
**progs
;
173 tprogs
= kcalloc(BPF_TRAMP_MAX
, sizeof(*tprogs
), GFP_KERNEL
);
175 return ERR_PTR(-ENOMEM
);
177 for (kind
= 0; kind
< BPF_TRAMP_MAX
; kind
++) {
178 tprogs
[kind
].nr_progs
= tr
->progs_cnt
[kind
];
179 *total
+= tr
->progs_cnt
[kind
];
180 progs
= tprogs
[kind
].progs
;
182 hlist_for_each_entry(aux
, &tr
->progs_hlist
[kind
], tramp_hlist
)
183 *progs
++ = aux
->prog
;
188 static int bpf_trampoline_update(struct bpf_trampoline
*tr
)
190 void *old_image
= tr
->image
+ ((tr
->selector
+ 1) & 1) * PAGE_SIZE
/2;
191 void *new_image
= tr
->image
+ (tr
->selector
& 1) * PAGE_SIZE
/2;
192 struct bpf_tramp_progs
*tprogs
;
193 u32 flags
= BPF_TRAMP_F_RESTORE_REGS
;
196 tprogs
= bpf_trampoline_get_progs(tr
, &total
);
198 return PTR_ERR(tprogs
);
201 err
= unregister_fentry(tr
, old_image
);
206 if (tprogs
[BPF_TRAMP_FEXIT
].nr_progs
||
207 tprogs
[BPF_TRAMP_MODIFY_RETURN
].nr_progs
)
208 flags
= BPF_TRAMP_F_CALL_ORIG
| BPF_TRAMP_F_SKIP_FRAME
;
210 /* Though the second half of trampoline page is unused a task could be
211 * preempted in the middle of the first half of trampoline and two
212 * updates to trampoline would change the code from underneath the
213 * preempted task. Hence wait for tasks to voluntarily schedule or go
215 * The same trampoline can hold both sleepable and non-sleepable progs.
216 * synchronize_rcu_tasks_trace() is needed to make sure all sleepable
217 * programs finish executing.
218 * Wait for these two grace periods together.
220 synchronize_rcu_mult(call_rcu_tasks
, call_rcu_tasks_trace
);
222 err
= arch_prepare_bpf_trampoline(new_image
, new_image
+ PAGE_SIZE
/ 2,
223 &tr
->func
.model
, flags
, tprogs
,
229 /* progs already running at this address */
230 err
= modify_fentry(tr
, old_image
, new_image
);
232 /* first time registering */
233 err
= register_fentry(tr
, new_image
);
242 static enum bpf_tramp_prog_type
bpf_attach_type_to_tramp(struct bpf_prog
*prog
)
244 switch (prog
->expected_attach_type
) {
245 case BPF_TRACE_FENTRY
:
246 return BPF_TRAMP_FENTRY
;
247 case BPF_MODIFY_RETURN
:
248 return BPF_TRAMP_MODIFY_RETURN
;
249 case BPF_TRACE_FEXIT
:
250 return BPF_TRAMP_FEXIT
;
252 if (!prog
->aux
->attach_func_proto
->type
)
253 /* The function returns void, we cannot modify its
256 return BPF_TRAMP_FEXIT
;
258 return BPF_TRAMP_MODIFY_RETURN
;
260 return BPF_TRAMP_REPLACE
;
264 int bpf_trampoline_link_prog(struct bpf_prog
*prog
, struct bpf_trampoline
*tr
)
266 enum bpf_tramp_prog_type kind
;
270 kind
= bpf_attach_type_to_tramp(prog
);
271 mutex_lock(&tr
->mutex
);
272 if (tr
->extension_prog
) {
273 /* cannot attach fentry/fexit if extension prog is attached.
274 * cannot overwrite extension prog either.
279 cnt
= tr
->progs_cnt
[BPF_TRAMP_FENTRY
] + tr
->progs_cnt
[BPF_TRAMP_FEXIT
];
280 if (kind
== BPF_TRAMP_REPLACE
) {
281 /* Cannot attach extension if fentry/fexit are in use. */
286 tr
->extension_prog
= prog
;
287 err
= bpf_arch_text_poke(tr
->func
.addr
, BPF_MOD_JUMP
, NULL
,
291 if (cnt
>= BPF_MAX_TRAMP_PROGS
) {
295 if (!hlist_unhashed(&prog
->aux
->tramp_hlist
)) {
296 /* prog already linked */
300 hlist_add_head(&prog
->aux
->tramp_hlist
, &tr
->progs_hlist
[kind
]);
301 tr
->progs_cnt
[kind
]++;
302 err
= bpf_trampoline_update(tr
);
304 hlist_del(&prog
->aux
->tramp_hlist
);
305 tr
->progs_cnt
[kind
]--;
308 mutex_unlock(&tr
->mutex
);
312 /* bpf_trampoline_unlink_prog() should never fail. */
313 int bpf_trampoline_unlink_prog(struct bpf_prog
*prog
, struct bpf_trampoline
*tr
)
315 enum bpf_tramp_prog_type kind
;
318 kind
= bpf_attach_type_to_tramp(prog
);
319 mutex_lock(&tr
->mutex
);
320 if (kind
== BPF_TRAMP_REPLACE
) {
321 WARN_ON_ONCE(!tr
->extension_prog
);
322 err
= bpf_arch_text_poke(tr
->func
.addr
, BPF_MOD_JUMP
,
323 tr
->extension_prog
->bpf_func
, NULL
);
324 tr
->extension_prog
= NULL
;
327 hlist_del(&prog
->aux
->tramp_hlist
);
328 tr
->progs_cnt
[kind
]--;
329 err
= bpf_trampoline_update(tr
);
331 mutex_unlock(&tr
->mutex
);
335 struct bpf_trampoline
*bpf_trampoline_get(u64 key
,
336 struct bpf_attach_target_info
*tgt_info
)
338 struct bpf_trampoline
*tr
;
340 tr
= bpf_trampoline_lookup(key
);
344 mutex_lock(&tr
->mutex
);
348 memcpy(&tr
->func
.model
, &tgt_info
->fmodel
, sizeof(tgt_info
->fmodel
));
349 tr
->func
.addr
= (void *)tgt_info
->tgt_addr
;
351 mutex_unlock(&tr
->mutex
);
355 void bpf_trampoline_put(struct bpf_trampoline
*tr
)
359 mutex_lock(&trampoline_mutex
);
360 if (!refcount_dec_and_test(&tr
->refcnt
))
362 WARN_ON_ONCE(mutex_is_locked(&tr
->mutex
));
363 if (WARN_ON_ONCE(!hlist_empty(&tr
->progs_hlist
[BPF_TRAMP_FENTRY
])))
365 if (WARN_ON_ONCE(!hlist_empty(&tr
->progs_hlist
[BPF_TRAMP_FEXIT
])))
367 bpf_image_ksym_del(&tr
->ksym
);
368 /* This code will be executed when all bpf progs (both sleepable and
369 * non-sleepable) went through
370 * bpf_prog_put()->call_rcu[_tasks_trace]()->bpf_prog_free_deferred().
371 * Hence no need for another synchronize_rcu_tasks_trace() here,
372 * but synchronize_rcu_tasks() is still needed, since trampoline
373 * may not have had any sleepable programs and we need to wait
374 * for tasks to get out of trampoline code before freeing it.
376 synchronize_rcu_tasks();
377 bpf_jit_free_exec(tr
->image
);
378 hlist_del(&tr
->hlist
);
381 mutex_unlock(&trampoline_mutex
);
384 /* The logic is similar to BPF_PROG_RUN, but with an explicit
385 * rcu_read_lock() and migrate_disable() which are required
386 * for the trampoline. The macro is split into
387 * call _bpf_prog_enter
388 * call prog->bpf_func
389 * call __bpf_prog_exit
391 u64 notrace
__bpf_prog_enter(void)
398 if (static_branch_unlikely(&bpf_stats_enabled_key
))
399 start
= sched_clock();
403 void notrace
__bpf_prog_exit(struct bpf_prog
*prog
, u64 start
)
406 struct bpf_prog_stats
*stats
;
408 if (static_branch_unlikely(&bpf_stats_enabled_key
) &&
409 /* static_key could be enabled in __bpf_prog_enter
410 * and disabled in __bpf_prog_exit.
412 * Hence check that 'start' is not zero.
415 stats
= this_cpu_ptr(prog
->aux
->stats
);
416 u64_stats_update_begin(&stats
->syncp
);
418 stats
->nsecs
+= sched_clock() - start
;
419 u64_stats_update_end(&stats
->syncp
);
425 void notrace
__bpf_prog_enter_sleepable(void)
427 rcu_read_lock_trace();
431 void notrace
__bpf_prog_exit_sleepable(void)
433 rcu_read_unlock_trace();
437 arch_prepare_bpf_trampoline(void *image
, void *image_end
,
438 const struct btf_func_model
*m
, u32 flags
,
439 struct bpf_tramp_progs
*tprogs
,
445 static int __init
init_trampolines(void)
449 for (i
= 0; i
< TRAMPOLINE_TABLE_SIZE
; i
++)
450 INIT_HLIST_HEAD(&trampoline_table
[i
]);
453 late_initcall(init_trampolines
);