1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2019 Facebook */
3 #include <linux/hash.h>
5 #include <linux/filter.h>
6 #include <linux/ftrace.h>
7 #include <linux/rbtree_latch.h>
9 /* dummy _ops. The verifier will operate on target program's ops. */
10 const struct bpf_verifier_ops bpf_extension_verifier_ops
= {
12 const struct bpf_prog_ops bpf_extension_prog_ops
= {
15 /* btf_vmlinux has ~22k attachable functions. 1k htab is enough. */
16 #define TRAMPOLINE_HASH_BITS 10
17 #define TRAMPOLINE_TABLE_SIZE (1 << TRAMPOLINE_HASH_BITS)
19 static struct hlist_head trampoline_table
[TRAMPOLINE_TABLE_SIZE
];
20 static struct latch_tree_root image_tree __cacheline_aligned
;
22 /* serializes access to trampoline_table and image_tree */
23 static DEFINE_MUTEX(trampoline_mutex
);
25 static void *bpf_jit_alloc_exec_page(void)
29 image
= bpf_jit_alloc_exec(PAGE_SIZE
);
33 set_vm_flush_reset_perms(image
);
34 /* Keep image as writeable. The alternative is to keep flipping ro/rw
35 * everytime new program is attached or detached.
37 set_memory_x((long)image
, 1);
41 static __always_inline
bool image_tree_less(struct latch_tree_node
*a
,
42 struct latch_tree_node
*b
)
44 struct bpf_image
*ia
= container_of(a
, struct bpf_image
, tnode
);
45 struct bpf_image
*ib
= container_of(b
, struct bpf_image
, tnode
);
50 static __always_inline
int image_tree_comp(void *addr
, struct latch_tree_node
*n
)
52 void *image
= container_of(n
, struct bpf_image
, tnode
);
56 if (addr
>= image
+ PAGE_SIZE
)
62 static const struct latch_tree_ops image_tree_ops
= {
63 .less
= image_tree_less
,
64 .comp
= image_tree_comp
,
67 static void *__bpf_image_alloc(bool lock
)
69 struct bpf_image
*image
;
71 image
= bpf_jit_alloc_exec_page();
76 mutex_lock(&trampoline_mutex
);
77 latch_tree_insert(&image
->tnode
, &image_tree
, &image_tree_ops
);
79 mutex_unlock(&trampoline_mutex
);
83 void *bpf_image_alloc(void)
85 return __bpf_image_alloc(true);
88 bool is_bpf_image_address(unsigned long addr
)
93 ret
= latch_tree_find((void *) addr
, &image_tree
, &image_tree_ops
) != NULL
;
99 struct bpf_trampoline
*bpf_trampoline_lookup(u64 key
)
101 struct bpf_trampoline
*tr
;
102 struct hlist_head
*head
;
106 mutex_lock(&trampoline_mutex
);
107 head
= &trampoline_table
[hash_64(key
, TRAMPOLINE_HASH_BITS
)];
108 hlist_for_each_entry(tr
, head
, hlist
) {
109 if (tr
->key
== key
) {
110 refcount_inc(&tr
->refcnt
);
114 tr
= kzalloc(sizeof(*tr
), GFP_KERNEL
);
118 /* is_root was checked earlier. No need for bpf_jit_charge_modmem() */
119 image
= __bpf_image_alloc(false);
127 INIT_HLIST_NODE(&tr
->hlist
);
128 hlist_add_head(&tr
->hlist
, head
);
129 refcount_set(&tr
->refcnt
, 1);
130 mutex_init(&tr
->mutex
);
131 for (i
= 0; i
< BPF_TRAMP_MAX
; i
++)
132 INIT_HLIST_HEAD(&tr
->progs_hlist
[i
]);
135 mutex_unlock(&trampoline_mutex
);
139 static int is_ftrace_location(void *ip
)
143 addr
= ftrace_location((long)ip
);
146 if (WARN_ON_ONCE(addr
!= (long)ip
))
151 static int unregister_fentry(struct bpf_trampoline
*tr
, void *old_addr
)
153 void *ip
= tr
->func
.addr
;
156 if (tr
->func
.ftrace_managed
)
157 ret
= unregister_ftrace_direct((long)ip
, (long)old_addr
);
159 ret
= bpf_arch_text_poke(ip
, BPF_MOD_CALL
, old_addr
, NULL
);
163 static int modify_fentry(struct bpf_trampoline
*tr
, void *old_addr
, void *new_addr
)
165 void *ip
= tr
->func
.addr
;
168 if (tr
->func
.ftrace_managed
)
169 ret
= modify_ftrace_direct((long)ip
, (long)old_addr
, (long)new_addr
);
171 ret
= bpf_arch_text_poke(ip
, BPF_MOD_CALL
, old_addr
, new_addr
);
175 /* first time registering */
176 static int register_fentry(struct bpf_trampoline
*tr
, void *new_addr
)
178 void *ip
= tr
->func
.addr
;
181 ret
= is_ftrace_location(ip
);
184 tr
->func
.ftrace_managed
= ret
;
186 if (tr
->func
.ftrace_managed
)
187 ret
= register_ftrace_direct((long)ip
, (long)new_addr
);
189 ret
= bpf_arch_text_poke(ip
, BPF_MOD_CALL
, NULL
, new_addr
);
193 /* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50
194 * bytes on x86. Pick a number to fit into BPF_IMAGE_SIZE / 2
196 #define BPF_MAX_TRAMP_PROGS 40
198 static int bpf_trampoline_update(struct bpf_trampoline
*tr
)
200 void *old_image
= tr
->image
+ ((tr
->selector
+ 1) & 1) * BPF_IMAGE_SIZE
/2;
201 void *new_image
= tr
->image
+ (tr
->selector
& 1) * BPF_IMAGE_SIZE
/2;
202 struct bpf_prog
*progs_to_run
[BPF_MAX_TRAMP_PROGS
];
203 int fentry_cnt
= tr
->progs_cnt
[BPF_TRAMP_FENTRY
];
204 int fexit_cnt
= tr
->progs_cnt
[BPF_TRAMP_FEXIT
];
205 struct bpf_prog
**progs
, **fentry
, **fexit
;
206 u32 flags
= BPF_TRAMP_F_RESTORE_REGS
;
207 struct bpf_prog_aux
*aux
;
210 if (fentry_cnt
+ fexit_cnt
== 0) {
211 err
= unregister_fentry(tr
, old_image
);
216 /* populate fentry progs */
217 fentry
= progs
= progs_to_run
;
218 hlist_for_each_entry(aux
, &tr
->progs_hlist
[BPF_TRAMP_FENTRY
], tramp_hlist
)
219 *progs
++ = aux
->prog
;
221 /* populate fexit progs */
223 hlist_for_each_entry(aux
, &tr
->progs_hlist
[BPF_TRAMP_FEXIT
], tramp_hlist
)
224 *progs
++ = aux
->prog
;
227 flags
= BPF_TRAMP_F_CALL_ORIG
| BPF_TRAMP_F_SKIP_FRAME
;
229 /* Though the second half of trampoline page is unused a task could be
230 * preempted in the middle of the first half of trampoline and two
231 * updates to trampoline would change the code from underneath the
232 * preempted task. Hence wait for tasks to voluntarily schedule or go
235 synchronize_rcu_tasks();
237 err
= arch_prepare_bpf_trampoline(new_image
, new_image
+ BPF_IMAGE_SIZE
/ 2,
238 &tr
->func
.model
, flags
,
246 /* progs already running at this address */
247 err
= modify_fentry(tr
, old_image
, new_image
);
249 /* first time registering */
250 err
= register_fentry(tr
, new_image
);
258 static enum bpf_tramp_prog_type
bpf_attach_type_to_tramp(enum bpf_attach_type t
)
261 case BPF_TRACE_FENTRY
:
262 return BPF_TRAMP_FENTRY
;
263 case BPF_TRACE_FEXIT
:
264 return BPF_TRAMP_FEXIT
;
266 return BPF_TRAMP_REPLACE
;
270 int bpf_trampoline_link_prog(struct bpf_prog
*prog
)
272 enum bpf_tramp_prog_type kind
;
273 struct bpf_trampoline
*tr
;
277 tr
= prog
->aux
->trampoline
;
278 kind
= bpf_attach_type_to_tramp(prog
->expected_attach_type
);
279 mutex_lock(&tr
->mutex
);
280 if (tr
->extension_prog
) {
281 /* cannot attach fentry/fexit if extension prog is attached.
282 * cannot overwrite extension prog either.
287 cnt
= tr
->progs_cnt
[BPF_TRAMP_FENTRY
] + tr
->progs_cnt
[BPF_TRAMP_FEXIT
];
288 if (kind
== BPF_TRAMP_REPLACE
) {
289 /* Cannot attach extension if fentry/fexit are in use. */
294 tr
->extension_prog
= prog
;
295 err
= bpf_arch_text_poke(tr
->func
.addr
, BPF_MOD_JUMP
, NULL
,
299 if (cnt
>= BPF_MAX_TRAMP_PROGS
) {
303 if (!hlist_unhashed(&prog
->aux
->tramp_hlist
)) {
304 /* prog already linked */
308 hlist_add_head(&prog
->aux
->tramp_hlist
, &tr
->progs_hlist
[kind
]);
309 tr
->progs_cnt
[kind
]++;
310 err
= bpf_trampoline_update(prog
->aux
->trampoline
);
312 hlist_del(&prog
->aux
->tramp_hlist
);
313 tr
->progs_cnt
[kind
]--;
316 mutex_unlock(&tr
->mutex
);
320 /* bpf_trampoline_unlink_prog() should never fail. */
321 int bpf_trampoline_unlink_prog(struct bpf_prog
*prog
)
323 enum bpf_tramp_prog_type kind
;
324 struct bpf_trampoline
*tr
;
327 tr
= prog
->aux
->trampoline
;
328 kind
= bpf_attach_type_to_tramp(prog
->expected_attach_type
);
329 mutex_lock(&tr
->mutex
);
330 if (kind
== BPF_TRAMP_REPLACE
) {
331 WARN_ON_ONCE(!tr
->extension_prog
);
332 err
= bpf_arch_text_poke(tr
->func
.addr
, BPF_MOD_JUMP
,
333 tr
->extension_prog
->bpf_func
, NULL
);
334 tr
->extension_prog
= NULL
;
337 hlist_del(&prog
->aux
->tramp_hlist
);
338 tr
->progs_cnt
[kind
]--;
339 err
= bpf_trampoline_update(prog
->aux
->trampoline
);
341 mutex_unlock(&tr
->mutex
);
345 void bpf_trampoline_put(struct bpf_trampoline
*tr
)
347 struct bpf_image
*image
;
351 mutex_lock(&trampoline_mutex
);
352 if (!refcount_dec_and_test(&tr
->refcnt
))
354 WARN_ON_ONCE(mutex_is_locked(&tr
->mutex
));
355 if (WARN_ON_ONCE(!hlist_empty(&tr
->progs_hlist
[BPF_TRAMP_FENTRY
])))
357 if (WARN_ON_ONCE(!hlist_empty(&tr
->progs_hlist
[BPF_TRAMP_FEXIT
])))
359 image
= container_of(tr
->image
, struct bpf_image
, data
);
360 latch_tree_erase(&image
->tnode
, &image_tree
, &image_tree_ops
);
361 /* wait for tasks to get out of trampoline before freeing it */
362 synchronize_rcu_tasks();
363 bpf_jit_free_exec(image
);
364 hlist_del(&tr
->hlist
);
367 mutex_unlock(&trampoline_mutex
);
370 /* The logic is similar to BPF_PROG_RUN, but with explicit rcu and preempt that
371 * are needed for trampoline. The macro is split into
372 * call _bpf_prog_enter
373 * call prog->bpf_func
374 * call __bpf_prog_exit
376 u64 notrace
__bpf_prog_enter(void)
382 if (static_branch_unlikely(&bpf_stats_enabled_key
))
383 start
= sched_clock();
387 void notrace
__bpf_prog_exit(struct bpf_prog
*prog
, u64 start
)
389 struct bpf_prog_stats
*stats
;
391 if (static_branch_unlikely(&bpf_stats_enabled_key
) &&
392 /* static_key could be enabled in __bpf_prog_enter
393 * and disabled in __bpf_prog_exit.
395 * Hence check that 'start' is not zero.
398 stats
= this_cpu_ptr(prog
->aux
->stats
);
399 u64_stats_update_begin(&stats
->syncp
);
401 stats
->nsecs
+= sched_clock() - start
;
402 u64_stats_update_end(&stats
->syncp
);
409 arch_prepare_bpf_trampoline(void *image
, void *image_end
,
410 const struct btf_func_model
*m
, u32 flags
,
411 struct bpf_prog
**fentry_progs
, int fentry_cnt
,
412 struct bpf_prog
**fexit_progs
, int fexit_cnt
,
418 static int __init
init_trampolines(void)
422 for (i
= 0; i
< TRAMPOLINE_TABLE_SIZE
; i
++)
423 INIT_HLIST_HEAD(&trampoline_table
[i
]);
426 late_initcall(init_trampolines
);