1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2019 Facebook */
3 #include <linux/hash.h>
5 #include <linux/filter.h>
6 #include <linux/ftrace.h>
7 #include <linux/rbtree_latch.h>
8 #include <linux/perf_event.h>
11 /* dummy _ops. The verifier will operate on target program's ops. */
12 const struct bpf_verifier_ops bpf_extension_verifier_ops
= {
14 const struct bpf_prog_ops bpf_extension_prog_ops
= {
17 /* btf_vmlinux has ~22k attachable functions. 1k htab is enough. */
18 #define TRAMPOLINE_HASH_BITS 10
19 #define TRAMPOLINE_TABLE_SIZE (1 << TRAMPOLINE_HASH_BITS)
21 static struct hlist_head trampoline_table
[TRAMPOLINE_TABLE_SIZE
];
23 /* serializes access to trampoline_table */
24 static DEFINE_MUTEX(trampoline_mutex
);
26 void *bpf_jit_alloc_exec_page(void)
30 image
= bpf_jit_alloc_exec(PAGE_SIZE
);
34 set_vm_flush_reset_perms(image
);
35 /* Keep image as writeable. The alternative is to keep flipping ro/rw
36 * everytime new program is attached or detached.
38 set_memory_x((long)image
, 1);
42 void bpf_image_ksym_add(void *data
, struct bpf_ksym
*ksym
)
44 ksym
->start
= (unsigned long) data
;
45 ksym
->end
= ksym
->start
+ PAGE_SIZE
;
47 perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF
, ksym
->start
,
48 PAGE_SIZE
, false, ksym
->name
);
51 void bpf_image_ksym_del(struct bpf_ksym
*ksym
)
54 perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF
, ksym
->start
,
55 PAGE_SIZE
, true, ksym
->name
);
58 static void bpf_trampoline_ksym_add(struct bpf_trampoline
*tr
)
60 struct bpf_ksym
*ksym
= &tr
->ksym
;
62 snprintf(ksym
->name
, KSYM_NAME_LEN
, "bpf_trampoline_%llu", tr
->key
);
63 bpf_image_ksym_add(tr
->image
, ksym
);
66 struct bpf_trampoline
*bpf_trampoline_lookup(u64 key
)
68 struct bpf_trampoline
*tr
;
69 struct hlist_head
*head
;
73 mutex_lock(&trampoline_mutex
);
74 head
= &trampoline_table
[hash_64(key
, TRAMPOLINE_HASH_BITS
)];
75 hlist_for_each_entry(tr
, head
, hlist
) {
77 refcount_inc(&tr
->refcnt
);
81 tr
= kzalloc(sizeof(*tr
), GFP_KERNEL
);
85 /* is_root was checked earlier. No need for bpf_jit_charge_modmem() */
86 image
= bpf_jit_alloc_exec_page();
94 INIT_HLIST_NODE(&tr
->hlist
);
95 hlist_add_head(&tr
->hlist
, head
);
96 refcount_set(&tr
->refcnt
, 1);
97 mutex_init(&tr
->mutex
);
98 for (i
= 0; i
< BPF_TRAMP_MAX
; i
++)
99 INIT_HLIST_HEAD(&tr
->progs_hlist
[i
]);
101 INIT_LIST_HEAD_RCU(&tr
->ksym
.lnode
);
102 bpf_trampoline_ksym_add(tr
);
104 mutex_unlock(&trampoline_mutex
);
108 static int is_ftrace_location(void *ip
)
112 addr
= ftrace_location((long)ip
);
115 if (WARN_ON_ONCE(addr
!= (long)ip
))
120 static int unregister_fentry(struct bpf_trampoline
*tr
, void *old_addr
)
122 void *ip
= tr
->func
.addr
;
125 if (tr
->func
.ftrace_managed
)
126 ret
= unregister_ftrace_direct((long)ip
, (long)old_addr
);
128 ret
= bpf_arch_text_poke(ip
, BPF_MOD_CALL
, old_addr
, NULL
);
132 static int modify_fentry(struct bpf_trampoline
*tr
, void *old_addr
, void *new_addr
)
134 void *ip
= tr
->func
.addr
;
137 if (tr
->func
.ftrace_managed
)
138 ret
= modify_ftrace_direct((long)ip
, (long)old_addr
, (long)new_addr
);
140 ret
= bpf_arch_text_poke(ip
, BPF_MOD_CALL
, old_addr
, new_addr
);
144 /* first time registering */
145 static int register_fentry(struct bpf_trampoline
*tr
, void *new_addr
)
147 void *ip
= tr
->func
.addr
;
150 ret
= is_ftrace_location(ip
);
153 tr
->func
.ftrace_managed
= ret
;
155 if (tr
->func
.ftrace_managed
)
156 ret
= register_ftrace_direct((long)ip
, (long)new_addr
);
158 ret
= bpf_arch_text_poke(ip
, BPF_MOD_CALL
, NULL
, new_addr
);
162 static struct bpf_tramp_progs
*
163 bpf_trampoline_get_progs(const struct bpf_trampoline
*tr
, int *total
)
165 const struct bpf_prog_aux
*aux
;
166 struct bpf_tramp_progs
*tprogs
;
167 struct bpf_prog
**progs
;
171 tprogs
= kcalloc(BPF_TRAMP_MAX
, sizeof(*tprogs
), GFP_KERNEL
);
173 return ERR_PTR(-ENOMEM
);
175 for (kind
= 0; kind
< BPF_TRAMP_MAX
; kind
++) {
176 tprogs
[kind
].nr_progs
= tr
->progs_cnt
[kind
];
177 *total
+= tr
->progs_cnt
[kind
];
178 progs
= tprogs
[kind
].progs
;
180 hlist_for_each_entry(aux
, &tr
->progs_hlist
[kind
], tramp_hlist
)
181 *progs
++ = aux
->prog
;
186 static int bpf_trampoline_update(struct bpf_trampoline
*tr
)
188 void *old_image
= tr
->image
+ ((tr
->selector
+ 1) & 1) * PAGE_SIZE
/2;
189 void *new_image
= tr
->image
+ (tr
->selector
& 1) * PAGE_SIZE
/2;
190 struct bpf_tramp_progs
*tprogs
;
191 u32 flags
= BPF_TRAMP_F_RESTORE_REGS
;
194 tprogs
= bpf_trampoline_get_progs(tr
, &total
);
196 return PTR_ERR(tprogs
);
199 err
= unregister_fentry(tr
, old_image
);
204 if (tprogs
[BPF_TRAMP_FEXIT
].nr_progs
||
205 tprogs
[BPF_TRAMP_MODIFY_RETURN
].nr_progs
)
206 flags
= BPF_TRAMP_F_CALL_ORIG
| BPF_TRAMP_F_SKIP_FRAME
;
208 /* Though the second half of trampoline page is unused a task could be
209 * preempted in the middle of the first half of trampoline and two
210 * updates to trampoline would change the code from underneath the
211 * preempted task. Hence wait for tasks to voluntarily schedule or go
215 synchronize_rcu_tasks();
217 err
= arch_prepare_bpf_trampoline(new_image
, new_image
+ PAGE_SIZE
/ 2,
218 &tr
->func
.model
, flags
, tprogs
,
224 /* progs already running at this address */
225 err
= modify_fentry(tr
, old_image
, new_image
);
227 /* first time registering */
228 err
= register_fentry(tr
, new_image
);
237 static enum bpf_tramp_prog_type
bpf_attach_type_to_tramp(struct bpf_prog
*prog
)
239 switch (prog
->expected_attach_type
) {
240 case BPF_TRACE_FENTRY
:
241 return BPF_TRAMP_FENTRY
;
242 case BPF_MODIFY_RETURN
:
243 return BPF_TRAMP_MODIFY_RETURN
;
244 case BPF_TRACE_FEXIT
:
245 return BPF_TRAMP_FEXIT
;
247 if (!prog
->aux
->attach_func_proto
->type
)
248 /* The function returns void, we cannot modify its
251 return BPF_TRAMP_FEXIT
;
253 return BPF_TRAMP_MODIFY_RETURN
;
255 return BPF_TRAMP_REPLACE
;
259 int bpf_trampoline_link_prog(struct bpf_prog
*prog
)
261 enum bpf_tramp_prog_type kind
;
262 struct bpf_trampoline
*tr
;
266 tr
= prog
->aux
->trampoline
;
267 kind
= bpf_attach_type_to_tramp(prog
);
268 mutex_lock(&tr
->mutex
);
269 if (tr
->extension_prog
) {
270 /* cannot attach fentry/fexit if extension prog is attached.
271 * cannot overwrite extension prog either.
276 cnt
= tr
->progs_cnt
[BPF_TRAMP_FENTRY
] + tr
->progs_cnt
[BPF_TRAMP_FEXIT
];
277 if (kind
== BPF_TRAMP_REPLACE
) {
278 /* Cannot attach extension if fentry/fexit are in use. */
283 tr
->extension_prog
= prog
;
284 err
= bpf_arch_text_poke(tr
->func
.addr
, BPF_MOD_JUMP
, NULL
,
288 if (cnt
>= BPF_MAX_TRAMP_PROGS
) {
292 if (!hlist_unhashed(&prog
->aux
->tramp_hlist
)) {
293 /* prog already linked */
297 hlist_add_head(&prog
->aux
->tramp_hlist
, &tr
->progs_hlist
[kind
]);
298 tr
->progs_cnt
[kind
]++;
299 err
= bpf_trampoline_update(prog
->aux
->trampoline
);
301 hlist_del(&prog
->aux
->tramp_hlist
);
302 tr
->progs_cnt
[kind
]--;
305 mutex_unlock(&tr
->mutex
);
309 /* bpf_trampoline_unlink_prog() should never fail. */
310 int bpf_trampoline_unlink_prog(struct bpf_prog
*prog
)
312 enum bpf_tramp_prog_type kind
;
313 struct bpf_trampoline
*tr
;
316 tr
= prog
->aux
->trampoline
;
317 kind
= bpf_attach_type_to_tramp(prog
);
318 mutex_lock(&tr
->mutex
);
319 if (kind
== BPF_TRAMP_REPLACE
) {
320 WARN_ON_ONCE(!tr
->extension_prog
);
321 err
= bpf_arch_text_poke(tr
->func
.addr
, BPF_MOD_JUMP
,
322 tr
->extension_prog
->bpf_func
, NULL
);
323 tr
->extension_prog
= NULL
;
326 hlist_del(&prog
->aux
->tramp_hlist
);
327 tr
->progs_cnt
[kind
]--;
328 err
= bpf_trampoline_update(prog
->aux
->trampoline
);
330 mutex_unlock(&tr
->mutex
);
334 void bpf_trampoline_put(struct bpf_trampoline
*tr
)
338 mutex_lock(&trampoline_mutex
);
339 if (!refcount_dec_and_test(&tr
->refcnt
))
341 WARN_ON_ONCE(mutex_is_locked(&tr
->mutex
));
342 if (WARN_ON_ONCE(!hlist_empty(&tr
->progs_hlist
[BPF_TRAMP_FENTRY
])))
344 if (WARN_ON_ONCE(!hlist_empty(&tr
->progs_hlist
[BPF_TRAMP_FEXIT
])))
346 bpf_image_ksym_del(&tr
->ksym
);
347 /* wait for tasks to get out of trampoline before freeing it */
348 synchronize_rcu_tasks();
349 bpf_jit_free_exec(tr
->image
);
350 hlist_del(&tr
->hlist
);
353 mutex_unlock(&trampoline_mutex
);
356 /* The logic is similar to BPF_PROG_RUN, but with an explicit
357 * rcu_read_lock() and migrate_disable() which are required
358 * for the trampoline. The macro is split into
359 * call _bpf_prog_enter
360 * call prog->bpf_func
361 * call __bpf_prog_exit
363 u64 notrace
__bpf_prog_enter(void)
370 if (static_branch_unlikely(&bpf_stats_enabled_key
))
371 start
= sched_clock();
375 void notrace
__bpf_prog_exit(struct bpf_prog
*prog
, u64 start
)
378 struct bpf_prog_stats
*stats
;
380 if (static_branch_unlikely(&bpf_stats_enabled_key
) &&
381 /* static_key could be enabled in __bpf_prog_enter
382 * and disabled in __bpf_prog_exit.
384 * Hence check that 'start' is not zero.
387 stats
= this_cpu_ptr(prog
->aux
->stats
);
388 u64_stats_update_begin(&stats
->syncp
);
390 stats
->nsecs
+= sched_clock() - start
;
391 u64_stats_update_end(&stats
->syncp
);
398 arch_prepare_bpf_trampoline(void *image
, void *image_end
,
399 const struct btf_func_model
*m
, u32 flags
,
400 struct bpf_tramp_progs
*tprogs
,
406 static int __init
init_trampolines(void)
410 for (i
= 0; i
< TRAMPOLINE_TABLE_SIZE
; i
++)
411 INIT_HLIST_HEAD(&trampoline_table
[i
]);
414 late_initcall(init_trampolines
);