1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Copyright (C) 2008-2014 Mathieu Desnoyers
5 #include <linux/module.h>
6 #include <linux/mutex.h>
7 #include <linux/types.h>
8 #include <linux/jhash.h>
9 #include <linux/list.h>
10 #include <linux/rcupdate.h>
11 #include <linux/tracepoint.h>
12 #include <linux/err.h>
13 #include <linux/slab.h>
14 #include <linux/sched/signal.h>
15 #include <linux/sched/task.h>
16 #include <linux/static_key.h>
25 extern tracepoint_ptr_t __start___tracepoints_ptrs
[];
26 extern tracepoint_ptr_t __stop___tracepoints_ptrs
[];
28 enum tp_transition_sync
{
29 TP_TRANSITION_SYNC_1_0_1
,
30 TP_TRANSITION_SYNC_N_2_1
,
32 _NR_TP_TRANSITION_SYNC
,
35 struct tp_transition_snapshot
{
40 /* Protected by tracepoints_mutex */
41 static struct tp_transition_snapshot tp_transition_snapshot
[_NR_TP_TRANSITION_SYNC
];
43 static void tp_rcu_get_state(enum tp_transition_sync sync
)
45 struct tp_transition_snapshot
*snapshot
= &tp_transition_snapshot
[sync
];
47 /* Keep the latest get_state snapshot. */
48 snapshot
->rcu
= get_state_synchronize_rcu();
49 snapshot
->ongoing
= true;
52 static void tp_rcu_cond_sync(enum tp_transition_sync sync
)
54 struct tp_transition_snapshot
*snapshot
= &tp_transition_snapshot
[sync
];
56 if (!snapshot
->ongoing
)
58 cond_synchronize_rcu(snapshot
->rcu
);
59 snapshot
->ongoing
= false;
62 /* Set to 1 to enable tracepoint debug output */
63 static const int tracepoint_debug
;
67 * Tracepoint module list mutex protects the local module list.
69 static DEFINE_MUTEX(tracepoint_module_list_mutex
);
71 /* Local list of struct tp_module */
72 static LIST_HEAD(tracepoint_module_list
);
73 #endif /* CONFIG_MODULES */
76 * tracepoints_mutex protects the builtin and module tracepoints.
77 * tracepoints_mutex nests inside tracepoint_module_list_mutex.
79 static DEFINE_MUTEX(tracepoints_mutex
);
83 * It is used to delay the free of multiple probes array until a quiescent
88 struct tracepoint_func probes
[];
91 /* Called in removal of a func but failed to allocate a new tp_funcs */
92 static void tp_stub_func(void)
97 static inline void *allocate_probes(int count
)
99 struct tp_probes
*p
= kmalloc(struct_size(p
, probes
, count
),
101 return p
== NULL
? NULL
: p
->probes
;
104 static void rcu_free_old_probes(struct rcu_head
*head
)
106 kfree(container_of(head
, struct tp_probes
, rcu
));
109 static inline void release_probes(struct tracepoint
*tp
, struct tracepoint_func
*old
)
112 struct tp_probes
*tp_probes
= container_of(old
,
113 struct tp_probes
, probes
[0]);
115 if (tracepoint_is_faultable(tp
))
116 call_rcu_tasks_trace(&tp_probes
->rcu
, rcu_free_old_probes
);
118 call_rcu(&tp_probes
->rcu
, rcu_free_old_probes
);
122 static void debug_print_probes(struct tracepoint_func
*funcs
)
126 if (!tracepoint_debug
|| !funcs
)
129 for (i
= 0; funcs
[i
].func
; i
++)
130 printk(KERN_DEBUG
"Probe %d : %p\n", i
, funcs
[i
].func
);
133 static struct tracepoint_func
*
134 func_add(struct tracepoint_func
**funcs
, struct tracepoint_func
*tp_func
,
137 struct tracepoint_func
*old
, *new;
138 int iter_probes
; /* Iterate over old probe array. */
139 int nr_probes
= 0; /* Counter for probes */
140 int pos
= -1; /* Insertion position into new array */
142 if (WARN_ON(!tp_func
->func
))
143 return ERR_PTR(-EINVAL
);
145 debug_print_probes(*funcs
);
148 /* (N -> N+1), (N != 0, 1) probes */
149 for (iter_probes
= 0; old
[iter_probes
].func
; iter_probes
++) {
150 if (old
[iter_probes
].func
== tp_stub_func
)
151 continue; /* Skip stub functions. */
152 if (old
[iter_probes
].func
== tp_func
->func
&&
153 old
[iter_probes
].data
== tp_func
->data
)
154 return ERR_PTR(-EEXIST
);
158 /* + 2 : one for new probe, one for NULL func */
159 new = allocate_probes(nr_probes
+ 2);
161 return ERR_PTR(-ENOMEM
);
164 for (iter_probes
= 0; old
[iter_probes
].func
; iter_probes
++) {
165 if (old
[iter_probes
].func
== tp_stub_func
)
167 /* Insert before probes of lower priority */
168 if (pos
< 0 && old
[iter_probes
].prio
< prio
)
170 new[nr_probes
++] = old
[iter_probes
];
174 /* nr_probes now points to the end of the new array */
177 nr_probes
= 1; /* must point at end of array */
180 new[nr_probes
].func
= NULL
;
182 debug_print_probes(*funcs
);
186 static void *func_remove(struct tracepoint_func
**funcs
,
187 struct tracepoint_func
*tp_func
)
189 int nr_probes
= 0, nr_del
= 0, i
;
190 struct tracepoint_func
*old
, *new;
195 return ERR_PTR(-ENOENT
);
197 debug_print_probes(*funcs
);
198 /* (N -> M), (N > 1, M >= 0) probes */
200 for (nr_probes
= 0; old
[nr_probes
].func
; nr_probes
++) {
201 if ((old
[nr_probes
].func
== tp_func
->func
&&
202 old
[nr_probes
].data
== tp_func
->data
) ||
203 old
[nr_probes
].func
== tp_stub_func
)
209 * If probe is NULL, then nr_probes = nr_del = 0, and then the
210 * entire entry will be removed.
212 if (nr_probes
- nr_del
== 0) {
213 /* N -> 0, (N > 1) */
215 debug_print_probes(*funcs
);
219 /* N -> M, (N > 1, M > 0) */
221 new = allocate_probes(nr_probes
- nr_del
+ 1);
223 for (i
= 0; old
[i
].func
; i
++) {
224 if ((old
[i
].func
!= tp_func
->func
||
225 old
[i
].data
!= tp_func
->data
) &&
226 old
[i
].func
!= tp_stub_func
)
229 new[nr_probes
- nr_del
].func
= NULL
;
233 * Failed to allocate, replace the old function
234 * with calls to tp_stub_func.
236 for (i
= 0; old
[i
].func
; i
++) {
237 if (old
[i
].func
== tp_func
->func
&&
238 old
[i
].data
== tp_func
->data
)
239 WRITE_ONCE(old
[i
].func
, tp_stub_func
);
244 debug_print_probes(*funcs
);
249 * Count the number of functions (enum tp_func_state) in a tp_funcs array.
251 static enum tp_func_state
nr_func_state(const struct tracepoint_func
*tp_funcs
)
255 if (!tp_funcs
[1].func
)
257 if (!tp_funcs
[2].func
)
259 return TP_FUNC_N
; /* 3 or more */
262 static void tracepoint_update_call(struct tracepoint
*tp
, struct tracepoint_func
*tp_funcs
)
264 void *func
= tp
->iterator
;
266 /* Synthetic events do not have static call sites */
267 if (!tp
->static_call_key
)
269 if (nr_func_state(tp_funcs
) == TP_FUNC_1
)
270 func
= tp_funcs
[0].func
;
271 __static_call_update(tp
->static_call_key
, tp
->static_call_tramp
, func
);
275 * Add the probe function to a tracepoint.
277 static int tracepoint_add_func(struct tracepoint
*tp
,
278 struct tracepoint_func
*func
, int prio
,
281 struct tracepoint_func
*old
, *tp_funcs
;
284 if (tp
->ext
&& tp
->ext
->regfunc
&& !static_key_enabled(&tp
->key
)) {
285 ret
= tp
->ext
->regfunc();
290 tp_funcs
= rcu_dereference_protected(tp
->funcs
,
291 lockdep_is_held(&tracepoints_mutex
));
292 old
= func_add(&tp_funcs
, func
, prio
);
294 WARN_ON_ONCE(warn
&& PTR_ERR(old
) != -ENOMEM
);
299 * rcu_assign_pointer has as smp_store_release() which makes sure
300 * that the new probe callbacks array is consistent before setting
301 * a pointer to it. This array is referenced by __DO_TRACE from
302 * include/linux/tracepoint.h using rcu_dereference_sched().
304 switch (nr_func_state(tp_funcs
)) {
305 case TP_FUNC_1
: /* 0->1 */
307 * Make sure new static func never uses old data after a
308 * 1->0->1 transition sequence.
310 tp_rcu_cond_sync(TP_TRANSITION_SYNC_1_0_1
);
311 /* Set static call to first function */
312 tracepoint_update_call(tp
, tp_funcs
);
313 /* Both iterator and static call handle NULL tp->funcs */
314 rcu_assign_pointer(tp
->funcs
, tp_funcs
);
315 static_branch_enable(&tp
->key
);
317 case TP_FUNC_2
: /* 1->2 */
318 /* Set iterator static call */
319 tracepoint_update_call(tp
, tp_funcs
);
321 * Iterator callback installed before updating tp->funcs.
322 * Requires ordering between RCU assign/dereference and
323 * static call update/call.
326 case TP_FUNC_N
: /* N->N+1 (N>1) */
327 rcu_assign_pointer(tp
->funcs
, tp_funcs
);
329 * Make sure static func never uses incorrect data after a
330 * N->...->2->1 (N>1) transition sequence.
332 if (tp_funcs
[0].data
!= old
[0].data
)
333 tp_rcu_get_state(TP_TRANSITION_SYNC_N_2_1
);
340 release_probes(tp
, old
);
345 * Remove a probe function from a tracepoint.
346 * Note: only waiting an RCU period after setting elem->call to the empty
347 * function insures that the original callback is not used anymore. This insured
348 * by preempt_disable around the call site.
350 static int tracepoint_remove_func(struct tracepoint
*tp
,
351 struct tracepoint_func
*func
)
353 struct tracepoint_func
*old
, *tp_funcs
;
355 tp_funcs
= rcu_dereference_protected(tp
->funcs
,
356 lockdep_is_held(&tracepoints_mutex
));
357 old
= func_remove(&tp_funcs
, func
);
358 if (WARN_ON_ONCE(IS_ERR(old
)))
362 /* Failed allocating new tp_funcs, replaced func with stub */
365 switch (nr_func_state(tp_funcs
)) {
366 case TP_FUNC_0
: /* 1->0 */
367 /* Removed last function */
368 if (tp
->ext
&& tp
->ext
->unregfunc
&& static_key_enabled(&tp
->key
))
369 tp
->ext
->unregfunc();
370 static_branch_disable(&tp
->key
);
371 /* Set iterator static call */
372 tracepoint_update_call(tp
, tp_funcs
);
373 /* Both iterator and static call handle NULL tp->funcs */
374 rcu_assign_pointer(tp
->funcs
, NULL
);
376 * Make sure new static func never uses old data after a
377 * 1->0->1 transition sequence.
379 tp_rcu_get_state(TP_TRANSITION_SYNC_1_0_1
);
381 case TP_FUNC_1
: /* 2->1 */
382 rcu_assign_pointer(tp
->funcs
, tp_funcs
);
384 * Make sure static func never uses incorrect data after a
385 * N->...->2->1 (N>2) transition sequence. If the first
386 * element's data has changed, then force the synchronization
387 * to prevent current readers that have loaded the old data
388 * from calling the new function.
390 if (tp_funcs
[0].data
!= old
[0].data
)
391 tp_rcu_get_state(TP_TRANSITION_SYNC_N_2_1
);
392 tp_rcu_cond_sync(TP_TRANSITION_SYNC_N_2_1
);
393 /* Set static call to first function */
394 tracepoint_update_call(tp
, tp_funcs
);
396 case TP_FUNC_2
: /* N->N-1 (N>2) */
399 rcu_assign_pointer(tp
->funcs
, tp_funcs
);
401 * Make sure static func never uses incorrect data after a
402 * N->...->2->1 (N>2) transition sequence.
404 if (tp_funcs
[0].data
!= old
[0].data
)
405 tp_rcu_get_state(TP_TRANSITION_SYNC_N_2_1
);
411 release_probes(tp
, old
);
416 * tracepoint_probe_register_prio_may_exist - Connect a probe to a tracepoint with priority
418 * @probe: probe handler
419 * @data: tracepoint data
420 * @prio: priority of this function over other registered functions
422 * Same as tracepoint_probe_register_prio() except that it will not warn
423 * if the tracepoint is already registered.
425 int tracepoint_probe_register_prio_may_exist(struct tracepoint
*tp
, void *probe
,
426 void *data
, int prio
)
428 struct tracepoint_func tp_func
;
431 mutex_lock(&tracepoints_mutex
);
432 tp_func
.func
= probe
;
435 ret
= tracepoint_add_func(tp
, &tp_func
, prio
, false);
436 mutex_unlock(&tracepoints_mutex
);
439 EXPORT_SYMBOL_GPL(tracepoint_probe_register_prio_may_exist
);
442 * tracepoint_probe_register_prio - Connect a probe to a tracepoint with priority
444 * @probe: probe handler
445 * @data: tracepoint data
446 * @prio: priority of this function over other registered functions
448 * Returns 0 if ok, error value on error.
449 * Note: if @tp is within a module, the caller is responsible for
450 * unregistering the probe before the module is gone. This can be
451 * performed either with a tracepoint module going notifier, or from
452 * within module exit functions.
454 int tracepoint_probe_register_prio(struct tracepoint
*tp
, void *probe
,
455 void *data
, int prio
)
457 struct tracepoint_func tp_func
;
460 mutex_lock(&tracepoints_mutex
);
461 tp_func
.func
= probe
;
464 ret
= tracepoint_add_func(tp
, &tp_func
, prio
, true);
465 mutex_unlock(&tracepoints_mutex
);
468 EXPORT_SYMBOL_GPL(tracepoint_probe_register_prio
);
471 * tracepoint_probe_register - Connect a probe to a tracepoint
473 * @probe: probe handler
474 * @data: tracepoint data
476 * Returns 0 if ok, error value on error.
477 * Note: if @tp is within a module, the caller is responsible for
478 * unregistering the probe before the module is gone. This can be
479 * performed either with a tracepoint module going notifier, or from
480 * within module exit functions.
482 int tracepoint_probe_register(struct tracepoint
*tp
, void *probe
, void *data
)
484 return tracepoint_probe_register_prio(tp
, probe
, data
, TRACEPOINT_DEFAULT_PRIO
);
486 EXPORT_SYMBOL_GPL(tracepoint_probe_register
);
489 * tracepoint_probe_unregister - Disconnect a probe from a tracepoint
491 * @probe: probe function pointer
492 * @data: tracepoint data
494 * Returns 0 if ok, error value on error.
496 int tracepoint_probe_unregister(struct tracepoint
*tp
, void *probe
, void *data
)
498 struct tracepoint_func tp_func
;
501 mutex_lock(&tracepoints_mutex
);
502 tp_func
.func
= probe
;
504 ret
= tracepoint_remove_func(tp
, &tp_func
);
505 mutex_unlock(&tracepoints_mutex
);
508 EXPORT_SYMBOL_GPL(tracepoint_probe_unregister
);
510 static void for_each_tracepoint_range(
511 tracepoint_ptr_t
*begin
, tracepoint_ptr_t
*end
,
512 void (*fct
)(struct tracepoint
*tp
, void *priv
),
515 tracepoint_ptr_t
*iter
;
519 for (iter
= begin
; iter
< end
; iter
++)
520 fct(tracepoint_ptr_deref(iter
), priv
);
523 #ifdef CONFIG_MODULES
524 bool trace_module_has_bad_taint(struct module
*mod
)
526 return mod
->taints
& ~((1 << TAINT_OOT_MODULE
) | (1 << TAINT_CRAP
) |
527 (1 << TAINT_UNSIGNED_MODULE
) | (1 << TAINT_TEST
) |
528 (1 << TAINT_LIVEPATCH
));
531 static BLOCKING_NOTIFIER_HEAD(tracepoint_notify_list
);
534 * register_tracepoint_module_notifier - register tracepoint coming/going notifier
535 * @nb: notifier block
537 * Notifiers registered with this function are called on module
538 * coming/going with the tracepoint_module_list_mutex held.
539 * The notifier block callback should expect a "struct tp_module" data
542 int register_tracepoint_module_notifier(struct notifier_block
*nb
)
544 struct tp_module
*tp_mod
;
547 mutex_lock(&tracepoint_module_list_mutex
);
548 ret
= blocking_notifier_chain_register(&tracepoint_notify_list
, nb
);
551 list_for_each_entry(tp_mod
, &tracepoint_module_list
, list
)
552 (void) nb
->notifier_call(nb
, MODULE_STATE_COMING
, tp_mod
);
554 mutex_unlock(&tracepoint_module_list_mutex
);
557 EXPORT_SYMBOL_GPL(register_tracepoint_module_notifier
);
560 * unregister_tracepoint_module_notifier - unregister tracepoint coming/going notifier
561 * @nb: notifier block
563 * The notifier block callback should expect a "struct tp_module" data
566 int unregister_tracepoint_module_notifier(struct notifier_block
*nb
)
568 struct tp_module
*tp_mod
;
571 mutex_lock(&tracepoint_module_list_mutex
);
572 ret
= blocking_notifier_chain_unregister(&tracepoint_notify_list
, nb
);
575 list_for_each_entry(tp_mod
, &tracepoint_module_list
, list
)
576 (void) nb
->notifier_call(nb
, MODULE_STATE_GOING
, tp_mod
);
578 mutex_unlock(&tracepoint_module_list_mutex
);
582 EXPORT_SYMBOL_GPL(unregister_tracepoint_module_notifier
);
585 * Ensure the tracer unregistered the module's probes before the module
586 * teardown is performed. Prevents leaks of probe and data pointers.
588 static void tp_module_going_check_quiescent(struct tracepoint
*tp
, void *priv
)
590 WARN_ON_ONCE(tp
->funcs
);
593 static int tracepoint_module_coming(struct module
*mod
)
595 struct tp_module
*tp_mod
;
597 if (!mod
->num_tracepoints
)
601 * We skip modules that taint the kernel, especially those with different
602 * module headers (for forced load), to make sure we don't cause a crash.
603 * Staging, out-of-tree, unsigned GPL, and test modules are fine.
605 if (trace_module_has_bad_taint(mod
))
608 tp_mod
= kmalloc(sizeof(struct tp_module
), GFP_KERNEL
);
613 mutex_lock(&tracepoint_module_list_mutex
);
614 list_add_tail(&tp_mod
->list
, &tracepoint_module_list
);
615 blocking_notifier_call_chain(&tracepoint_notify_list
,
616 MODULE_STATE_COMING
, tp_mod
);
617 mutex_unlock(&tracepoint_module_list_mutex
);
621 static void tracepoint_module_going(struct module
*mod
)
623 struct tp_module
*tp_mod
;
625 if (!mod
->num_tracepoints
)
628 mutex_lock(&tracepoint_module_list_mutex
);
629 list_for_each_entry(tp_mod
, &tracepoint_module_list
, list
) {
630 if (tp_mod
->mod
== mod
) {
631 blocking_notifier_call_chain(&tracepoint_notify_list
,
632 MODULE_STATE_GOING
, tp_mod
);
633 list_del(&tp_mod
->list
);
636 * Called the going notifier before checking for
639 for_each_tracepoint_range(mod
->tracepoints_ptrs
,
640 mod
->tracepoints_ptrs
+ mod
->num_tracepoints
,
641 tp_module_going_check_quiescent
, NULL
);
646 * In the case of modules that were tainted at "coming", we'll simply
647 * walk through the list without finding it. We cannot use the "tainted"
648 * flag on "going", in case a module taints the kernel only after being
651 mutex_unlock(&tracepoint_module_list_mutex
);
654 static int tracepoint_module_notify(struct notifier_block
*self
,
655 unsigned long val
, void *data
)
657 struct module
*mod
= data
;
661 case MODULE_STATE_COMING
:
662 ret
= tracepoint_module_coming(mod
);
664 case MODULE_STATE_LIVE
:
666 case MODULE_STATE_GOING
:
667 tracepoint_module_going(mod
);
669 case MODULE_STATE_UNFORMED
:
672 return notifier_from_errno(ret
);
675 static struct notifier_block tracepoint_module_nb
= {
676 .notifier_call
= tracepoint_module_notify
,
680 static __init
int init_tracepoints(void)
684 ret
= register_module_notifier(&tracepoint_module_nb
);
686 pr_warn("Failed to register tracepoint module enter notifier\n");
690 __initcall(init_tracepoints
);
693 * for_each_tracepoint_in_module - iteration on all tracepoints in a module
696 * @priv: private data
698 void for_each_tracepoint_in_module(struct module
*mod
,
699 void (*fct
)(struct tracepoint
*tp
,
700 struct module
*mod
, void *priv
),
703 tracepoint_ptr_t
*begin
, *end
, *iter
;
705 lockdep_assert_held(&tracepoint_module_list_mutex
);
710 begin
= mod
->tracepoints_ptrs
;
711 end
= mod
->tracepoints_ptrs
+ mod
->num_tracepoints
;
713 for (iter
= begin
; iter
< end
; iter
++)
714 fct(tracepoint_ptr_deref(iter
), mod
, priv
);
718 * for_each_module_tracepoint - iteration on all tracepoints in all modules
720 * @priv: private data
722 void for_each_module_tracepoint(void (*fct
)(struct tracepoint
*tp
,
723 struct module
*mod
, void *priv
),
726 struct tp_module
*tp_mod
;
728 mutex_lock(&tracepoint_module_list_mutex
);
729 list_for_each_entry(tp_mod
, &tracepoint_module_list
, list
)
730 for_each_tracepoint_in_module(tp_mod
->mod
, fct
, priv
);
731 mutex_unlock(&tracepoint_module_list_mutex
);
733 #endif /* CONFIG_MODULES */
736 * for_each_kernel_tracepoint - iteration on all kernel tracepoints
738 * @priv: private data
740 void for_each_kernel_tracepoint(void (*fct
)(struct tracepoint
*tp
, void *priv
),
743 for_each_tracepoint_range(__start___tracepoints_ptrs
,
744 __stop___tracepoints_ptrs
, fct
, priv
);
746 EXPORT_SYMBOL_GPL(for_each_kernel_tracepoint
);
748 #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS
750 /* NB: reg/unreg are called while guarded with the tracepoints_mutex */
751 static int sys_tracepoint_refcount
;
753 int syscall_regfunc(void)
755 struct task_struct
*p
, *t
;
757 if (!sys_tracepoint_refcount
) {
758 read_lock(&tasklist_lock
);
759 for_each_process_thread(p
, t
) {
760 set_task_syscall_work(t
, SYSCALL_TRACEPOINT
);
762 read_unlock(&tasklist_lock
);
764 sys_tracepoint_refcount
++;
769 void syscall_unregfunc(void)
771 struct task_struct
*p
, *t
;
773 sys_tracepoint_refcount
--;
774 if (!sys_tracepoint_refcount
) {
775 read_lock(&tasklist_lock
);
776 for_each_process_thread(p
, t
) {
777 clear_task_syscall_work(t
, SYSCALL_TRACEPOINT
);
779 read_unlock(&tasklist_lock
);