1 // SPDX-License-Identifier: GPL-2.0
3 * uprobes-based tracing events
5 * Copyright (C) IBM Corporation, 2010-2012
6 * Author: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
8 #define pr_fmt(fmt) "trace_uprobe: " fmt
10 #include <linux/bpf-cgroup.h>
11 #include <linux/security.h>
12 #include <linux/ctype.h>
13 #include <linux/module.h>
14 #include <linux/uaccess.h>
15 #include <linux/uprobes.h>
16 #include <linux/namei.h>
17 #include <linux/string.h>
18 #include <linux/rculist.h>
19 #include <linux/filter.h>
20 #include <linux/percpu.h>
22 #include "trace_dynevent.h"
23 #include "trace_probe.h"
24 #include "trace_probe_tmpl.h"
26 #define UPROBE_EVENT_SYSTEM "uprobes"
28 struct uprobe_trace_entry_head
{
29 struct trace_entry ent
;
30 unsigned long vaddr
[];
33 #define SIZEOF_TRACE_ENTRY(is_return) \
34 (sizeof(struct uprobe_trace_entry_head) + \
35 sizeof(unsigned long) * (is_return ? 2 : 1))
37 #define DATAOF_TRACE_ENTRY(entry, is_return) \
38 ((void*)(entry) + SIZEOF_TRACE_ENTRY(is_return))
40 static int trace_uprobe_create(const char *raw_command
);
41 static int trace_uprobe_show(struct seq_file
*m
, struct dyn_event
*ev
);
42 static int trace_uprobe_release(struct dyn_event
*ev
);
43 static bool trace_uprobe_is_busy(struct dyn_event
*ev
);
44 static bool trace_uprobe_match(const char *system
, const char *event
,
45 int argc
, const char **argv
, struct dyn_event
*ev
);
47 static struct dyn_event_operations trace_uprobe_ops
= {
48 .create
= trace_uprobe_create
,
49 .show
= trace_uprobe_show
,
50 .is_busy
= trace_uprobe_is_busy
,
51 .free
= trace_uprobe_release
,
52 .match
= trace_uprobe_match
,
56 * uprobe event core functions
59 struct dyn_event devent
;
60 struct uprobe_consumer consumer
;
63 struct uprobe
*uprobe
;
65 unsigned long ref_ctr_offset
;
66 unsigned long __percpu
*nhits
;
67 struct trace_probe tp
;
70 static bool is_trace_uprobe(struct dyn_event
*ev
)
72 return ev
->ops
== &trace_uprobe_ops
;
75 static struct trace_uprobe
*to_trace_uprobe(struct dyn_event
*ev
)
77 return container_of(ev
, struct trace_uprobe
, devent
);
81 * for_each_trace_uprobe - iterate over the trace_uprobe list
82 * @pos: the struct trace_uprobe * for each entry
83 * @dpos: the struct dyn_event * to use as a loop cursor
85 #define for_each_trace_uprobe(pos, dpos) \
86 for_each_dyn_event(dpos) \
87 if (is_trace_uprobe(dpos) && (pos = to_trace_uprobe(dpos)))
89 static int register_uprobe_event(struct trace_uprobe
*tu
);
90 static int unregister_uprobe_event(struct trace_uprobe
*tu
);
92 static int uprobe_dispatcher(struct uprobe_consumer
*con
, struct pt_regs
*regs
,
94 static int uretprobe_dispatcher(struct uprobe_consumer
*con
,
95 unsigned long func
, struct pt_regs
*regs
,
98 #ifdef CONFIG_STACK_GROWSUP
99 static unsigned long adjust_stack_addr(unsigned long addr
, unsigned int n
)
101 return addr
- (n
* sizeof(long));
104 static unsigned long adjust_stack_addr(unsigned long addr
, unsigned int n
)
106 return addr
+ (n
* sizeof(long));
110 static unsigned long get_user_stack_nth(struct pt_regs
*regs
, unsigned int n
)
113 unsigned long addr
= user_stack_pointer(regs
);
115 addr
= adjust_stack_addr(addr
, n
);
117 if (copy_from_user(&ret
, (void __force __user
*) addr
, sizeof(ret
)))
124 * Uprobes-specific fetch functions
126 static nokprobe_inline
int
127 probe_mem_read(void *dest
, void *src
, size_t size
)
129 void __user
*vaddr
= (void __force __user
*)src
;
131 return copy_from_user(dest
, vaddr
, size
) ? -EFAULT
: 0;
134 static nokprobe_inline
int
135 probe_mem_read_user(void *dest
, void *src
, size_t size
)
137 return probe_mem_read(dest
, src
, size
);
141 * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max
142 * length and relative data location.
144 static nokprobe_inline
int
145 fetch_store_string(unsigned long addr
, void *dest
, void *base
)
148 u32 loc
= *(u32
*)dest
;
149 int maxlen
= get_loc_len(loc
);
150 u8
*dst
= get_loc_data(dest
, base
);
151 void __user
*src
= (void __force __user
*) addr
;
153 if (unlikely(!maxlen
))
156 if (addr
== FETCH_TOKEN_COMM
)
157 ret
= strscpy(dst
, current
->comm
, maxlen
);
159 ret
= strncpy_from_user(dst
, src
, maxlen
);
165 * Include the terminating null byte. In this case it
166 * was copied by strncpy_from_user but not accounted
170 *(u32
*)dest
= make_data_loc(ret
, (void *)dst
- base
);
172 *(u32
*)dest
= make_data_loc(0, (void *)dst
- base
);
177 static nokprobe_inline
int
178 fetch_store_string_user(unsigned long addr
, void *dest
, void *base
)
180 return fetch_store_string(addr
, dest
, base
);
183 /* Return the length of string -- including null terminal byte */
184 static nokprobe_inline
int
185 fetch_store_strlen(unsigned long addr
)
188 void __user
*vaddr
= (void __force __user
*) addr
;
190 if (addr
== FETCH_TOKEN_COMM
)
191 len
= strlen(current
->comm
) + 1;
193 len
= strnlen_user(vaddr
, MAX_STRING_SIZE
);
195 return (len
> MAX_STRING_SIZE
) ? 0 : len
;
198 static nokprobe_inline
int
199 fetch_store_strlen_user(unsigned long addr
)
201 return fetch_store_strlen(addr
);
204 static unsigned long translate_user_vaddr(unsigned long file_offset
)
206 unsigned long base_addr
;
207 struct uprobe_dispatch_data
*udd
;
209 udd
= (void *) current
->utask
->vaddr
;
211 base_addr
= udd
->bp_addr
- udd
->tu
->offset
;
212 return base_addr
+ file_offset
;
215 /* Note that we don't verify it, since the code does not come from user space */
217 process_fetch_insn(struct fetch_insn
*code
, void *rec
, void *edata
,
218 void *dest
, void *base
)
220 struct pt_regs
*regs
= rec
;
224 /* 1st stage: get value from context */
227 val
= regs_get_register(regs
, code
->param
);
230 val
= get_user_stack_nth(regs
, code
->param
);
232 case FETCH_OP_STACKP
:
233 val
= user_stack_pointer(regs
);
235 case FETCH_OP_RETVAL
:
236 val
= regs_return_value(regs
);
239 val
= FETCH_TOKEN_COMM
;
242 val
= translate_user_vaddr(code
->immediate
);
245 ret
= process_common_fetch_insn(code
, &val
);
251 return process_fetch_insn_bottom(code
, val
, dest
, base
);
253 NOKPROBE_SYMBOL(process_fetch_insn
)
255 static inline void init_trace_uprobe_filter(struct trace_uprobe_filter
*filter
)
257 rwlock_init(&filter
->rwlock
);
258 filter
->nr_systemwide
= 0;
259 INIT_LIST_HEAD(&filter
->perf_events
);
262 static inline bool uprobe_filter_is_empty(struct trace_uprobe_filter
*filter
)
264 return !filter
->nr_systemwide
&& list_empty(&filter
->perf_events
);
267 static inline bool is_ret_probe(struct trace_uprobe
*tu
)
269 return tu
->consumer
.ret_handler
!= NULL
;
272 static bool trace_uprobe_is_busy(struct dyn_event
*ev
)
274 struct trace_uprobe
*tu
= to_trace_uprobe(ev
);
276 return trace_probe_is_enabled(&tu
->tp
);
279 static bool trace_uprobe_match_command_head(struct trace_uprobe
*tu
,
280 int argc
, const char **argv
)
282 char buf
[MAX_ARGSTR_LEN
+ 1];
288 len
= strlen(tu
->filename
);
289 if (strncmp(tu
->filename
, argv
[0], len
) || argv
[0][len
] != ':')
292 if (tu
->ref_ctr_offset
== 0)
293 snprintf(buf
, sizeof(buf
), "0x%0*lx",
294 (int)(sizeof(void *) * 2), tu
->offset
);
296 snprintf(buf
, sizeof(buf
), "0x%0*lx(0x%lx)",
297 (int)(sizeof(void *) * 2), tu
->offset
,
299 if (strcmp(buf
, &argv
[0][len
+ 1]))
304 return trace_probe_match_command_args(&tu
->tp
, argc
, argv
);
307 static bool trace_uprobe_match(const char *system
, const char *event
,
308 int argc
, const char **argv
, struct dyn_event
*ev
)
310 struct trace_uprobe
*tu
= to_trace_uprobe(ev
);
312 return (event
[0] == '\0' ||
313 strcmp(trace_probe_name(&tu
->tp
), event
) == 0) &&
314 (!system
|| strcmp(trace_probe_group_name(&tu
->tp
), system
) == 0) &&
315 trace_uprobe_match_command_head(tu
, argc
, argv
);
318 static nokprobe_inline
struct trace_uprobe
*
319 trace_uprobe_primary_from_call(struct trace_event_call
*call
)
321 struct trace_probe
*tp
;
323 tp
= trace_probe_primary_from_call(call
);
324 if (WARN_ON_ONCE(!tp
))
327 return container_of(tp
, struct trace_uprobe
, tp
);
331 * Allocate new trace_uprobe and initialize it (including uprobes).
333 static struct trace_uprobe
*
334 alloc_trace_uprobe(const char *group
, const char *event
, int nargs
, bool is_ret
)
336 struct trace_uprobe
*tu
;
339 tu
= kzalloc(struct_size(tu
, tp
.args
, nargs
), GFP_KERNEL
);
341 return ERR_PTR(-ENOMEM
);
343 tu
->nhits
= alloc_percpu(unsigned long);
349 ret
= trace_probe_init(&tu
->tp
, event
, group
, true, nargs
);
353 dyn_event_init(&tu
->devent
, &trace_uprobe_ops
);
354 tu
->consumer
.handler
= uprobe_dispatcher
;
356 tu
->consumer
.ret_handler
= uretprobe_dispatcher
;
357 init_trace_uprobe_filter(tu
->tp
.event
->filter
);
361 free_percpu(tu
->nhits
);
367 static void free_trace_uprobe(struct trace_uprobe
*tu
)
373 trace_probe_cleanup(&tu
->tp
);
375 free_percpu(tu
->nhits
);
379 static struct trace_uprobe
*find_probe_event(const char *event
, const char *group
)
381 struct dyn_event
*pos
;
382 struct trace_uprobe
*tu
;
384 for_each_trace_uprobe(tu
, pos
)
385 if (strcmp(trace_probe_name(&tu
->tp
), event
) == 0 &&
386 strcmp(trace_probe_group_name(&tu
->tp
), group
) == 0)
392 /* Unregister a trace_uprobe and probe_event */
393 static int unregister_trace_uprobe(struct trace_uprobe
*tu
)
397 if (trace_probe_has_sibling(&tu
->tp
))
400 /* If there's a reference to the dynamic event */
401 if (trace_event_dyn_busy(trace_probe_event_call(&tu
->tp
)))
404 ret
= unregister_uprobe_event(tu
);
409 dyn_event_remove(&tu
->devent
);
410 trace_probe_unlink(&tu
->tp
);
411 free_trace_uprobe(tu
);
415 static bool trace_uprobe_has_same_uprobe(struct trace_uprobe
*orig
,
416 struct trace_uprobe
*comp
)
418 struct trace_probe_event
*tpe
= orig
->tp
.event
;
419 struct inode
*comp_inode
= d_real_inode(comp
->path
.dentry
);
422 list_for_each_entry(orig
, &tpe
->probes
, tp
.list
) {
423 if (comp_inode
!= d_real_inode(orig
->path
.dentry
) ||
424 comp
->offset
!= orig
->offset
)
428 * trace_probe_compare_arg_type() ensured that nr_args and
429 * each argument name and type are same. Let's compare comm.
431 for (i
= 0; i
< orig
->tp
.nr_args
; i
++) {
432 if (strcmp(orig
->tp
.args
[i
].comm
,
433 comp
->tp
.args
[i
].comm
))
437 if (i
== orig
->tp
.nr_args
)
444 static int append_trace_uprobe(struct trace_uprobe
*tu
, struct trace_uprobe
*to
)
448 ret
= trace_probe_compare_arg_type(&tu
->tp
, &to
->tp
);
450 /* Note that argument starts index = 2 */
451 trace_probe_log_set_index(ret
+ 1);
452 trace_probe_log_err(0, DIFF_ARG_TYPE
);
455 if (trace_uprobe_has_same_uprobe(to
, tu
)) {
456 trace_probe_log_set_index(0);
457 trace_probe_log_err(0, SAME_PROBE
);
461 /* Append to existing event */
462 ret
= trace_probe_append(&tu
->tp
, &to
->tp
);
464 dyn_event_add(&tu
->devent
, trace_probe_event_call(&tu
->tp
));
470 * Uprobe with multiple reference counter is not allowed. i.e.
471 * If inode and offset matches, reference counter offset *must*
472 * match as well. Though, there is one exception: If user is
473 * replacing old trace_uprobe with new one(same group/event),
474 * then we allow same uprobe with new reference counter as far
475 * as the new one does not conflict with any other existing
478 static int validate_ref_ctr_offset(struct trace_uprobe
*new)
480 struct dyn_event
*pos
;
481 struct trace_uprobe
*tmp
;
482 struct inode
*new_inode
= d_real_inode(new->path
.dentry
);
484 for_each_trace_uprobe(tmp
, pos
) {
485 if (new_inode
== d_real_inode(tmp
->path
.dentry
) &&
486 new->offset
== tmp
->offset
&&
487 new->ref_ctr_offset
!= tmp
->ref_ctr_offset
) {
488 pr_warn("Reference counter offset mismatch.");
495 /* Register a trace_uprobe and probe_event */
496 static int register_trace_uprobe(struct trace_uprobe
*tu
)
498 struct trace_uprobe
*old_tu
;
501 mutex_lock(&event_mutex
);
503 ret
= validate_ref_ctr_offset(tu
);
507 /* register as an event */
508 old_tu
= find_probe_event(trace_probe_name(&tu
->tp
),
509 trace_probe_group_name(&tu
->tp
));
511 if (is_ret_probe(tu
) != is_ret_probe(old_tu
)) {
512 trace_probe_log_set_index(0);
513 trace_probe_log_err(0, DIFF_PROBE_TYPE
);
516 ret
= append_trace_uprobe(tu
, old_tu
);
521 ret
= register_uprobe_event(tu
);
523 if (ret
== -EEXIST
) {
524 trace_probe_log_set_index(0);
525 trace_probe_log_err(0, EVENT_EXIST
);
527 pr_warn("Failed to register probe event(%d)\n", ret
);
531 dyn_event_add(&tu
->devent
, trace_probe_event_call(&tu
->tp
));
534 mutex_unlock(&event_mutex
);
541 * - Add uprobe: p|r[:[GRP/][EVENT]] PATH:OFFSET[%return][(REF)] [FETCHARGS]
543 static int __trace_uprobe_create(int argc
, const char **argv
)
545 struct trace_uprobe
*tu
;
546 const char *event
= NULL
, *group
= UPROBE_EVENT_SYSTEM
;
547 char *arg
, *filename
, *rctr
, *rctr_end
, *tmp
;
548 char buf
[MAX_EVENT_NAME_LEN
];
549 char gbuf
[MAX_EVENT_NAME_LEN
];
550 enum probe_print_type ptype
;
552 unsigned long offset
, ref_ctr_offset
;
553 bool is_return
= false;
558 switch (argv
[0][0]) {
570 if (argc
- 2 > MAX_TRACE_ARGS
)
573 if (argv
[0][1] == ':')
576 if (!strchr(argv
[1], '/'))
579 filename
= kstrdup(argv
[1], GFP_KERNEL
);
583 /* Find the last occurrence, in case the path contains ':' too. */
584 arg
= strrchr(filename
, ':');
585 if (!arg
|| !isdigit(arg
[1])) {
590 trace_probe_log_init("trace_uprobe", argc
, argv
);
591 trace_probe_log_set_index(1); /* filename is the 2nd argument */
594 ret
= kern_path(filename
, LOOKUP_FOLLOW
, &path
);
596 trace_probe_log_err(0, FILE_NOT_FOUND
);
598 trace_probe_log_clear();
601 if (!d_is_reg(path
.dentry
)) {
602 trace_probe_log_err(0, NO_REGULAR_FILE
);
604 goto fail_address_parse
;
607 /* Parse reference counter offset if specified. */
608 rctr
= strchr(arg
, '(');
610 rctr_end
= strchr(rctr
, ')');
613 rctr_end
= rctr
+ strlen(rctr
);
614 trace_probe_log_err(rctr_end
- filename
,
616 goto fail_address_parse
;
617 } else if (rctr_end
[1] != '\0') {
619 trace_probe_log_err(rctr_end
+ 1 - filename
,
621 goto fail_address_parse
;
626 ret
= kstrtoul(rctr
, 0, &ref_ctr_offset
);
628 trace_probe_log_err(rctr
- filename
, BAD_REFCNT
);
629 goto fail_address_parse
;
633 /* Check if there is %return suffix */
634 tmp
= strchr(arg
, '%');
636 if (!strcmp(tmp
, "%return")) {
640 trace_probe_log_err(tmp
- filename
, BAD_ADDR_SUFFIX
);
642 goto fail_address_parse
;
646 /* Parse uprobe offset. */
647 ret
= kstrtoul(arg
, 0, &offset
);
649 trace_probe_log_err(arg
- filename
, BAD_UPROBE_OFFS
);
650 goto fail_address_parse
;
654 trace_probe_log_set_index(0);
656 ret
= traceprobe_parse_event_name(&event
, &group
, gbuf
,
659 goto fail_address_parse
;
666 tail
= kstrdup(kbasename(filename
), GFP_KERNEL
);
669 goto fail_address_parse
;
672 ptr
= strpbrk(tail
, ".-_");
676 snprintf(buf
, MAX_EVENT_NAME_LEN
, "%c_%s_0x%lx", 'p', tail
, offset
);
684 tu
= alloc_trace_uprobe(group
, event
, argc
, is_return
);
687 /* This must return -ENOMEM otherwise there is a bug */
688 WARN_ON_ONCE(ret
!= -ENOMEM
);
689 goto fail_address_parse
;
692 tu
->ref_ctr_offset
= ref_ctr_offset
;
694 tu
->filename
= filename
;
696 /* parse arguments */
697 for (i
= 0; i
< argc
; i
++) {
698 struct traceprobe_parse_context ctx
= {
699 .flags
= (is_return
? TPARG_FL_RETURN
: 0) | TPARG_FL_USER
,
702 trace_probe_log_set_index(i
+ 2);
703 ret
= traceprobe_parse_probe_arg(&tu
->tp
, i
, argv
[i
], &ctx
);
704 traceprobe_finish_parse(&ctx
);
709 ptype
= is_ret_probe(tu
) ? PROBE_PRINT_RETURN
: PROBE_PRINT_NORMAL
;
710 ret
= traceprobe_set_print_fmt(&tu
->tp
, ptype
);
714 ret
= register_trace_uprobe(tu
);
719 free_trace_uprobe(tu
);
721 trace_probe_log_clear();
725 trace_probe_log_clear();
732 int trace_uprobe_create(const char *raw_command
)
734 return trace_probe_create(raw_command
, __trace_uprobe_create
);
737 static int create_or_delete_trace_uprobe(const char *raw_command
)
741 if (raw_command
[0] == '-')
742 return dyn_event_release(raw_command
, &trace_uprobe_ops
);
744 ret
= trace_uprobe_create(raw_command
);
745 return ret
== -ECANCELED
? -EINVAL
: ret
;
748 static int trace_uprobe_release(struct dyn_event
*ev
)
750 struct trace_uprobe
*tu
= to_trace_uprobe(ev
);
752 return unregister_trace_uprobe(tu
);
755 /* Probes listing interfaces */
756 static int trace_uprobe_show(struct seq_file
*m
, struct dyn_event
*ev
)
758 struct trace_uprobe
*tu
= to_trace_uprobe(ev
);
759 char c
= is_ret_probe(tu
) ? 'r' : 'p';
762 seq_printf(m
, "%c:%s/%s %s:0x%0*lx", c
, trace_probe_group_name(&tu
->tp
),
763 trace_probe_name(&tu
->tp
), tu
->filename
,
764 (int)(sizeof(void *) * 2), tu
->offset
);
766 if (tu
->ref_ctr_offset
)
767 seq_printf(m
, "(0x%lx)", tu
->ref_ctr_offset
);
769 for (i
= 0; i
< tu
->tp
.nr_args
; i
++)
770 seq_printf(m
, " %s=%s", tu
->tp
.args
[i
].name
, tu
->tp
.args
[i
].comm
);
776 static int probes_seq_show(struct seq_file
*m
, void *v
)
778 struct dyn_event
*ev
= v
;
780 if (!is_trace_uprobe(ev
))
783 return trace_uprobe_show(m
, ev
);
786 static const struct seq_operations probes_seq_op
= {
787 .start
= dyn_event_seq_start
,
788 .next
= dyn_event_seq_next
,
789 .stop
= dyn_event_seq_stop
,
790 .show
= probes_seq_show
793 static int probes_open(struct inode
*inode
, struct file
*file
)
797 ret
= security_locked_down(LOCKDOWN_TRACEFS
);
801 if ((file
->f_mode
& FMODE_WRITE
) && (file
->f_flags
& O_TRUNC
)) {
802 ret
= dyn_events_release_all(&trace_uprobe_ops
);
807 return seq_open(file
, &probes_seq_op
);
810 static ssize_t
probes_write(struct file
*file
, const char __user
*buffer
,
811 size_t count
, loff_t
*ppos
)
813 return trace_parse_run_command(file
, buffer
, count
, ppos
,
814 create_or_delete_trace_uprobe
);
817 static const struct file_operations uprobe_events_ops
= {
818 .owner
= THIS_MODULE
,
822 .release
= seq_release
,
823 .write
= probes_write
,
826 /* Probes profiling interfaces */
827 static int probes_profile_seq_show(struct seq_file
*m
, void *v
)
829 struct dyn_event
*ev
= v
;
830 struct trace_uprobe
*tu
;
834 if (!is_trace_uprobe(ev
))
837 tu
= to_trace_uprobe(ev
);
840 for_each_possible_cpu(cpu
) {
841 nhits
+= per_cpu(*tu
->nhits
, cpu
);
844 seq_printf(m
, " %s %-44s %15lu\n", tu
->filename
,
845 trace_probe_name(&tu
->tp
), nhits
);
849 static const struct seq_operations profile_seq_op
= {
850 .start
= dyn_event_seq_start
,
851 .next
= dyn_event_seq_next
,
852 .stop
= dyn_event_seq_stop
,
853 .show
= probes_profile_seq_show
856 static int profile_open(struct inode
*inode
, struct file
*file
)
860 ret
= security_locked_down(LOCKDOWN_TRACEFS
);
864 return seq_open(file
, &profile_seq_op
);
867 static const struct file_operations uprobe_profile_ops
= {
868 .owner
= THIS_MODULE
,
869 .open
= profile_open
,
872 .release
= seq_release
,
875 struct uprobe_cpu_buffer
{
880 static struct uprobe_cpu_buffer __percpu
*uprobe_cpu_buffer
;
881 static int uprobe_buffer_refcnt
;
882 #define MAX_UCB_BUFFER_SIZE PAGE_SIZE
884 static int uprobe_buffer_init(void)
888 uprobe_cpu_buffer
= alloc_percpu(struct uprobe_cpu_buffer
);
889 if (uprobe_cpu_buffer
== NULL
)
892 for_each_possible_cpu(cpu
) {
893 struct page
*p
= alloc_pages_node(cpu_to_node(cpu
),
899 per_cpu_ptr(uprobe_cpu_buffer
, cpu
)->buf
= page_address(p
);
900 mutex_init(&per_cpu_ptr(uprobe_cpu_buffer
, cpu
)->mutex
);
906 for_each_possible_cpu(cpu
) {
909 free_page((unsigned long)per_cpu_ptr(uprobe_cpu_buffer
, cpu
)->buf
);
912 free_percpu(uprobe_cpu_buffer
);
916 static int uprobe_buffer_enable(void)
920 BUG_ON(!mutex_is_locked(&event_mutex
));
922 if (uprobe_buffer_refcnt
++ == 0) {
923 ret
= uprobe_buffer_init();
925 uprobe_buffer_refcnt
--;
931 static void uprobe_buffer_disable(void)
935 BUG_ON(!mutex_is_locked(&event_mutex
));
937 if (--uprobe_buffer_refcnt
== 0) {
938 for_each_possible_cpu(cpu
)
939 free_page((unsigned long)per_cpu_ptr(uprobe_cpu_buffer
,
942 free_percpu(uprobe_cpu_buffer
);
943 uprobe_cpu_buffer
= NULL
;
947 static struct uprobe_cpu_buffer
*uprobe_buffer_get(void)
949 struct uprobe_cpu_buffer
*ucb
;
952 cpu
= raw_smp_processor_id();
953 ucb
= per_cpu_ptr(uprobe_cpu_buffer
, cpu
);
956 * Use per-cpu buffers for fastest access, but we might migrate
957 * so the mutex makes sure we have sole access to it.
959 mutex_lock(&ucb
->mutex
);
964 static void uprobe_buffer_put(struct uprobe_cpu_buffer
*ucb
)
968 mutex_unlock(&ucb
->mutex
);
971 static struct uprobe_cpu_buffer
*prepare_uprobe_buffer(struct trace_uprobe
*tu
,
972 struct pt_regs
*regs
,
973 struct uprobe_cpu_buffer
**ucbp
)
975 struct uprobe_cpu_buffer
*ucb
;
981 esize
= SIZEOF_TRACE_ENTRY(is_ret_probe(tu
));
982 dsize
= __get_data_size(&tu
->tp
, regs
, NULL
);
984 ucb
= uprobe_buffer_get();
985 ucb
->dsize
= tu
->tp
.size
+ dsize
;
987 if (WARN_ON_ONCE(ucb
->dsize
> MAX_UCB_BUFFER_SIZE
)) {
988 ucb
->dsize
= MAX_UCB_BUFFER_SIZE
;
989 dsize
= MAX_UCB_BUFFER_SIZE
- tu
->tp
.size
;
992 store_trace_args(ucb
->buf
, &tu
->tp
, regs
, NULL
, esize
, dsize
);
998 static void __uprobe_trace_func(struct trace_uprobe
*tu
,
999 unsigned long func
, struct pt_regs
*regs
,
1000 struct uprobe_cpu_buffer
*ucb
,
1001 struct trace_event_file
*trace_file
)
1003 struct uprobe_trace_entry_head
*entry
;
1004 struct trace_event_buffer fbuffer
;
1007 struct trace_event_call
*call
= trace_probe_event_call(&tu
->tp
);
1009 WARN_ON(call
!= trace_file
->event_call
);
1011 if (trace_trigger_soft_disabled(trace_file
))
1014 esize
= SIZEOF_TRACE_ENTRY(is_ret_probe(tu
));
1015 size
= esize
+ ucb
->dsize
;
1016 entry
= trace_event_buffer_reserve(&fbuffer
, trace_file
, size
);
1020 if (is_ret_probe(tu
)) {
1021 entry
->vaddr
[0] = func
;
1022 entry
->vaddr
[1] = instruction_pointer(regs
);
1023 data
= DATAOF_TRACE_ENTRY(entry
, true);
1025 entry
->vaddr
[0] = instruction_pointer(regs
);
1026 data
= DATAOF_TRACE_ENTRY(entry
, false);
1029 memcpy(data
, ucb
->buf
, ucb
->dsize
);
1031 trace_event_buffer_commit(&fbuffer
);
1034 /* uprobe handler */
1035 static int uprobe_trace_func(struct trace_uprobe
*tu
, struct pt_regs
*regs
,
1036 struct uprobe_cpu_buffer
**ucbp
)
1038 struct event_file_link
*link
;
1039 struct uprobe_cpu_buffer
*ucb
;
1041 if (is_ret_probe(tu
))
1044 ucb
= prepare_uprobe_buffer(tu
, regs
, ucbp
);
1047 trace_probe_for_each_link_rcu(link
, &tu
->tp
)
1048 __uprobe_trace_func(tu
, 0, regs
, ucb
, link
->file
);
1054 static void uretprobe_trace_func(struct trace_uprobe
*tu
, unsigned long func
,
1055 struct pt_regs
*regs
,
1056 struct uprobe_cpu_buffer
**ucbp
)
1058 struct event_file_link
*link
;
1059 struct uprobe_cpu_buffer
*ucb
;
1061 ucb
= prepare_uprobe_buffer(tu
, regs
, ucbp
);
1064 trace_probe_for_each_link_rcu(link
, &tu
->tp
)
1065 __uprobe_trace_func(tu
, func
, regs
, ucb
, link
->file
);
1069 /* Event entry printers */
1070 static enum print_line_t
1071 print_uprobe_event(struct trace_iterator
*iter
, int flags
, struct trace_event
*event
)
1073 struct uprobe_trace_entry_head
*entry
;
1074 struct trace_seq
*s
= &iter
->seq
;
1075 struct trace_uprobe
*tu
;
1078 entry
= (struct uprobe_trace_entry_head
*)iter
->ent
;
1079 tu
= trace_uprobe_primary_from_call(
1080 container_of(event
, struct trace_event_call
, event
));
1084 if (is_ret_probe(tu
)) {
1085 trace_seq_printf(s
, "%s: (0x%lx <- 0x%lx)",
1086 trace_probe_name(&tu
->tp
),
1087 entry
->vaddr
[1], entry
->vaddr
[0]);
1088 data
= DATAOF_TRACE_ENTRY(entry
, true);
1090 trace_seq_printf(s
, "%s: (0x%lx)",
1091 trace_probe_name(&tu
->tp
),
1093 data
= DATAOF_TRACE_ENTRY(entry
, false);
1096 if (trace_probe_print_args(s
, tu
->tp
.args
, tu
->tp
.nr_args
, data
, entry
) < 0)
1099 trace_seq_putc(s
, '\n');
1102 return trace_handle_return(s
);
1105 typedef bool (*filter_func_t
)(struct uprobe_consumer
*self
, struct mm_struct
*mm
);
1107 static int trace_uprobe_enable(struct trace_uprobe
*tu
, filter_func_t filter
)
1109 struct inode
*inode
= d_real_inode(tu
->path
.dentry
);
1110 struct uprobe
*uprobe
;
1112 tu
->consumer
.filter
= filter
;
1113 uprobe
= uprobe_register(inode
, tu
->offset
, tu
->ref_ctr_offset
, &tu
->consumer
);
1115 return PTR_ERR(uprobe
);
1117 tu
->uprobe
= uprobe
;
1121 static void __probe_event_disable(struct trace_probe
*tp
)
1123 struct trace_uprobe
*tu
;
1126 tu
= container_of(tp
, struct trace_uprobe
, tp
);
1127 WARN_ON(!uprobe_filter_is_empty(tu
->tp
.event
->filter
));
1129 list_for_each_entry(tu
, trace_probe_probe_list(tp
), tp
.list
) {
1133 uprobe_unregister_nosync(tu
->uprobe
, &tu
->consumer
);
1138 uprobe_unregister_sync();
1141 static int probe_event_enable(struct trace_event_call
*call
,
1142 struct trace_event_file
*file
, filter_func_t filter
)
1144 struct trace_probe
*tp
;
1145 struct trace_uprobe
*tu
;
1149 tp
= trace_probe_primary_from_call(call
);
1150 if (WARN_ON_ONCE(!tp
))
1152 enabled
= trace_probe_is_enabled(tp
);
1154 /* This may also change "enabled" state */
1156 if (trace_probe_test_flag(tp
, TP_FLAG_PROFILE
))
1159 ret
= trace_probe_add_file(tp
, file
);
1163 if (trace_probe_test_flag(tp
, TP_FLAG_TRACE
))
1166 trace_probe_set_flag(tp
, TP_FLAG_PROFILE
);
1169 tu
= container_of(tp
, struct trace_uprobe
, tp
);
1170 WARN_ON(!uprobe_filter_is_empty(tu
->tp
.event
->filter
));
1175 ret
= uprobe_buffer_enable();
1179 list_for_each_entry(tu
, trace_probe_probe_list(tp
), tp
.list
) {
1180 ret
= trace_uprobe_enable(tu
, filter
);
1182 __probe_event_disable(tp
);
1190 uprobe_buffer_disable();
1194 trace_probe_remove_file(tp
, file
);
1196 trace_probe_clear_flag(tp
, TP_FLAG_PROFILE
);
1201 static void probe_event_disable(struct trace_event_call
*call
,
1202 struct trace_event_file
*file
)
1204 struct trace_probe
*tp
;
1206 tp
= trace_probe_primary_from_call(call
);
1207 if (WARN_ON_ONCE(!tp
))
1210 if (!trace_probe_is_enabled(tp
))
1214 if (trace_probe_remove_file(tp
, file
) < 0)
1217 if (trace_probe_is_enabled(tp
))
1220 trace_probe_clear_flag(tp
, TP_FLAG_PROFILE
);
1222 __probe_event_disable(tp
);
1223 uprobe_buffer_disable();
1226 static int uprobe_event_define_fields(struct trace_event_call
*event_call
)
1229 struct uprobe_trace_entry_head field
;
1230 struct trace_uprobe
*tu
;
1232 tu
= trace_uprobe_primary_from_call(event_call
);
1236 if (is_ret_probe(tu
)) {
1237 DEFINE_FIELD(unsigned long, vaddr
[0], FIELD_STRING_FUNC
, 0);
1238 DEFINE_FIELD(unsigned long, vaddr
[1], FIELD_STRING_RETIP
, 0);
1239 size
= SIZEOF_TRACE_ENTRY(true);
1241 DEFINE_FIELD(unsigned long, vaddr
[0], FIELD_STRING_IP
, 0);
1242 size
= SIZEOF_TRACE_ENTRY(false);
1245 return traceprobe_define_arg_fields(event_call
, size
, &tu
->tp
);
1248 #ifdef CONFIG_PERF_EVENTS
1250 __uprobe_perf_filter(struct trace_uprobe_filter
*filter
, struct mm_struct
*mm
)
1252 struct perf_event
*event
;
1254 list_for_each_entry(event
, &filter
->perf_events
, hw
.tp_list
) {
1255 if (event
->hw
.target
->mm
== mm
)
1263 trace_uprobe_filter_event(struct trace_uprobe_filter
*filter
,
1264 struct perf_event
*event
)
1266 return __uprobe_perf_filter(filter
, event
->hw
.target
->mm
);
1269 static bool trace_uprobe_filter_remove(struct trace_uprobe_filter
*filter
,
1270 struct perf_event
*event
)
1274 write_lock(&filter
->rwlock
);
1275 if (event
->hw
.target
) {
1276 list_del(&event
->hw
.tp_list
);
1277 done
= filter
->nr_systemwide
||
1278 (event
->hw
.target
->flags
& PF_EXITING
) ||
1279 trace_uprobe_filter_event(filter
, event
);
1281 filter
->nr_systemwide
--;
1282 done
= filter
->nr_systemwide
;
1284 write_unlock(&filter
->rwlock
);
1289 /* This returns true if the filter always covers target mm */
1290 static bool trace_uprobe_filter_add(struct trace_uprobe_filter
*filter
,
1291 struct perf_event
*event
)
1295 write_lock(&filter
->rwlock
);
1296 if (event
->hw
.target
) {
1298 * event->parent != NULL means copy_process(), we can avoid
1299 * uprobe_apply(). current->mm must be probed and we can rely
1300 * on dup_mmap() which preserves the already installed bp's.
1302 * attr.enable_on_exec means that exec/mmap will install the
1303 * breakpoints we need.
1305 done
= filter
->nr_systemwide
||
1306 event
->parent
|| event
->attr
.enable_on_exec
||
1307 trace_uprobe_filter_event(filter
, event
);
1308 list_add(&event
->hw
.tp_list
, &filter
->perf_events
);
1310 done
= filter
->nr_systemwide
;
1311 filter
->nr_systemwide
++;
1313 write_unlock(&filter
->rwlock
);
1318 static int uprobe_perf_close(struct trace_event_call
*call
,
1319 struct perf_event
*event
)
1321 struct trace_probe
*tp
;
1322 struct trace_uprobe
*tu
;
1325 tp
= trace_probe_primary_from_call(call
);
1326 if (WARN_ON_ONCE(!tp
))
1329 tu
= container_of(tp
, struct trace_uprobe
, tp
);
1330 if (trace_uprobe_filter_remove(tu
->tp
.event
->filter
, event
))
1333 list_for_each_entry(tu
, trace_probe_probe_list(tp
), tp
.list
) {
1334 ret
= uprobe_apply(tu
->uprobe
, &tu
->consumer
, false);
1342 static int uprobe_perf_open(struct trace_event_call
*call
,
1343 struct perf_event
*event
)
1345 struct trace_probe
*tp
;
1346 struct trace_uprobe
*tu
;
1349 tp
= trace_probe_primary_from_call(call
);
1350 if (WARN_ON_ONCE(!tp
))
1353 tu
= container_of(tp
, struct trace_uprobe
, tp
);
1354 if (trace_uprobe_filter_add(tu
->tp
.event
->filter
, event
))
1357 list_for_each_entry(tu
, trace_probe_probe_list(tp
), tp
.list
) {
1358 err
= uprobe_apply(tu
->uprobe
, &tu
->consumer
, true);
1360 uprobe_perf_close(call
, event
);
1368 static bool uprobe_perf_filter(struct uprobe_consumer
*uc
, struct mm_struct
*mm
)
1370 struct trace_uprobe_filter
*filter
;
1371 struct trace_uprobe
*tu
;
1374 tu
= container_of(uc
, struct trace_uprobe
, consumer
);
1375 filter
= tu
->tp
.event
->filter
;
1378 * speculative short-circuiting check to avoid unnecessarily taking
1379 * filter->rwlock below, if the uprobe has system-wide consumer
1381 if (READ_ONCE(filter
->nr_systemwide
))
1384 read_lock(&filter
->rwlock
);
1385 ret
= __uprobe_perf_filter(filter
, mm
);
1386 read_unlock(&filter
->rwlock
);
1391 static void __uprobe_perf_func(struct trace_uprobe
*tu
,
1392 unsigned long func
, struct pt_regs
*regs
,
1393 struct uprobe_cpu_buffer
**ucbp
)
1395 struct trace_event_call
*call
= trace_probe_event_call(&tu
->tp
);
1396 struct uprobe_trace_entry_head
*entry
;
1397 struct uprobe_cpu_buffer
*ucb
;
1398 struct hlist_head
*head
;
1403 #ifdef CONFIG_BPF_EVENTS
1404 if (bpf_prog_array_valid(call
)) {
1407 ret
= bpf_prog_run_array_uprobe(call
->prog_array
, regs
, bpf_prog_run
);
1411 #endif /* CONFIG_BPF_EVENTS */
1413 esize
= SIZEOF_TRACE_ENTRY(is_ret_probe(tu
));
1415 ucb
= prepare_uprobe_buffer(tu
, regs
, ucbp
);
1416 size
= esize
+ ucb
->dsize
;
1417 size
= ALIGN(size
+ sizeof(u32
), sizeof(u64
)) - sizeof(u32
);
1418 if (WARN_ONCE(size
> PERF_MAX_TRACE_SIZE
, "profile buffer not large enough"))
1422 head
= this_cpu_ptr(call
->perf_events
);
1423 if (hlist_empty(head
))
1426 entry
= perf_trace_buf_alloc(size
, NULL
, &rctx
);
1430 if (is_ret_probe(tu
)) {
1431 entry
->vaddr
[0] = func
;
1432 entry
->vaddr
[1] = instruction_pointer(regs
);
1433 data
= DATAOF_TRACE_ENTRY(entry
, true);
1435 entry
->vaddr
[0] = instruction_pointer(regs
);
1436 data
= DATAOF_TRACE_ENTRY(entry
, false);
1439 memcpy(data
, ucb
->buf
, ucb
->dsize
);
1441 if (size
- esize
> ucb
->dsize
)
1442 memset(data
+ ucb
->dsize
, 0, size
- esize
- ucb
->dsize
);
1444 perf_trace_buf_submit(entry
, size
, rctx
, call
->event
.type
, 1, regs
,
1450 /* uprobe profile handler */
1451 static int uprobe_perf_func(struct trace_uprobe
*tu
, struct pt_regs
*regs
,
1452 struct uprobe_cpu_buffer
**ucbp
)
1454 if (!uprobe_perf_filter(&tu
->consumer
, current
->mm
))
1455 return UPROBE_HANDLER_REMOVE
;
1457 if (!is_ret_probe(tu
))
1458 __uprobe_perf_func(tu
, 0, regs
, ucbp
);
1462 static void uretprobe_perf_func(struct trace_uprobe
*tu
, unsigned long func
,
1463 struct pt_regs
*regs
,
1464 struct uprobe_cpu_buffer
**ucbp
)
1466 __uprobe_perf_func(tu
, func
, regs
, ucbp
);
1469 int bpf_get_uprobe_info(const struct perf_event
*event
, u32
*fd_type
,
1470 const char **filename
, u64
*probe_offset
,
1471 u64
*probe_addr
, bool perf_type_tracepoint
)
1473 const char *pevent
= trace_event_name(event
->tp_event
);
1474 const char *group
= event
->tp_event
->class->system
;
1475 struct trace_uprobe
*tu
;
1477 if (perf_type_tracepoint
)
1478 tu
= find_probe_event(pevent
, group
);
1480 tu
= trace_uprobe_primary_from_call(event
->tp_event
);
1484 *fd_type
= is_ret_probe(tu
) ? BPF_FD_TYPE_URETPROBE
1485 : BPF_FD_TYPE_UPROBE
;
1486 *filename
= tu
->filename
;
1487 *probe_offset
= tu
->offset
;
1491 #endif /* CONFIG_PERF_EVENTS */
1494 trace_uprobe_register(struct trace_event_call
*event
, enum trace_reg type
,
1497 struct trace_event_file
*file
= data
;
1500 case TRACE_REG_REGISTER
:
1501 return probe_event_enable(event
, file
, NULL
);
1503 case TRACE_REG_UNREGISTER
:
1504 probe_event_disable(event
, file
);
1507 #ifdef CONFIG_PERF_EVENTS
1508 case TRACE_REG_PERF_REGISTER
:
1509 return probe_event_enable(event
, NULL
, uprobe_perf_filter
);
1511 case TRACE_REG_PERF_UNREGISTER
:
1512 probe_event_disable(event
, NULL
);
1515 case TRACE_REG_PERF_OPEN
:
1516 return uprobe_perf_open(event
, data
);
1518 case TRACE_REG_PERF_CLOSE
:
1519 return uprobe_perf_close(event
, data
);
1527 static int uprobe_dispatcher(struct uprobe_consumer
*con
, struct pt_regs
*regs
,
1530 struct trace_uprobe
*tu
;
1531 struct uprobe_dispatch_data udd
;
1532 struct uprobe_cpu_buffer
*ucb
= NULL
;
1535 tu
= container_of(con
, struct trace_uprobe
, consumer
);
1537 this_cpu_inc(*tu
->nhits
);
1540 udd
.bp_addr
= instruction_pointer(regs
);
1542 current
->utask
->vaddr
= (unsigned long) &udd
;
1544 if (WARN_ON_ONCE(!uprobe_cpu_buffer
))
1547 if (trace_probe_test_flag(&tu
->tp
, TP_FLAG_TRACE
))
1548 ret
|= uprobe_trace_func(tu
, regs
, &ucb
);
1550 #ifdef CONFIG_PERF_EVENTS
1551 if (trace_probe_test_flag(&tu
->tp
, TP_FLAG_PROFILE
))
1552 ret
|= uprobe_perf_func(tu
, regs
, &ucb
);
1554 uprobe_buffer_put(ucb
);
1558 static int uretprobe_dispatcher(struct uprobe_consumer
*con
,
1559 unsigned long func
, struct pt_regs
*regs
,
1562 struct trace_uprobe
*tu
;
1563 struct uprobe_dispatch_data udd
;
1564 struct uprobe_cpu_buffer
*ucb
= NULL
;
1566 tu
= container_of(con
, struct trace_uprobe
, consumer
);
1571 current
->utask
->vaddr
= (unsigned long) &udd
;
1573 if (WARN_ON_ONCE(!uprobe_cpu_buffer
))
1576 if (trace_probe_test_flag(&tu
->tp
, TP_FLAG_TRACE
))
1577 uretprobe_trace_func(tu
, func
, regs
, &ucb
);
1579 #ifdef CONFIG_PERF_EVENTS
1580 if (trace_probe_test_flag(&tu
->tp
, TP_FLAG_PROFILE
))
1581 uretprobe_perf_func(tu
, func
, regs
, &ucb
);
1583 uprobe_buffer_put(ucb
);
1587 static struct trace_event_functions uprobe_funcs
= {
1588 .trace
= print_uprobe_event
1591 static struct trace_event_fields uprobe_fields_array
[] = {
1592 { .type
= TRACE_FUNCTION_TYPE
,
1593 .define_fields
= uprobe_event_define_fields
},
1597 static inline void init_trace_event_call(struct trace_uprobe
*tu
)
1599 struct trace_event_call
*call
= trace_probe_event_call(&tu
->tp
);
1600 call
->event
.funcs
= &uprobe_funcs
;
1601 call
->class->fields_array
= uprobe_fields_array
;
1603 call
->flags
= TRACE_EVENT_FL_UPROBE
| TRACE_EVENT_FL_CAP_ANY
;
1604 call
->class->reg
= trace_uprobe_register
;
1607 static int register_uprobe_event(struct trace_uprobe
*tu
)
1609 init_trace_event_call(tu
);
1611 return trace_probe_register_event_call(&tu
->tp
);
1614 static int unregister_uprobe_event(struct trace_uprobe
*tu
)
1616 return trace_probe_unregister_event_call(&tu
->tp
);
1619 #ifdef CONFIG_PERF_EVENTS
1620 struct trace_event_call
*
1621 create_local_trace_uprobe(char *name
, unsigned long offs
,
1622 unsigned long ref_ctr_offset
, bool is_return
)
1624 enum probe_print_type ptype
;
1625 struct trace_uprobe
*tu
;
1629 ret
= kern_path(name
, LOOKUP_FOLLOW
, &path
);
1631 return ERR_PTR(ret
);
1633 if (!d_is_reg(path
.dentry
)) {
1635 return ERR_PTR(-EINVAL
);
1639 * local trace_kprobes are not added to dyn_event, so they are never
1640 * searched in find_trace_kprobe(). Therefore, there is no concern of
1641 * duplicated name "DUMMY_EVENT" here.
1643 tu
= alloc_trace_uprobe(UPROBE_EVENT_SYSTEM
, "DUMMY_EVENT", 0,
1647 pr_info("Failed to allocate trace_uprobe.(%d)\n",
1650 return ERR_CAST(tu
);
1655 tu
->ref_ctr_offset
= ref_ctr_offset
;
1656 tu
->filename
= kstrdup(name
, GFP_KERNEL
);
1657 if (!tu
->filename
) {
1662 init_trace_event_call(tu
);
1664 ptype
= is_ret_probe(tu
) ? PROBE_PRINT_RETURN
: PROBE_PRINT_NORMAL
;
1665 if (traceprobe_set_print_fmt(&tu
->tp
, ptype
) < 0) {
1670 return trace_probe_event_call(&tu
->tp
);
1672 free_trace_uprobe(tu
);
1673 return ERR_PTR(ret
);
1676 void destroy_local_trace_uprobe(struct trace_event_call
*event_call
)
1678 struct trace_uprobe
*tu
;
1680 tu
= trace_uprobe_primary_from_call(event_call
);
1682 free_trace_uprobe(tu
);
1684 #endif /* CONFIG_PERF_EVENTS */
1686 /* Make a trace interface for controlling probe points */
1687 static __init
int init_uprobe_trace(void)
1691 ret
= dyn_event_register(&trace_uprobe_ops
);
1695 ret
= tracing_init_dentry();
1699 trace_create_file("uprobe_events", TRACE_MODE_WRITE
, NULL
,
1700 NULL
, &uprobe_events_ops
);
1701 /* Profile interface */
1702 trace_create_file("uprobe_profile", TRACE_MODE_READ
, NULL
,
1703 NULL
, &uprobe_profile_ops
);
1707 fs_initcall(init_uprobe_trace
);