1 #include <trace/syscall.h>
2 #include <trace/events/syscalls.h>
3 #include <linux/slab.h>
4 #include <linux/kernel.h>
5 #include <linux/module.h> /* for MODULE_NAME_LEN via KSYM_SYMBOL_LEN */
6 #include <linux/ftrace.h>
7 #include <linux/perf_event.h>
8 #include <asm/syscall.h>
10 #include "trace_output.h"
13 static DEFINE_MUTEX(syscall_trace_lock
);
14 static int sys_refcount_enter
;
15 static int sys_refcount_exit
;
16 static DECLARE_BITMAP(enabled_enter_syscalls
, NR_syscalls
);
17 static DECLARE_BITMAP(enabled_exit_syscalls
, NR_syscalls
);
19 static int syscall_enter_register(struct ftrace_event_call
*event
,
20 enum trace_reg type
, void *data
);
21 static int syscall_exit_register(struct ftrace_event_call
*event
,
22 enum trace_reg type
, void *data
);
24 static struct list_head
*
25 syscall_get_enter_fields(struct ftrace_event_call
*call
)
27 struct syscall_metadata
*entry
= call
->data
;
29 return &entry
->enter_fields
;
32 extern struct syscall_metadata
*__start_syscalls_metadata
[];
33 extern struct syscall_metadata
*__stop_syscalls_metadata
[];
35 static struct syscall_metadata
**syscalls_metadata
;
37 #ifndef ARCH_HAS_SYSCALL_MATCH_SYM_NAME
38 static inline bool arch_syscall_match_sym_name(const char *sym
, const char *name
)
41 * Only compare after the "sys" prefix. Archs that use
42 * syscall wrappers may have syscalls symbols aliases prefixed
43 * with "SyS" instead of "sys", leading to an unwanted
46 return !strcmp(sym
+ 3, name
+ 3);
50 static __init
struct syscall_metadata
*
51 find_syscall_meta(unsigned long syscall
)
53 struct syscall_metadata
**start
;
54 struct syscall_metadata
**stop
;
55 char str
[KSYM_SYMBOL_LEN
];
58 start
= __start_syscalls_metadata
;
59 stop
= __stop_syscalls_metadata
;
60 kallsyms_lookup(syscall
, NULL
, NULL
, NULL
, str
);
62 if (arch_syscall_match_sym_name(str
, "sys_ni_syscall"))
65 for ( ; start
< stop
; start
++) {
66 if ((*start
)->name
&& arch_syscall_match_sym_name(str
, (*start
)->name
))
72 static struct syscall_metadata
*syscall_nr_to_meta(int nr
)
74 if (!syscalls_metadata
|| nr
>= NR_syscalls
|| nr
< 0)
77 return syscalls_metadata
[nr
];
81 print_syscall_enter(struct trace_iterator
*iter
, int flags
,
82 struct trace_event
*event
)
84 struct trace_seq
*s
= &iter
->seq
;
85 struct trace_entry
*ent
= iter
->ent
;
86 struct syscall_trace_enter
*trace
;
87 struct syscall_metadata
*entry
;
90 trace
= (typeof(trace
))ent
;
92 entry
= syscall_nr_to_meta(syscall
);
97 if (entry
->enter_event
->event
.type
!= ent
->type
) {
102 ret
= trace_seq_printf(s
, "%s(", entry
->name
);
104 return TRACE_TYPE_PARTIAL_LINE
;
106 for (i
= 0; i
< entry
->nb_args
; i
++) {
107 /* parameter types */
108 if (trace_flags
& TRACE_ITER_VERBOSE
) {
109 ret
= trace_seq_printf(s
, "%s ", entry
->types
[i
]);
111 return TRACE_TYPE_PARTIAL_LINE
;
113 /* parameter values */
114 ret
= trace_seq_printf(s
, "%s: %lx%s", entry
->args
[i
],
116 i
== entry
->nb_args
- 1 ? "" : ", ");
118 return TRACE_TYPE_PARTIAL_LINE
;
121 ret
= trace_seq_putc(s
, ')');
123 return TRACE_TYPE_PARTIAL_LINE
;
126 ret
= trace_seq_putc(s
, '\n');
128 return TRACE_TYPE_PARTIAL_LINE
;
130 return TRACE_TYPE_HANDLED
;
134 print_syscall_exit(struct trace_iterator
*iter
, int flags
,
135 struct trace_event
*event
)
137 struct trace_seq
*s
= &iter
->seq
;
138 struct trace_entry
*ent
= iter
->ent
;
139 struct syscall_trace_exit
*trace
;
141 struct syscall_metadata
*entry
;
144 trace
= (typeof(trace
))ent
;
146 entry
= syscall_nr_to_meta(syscall
);
149 trace_seq_printf(s
, "\n");
150 return TRACE_TYPE_HANDLED
;
153 if (entry
->exit_event
->event
.type
!= ent
->type
) {
155 return TRACE_TYPE_UNHANDLED
;
158 ret
= trace_seq_printf(s
, "%s -> 0x%lx\n", entry
->name
,
161 return TRACE_TYPE_PARTIAL_LINE
;
163 return TRACE_TYPE_HANDLED
;
166 extern char *__bad_type_size(void);
168 #define SYSCALL_FIELD(type, name) \
169 sizeof(type) != sizeof(trace.name) ? \
170 __bad_type_size() : \
171 #type, #name, offsetof(typeof(trace), name), \
172 sizeof(trace.name), is_signed_type(type)
175 int __set_enter_print_fmt(struct syscall_metadata
*entry
, char *buf
, int len
)
180 /* When len=0, we just calculate the needed length */
181 #define LEN_OR_ZERO (len ? len - pos : 0)
183 pos
+= snprintf(buf
+ pos
, LEN_OR_ZERO
, "\"");
184 for (i
= 0; i
< entry
->nb_args
; i
++) {
185 pos
+= snprintf(buf
+ pos
, LEN_OR_ZERO
, "%s: 0x%%0%zulx%s",
186 entry
->args
[i
], sizeof(unsigned long),
187 i
== entry
->nb_args
- 1 ? "" : ", ");
189 pos
+= snprintf(buf
+ pos
, LEN_OR_ZERO
, "\"");
191 for (i
= 0; i
< entry
->nb_args
; i
++) {
192 pos
+= snprintf(buf
+ pos
, LEN_OR_ZERO
,
193 ", ((unsigned long)(REC->%s))", entry
->args
[i
]);
198 /* return the length of print_fmt */
202 static int set_syscall_print_fmt(struct ftrace_event_call
*call
)
206 struct syscall_metadata
*entry
= call
->data
;
208 if (entry
->enter_event
!= call
) {
209 call
->print_fmt
= "\"0x%lx\", REC->ret";
213 /* First: called with 0 length to calculate the needed length */
214 len
= __set_enter_print_fmt(entry
, NULL
, 0);
216 print_fmt
= kmalloc(len
+ 1, GFP_KERNEL
);
220 /* Second: actually write the @print_fmt */
221 __set_enter_print_fmt(entry
, print_fmt
, len
+ 1);
222 call
->print_fmt
= print_fmt
;
227 static void free_syscall_print_fmt(struct ftrace_event_call
*call
)
229 struct syscall_metadata
*entry
= call
->data
;
231 if (entry
->enter_event
== call
)
232 kfree(call
->print_fmt
);
235 static int syscall_enter_define_fields(struct ftrace_event_call
*call
)
237 struct syscall_trace_enter trace
;
238 struct syscall_metadata
*meta
= call
->data
;
241 int offset
= offsetof(typeof(trace
), args
);
243 ret
= trace_define_field(call
, SYSCALL_FIELD(int, nr
), FILTER_OTHER
);
247 for (i
= 0; i
< meta
->nb_args
; i
++) {
248 ret
= trace_define_field(call
, meta
->types
[i
],
249 meta
->args
[i
], offset
,
250 sizeof(unsigned long), 0,
252 offset
+= sizeof(unsigned long);
258 static int syscall_exit_define_fields(struct ftrace_event_call
*call
)
260 struct syscall_trace_exit trace
;
263 ret
= trace_define_field(call
, SYSCALL_FIELD(int, nr
), FILTER_OTHER
);
267 ret
= trace_define_field(call
, SYSCALL_FIELD(long, ret
),
273 void ftrace_syscall_enter(void *ignore
, struct pt_regs
*regs
, long id
)
275 struct syscall_trace_enter
*entry
;
276 struct syscall_metadata
*sys_data
;
277 struct ring_buffer_event
*event
;
278 struct ring_buffer
*buffer
;
282 syscall_nr
= syscall_get_nr(current
, regs
);
285 if (!test_bit(syscall_nr
, enabled_enter_syscalls
))
288 sys_data
= syscall_nr_to_meta(syscall_nr
);
292 size
= sizeof(*entry
) + sizeof(unsigned long) * sys_data
->nb_args
;
294 event
= trace_current_buffer_lock_reserve(&buffer
,
295 sys_data
->enter_event
->event
.type
, size
, 0, 0);
299 entry
= ring_buffer_event_data(event
);
300 entry
->nr
= syscall_nr
;
301 syscall_get_arguments(current
, regs
, 0, sys_data
->nb_args
, entry
->args
);
303 if (!filter_current_check_discard(buffer
, sys_data
->enter_event
,
305 trace_current_buffer_unlock_commit(buffer
, event
, 0, 0);
308 void ftrace_syscall_exit(void *ignore
, struct pt_regs
*regs
, long ret
)
310 struct syscall_trace_exit
*entry
;
311 struct syscall_metadata
*sys_data
;
312 struct ring_buffer_event
*event
;
313 struct ring_buffer
*buffer
;
316 syscall_nr
= syscall_get_nr(current
, regs
);
319 if (!test_bit(syscall_nr
, enabled_exit_syscalls
))
322 sys_data
= syscall_nr_to_meta(syscall_nr
);
326 event
= trace_current_buffer_lock_reserve(&buffer
,
327 sys_data
->exit_event
->event
.type
, sizeof(*entry
), 0, 0);
331 entry
= ring_buffer_event_data(event
);
332 entry
->nr
= syscall_nr
;
333 entry
->ret
= syscall_get_return_value(current
, regs
);
335 if (!filter_current_check_discard(buffer
, sys_data
->exit_event
,
337 trace_current_buffer_unlock_commit(buffer
, event
, 0, 0);
340 int reg_event_syscall_enter(struct ftrace_event_call
*call
)
345 num
= ((struct syscall_metadata
*)call
->data
)->syscall_nr
;
346 if (WARN_ON_ONCE(num
< 0 || num
>= NR_syscalls
))
348 mutex_lock(&syscall_trace_lock
);
349 if (!sys_refcount_enter
)
350 ret
= register_trace_sys_enter(ftrace_syscall_enter
, NULL
);
352 set_bit(num
, enabled_enter_syscalls
);
353 sys_refcount_enter
++;
355 mutex_unlock(&syscall_trace_lock
);
359 void unreg_event_syscall_enter(struct ftrace_event_call
*call
)
363 num
= ((struct syscall_metadata
*)call
->data
)->syscall_nr
;
364 if (WARN_ON_ONCE(num
< 0 || num
>= NR_syscalls
))
366 mutex_lock(&syscall_trace_lock
);
367 sys_refcount_enter
--;
368 clear_bit(num
, enabled_enter_syscalls
);
369 if (!sys_refcount_enter
)
370 unregister_trace_sys_enter(ftrace_syscall_enter
, NULL
);
371 mutex_unlock(&syscall_trace_lock
);
374 int reg_event_syscall_exit(struct ftrace_event_call
*call
)
379 num
= ((struct syscall_metadata
*)call
->data
)->syscall_nr
;
380 if (WARN_ON_ONCE(num
< 0 || num
>= NR_syscalls
))
382 mutex_lock(&syscall_trace_lock
);
383 if (!sys_refcount_exit
)
384 ret
= register_trace_sys_exit(ftrace_syscall_exit
, NULL
);
386 set_bit(num
, enabled_exit_syscalls
);
389 mutex_unlock(&syscall_trace_lock
);
393 void unreg_event_syscall_exit(struct ftrace_event_call
*call
)
397 num
= ((struct syscall_metadata
*)call
->data
)->syscall_nr
;
398 if (WARN_ON_ONCE(num
< 0 || num
>= NR_syscalls
))
400 mutex_lock(&syscall_trace_lock
);
402 clear_bit(num
, enabled_exit_syscalls
);
403 if (!sys_refcount_exit
)
404 unregister_trace_sys_exit(ftrace_syscall_exit
, NULL
);
405 mutex_unlock(&syscall_trace_lock
);
408 static int init_syscall_trace(struct ftrace_event_call
*call
)
413 num
= ((struct syscall_metadata
*)call
->data
)->syscall_nr
;
414 if (num
< 0 || num
>= NR_syscalls
) {
415 pr_debug("syscall %s metadata not mapped, disabling ftrace event\n",
416 ((struct syscall_metadata
*)call
->data
)->name
);
420 if (set_syscall_print_fmt(call
) < 0)
423 id
= trace_event_raw_init(call
);
426 free_syscall_print_fmt(call
);
433 struct trace_event_functions enter_syscall_print_funcs
= {
434 .trace
= print_syscall_enter
,
437 struct trace_event_functions exit_syscall_print_funcs
= {
438 .trace
= print_syscall_exit
,
441 struct ftrace_event_class event_class_syscall_enter
= {
442 .system
= "syscalls",
443 .reg
= syscall_enter_register
,
444 .define_fields
= syscall_enter_define_fields
,
445 .get_fields
= syscall_get_enter_fields
,
446 .raw_init
= init_syscall_trace
,
449 struct ftrace_event_class event_class_syscall_exit
= {
450 .system
= "syscalls",
451 .reg
= syscall_exit_register
,
452 .define_fields
= syscall_exit_define_fields
,
453 .fields
= LIST_HEAD_INIT(event_class_syscall_exit
.fields
),
454 .raw_init
= init_syscall_trace
,
457 unsigned long __init __weak
arch_syscall_addr(int nr
)
459 return (unsigned long)sys_call_table
[nr
];
462 int __init
init_ftrace_syscalls(void)
464 struct syscall_metadata
*meta
;
468 syscalls_metadata
= kcalloc(NR_syscalls
, sizeof(*syscalls_metadata
),
470 if (!syscalls_metadata
) {
475 for (i
= 0; i
< NR_syscalls
; i
++) {
476 addr
= arch_syscall_addr(i
);
477 meta
= find_syscall_meta(addr
);
481 meta
->syscall_nr
= i
;
482 syscalls_metadata
[i
] = meta
;
487 early_initcall(init_ftrace_syscalls
);
489 #ifdef CONFIG_PERF_EVENTS
491 static DECLARE_BITMAP(enabled_perf_enter_syscalls
, NR_syscalls
);
492 static DECLARE_BITMAP(enabled_perf_exit_syscalls
, NR_syscalls
);
493 static int sys_perf_refcount_enter
;
494 static int sys_perf_refcount_exit
;
496 static void perf_syscall_enter(void *ignore
, struct pt_regs
*regs
, long id
)
498 struct syscall_metadata
*sys_data
;
499 struct syscall_trace_enter
*rec
;
500 struct hlist_head
*head
;
505 syscall_nr
= syscall_get_nr(current
, regs
);
508 if (!test_bit(syscall_nr
, enabled_perf_enter_syscalls
))
511 sys_data
= syscall_nr_to_meta(syscall_nr
);
515 /* get the size after alignment with the u32 buffer size field */
516 size
= sizeof(unsigned long) * sys_data
->nb_args
+ sizeof(*rec
);
517 size
= ALIGN(size
+ sizeof(u32
), sizeof(u64
));
520 if (WARN_ONCE(size
> PERF_MAX_TRACE_SIZE
,
521 "perf buffer not large enough"))
524 rec
= (struct syscall_trace_enter
*)perf_trace_buf_prepare(size
,
525 sys_data
->enter_event
->event
.type
, regs
, &rctx
);
529 rec
->nr
= syscall_nr
;
530 syscall_get_arguments(current
, regs
, 0, sys_data
->nb_args
,
531 (unsigned long *)&rec
->args
);
533 head
= this_cpu_ptr(sys_data
->enter_event
->perf_events
);
534 perf_trace_buf_submit(rec
, size
, rctx
, 0, 1, regs
, head
, NULL
);
537 static int perf_sysenter_enable(struct ftrace_event_call
*call
)
542 num
= ((struct syscall_metadata
*)call
->data
)->syscall_nr
;
544 mutex_lock(&syscall_trace_lock
);
545 if (!sys_perf_refcount_enter
)
546 ret
= register_trace_sys_enter(perf_syscall_enter
, NULL
);
548 pr_info("event trace: Could not activate"
549 "syscall entry trace point");
551 set_bit(num
, enabled_perf_enter_syscalls
);
552 sys_perf_refcount_enter
++;
554 mutex_unlock(&syscall_trace_lock
);
558 static void perf_sysenter_disable(struct ftrace_event_call
*call
)
562 num
= ((struct syscall_metadata
*)call
->data
)->syscall_nr
;
564 mutex_lock(&syscall_trace_lock
);
565 sys_perf_refcount_enter
--;
566 clear_bit(num
, enabled_perf_enter_syscalls
);
567 if (!sys_perf_refcount_enter
)
568 unregister_trace_sys_enter(perf_syscall_enter
, NULL
);
569 mutex_unlock(&syscall_trace_lock
);
572 static void perf_syscall_exit(void *ignore
, struct pt_regs
*regs
, long ret
)
574 struct syscall_metadata
*sys_data
;
575 struct syscall_trace_exit
*rec
;
576 struct hlist_head
*head
;
581 syscall_nr
= syscall_get_nr(current
, regs
);
584 if (!test_bit(syscall_nr
, enabled_perf_exit_syscalls
))
587 sys_data
= syscall_nr_to_meta(syscall_nr
);
591 /* We can probably do that at build time */
592 size
= ALIGN(sizeof(*rec
) + sizeof(u32
), sizeof(u64
));
596 * Impossible, but be paranoid with the future
597 * How to put this check outside runtime?
599 if (WARN_ONCE(size
> PERF_MAX_TRACE_SIZE
,
600 "exit event has grown above perf buffer size"))
603 rec
= (struct syscall_trace_exit
*)perf_trace_buf_prepare(size
,
604 sys_data
->exit_event
->event
.type
, regs
, &rctx
);
608 rec
->nr
= syscall_nr
;
609 rec
->ret
= syscall_get_return_value(current
, regs
);
611 head
= this_cpu_ptr(sys_data
->exit_event
->perf_events
);
612 perf_trace_buf_submit(rec
, size
, rctx
, 0, 1, regs
, head
, NULL
);
615 static int perf_sysexit_enable(struct ftrace_event_call
*call
)
620 num
= ((struct syscall_metadata
*)call
->data
)->syscall_nr
;
622 mutex_lock(&syscall_trace_lock
);
623 if (!sys_perf_refcount_exit
)
624 ret
= register_trace_sys_exit(perf_syscall_exit
, NULL
);
626 pr_info("event trace: Could not activate"
627 "syscall exit trace point");
629 set_bit(num
, enabled_perf_exit_syscalls
);
630 sys_perf_refcount_exit
++;
632 mutex_unlock(&syscall_trace_lock
);
636 static void perf_sysexit_disable(struct ftrace_event_call
*call
)
640 num
= ((struct syscall_metadata
*)call
->data
)->syscall_nr
;
642 mutex_lock(&syscall_trace_lock
);
643 sys_perf_refcount_exit
--;
644 clear_bit(num
, enabled_perf_exit_syscalls
);
645 if (!sys_perf_refcount_exit
)
646 unregister_trace_sys_exit(perf_syscall_exit
, NULL
);
647 mutex_unlock(&syscall_trace_lock
);
650 #endif /* CONFIG_PERF_EVENTS */
652 static int syscall_enter_register(struct ftrace_event_call
*event
,
653 enum trace_reg type
, void *data
)
656 case TRACE_REG_REGISTER
:
657 return reg_event_syscall_enter(event
);
658 case TRACE_REG_UNREGISTER
:
659 unreg_event_syscall_enter(event
);
662 #ifdef CONFIG_PERF_EVENTS
663 case TRACE_REG_PERF_REGISTER
:
664 return perf_sysenter_enable(event
);
665 case TRACE_REG_PERF_UNREGISTER
:
666 perf_sysenter_disable(event
);
668 case TRACE_REG_PERF_OPEN
:
669 case TRACE_REG_PERF_CLOSE
:
670 case TRACE_REG_PERF_ADD
:
671 case TRACE_REG_PERF_DEL
:
678 static int syscall_exit_register(struct ftrace_event_call
*event
,
679 enum trace_reg type
, void *data
)
682 case TRACE_REG_REGISTER
:
683 return reg_event_syscall_exit(event
);
684 case TRACE_REG_UNREGISTER
:
685 unreg_event_syscall_exit(event
);
688 #ifdef CONFIG_PERF_EVENTS
689 case TRACE_REG_PERF_REGISTER
:
690 return perf_sysexit_enable(event
);
691 case TRACE_REG_PERF_UNREGISTER
:
692 perf_sysexit_disable(event
);
694 case TRACE_REG_PERF_OPEN
:
695 case TRACE_REG_PERF_CLOSE
:
696 case TRACE_REG_PERF_ADD
:
697 case TRACE_REG_PERF_DEL
: