1 #include <traceevent/event-parse.h>
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
17 #include <linux/futex.h>
19 static size_t syscall_arg__scnprintf_hex(char *bf
, size_t size
,
21 u8 arg_idx __maybe_unused
,
22 u8
*arg_mask __maybe_unused
)
24 return scnprintf(bf
, size
, "%#lx", arg
);
27 #define SCA_HEX syscall_arg__scnprintf_hex
29 static size_t syscall_arg__scnprintf_whence(char *bf
, size_t size
,
31 u8 arg_idx __maybe_unused
,
32 u8
*arg_mask __maybe_unused
)
37 #define P_WHENCE(n) case SEEK_##n: return scnprintf(bf, size, #n)
51 return scnprintf(bf
, size
, "%#x", whence
);
54 #define SCA_WHENCE syscall_arg__scnprintf_whence
56 static size_t syscall_arg__scnprintf_mmap_prot(char *bf
, size_t size
,
58 u8 arg_idx __maybe_unused
,
59 u8
*arg_mask __maybe_unused
)
61 int printed
= 0, prot
= arg
;
63 if (prot
== PROT_NONE
)
64 return scnprintf(bf
, size
, "NONE");
65 #define P_MMAP_PROT(n) \
66 if (prot & PROT_##n) { \
67 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
77 P_MMAP_PROT(GROWSDOWN
);
82 printed
+= scnprintf(bf
+ printed
, size
- printed
, "%s%#x", printed
? "|" : "", prot
);
87 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
89 static size_t syscall_arg__scnprintf_mmap_flags(char *bf
, size_t size
,
90 unsigned long arg
, u8 arg_idx __maybe_unused
,
91 u8
*arg_mask __maybe_unused
)
93 int printed
= 0, flags
= arg
;
95 #define P_MMAP_FLAG(n) \
96 if (flags & MAP_##n) { \
97 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
102 P_MMAP_FLAG(PRIVATE
);
106 P_MMAP_FLAG(ANONYMOUS
);
107 P_MMAP_FLAG(DENYWRITE
);
108 P_MMAP_FLAG(EXECUTABLE
);
111 P_MMAP_FLAG(GROWSDOWN
);
113 P_MMAP_FLAG(HUGETLB
);
116 P_MMAP_FLAG(NONBLOCK
);
117 P_MMAP_FLAG(NORESERVE
);
118 P_MMAP_FLAG(POPULATE
);
120 #ifdef MAP_UNINITIALIZED
121 P_MMAP_FLAG(UNINITIALIZED
);
126 printed
+= scnprintf(bf
+ printed
, size
- printed
, "%s%#x", printed
? "|" : "", flags
);
131 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
133 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf
, size_t size
,
134 unsigned long arg
, u8 arg_idx __maybe_unused
,
135 u8
*arg_mask __maybe_unused
)
140 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
143 P_MADV_BHV(SEQUENTIAL
);
144 P_MADV_BHV(WILLNEED
);
145 P_MADV_BHV(DONTNEED
);
147 P_MADV_BHV(DONTFORK
);
149 P_MADV_BHV(HWPOISON
);
150 #ifdef MADV_SOFT_OFFLINE
151 P_MADV_BHV(SOFT_OFFLINE
);
153 P_MADV_BHV(MERGEABLE
);
154 P_MADV_BHV(UNMERGEABLE
);
156 P_MADV_BHV(HUGEPAGE
);
158 #ifdef MADV_NOHUGEPAGE
159 P_MADV_BHV(NOHUGEPAGE
);
162 P_MADV_BHV(DONTDUMP
);
171 return scnprintf(bf
, size
, "%#x", behavior
);
174 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
176 static size_t syscall_arg__scnprintf_futex_op(char *bf
, size_t size
, unsigned long arg
,
177 u8 arg_idx __maybe_unused
, u8
*arg_mask
)
179 enum syscall_futex_args
{
180 SCF_UADDR
= (1 << 0),
183 SCF_TIMEOUT
= (1 << 3),
184 SCF_UADDR2
= (1 << 4),
188 int cmd
= op
& FUTEX_CMD_MASK
;
192 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
193 P_FUTEX_OP(WAIT
); *arg_mask
|= SCF_VAL3
|SCF_UADDR2
; break;
194 P_FUTEX_OP(WAKE
); *arg_mask
|= SCF_VAL3
|SCF_UADDR2
|SCF_TIMEOUT
; break;
195 P_FUTEX_OP(FD
); *arg_mask
|= SCF_VAL3
|SCF_UADDR2
|SCF_TIMEOUT
; break;
196 P_FUTEX_OP(REQUEUE
); *arg_mask
|= SCF_VAL3
|SCF_TIMEOUT
; break;
197 P_FUTEX_OP(CMP_REQUEUE
); *arg_mask
|= SCF_TIMEOUT
; break;
198 P_FUTEX_OP(CMP_REQUEUE_PI
); *arg_mask
|= SCF_TIMEOUT
; break;
199 P_FUTEX_OP(WAKE_OP
); break;
200 P_FUTEX_OP(LOCK_PI
); *arg_mask
|= SCF_VAL3
|SCF_UADDR2
|SCF_TIMEOUT
; break;
201 P_FUTEX_OP(UNLOCK_PI
); *arg_mask
|= SCF_VAL3
|SCF_UADDR2
|SCF_TIMEOUT
; break;
202 P_FUTEX_OP(TRYLOCK_PI
); *arg_mask
|= SCF_VAL3
|SCF_UADDR2
; break;
203 P_FUTEX_OP(WAIT_BITSET
); *arg_mask
|= SCF_UADDR2
; break;
204 P_FUTEX_OP(WAKE_BITSET
); *arg_mask
|= SCF_UADDR2
; break;
205 P_FUTEX_OP(WAIT_REQUEUE_PI
); break;
206 default: printed
= scnprintf(bf
, size
, "%#x", cmd
); break;
209 if (op
& FUTEX_PRIVATE_FLAG
)
210 printed
+= scnprintf(bf
+ printed
, size
- printed
, "|PRIV");
212 if (op
& FUTEX_CLOCK_REALTIME
)
213 printed
+= scnprintf(bf
+ printed
, size
- printed
, "|CLKRT");
218 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
220 static size_t syscall_arg__scnprintf_open_flags(char *bf
, size_t size
,
222 u8 arg_idx
, u8
*arg_mask
)
224 int printed
= 0, flags
= arg
;
226 if (!(flags
& O_CREAT
))
227 *arg_mask
|= 1 << (arg_idx
+ 1); /* Mask the mode parm */
230 return scnprintf(bf
, size
, "RDONLY");
232 if (flags & O_##n) { \
233 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
257 if ((flags
& O_SYNC
) == O_SYNC
)
258 printed
+= scnprintf(bf
+ printed
, size
- printed
, "%s%s", printed
? "|" : "", "SYNC");
270 printed
+= scnprintf(bf
+ printed
, size
- printed
, "%s%#x", printed
? "|" : "", flags
);
275 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
277 static struct syscall_fmt
{
280 size_t (*arg_scnprintf
[6])(char *bf
, size_t size
, unsigned long arg
, u8 arg_idx
, u8
*arg_mask
);
285 { .name
= "access", .errmsg
= true, },
286 { .name
= "arch_prctl", .errmsg
= true, .alias
= "prctl", },
287 { .name
= "brk", .hexret
= true,
288 .arg_scnprintf
= { [0] = SCA_HEX
, /* brk */ }, },
289 { .name
= "mmap", .hexret
= true, },
290 { .name
= "connect", .errmsg
= true, },
291 { .name
= "fstat", .errmsg
= true, .alias
= "newfstat", },
292 { .name
= "fstatat", .errmsg
= true, .alias
= "newfstatat", },
293 { .name
= "futex", .errmsg
= true,
294 .arg_scnprintf
= { [1] = SCA_FUTEX_OP
, /* op */ }, },
295 { .name
= "ioctl", .errmsg
= true,
296 .arg_scnprintf
= { [2] = SCA_HEX
, /* arg */ }, },
297 { .name
= "lseek", .errmsg
= true,
298 .arg_scnprintf
= { [2] = SCA_WHENCE
, /* whence */ }, },
299 { .name
= "lstat", .errmsg
= true, .alias
= "newlstat", },
300 { .name
= "madvise", .errmsg
= true,
301 .arg_scnprintf
= { [0] = SCA_HEX
, /* start */
302 [2] = SCA_MADV_BHV
, /* behavior */ }, },
303 { .name
= "mmap", .hexret
= true,
304 .arg_scnprintf
= { [0] = SCA_HEX
, /* addr */
305 [2] = SCA_MMAP_PROT
, /* prot */
306 [3] = SCA_MMAP_FLAGS
, /* flags */ }, },
307 { .name
= "mprotect", .errmsg
= true,
308 .arg_scnprintf
= { [0] = SCA_HEX
, /* start */
309 [2] = SCA_MMAP_PROT
, /* prot */ }, },
310 { .name
= "mremap", .hexret
= true,
311 .arg_scnprintf
= { [0] = SCA_HEX
, /* addr */
312 [4] = SCA_HEX
, /* new_addr */ }, },
313 { .name
= "munmap", .errmsg
= true,
314 .arg_scnprintf
= { [0] = SCA_HEX
, /* addr */ }, },
315 { .name
= "open", .errmsg
= true,
316 .arg_scnprintf
= { [1] = SCA_OPEN_FLAGS
, /* flags */ }, },
317 { .name
= "open_by_handle_at", .errmsg
= true,
318 .arg_scnprintf
= { [2] = SCA_OPEN_FLAGS
, /* flags */ }, },
319 { .name
= "openat", .errmsg
= true,
320 .arg_scnprintf
= { [2] = SCA_OPEN_FLAGS
, /* flags */ }, },
321 { .name
= "poll", .errmsg
= true, .timeout
= true, },
322 { .name
= "ppoll", .errmsg
= true, .timeout
= true, },
323 { .name
= "pread", .errmsg
= true, .alias
= "pread64", },
324 { .name
= "pwrite", .errmsg
= true, .alias
= "pwrite64", },
325 { .name
= "read", .errmsg
= true, },
326 { .name
= "recvfrom", .errmsg
= true, },
327 { .name
= "select", .errmsg
= true, .timeout
= true, },
328 { .name
= "socket", .errmsg
= true, },
329 { .name
= "stat", .errmsg
= true, .alias
= "newstat", },
330 { .name
= "uname", .errmsg
= true, .alias
= "newuname", },
333 static int syscall_fmt__cmp(const void *name
, const void *fmtp
)
335 const struct syscall_fmt
*fmt
= fmtp
;
336 return strcmp(name
, fmt
->name
);
339 static struct syscall_fmt
*syscall_fmt__find(const char *name
)
341 const int nmemb
= ARRAY_SIZE(syscall_fmts
);
342 return bsearch(name
, syscall_fmts
, nmemb
, sizeof(struct syscall_fmt
), syscall_fmt__cmp
);
346 struct event_format
*tp_format
;
349 struct syscall_fmt
*fmt
;
350 size_t (**arg_scnprintf
)(char *bf
, size_t size
,
351 unsigned long arg
, u8 arg_idx
, u8
*args_mask
);
354 static size_t fprintf_duration(unsigned long t
, FILE *fp
)
356 double duration
= (double)t
/ NSEC_PER_MSEC
;
357 size_t printed
= fprintf(fp
, "(");
360 printed
+= color_fprintf(fp
, PERF_COLOR_RED
, "%6.3f ms", duration
);
361 else if (duration
>= 0.01)
362 printed
+= color_fprintf(fp
, PERF_COLOR_YELLOW
, "%6.3f ms", duration
);
364 printed
+= color_fprintf(fp
, PERF_COLOR_NORMAL
, "%6.3f ms", duration
);
365 return printed
+ fprintf(fp
, "): ");
368 struct thread_trace
{
372 unsigned long nr_events
;
377 static struct thread_trace
*thread_trace__new(void)
379 return zalloc(sizeof(struct thread_trace
));
382 static struct thread_trace
*thread__trace(struct thread
*thread
, FILE *fp
)
384 struct thread_trace
*ttrace
;
389 if (thread
->priv
== NULL
)
390 thread
->priv
= thread_trace__new();
392 if (thread
->priv
== NULL
)
395 ttrace
= thread
->priv
;
400 color_fprintf(fp
, PERF_COLOR_RED
,
401 "WARNING: not enough memory, dropping samples!\n");
406 struct perf_tool tool
;
410 struct syscall
*table
;
412 struct perf_record_opts opts
;
416 unsigned long nr_events
;
417 struct strlist
*ev_qualifier
;
418 bool not_ev_qualifier
;
419 struct intlist
*tid_list
;
420 struct intlist
*pid_list
;
422 bool multiple_threads
;
423 double duration_filter
;
427 static bool trace__filter_duration(struct trace
*trace
, double t
)
429 return t
< (trace
->duration_filter
* NSEC_PER_MSEC
);
432 static size_t trace__fprintf_tstamp(struct trace
*trace
, u64 tstamp
, FILE *fp
)
434 double ts
= (double)(tstamp
- trace
->base_time
) / NSEC_PER_MSEC
;
436 return fprintf(fp
, "%10.3f ", ts
);
439 static bool done
= false;
441 static void sig_handler(int sig __maybe_unused
)
446 static size_t trace__fprintf_entry_head(struct trace
*trace
, struct thread
*thread
,
447 u64 duration
, u64 tstamp
, FILE *fp
)
449 size_t printed
= trace__fprintf_tstamp(trace
, tstamp
, fp
);
450 printed
+= fprintf_duration(duration
, fp
);
452 if (trace
->multiple_threads
)
453 printed
+= fprintf(fp
, "%d ", thread
->tid
);
458 static int trace__process_event(struct trace
*trace
, struct machine
*machine
,
459 union perf_event
*event
)
463 switch (event
->header
.type
) {
464 case PERF_RECORD_LOST
:
465 color_fprintf(trace
->output
, PERF_COLOR_RED
,
466 "LOST %" PRIu64
" events!\n", event
->lost
.lost
);
467 ret
= machine__process_lost_event(machine
, event
);
469 ret
= machine__process_event(machine
, event
);
476 static int trace__tool_process(struct perf_tool
*tool
,
477 union perf_event
*event
,
478 struct perf_sample
*sample __maybe_unused
,
479 struct machine
*machine
)
481 struct trace
*trace
= container_of(tool
, struct trace
, tool
);
482 return trace__process_event(trace
, machine
, event
);
485 static int trace__symbols_init(struct trace
*trace
, struct perf_evlist
*evlist
)
487 int err
= symbol__init();
492 machine__init(&trace
->host
, "", HOST_KERNEL_ID
);
493 machine__create_kernel_maps(&trace
->host
);
495 if (perf_target__has_task(&trace
->opts
.target
)) {
496 err
= perf_event__synthesize_thread_map(&trace
->tool
, evlist
->threads
,
500 err
= perf_event__synthesize_threads(&trace
->tool
, trace__tool_process
,
510 static int syscall__set_arg_fmts(struct syscall
*sc
)
512 struct format_field
*field
;
515 sc
->arg_scnprintf
= calloc(sc
->tp_format
->format
.nr_fields
- 1, sizeof(void *));
516 if (sc
->arg_scnprintf
== NULL
)
519 for (field
= sc
->tp_format
->format
.fields
->next
; field
; field
= field
->next
) {
520 if (sc
->fmt
&& sc
->fmt
->arg_scnprintf
[idx
])
521 sc
->arg_scnprintf
[idx
] = sc
->fmt
->arg_scnprintf
[idx
];
522 else if (field
->flags
& FIELD_IS_POINTER
)
523 sc
->arg_scnprintf
[idx
] = syscall_arg__scnprintf_hex
;
530 static int trace__read_syscall_info(struct trace
*trace
, int id
)
534 const char *name
= audit_syscall_to_name(id
, trace
->audit_machine
);
539 if (id
> trace
->syscalls
.max
) {
540 struct syscall
*nsyscalls
= realloc(trace
->syscalls
.table
, (id
+ 1) * sizeof(*sc
));
542 if (nsyscalls
== NULL
)
545 if (trace
->syscalls
.max
!= -1) {
546 memset(nsyscalls
+ trace
->syscalls
.max
+ 1, 0,
547 (id
- trace
->syscalls
.max
) * sizeof(*sc
));
549 memset(nsyscalls
, 0, (id
+ 1) * sizeof(*sc
));
552 trace
->syscalls
.table
= nsyscalls
;
553 trace
->syscalls
.max
= id
;
556 sc
= trace
->syscalls
.table
+ id
;
559 if (trace
->ev_qualifier
) {
560 bool in
= strlist__find(trace
->ev_qualifier
, name
) != NULL
;
562 if (!(in
^ trace
->not_ev_qualifier
)) {
565 * No need to do read tracepoint information since this will be
572 sc
->fmt
= syscall_fmt__find(sc
->name
);
574 snprintf(tp_name
, sizeof(tp_name
), "sys_enter_%s", sc
->name
);
575 sc
->tp_format
= event_format__new("syscalls", tp_name
);
577 if (sc
->tp_format
== NULL
&& sc
->fmt
&& sc
->fmt
->alias
) {
578 snprintf(tp_name
, sizeof(tp_name
), "sys_enter_%s", sc
->fmt
->alias
);
579 sc
->tp_format
= event_format__new("syscalls", tp_name
);
582 if (sc
->tp_format
== NULL
)
585 return syscall__set_arg_fmts(sc
);
588 static size_t syscall__scnprintf_args(struct syscall
*sc
, char *bf
, size_t size
,
594 if (sc
->tp_format
!= NULL
) {
595 struct format_field
*field
;
596 u8 mask
= 0, bit
= 1;
598 for (field
= sc
->tp_format
->format
.fields
->next
; field
;
599 field
= field
->next
, ++i
, bit
<<= 1) {
603 printed
+= scnprintf(bf
+ printed
, size
- printed
,
604 "%s%s: ", printed
? ", " : "", field
->name
);
606 if (sc
->arg_scnprintf
&& sc
->arg_scnprintf
[i
]) {
607 printed
+= sc
->arg_scnprintf
[i
](bf
+ printed
, size
- printed
,
610 printed
+= scnprintf(bf
+ printed
, size
- printed
,
616 printed
+= scnprintf(bf
+ printed
, size
- printed
,
618 printed
? ", " : "", i
, args
[i
]);
626 typedef int (*tracepoint_handler
)(struct trace
*trace
, struct perf_evsel
*evsel
,
627 struct perf_sample
*sample
);
629 static struct syscall
*trace__syscall_info(struct trace
*trace
,
630 struct perf_evsel
*evsel
,
631 struct perf_sample
*sample
)
633 int id
= perf_evsel__intval(evsel
, sample
, "id");
638 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
639 * before that, leaving at a higher verbosity level till that is
640 * explained. Reproduced with plain ftrace with:
642 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
643 * grep "NR -1 " /t/trace_pipe
645 * After generating some load on the machine.
649 fprintf(trace
->output
, "Invalid syscall %d id, skipping (%s, %" PRIu64
") ...\n",
650 id
, perf_evsel__name(evsel
), ++n
);
655 if ((id
> trace
->syscalls
.max
|| trace
->syscalls
.table
[id
].name
== NULL
) &&
656 trace__read_syscall_info(trace
, id
))
659 if ((id
> trace
->syscalls
.max
|| trace
->syscalls
.table
[id
].name
== NULL
))
662 return &trace
->syscalls
.table
[id
];
666 fprintf(trace
->output
, "Problems reading syscall %d", id
);
667 if (id
<= trace
->syscalls
.max
&& trace
->syscalls
.table
[id
].name
!= NULL
)
668 fprintf(trace
->output
, "(%s)", trace
->syscalls
.table
[id
].name
);
669 fputs(" information\n", trace
->output
);
674 static int trace__sys_enter(struct trace
*trace
, struct perf_evsel
*evsel
,
675 struct perf_sample
*sample
)
680 struct thread
*thread
;
681 struct syscall
*sc
= trace__syscall_info(trace
, evsel
, sample
);
682 struct thread_trace
*ttrace
;
690 thread
= machine__findnew_thread(&trace
->host
, sample
->pid
,
692 ttrace
= thread__trace(thread
, trace
->output
);
696 args
= perf_evsel__rawptr(evsel
, sample
, "args");
698 fprintf(trace
->output
, "Problems reading syscall arguments\n");
702 ttrace
= thread
->priv
;
704 if (ttrace
->entry_str
== NULL
) {
705 ttrace
->entry_str
= malloc(1024);
706 if (!ttrace
->entry_str
)
710 ttrace
->entry_time
= sample
->time
;
711 msg
= ttrace
->entry_str
;
712 printed
+= scnprintf(msg
+ printed
, 1024 - printed
, "%s(", sc
->name
);
714 printed
+= syscall__scnprintf_args(sc
, msg
+ printed
, 1024 - printed
, args
);
716 if (!strcmp(sc
->name
, "exit_group") || !strcmp(sc
->name
, "exit")) {
717 if (!trace
->duration_filter
) {
718 trace__fprintf_entry_head(trace
, thread
, 1, sample
->time
, trace
->output
);
719 fprintf(trace
->output
, "%-70s\n", ttrace
->entry_str
);
722 ttrace
->entry_pending
= true;
727 static int trace__sys_exit(struct trace
*trace
, struct perf_evsel
*evsel
,
728 struct perf_sample
*sample
)
732 struct thread
*thread
;
733 struct syscall
*sc
= trace__syscall_info(trace
, evsel
, sample
);
734 struct thread_trace
*ttrace
;
742 thread
= machine__findnew_thread(&trace
->host
, sample
->pid
,
744 ttrace
= thread__trace(thread
, trace
->output
);
748 ret
= perf_evsel__intval(evsel
, sample
, "ret");
750 ttrace
= thread
->priv
;
752 ttrace
->exit_time
= sample
->time
;
754 if (ttrace
->entry_time
) {
755 duration
= sample
->time
- ttrace
->entry_time
;
756 if (trace__filter_duration(trace
, duration
))
758 } else if (trace
->duration_filter
)
761 trace__fprintf_entry_head(trace
, thread
, duration
, sample
->time
, trace
->output
);
763 if (ttrace
->entry_pending
) {
764 fprintf(trace
->output
, "%-70s", ttrace
->entry_str
);
766 fprintf(trace
->output
, " ... [");
767 color_fprintf(trace
->output
, PERF_COLOR_YELLOW
, "continued");
768 fprintf(trace
->output
, "]: %s()", sc
->name
);
771 if (sc
->fmt
== NULL
) {
773 fprintf(trace
->output
, ") = %d", ret
);
774 } else if (ret
< 0 && sc
->fmt
->errmsg
) {
776 const char *emsg
= strerror_r(-ret
, bf
, sizeof(bf
)),
777 *e
= audit_errno_to_name(-ret
);
779 fprintf(trace
->output
, ") = -1 %s %s", e
, emsg
);
780 } else if (ret
== 0 && sc
->fmt
->timeout
)
781 fprintf(trace
->output
, ") = 0 Timeout");
782 else if (sc
->fmt
->hexret
)
783 fprintf(trace
->output
, ") = %#x", ret
);
787 fputc('\n', trace
->output
);
789 ttrace
->entry_pending
= false;
794 static int trace__sched_stat_runtime(struct trace
*trace
, struct perf_evsel
*evsel
,
795 struct perf_sample
*sample
)
797 u64 runtime
= perf_evsel__intval(evsel
, sample
, "runtime");
798 double runtime_ms
= (double)runtime
/ NSEC_PER_MSEC
;
799 struct thread
*thread
= machine__findnew_thread(&trace
->host
,
802 struct thread_trace
*ttrace
= thread__trace(thread
, trace
->output
);
807 ttrace
->runtime_ms
+= runtime_ms
;
808 trace
->runtime_ms
+= runtime_ms
;
812 fprintf(trace
->output
, "%s: comm=%s,pid=%u,runtime=%" PRIu64
",vruntime=%" PRIu64
")\n",
814 perf_evsel__strval(evsel
, sample
, "comm"),
815 (pid_t
)perf_evsel__intval(evsel
, sample
, "pid"),
817 perf_evsel__intval(evsel
, sample
, "vruntime"));
821 static bool skip_sample(struct trace
*trace
, struct perf_sample
*sample
)
823 if ((trace
->pid_list
&& intlist__find(trace
->pid_list
, sample
->pid
)) ||
824 (trace
->tid_list
&& intlist__find(trace
->tid_list
, sample
->tid
)))
827 if (trace
->pid_list
|| trace
->tid_list
)
833 static int trace__process_sample(struct perf_tool
*tool
,
834 union perf_event
*event __maybe_unused
,
835 struct perf_sample
*sample
,
836 struct perf_evsel
*evsel
,
837 struct machine
*machine __maybe_unused
)
839 struct trace
*trace
= container_of(tool
, struct trace
, tool
);
842 tracepoint_handler handler
= evsel
->handler
.func
;
844 if (skip_sample(trace
, sample
))
847 if (trace
->base_time
== 0)
848 trace
->base_time
= sample
->time
;
851 handler(trace
, evsel
, sample
);
857 perf_session__has_tp(struct perf_session
*session
, const char *name
)
859 struct perf_evsel
*evsel
;
861 evsel
= perf_evlist__find_tracepoint_by_name(session
->evlist
, name
);
863 return evsel
!= NULL
;
866 static int parse_target_str(struct trace
*trace
)
868 if (trace
->opts
.target
.pid
) {
869 trace
->pid_list
= intlist__new(trace
->opts
.target
.pid
);
870 if (trace
->pid_list
== NULL
) {
871 pr_err("Error parsing process id string\n");
876 if (trace
->opts
.target
.tid
) {
877 trace
->tid_list
= intlist__new(trace
->opts
.target
.tid
);
878 if (trace
->tid_list
== NULL
) {
879 pr_err("Error parsing thread id string\n");
887 static int trace__run(struct trace
*trace
, int argc
, const char **argv
)
889 struct perf_evlist
*evlist
= perf_evlist__new();
890 struct perf_evsel
*evsel
;
892 unsigned long before
;
893 const bool forks
= argc
> 0;
895 if (evlist
== NULL
) {
896 fprintf(trace
->output
, "Not enough memory to run!\n");
900 if (perf_evlist__add_newtp(evlist
, "raw_syscalls", "sys_enter", trace__sys_enter
) ||
901 perf_evlist__add_newtp(evlist
, "raw_syscalls", "sys_exit", trace__sys_exit
)) {
902 fprintf(trace
->output
, "Couldn't read the raw_syscalls tracepoints information!\n");
903 goto out_delete_evlist
;
907 perf_evlist__add_newtp(evlist
, "sched", "sched_stat_runtime",
908 trace__sched_stat_runtime
)) {
909 fprintf(trace
->output
, "Couldn't read the sched_stat_runtime tracepoint information!\n");
910 goto out_delete_evlist
;
913 err
= perf_evlist__create_maps(evlist
, &trace
->opts
.target
);
915 fprintf(trace
->output
, "Problems parsing the target to trace, check your options!\n");
916 goto out_delete_evlist
;
919 err
= trace__symbols_init(trace
, evlist
);
921 fprintf(trace
->output
, "Problems initializing symbol libraries!\n");
922 goto out_delete_maps
;
925 perf_evlist__config(evlist
, &trace
->opts
);
927 signal(SIGCHLD
, sig_handler
);
928 signal(SIGINT
, sig_handler
);
931 err
= perf_evlist__prepare_workload(evlist
, &trace
->opts
.target
,
934 fprintf(trace
->output
, "Couldn't run the workload!\n");
935 goto out_delete_maps
;
939 err
= perf_evlist__open(evlist
);
941 fprintf(trace
->output
, "Couldn't create the events: %s\n", strerror(errno
));
942 goto out_delete_maps
;
945 err
= perf_evlist__mmap(evlist
, UINT_MAX
, false);
947 fprintf(trace
->output
, "Couldn't mmap the events: %s\n", strerror(errno
));
948 goto out_close_evlist
;
951 perf_evlist__enable(evlist
);
954 perf_evlist__start_workload(evlist
);
956 trace
->multiple_threads
= evlist
->threads
->map
[0] == -1 || evlist
->threads
->nr
> 1;
958 before
= trace
->nr_events
;
960 for (i
= 0; i
< evlist
->nr_mmaps
; i
++) {
961 union perf_event
*event
;
963 while ((event
= perf_evlist__mmap_read(evlist
, i
)) != NULL
) {
964 const u32 type
= event
->header
.type
;
965 tracepoint_handler handler
;
966 struct perf_sample sample
;
970 err
= perf_evlist__parse_sample(evlist
, event
, &sample
);
972 fprintf(trace
->output
, "Can't parse sample, err = %d, skipping...\n", err
);
976 if (trace
->base_time
== 0)
977 trace
->base_time
= sample
.time
;
979 if (type
!= PERF_RECORD_SAMPLE
) {
980 trace__process_event(trace
, &trace
->host
, event
);
984 evsel
= perf_evlist__id2evsel(evlist
, sample
.id
);
986 fprintf(trace
->output
, "Unknown tp ID %" PRIu64
", skipping...\n", sample
.id
);
990 if (sample
.raw_data
== NULL
) {
991 fprintf(trace
->output
, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
992 perf_evsel__name(evsel
), sample
.tid
,
993 sample
.cpu
, sample
.raw_size
);
997 handler
= evsel
->handler
.func
;
998 handler(trace
, evsel
, &sample
);
1001 goto out_unmap_evlist
;
1005 if (trace
->nr_events
== before
) {
1007 goto out_unmap_evlist
;
1009 poll(evlist
->pollfd
, evlist
->nr_fds
, -1);
1013 perf_evlist__disable(evlist
);
1018 perf_evlist__munmap(evlist
);
1020 perf_evlist__close(evlist
);
1022 perf_evlist__delete_maps(evlist
);
1024 perf_evlist__delete(evlist
);
1029 static int trace__replay(struct trace
*trace
)
1031 const struct perf_evsel_str_handler handlers
[] = {
1032 { "raw_syscalls:sys_enter", trace__sys_enter
, },
1033 { "raw_syscalls:sys_exit", trace__sys_exit
, },
1036 struct perf_session
*session
;
1039 trace
->tool
.sample
= trace__process_sample
;
1040 trace
->tool
.mmap
= perf_event__process_mmap
;
1041 trace
->tool
.comm
= perf_event__process_comm
;
1042 trace
->tool
.exit
= perf_event__process_exit
;
1043 trace
->tool
.fork
= perf_event__process_fork
;
1044 trace
->tool
.attr
= perf_event__process_attr
;
1045 trace
->tool
.tracing_data
= perf_event__process_tracing_data
;
1046 trace
->tool
.build_id
= perf_event__process_build_id
;
1048 trace
->tool
.ordered_samples
= true;
1049 trace
->tool
.ordering_requires_timestamps
= true;
1051 /* add tid to output */
1052 trace
->multiple_threads
= true;
1054 if (symbol__init() < 0)
1057 session
= perf_session__new(input_name
, O_RDONLY
, 0, false,
1059 if (session
== NULL
)
1062 err
= perf_session__set_tracepoints_handlers(session
, handlers
);
1066 if (!perf_session__has_tp(session
, "raw_syscalls:sys_enter")) {
1067 pr_err("Data file does not have raw_syscalls:sys_enter events\n");
1071 if (!perf_session__has_tp(session
, "raw_syscalls:sys_exit")) {
1072 pr_err("Data file does not have raw_syscalls:sys_exit events\n");
1076 err
= parse_target_str(trace
);
1082 err
= perf_session__process_events(session
, &trace
->tool
);
1084 pr_err("Failed to process events, error %d", err
);
1087 perf_session__delete(session
);
1092 static size_t trace__fprintf_threads_header(FILE *fp
)
1096 printed
= fprintf(fp
, "\n _____________________________________________________________________\n");
1097 printed
+= fprintf(fp
," __) Summary of events (__\n\n");
1098 printed
+= fprintf(fp
," [ task - pid ] [ events ] [ ratio ] [ runtime ]\n");
1099 printed
+= fprintf(fp
," _____________________________________________________________________\n\n");
1104 static size_t trace__fprintf_thread_summary(struct trace
*trace
, FILE *fp
)
1106 size_t printed
= trace__fprintf_threads_header(fp
);
1109 for (nd
= rb_first(&trace
->host
.threads
); nd
; nd
= rb_next(nd
)) {
1110 struct thread
*thread
= rb_entry(nd
, struct thread
, rb_node
);
1111 struct thread_trace
*ttrace
= thread
->priv
;
1118 ratio
= (double)ttrace
->nr_events
/ trace
->nr_events
* 100.0;
1120 color
= PERF_COLOR_NORMAL
;
1122 color
= PERF_COLOR_RED
;
1123 else if (ratio
> 25.0)
1124 color
= PERF_COLOR_GREEN
;
1125 else if (ratio
> 5.0)
1126 color
= PERF_COLOR_YELLOW
;
1128 printed
+= color_fprintf(fp
, color
, "%20s", thread
->comm
);
1129 printed
+= fprintf(fp
, " - %-5d :%11lu [", thread
->tid
, ttrace
->nr_events
);
1130 printed
+= color_fprintf(fp
, color
, "%5.1f%%", ratio
);
1131 printed
+= fprintf(fp
, " ] %10.3f ms\n", ttrace
->runtime_ms
);
1137 static int trace__set_duration(const struct option
*opt
, const char *str
,
1138 int unset __maybe_unused
)
1140 struct trace
*trace
= opt
->value
;
1142 trace
->duration_filter
= atof(str
);
1146 static int trace__open_output(struct trace
*trace
, const char *filename
)
1150 if (!stat(filename
, &st
) && st
.st_size
) {
1151 char oldname
[PATH_MAX
];
1153 scnprintf(oldname
, sizeof(oldname
), "%s.old", filename
);
1155 rename(filename
, oldname
);
1158 trace
->output
= fopen(filename
, "w");
1160 return trace
->output
== NULL
? -errno
: 0;
1163 int cmd_trace(int argc
, const char **argv
, const char *prefix __maybe_unused
)
1165 const char * const trace_usage
[] = {
1166 "perf trace [<options>] [<command>]",
1167 "perf trace [<options>] -- <command> [<options>]",
1170 struct trace trace
= {
1171 .audit_machine
= audit_detect_machine(),
1180 .user_freq
= UINT_MAX
,
1181 .user_interval
= ULLONG_MAX
,
1187 const char *output_name
= NULL
;
1188 const char *ev_qualifier_str
= NULL
;
1189 const struct option trace_options
[] = {
1190 OPT_STRING('e', "expr", &ev_qualifier_str
, "expr",
1191 "list of events to trace"),
1192 OPT_STRING('o', "output", &output_name
, "file", "output file name"),
1193 OPT_STRING('i', "input", &input_name
, "file", "Analyze events in file"),
1194 OPT_STRING('p', "pid", &trace
.opts
.target
.pid
, "pid",
1195 "trace events on existing process id"),
1196 OPT_STRING('t', "tid", &trace
.opts
.target
.tid
, "tid",
1197 "trace events on existing thread id"),
1198 OPT_BOOLEAN('a', "all-cpus", &trace
.opts
.target
.system_wide
,
1199 "system-wide collection from all CPUs"),
1200 OPT_STRING('C', "cpu", &trace
.opts
.target
.cpu_list
, "cpu",
1201 "list of cpus to monitor"),
1202 OPT_BOOLEAN(0, "no-inherit", &trace
.opts
.no_inherit
,
1203 "child tasks do not inherit counters"),
1204 OPT_UINTEGER('m', "mmap-pages", &trace
.opts
.mmap_pages
,
1205 "number of mmap data pages"),
1206 OPT_STRING('u', "uid", &trace
.opts
.target
.uid_str
, "user",
1208 OPT_CALLBACK(0, "duration", &trace
, "float",
1209 "show only events with duration > N.M ms",
1210 trace__set_duration
),
1211 OPT_BOOLEAN(0, "sched", &trace
.sched
, "show blocking scheduler events"),
1212 OPT_INCR('v', "verbose", &verbose
, "be more verbose"),
1218 argc
= parse_options(argc
, argv
, trace_options
, trace_usage
, 0);
1220 if (output_name
!= NULL
) {
1221 err
= trace__open_output(&trace
, output_name
);
1223 perror("failed to create output file");
1228 if (ev_qualifier_str
!= NULL
) {
1229 const char *s
= ev_qualifier_str
;
1231 trace
.not_ev_qualifier
= *s
== '!';
1232 if (trace
.not_ev_qualifier
)
1234 trace
.ev_qualifier
= strlist__new(true, s
);
1235 if (trace
.ev_qualifier
== NULL
) {
1236 fputs("Not enough memory to parse event qualifier",
1243 err
= perf_target__validate(&trace
.opts
.target
);
1245 perf_target__strerror(&trace
.opts
.target
, err
, bf
, sizeof(bf
));
1246 fprintf(trace
.output
, "%s", bf
);
1250 err
= perf_target__parse_uid(&trace
.opts
.target
);
1252 perf_target__strerror(&trace
.opts
.target
, err
, bf
, sizeof(bf
));
1253 fprintf(trace
.output
, "%s", bf
);
1257 if (!argc
&& perf_target__none(&trace
.opts
.target
))
1258 trace
.opts
.target
.system_wide
= true;
1261 err
= trace__replay(&trace
);
1263 err
= trace__run(&trace
, argc
, argv
);
1265 if (trace
.sched
&& !err
)
1266 trace__fprintf_thread_summary(&trace
, trace
.output
);
1269 if (output_name
!= NULL
)
1270 fclose(trace
.output
);