1 #include <traceevent/event-parse.h>
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
17 #include <linux/futex.h>
19 /* For older distros: */
21 # define MAP_STACK 0x20000
25 # define MADV_HWPOISON 100
28 #ifndef MADV_MERGEABLE
29 # define MADV_MERGEABLE 12
32 #ifndef MADV_UNMERGEABLE
33 # define MADV_UNMERGEABLE 13
36 static size_t syscall_arg__scnprintf_hex(char *bf
, size_t size
,
38 u8 arg_idx __maybe_unused
,
39 u8
*arg_mask __maybe_unused
)
41 return scnprintf(bf
, size
, "%#lx", arg
);
44 #define SCA_HEX syscall_arg__scnprintf_hex
46 static size_t syscall_arg__scnprintf_whence(char *bf
, size_t size
,
48 u8 arg_idx __maybe_unused
,
49 u8
*arg_mask __maybe_unused
)
54 #define P_WHENCE(n) case SEEK_##n: return scnprintf(bf, size, #n)
68 return scnprintf(bf
, size
, "%#x", whence
);
71 #define SCA_WHENCE syscall_arg__scnprintf_whence
73 static size_t syscall_arg__scnprintf_mmap_prot(char *bf
, size_t size
,
75 u8 arg_idx __maybe_unused
,
76 u8
*arg_mask __maybe_unused
)
78 int printed
= 0, prot
= arg
;
80 if (prot
== PROT_NONE
)
81 return scnprintf(bf
, size
, "NONE");
82 #define P_MMAP_PROT(n) \
83 if (prot & PROT_##n) { \
84 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
94 P_MMAP_PROT(GROWSDOWN
);
99 printed
+= scnprintf(bf
+ printed
, size
- printed
, "%s%#x", printed
? "|" : "", prot
);
104 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
106 static size_t syscall_arg__scnprintf_mmap_flags(char *bf
, size_t size
,
107 unsigned long arg
, u8 arg_idx __maybe_unused
,
108 u8
*arg_mask __maybe_unused
)
110 int printed
= 0, flags
= arg
;
112 #define P_MMAP_FLAG(n) \
113 if (flags & MAP_##n) { \
114 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
119 P_MMAP_FLAG(PRIVATE
);
123 P_MMAP_FLAG(ANONYMOUS
);
124 P_MMAP_FLAG(DENYWRITE
);
125 P_MMAP_FLAG(EXECUTABLE
);
128 P_MMAP_FLAG(GROWSDOWN
);
130 P_MMAP_FLAG(HUGETLB
);
133 P_MMAP_FLAG(NONBLOCK
);
134 P_MMAP_FLAG(NORESERVE
);
135 P_MMAP_FLAG(POPULATE
);
137 #ifdef MAP_UNINITIALIZED
138 P_MMAP_FLAG(UNINITIALIZED
);
143 printed
+= scnprintf(bf
+ printed
, size
- printed
, "%s%#x", printed
? "|" : "", flags
);
148 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
150 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf
, size_t size
,
151 unsigned long arg
, u8 arg_idx __maybe_unused
,
152 u8
*arg_mask __maybe_unused
)
157 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
160 P_MADV_BHV(SEQUENTIAL
);
161 P_MADV_BHV(WILLNEED
);
162 P_MADV_BHV(DONTNEED
);
164 P_MADV_BHV(DONTFORK
);
166 P_MADV_BHV(HWPOISON
);
167 #ifdef MADV_SOFT_OFFLINE
168 P_MADV_BHV(SOFT_OFFLINE
);
170 P_MADV_BHV(MERGEABLE
);
171 P_MADV_BHV(UNMERGEABLE
);
173 P_MADV_BHV(HUGEPAGE
);
175 #ifdef MADV_NOHUGEPAGE
176 P_MADV_BHV(NOHUGEPAGE
);
179 P_MADV_BHV(DONTDUMP
);
188 return scnprintf(bf
, size
, "%#x", behavior
);
191 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
193 static size_t syscall_arg__scnprintf_futex_op(char *bf
, size_t size
, unsigned long arg
,
194 u8 arg_idx __maybe_unused
, u8
*arg_mask
)
196 enum syscall_futex_args
{
197 SCF_UADDR
= (1 << 0),
200 SCF_TIMEOUT
= (1 << 3),
201 SCF_UADDR2
= (1 << 4),
205 int cmd
= op
& FUTEX_CMD_MASK
;
209 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
210 P_FUTEX_OP(WAIT
); *arg_mask
|= SCF_VAL3
|SCF_UADDR2
; break;
211 P_FUTEX_OP(WAKE
); *arg_mask
|= SCF_VAL3
|SCF_UADDR2
|SCF_TIMEOUT
; break;
212 P_FUTEX_OP(FD
); *arg_mask
|= SCF_VAL3
|SCF_UADDR2
|SCF_TIMEOUT
; break;
213 P_FUTEX_OP(REQUEUE
); *arg_mask
|= SCF_VAL3
|SCF_TIMEOUT
; break;
214 P_FUTEX_OP(CMP_REQUEUE
); *arg_mask
|= SCF_TIMEOUT
; break;
215 P_FUTEX_OP(CMP_REQUEUE_PI
); *arg_mask
|= SCF_TIMEOUT
; break;
216 P_FUTEX_OP(WAKE_OP
); break;
217 P_FUTEX_OP(LOCK_PI
); *arg_mask
|= SCF_VAL3
|SCF_UADDR2
|SCF_TIMEOUT
; break;
218 P_FUTEX_OP(UNLOCK_PI
); *arg_mask
|= SCF_VAL3
|SCF_UADDR2
|SCF_TIMEOUT
; break;
219 P_FUTEX_OP(TRYLOCK_PI
); *arg_mask
|= SCF_VAL3
|SCF_UADDR2
; break;
220 P_FUTEX_OP(WAIT_BITSET
); *arg_mask
|= SCF_UADDR2
; break;
221 P_FUTEX_OP(WAKE_BITSET
); *arg_mask
|= SCF_UADDR2
; break;
222 P_FUTEX_OP(WAIT_REQUEUE_PI
); break;
223 default: printed
= scnprintf(bf
, size
, "%#x", cmd
); break;
226 if (op
& FUTEX_PRIVATE_FLAG
)
227 printed
+= scnprintf(bf
+ printed
, size
- printed
, "|PRIV");
229 if (op
& FUTEX_CLOCK_REALTIME
)
230 printed
+= scnprintf(bf
+ printed
, size
- printed
, "|CLKRT");
235 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
237 static size_t syscall_arg__scnprintf_open_flags(char *bf
, size_t size
,
239 u8 arg_idx
, u8
*arg_mask
)
241 int printed
= 0, flags
= arg
;
243 if (!(flags
& O_CREAT
))
244 *arg_mask
|= 1 << (arg_idx
+ 1); /* Mask the mode parm */
247 return scnprintf(bf
, size
, "RDONLY");
249 if (flags & O_##n) { \
250 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
274 if ((flags
& O_SYNC
) == O_SYNC
)
275 printed
+= scnprintf(bf
+ printed
, size
- printed
, "%s%s", printed
? "|" : "", "SYNC");
287 printed
+= scnprintf(bf
+ printed
, size
- printed
, "%s%#x", printed
? "|" : "", flags
);
292 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
294 static struct syscall_fmt
{
297 size_t (*arg_scnprintf
[6])(char *bf
, size_t size
, unsigned long arg
, u8 arg_idx
, u8
*arg_mask
);
302 { .name
= "access", .errmsg
= true, },
303 { .name
= "arch_prctl", .errmsg
= true, .alias
= "prctl", },
304 { .name
= "brk", .hexret
= true,
305 .arg_scnprintf
= { [0] = SCA_HEX
, /* brk */ }, },
306 { .name
= "mmap", .hexret
= true, },
307 { .name
= "connect", .errmsg
= true, },
308 { .name
= "fstat", .errmsg
= true, .alias
= "newfstat", },
309 { .name
= "fstatat", .errmsg
= true, .alias
= "newfstatat", },
310 { .name
= "futex", .errmsg
= true,
311 .arg_scnprintf
= { [1] = SCA_FUTEX_OP
, /* op */ }, },
312 { .name
= "ioctl", .errmsg
= true,
313 .arg_scnprintf
= { [2] = SCA_HEX
, /* arg */ }, },
314 { .name
= "lseek", .errmsg
= true,
315 .arg_scnprintf
= { [2] = SCA_WHENCE
, /* whence */ }, },
316 { .name
= "lstat", .errmsg
= true, .alias
= "newlstat", },
317 { .name
= "madvise", .errmsg
= true,
318 .arg_scnprintf
= { [0] = SCA_HEX
, /* start */
319 [2] = SCA_MADV_BHV
, /* behavior */ }, },
320 { .name
= "mmap", .hexret
= true,
321 .arg_scnprintf
= { [0] = SCA_HEX
, /* addr */
322 [2] = SCA_MMAP_PROT
, /* prot */
323 [3] = SCA_MMAP_FLAGS
, /* flags */ }, },
324 { .name
= "mprotect", .errmsg
= true,
325 .arg_scnprintf
= { [0] = SCA_HEX
, /* start */
326 [2] = SCA_MMAP_PROT
, /* prot */ }, },
327 { .name
= "mremap", .hexret
= true,
328 .arg_scnprintf
= { [0] = SCA_HEX
, /* addr */
329 [4] = SCA_HEX
, /* new_addr */ }, },
330 { .name
= "munmap", .errmsg
= true,
331 .arg_scnprintf
= { [0] = SCA_HEX
, /* addr */ }, },
332 { .name
= "open", .errmsg
= true,
333 .arg_scnprintf
= { [1] = SCA_OPEN_FLAGS
, /* flags */ }, },
334 { .name
= "open_by_handle_at", .errmsg
= true,
335 .arg_scnprintf
= { [2] = SCA_OPEN_FLAGS
, /* flags */ }, },
336 { .name
= "openat", .errmsg
= true,
337 .arg_scnprintf
= { [2] = SCA_OPEN_FLAGS
, /* flags */ }, },
338 { .name
= "poll", .errmsg
= true, .timeout
= true, },
339 { .name
= "ppoll", .errmsg
= true, .timeout
= true, },
340 { .name
= "pread", .errmsg
= true, .alias
= "pread64", },
341 { .name
= "pwrite", .errmsg
= true, .alias
= "pwrite64", },
342 { .name
= "read", .errmsg
= true, },
343 { .name
= "recvfrom", .errmsg
= true, },
344 { .name
= "select", .errmsg
= true, .timeout
= true, },
345 { .name
= "socket", .errmsg
= true, },
346 { .name
= "stat", .errmsg
= true, .alias
= "newstat", },
347 { .name
= "uname", .errmsg
= true, .alias
= "newuname", },
350 static int syscall_fmt__cmp(const void *name
, const void *fmtp
)
352 const struct syscall_fmt
*fmt
= fmtp
;
353 return strcmp(name
, fmt
->name
);
356 static struct syscall_fmt
*syscall_fmt__find(const char *name
)
358 const int nmemb
= ARRAY_SIZE(syscall_fmts
);
359 return bsearch(name
, syscall_fmts
, nmemb
, sizeof(struct syscall_fmt
), syscall_fmt__cmp
);
363 struct event_format
*tp_format
;
366 struct syscall_fmt
*fmt
;
367 size_t (**arg_scnprintf
)(char *bf
, size_t size
,
368 unsigned long arg
, u8 arg_idx
, u8
*args_mask
);
371 static size_t fprintf_duration(unsigned long t
, FILE *fp
)
373 double duration
= (double)t
/ NSEC_PER_MSEC
;
374 size_t printed
= fprintf(fp
, "(");
377 printed
+= color_fprintf(fp
, PERF_COLOR_RED
, "%6.3f ms", duration
);
378 else if (duration
>= 0.01)
379 printed
+= color_fprintf(fp
, PERF_COLOR_YELLOW
, "%6.3f ms", duration
);
381 printed
+= color_fprintf(fp
, PERF_COLOR_NORMAL
, "%6.3f ms", duration
);
382 return printed
+ fprintf(fp
, "): ");
385 struct thread_trace
{
389 unsigned long nr_events
;
394 static struct thread_trace
*thread_trace__new(void)
396 return zalloc(sizeof(struct thread_trace
));
399 static struct thread_trace
*thread__trace(struct thread
*thread
, FILE *fp
)
401 struct thread_trace
*ttrace
;
406 if (thread
->priv
== NULL
)
407 thread
->priv
= thread_trace__new();
409 if (thread
->priv
== NULL
)
412 ttrace
= thread
->priv
;
417 color_fprintf(fp
, PERF_COLOR_RED
,
418 "WARNING: not enough memory, dropping samples!\n");
423 struct perf_tool tool
;
427 struct syscall
*table
;
429 struct perf_record_opts opts
;
433 unsigned long nr_events
;
434 struct strlist
*ev_qualifier
;
435 bool not_ev_qualifier
;
436 struct intlist
*tid_list
;
437 struct intlist
*pid_list
;
439 bool multiple_threads
;
440 double duration_filter
;
444 static bool trace__filter_duration(struct trace
*trace
, double t
)
446 return t
< (trace
->duration_filter
* NSEC_PER_MSEC
);
449 static size_t trace__fprintf_tstamp(struct trace
*trace
, u64 tstamp
, FILE *fp
)
451 double ts
= (double)(tstamp
- trace
->base_time
) / NSEC_PER_MSEC
;
453 return fprintf(fp
, "%10.3f ", ts
);
456 static bool done
= false;
458 static void sig_handler(int sig __maybe_unused
)
463 static size_t trace__fprintf_entry_head(struct trace
*trace
, struct thread
*thread
,
464 u64 duration
, u64 tstamp
, FILE *fp
)
466 size_t printed
= trace__fprintf_tstamp(trace
, tstamp
, fp
);
467 printed
+= fprintf_duration(duration
, fp
);
469 if (trace
->multiple_threads
)
470 printed
+= fprintf(fp
, "%d ", thread
->tid
);
475 static int trace__process_event(struct trace
*trace
, struct machine
*machine
,
476 union perf_event
*event
)
480 switch (event
->header
.type
) {
481 case PERF_RECORD_LOST
:
482 color_fprintf(trace
->output
, PERF_COLOR_RED
,
483 "LOST %" PRIu64
" events!\n", event
->lost
.lost
);
484 ret
= machine__process_lost_event(machine
, event
);
486 ret
= machine__process_event(machine
, event
);
493 static int trace__tool_process(struct perf_tool
*tool
,
494 union perf_event
*event
,
495 struct perf_sample
*sample __maybe_unused
,
496 struct machine
*machine
)
498 struct trace
*trace
= container_of(tool
, struct trace
, tool
);
499 return trace__process_event(trace
, machine
, event
);
502 static int trace__symbols_init(struct trace
*trace
, struct perf_evlist
*evlist
)
504 int err
= symbol__init();
509 machine__init(&trace
->host
, "", HOST_KERNEL_ID
);
510 machine__create_kernel_maps(&trace
->host
);
512 if (perf_target__has_task(&trace
->opts
.target
)) {
513 err
= perf_event__synthesize_thread_map(&trace
->tool
, evlist
->threads
,
517 err
= perf_event__synthesize_threads(&trace
->tool
, trace__tool_process
,
527 static int syscall__set_arg_fmts(struct syscall
*sc
)
529 struct format_field
*field
;
532 sc
->arg_scnprintf
= calloc(sc
->tp_format
->format
.nr_fields
- 1, sizeof(void *));
533 if (sc
->arg_scnprintf
== NULL
)
536 for (field
= sc
->tp_format
->format
.fields
->next
; field
; field
= field
->next
) {
537 if (sc
->fmt
&& sc
->fmt
->arg_scnprintf
[idx
])
538 sc
->arg_scnprintf
[idx
] = sc
->fmt
->arg_scnprintf
[idx
];
539 else if (field
->flags
& FIELD_IS_POINTER
)
540 sc
->arg_scnprintf
[idx
] = syscall_arg__scnprintf_hex
;
547 static int trace__read_syscall_info(struct trace
*trace
, int id
)
551 const char *name
= audit_syscall_to_name(id
, trace
->audit_machine
);
556 if (id
> trace
->syscalls
.max
) {
557 struct syscall
*nsyscalls
= realloc(trace
->syscalls
.table
, (id
+ 1) * sizeof(*sc
));
559 if (nsyscalls
== NULL
)
562 if (trace
->syscalls
.max
!= -1) {
563 memset(nsyscalls
+ trace
->syscalls
.max
+ 1, 0,
564 (id
- trace
->syscalls
.max
) * sizeof(*sc
));
566 memset(nsyscalls
, 0, (id
+ 1) * sizeof(*sc
));
569 trace
->syscalls
.table
= nsyscalls
;
570 trace
->syscalls
.max
= id
;
573 sc
= trace
->syscalls
.table
+ id
;
576 if (trace
->ev_qualifier
) {
577 bool in
= strlist__find(trace
->ev_qualifier
, name
) != NULL
;
579 if (!(in
^ trace
->not_ev_qualifier
)) {
582 * No need to do read tracepoint information since this will be
589 sc
->fmt
= syscall_fmt__find(sc
->name
);
591 snprintf(tp_name
, sizeof(tp_name
), "sys_enter_%s", sc
->name
);
592 sc
->tp_format
= event_format__new("syscalls", tp_name
);
594 if (sc
->tp_format
== NULL
&& sc
->fmt
&& sc
->fmt
->alias
) {
595 snprintf(tp_name
, sizeof(tp_name
), "sys_enter_%s", sc
->fmt
->alias
);
596 sc
->tp_format
= event_format__new("syscalls", tp_name
);
599 if (sc
->tp_format
== NULL
)
602 return syscall__set_arg_fmts(sc
);
605 static size_t syscall__scnprintf_args(struct syscall
*sc
, char *bf
, size_t size
,
611 if (sc
->tp_format
!= NULL
) {
612 struct format_field
*field
;
613 u8 mask
= 0, bit
= 1;
615 for (field
= sc
->tp_format
->format
.fields
->next
; field
;
616 field
= field
->next
, ++i
, bit
<<= 1) {
620 printed
+= scnprintf(bf
+ printed
, size
- printed
,
621 "%s%s: ", printed
? ", " : "", field
->name
);
623 if (sc
->arg_scnprintf
&& sc
->arg_scnprintf
[i
]) {
624 printed
+= sc
->arg_scnprintf
[i
](bf
+ printed
, size
- printed
,
627 printed
+= scnprintf(bf
+ printed
, size
- printed
,
633 printed
+= scnprintf(bf
+ printed
, size
- printed
,
635 printed
? ", " : "", i
, args
[i
]);
643 typedef int (*tracepoint_handler
)(struct trace
*trace
, struct perf_evsel
*evsel
,
644 struct perf_sample
*sample
);
646 static struct syscall
*trace__syscall_info(struct trace
*trace
,
647 struct perf_evsel
*evsel
,
648 struct perf_sample
*sample
)
650 int id
= perf_evsel__intval(evsel
, sample
, "id");
655 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
656 * before that, leaving at a higher verbosity level till that is
657 * explained. Reproduced with plain ftrace with:
659 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
660 * grep "NR -1 " /t/trace_pipe
662 * After generating some load on the machine.
666 fprintf(trace
->output
, "Invalid syscall %d id, skipping (%s, %" PRIu64
") ...\n",
667 id
, perf_evsel__name(evsel
), ++n
);
672 if ((id
> trace
->syscalls
.max
|| trace
->syscalls
.table
[id
].name
== NULL
) &&
673 trace__read_syscall_info(trace
, id
))
676 if ((id
> trace
->syscalls
.max
|| trace
->syscalls
.table
[id
].name
== NULL
))
679 return &trace
->syscalls
.table
[id
];
683 fprintf(trace
->output
, "Problems reading syscall %d", id
);
684 if (id
<= trace
->syscalls
.max
&& trace
->syscalls
.table
[id
].name
!= NULL
)
685 fprintf(trace
->output
, "(%s)", trace
->syscalls
.table
[id
].name
);
686 fputs(" information\n", trace
->output
);
691 static int trace__sys_enter(struct trace
*trace
, struct perf_evsel
*evsel
,
692 struct perf_sample
*sample
)
697 struct thread
*thread
;
698 struct syscall
*sc
= trace__syscall_info(trace
, evsel
, sample
);
699 struct thread_trace
*ttrace
;
707 thread
= machine__findnew_thread(&trace
->host
, sample
->pid
,
709 ttrace
= thread__trace(thread
, trace
->output
);
713 args
= perf_evsel__rawptr(evsel
, sample
, "args");
715 fprintf(trace
->output
, "Problems reading syscall arguments\n");
719 ttrace
= thread
->priv
;
721 if (ttrace
->entry_str
== NULL
) {
722 ttrace
->entry_str
= malloc(1024);
723 if (!ttrace
->entry_str
)
727 ttrace
->entry_time
= sample
->time
;
728 msg
= ttrace
->entry_str
;
729 printed
+= scnprintf(msg
+ printed
, 1024 - printed
, "%s(", sc
->name
);
731 printed
+= syscall__scnprintf_args(sc
, msg
+ printed
, 1024 - printed
, args
);
733 if (!strcmp(sc
->name
, "exit_group") || !strcmp(sc
->name
, "exit")) {
734 if (!trace
->duration_filter
) {
735 trace__fprintf_entry_head(trace
, thread
, 1, sample
->time
, trace
->output
);
736 fprintf(trace
->output
, "%-70s\n", ttrace
->entry_str
);
739 ttrace
->entry_pending
= true;
744 static int trace__sys_exit(struct trace
*trace
, struct perf_evsel
*evsel
,
745 struct perf_sample
*sample
)
749 struct thread
*thread
;
750 struct syscall
*sc
= trace__syscall_info(trace
, evsel
, sample
);
751 struct thread_trace
*ttrace
;
759 thread
= machine__findnew_thread(&trace
->host
, sample
->pid
,
761 ttrace
= thread__trace(thread
, trace
->output
);
765 ret
= perf_evsel__intval(evsel
, sample
, "ret");
767 ttrace
= thread
->priv
;
769 ttrace
->exit_time
= sample
->time
;
771 if (ttrace
->entry_time
) {
772 duration
= sample
->time
- ttrace
->entry_time
;
773 if (trace__filter_duration(trace
, duration
))
775 } else if (trace
->duration_filter
)
778 trace__fprintf_entry_head(trace
, thread
, duration
, sample
->time
, trace
->output
);
780 if (ttrace
->entry_pending
) {
781 fprintf(trace
->output
, "%-70s", ttrace
->entry_str
);
783 fprintf(trace
->output
, " ... [");
784 color_fprintf(trace
->output
, PERF_COLOR_YELLOW
, "continued");
785 fprintf(trace
->output
, "]: %s()", sc
->name
);
788 if (sc
->fmt
== NULL
) {
790 fprintf(trace
->output
, ") = %d", ret
);
791 } else if (ret
< 0 && sc
->fmt
->errmsg
) {
793 const char *emsg
= strerror_r(-ret
, bf
, sizeof(bf
)),
794 *e
= audit_errno_to_name(-ret
);
796 fprintf(trace
->output
, ") = -1 %s %s", e
, emsg
);
797 } else if (ret
== 0 && sc
->fmt
->timeout
)
798 fprintf(trace
->output
, ") = 0 Timeout");
799 else if (sc
->fmt
->hexret
)
800 fprintf(trace
->output
, ") = %#x", ret
);
804 fputc('\n', trace
->output
);
806 ttrace
->entry_pending
= false;
811 static int trace__sched_stat_runtime(struct trace
*trace
, struct perf_evsel
*evsel
,
812 struct perf_sample
*sample
)
814 u64 runtime
= perf_evsel__intval(evsel
, sample
, "runtime");
815 double runtime_ms
= (double)runtime
/ NSEC_PER_MSEC
;
816 struct thread
*thread
= machine__findnew_thread(&trace
->host
,
819 struct thread_trace
*ttrace
= thread__trace(thread
, trace
->output
);
824 ttrace
->runtime_ms
+= runtime_ms
;
825 trace
->runtime_ms
+= runtime_ms
;
829 fprintf(trace
->output
, "%s: comm=%s,pid=%u,runtime=%" PRIu64
",vruntime=%" PRIu64
")\n",
831 perf_evsel__strval(evsel
, sample
, "comm"),
832 (pid_t
)perf_evsel__intval(evsel
, sample
, "pid"),
834 perf_evsel__intval(evsel
, sample
, "vruntime"));
838 static bool skip_sample(struct trace
*trace
, struct perf_sample
*sample
)
840 if ((trace
->pid_list
&& intlist__find(trace
->pid_list
, sample
->pid
)) ||
841 (trace
->tid_list
&& intlist__find(trace
->tid_list
, sample
->tid
)))
844 if (trace
->pid_list
|| trace
->tid_list
)
850 static int trace__process_sample(struct perf_tool
*tool
,
851 union perf_event
*event __maybe_unused
,
852 struct perf_sample
*sample
,
853 struct perf_evsel
*evsel
,
854 struct machine
*machine __maybe_unused
)
856 struct trace
*trace
= container_of(tool
, struct trace
, tool
);
859 tracepoint_handler handler
= evsel
->handler
.func
;
861 if (skip_sample(trace
, sample
))
864 if (trace
->base_time
== 0)
865 trace
->base_time
= sample
->time
;
868 handler(trace
, evsel
, sample
);
874 perf_session__has_tp(struct perf_session
*session
, const char *name
)
876 struct perf_evsel
*evsel
;
878 evsel
= perf_evlist__find_tracepoint_by_name(session
->evlist
, name
);
880 return evsel
!= NULL
;
883 static int parse_target_str(struct trace
*trace
)
885 if (trace
->opts
.target
.pid
) {
886 trace
->pid_list
= intlist__new(trace
->opts
.target
.pid
);
887 if (trace
->pid_list
== NULL
) {
888 pr_err("Error parsing process id string\n");
893 if (trace
->opts
.target
.tid
) {
894 trace
->tid_list
= intlist__new(trace
->opts
.target
.tid
);
895 if (trace
->tid_list
== NULL
) {
896 pr_err("Error parsing thread id string\n");
904 static int trace__run(struct trace
*trace
, int argc
, const char **argv
)
906 struct perf_evlist
*evlist
= perf_evlist__new();
907 struct perf_evsel
*evsel
;
909 unsigned long before
;
910 const bool forks
= argc
> 0;
912 if (evlist
== NULL
) {
913 fprintf(trace
->output
, "Not enough memory to run!\n");
917 if (perf_evlist__add_newtp(evlist
, "raw_syscalls", "sys_enter", trace__sys_enter
) ||
918 perf_evlist__add_newtp(evlist
, "raw_syscalls", "sys_exit", trace__sys_exit
)) {
919 fprintf(trace
->output
, "Couldn't read the raw_syscalls tracepoints information!\n");
920 goto out_delete_evlist
;
924 perf_evlist__add_newtp(evlist
, "sched", "sched_stat_runtime",
925 trace__sched_stat_runtime
)) {
926 fprintf(trace
->output
, "Couldn't read the sched_stat_runtime tracepoint information!\n");
927 goto out_delete_evlist
;
930 err
= perf_evlist__create_maps(evlist
, &trace
->opts
.target
);
932 fprintf(trace
->output
, "Problems parsing the target to trace, check your options!\n");
933 goto out_delete_evlist
;
936 err
= trace__symbols_init(trace
, evlist
);
938 fprintf(trace
->output
, "Problems initializing symbol libraries!\n");
939 goto out_delete_maps
;
942 perf_evlist__config(evlist
, &trace
->opts
);
944 signal(SIGCHLD
, sig_handler
);
945 signal(SIGINT
, sig_handler
);
948 err
= perf_evlist__prepare_workload(evlist
, &trace
->opts
.target
,
951 fprintf(trace
->output
, "Couldn't run the workload!\n");
952 goto out_delete_maps
;
956 err
= perf_evlist__open(evlist
);
958 fprintf(trace
->output
, "Couldn't create the events: %s\n", strerror(errno
));
959 goto out_delete_maps
;
962 err
= perf_evlist__mmap(evlist
, UINT_MAX
, false);
964 fprintf(trace
->output
, "Couldn't mmap the events: %s\n", strerror(errno
));
965 goto out_close_evlist
;
968 perf_evlist__enable(evlist
);
971 perf_evlist__start_workload(evlist
);
973 trace
->multiple_threads
= evlist
->threads
->map
[0] == -1 || evlist
->threads
->nr
> 1;
975 before
= trace
->nr_events
;
977 for (i
= 0; i
< evlist
->nr_mmaps
; i
++) {
978 union perf_event
*event
;
980 while ((event
= perf_evlist__mmap_read(evlist
, i
)) != NULL
) {
981 const u32 type
= event
->header
.type
;
982 tracepoint_handler handler
;
983 struct perf_sample sample
;
987 err
= perf_evlist__parse_sample(evlist
, event
, &sample
);
989 fprintf(trace
->output
, "Can't parse sample, err = %d, skipping...\n", err
);
993 if (trace
->base_time
== 0)
994 trace
->base_time
= sample
.time
;
996 if (type
!= PERF_RECORD_SAMPLE
) {
997 trace__process_event(trace
, &trace
->host
, event
);
1001 evsel
= perf_evlist__id2evsel(evlist
, sample
.id
);
1002 if (evsel
== NULL
) {
1003 fprintf(trace
->output
, "Unknown tp ID %" PRIu64
", skipping...\n", sample
.id
);
1007 if (sample
.raw_data
== NULL
) {
1008 fprintf(trace
->output
, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1009 perf_evsel__name(evsel
), sample
.tid
,
1010 sample
.cpu
, sample
.raw_size
);
1014 handler
= evsel
->handler
.func
;
1015 handler(trace
, evsel
, &sample
);
1017 perf_evlist__mmap_consume(evlist
, i
);
1020 goto out_unmap_evlist
;
1024 if (trace
->nr_events
== before
) {
1026 goto out_unmap_evlist
;
1028 poll(evlist
->pollfd
, evlist
->nr_fds
, -1);
1032 perf_evlist__disable(evlist
);
1037 perf_evlist__munmap(evlist
);
1039 perf_evlist__close(evlist
);
1041 perf_evlist__delete_maps(evlist
);
1043 perf_evlist__delete(evlist
);
1048 static int trace__replay(struct trace
*trace
)
1050 const struct perf_evsel_str_handler handlers
[] = {
1051 { "raw_syscalls:sys_enter", trace__sys_enter
, },
1052 { "raw_syscalls:sys_exit", trace__sys_exit
, },
1055 struct perf_session
*session
;
1058 trace
->tool
.sample
= trace__process_sample
;
1059 trace
->tool
.mmap
= perf_event__process_mmap
;
1060 trace
->tool
.mmap2
= perf_event__process_mmap2
;
1061 trace
->tool
.comm
= perf_event__process_comm
;
1062 trace
->tool
.exit
= perf_event__process_exit
;
1063 trace
->tool
.fork
= perf_event__process_fork
;
1064 trace
->tool
.attr
= perf_event__process_attr
;
1065 trace
->tool
.tracing_data
= perf_event__process_tracing_data
;
1066 trace
->tool
.build_id
= perf_event__process_build_id
;
1068 trace
->tool
.ordered_samples
= true;
1069 trace
->tool
.ordering_requires_timestamps
= true;
1071 /* add tid to output */
1072 trace
->multiple_threads
= true;
1074 if (symbol__init() < 0)
1077 session
= perf_session__new(input_name
, O_RDONLY
, 0, false,
1079 if (session
== NULL
)
1082 err
= perf_session__set_tracepoints_handlers(session
, handlers
);
1086 if (!perf_session__has_tp(session
, "raw_syscalls:sys_enter")) {
1087 pr_err("Data file does not have raw_syscalls:sys_enter events\n");
1091 if (!perf_session__has_tp(session
, "raw_syscalls:sys_exit")) {
1092 pr_err("Data file does not have raw_syscalls:sys_exit events\n");
1096 err
= parse_target_str(trace
);
1102 err
= perf_session__process_events(session
, &trace
->tool
);
1104 pr_err("Failed to process events, error %d", err
);
1107 perf_session__delete(session
);
1112 static size_t trace__fprintf_threads_header(FILE *fp
)
1116 printed
= fprintf(fp
, "\n _____________________________________________________________________\n");
1117 printed
+= fprintf(fp
," __) Summary of events (__\n\n");
1118 printed
+= fprintf(fp
," [ task - pid ] [ events ] [ ratio ] [ runtime ]\n");
1119 printed
+= fprintf(fp
," _____________________________________________________________________\n\n");
1124 static size_t trace__fprintf_thread_summary(struct trace
*trace
, FILE *fp
)
1126 size_t printed
= trace__fprintf_threads_header(fp
);
1129 for (nd
= rb_first(&trace
->host
.threads
); nd
; nd
= rb_next(nd
)) {
1130 struct thread
*thread
= rb_entry(nd
, struct thread
, rb_node
);
1131 struct thread_trace
*ttrace
= thread
->priv
;
1138 ratio
= (double)ttrace
->nr_events
/ trace
->nr_events
* 100.0;
1140 color
= PERF_COLOR_NORMAL
;
1142 color
= PERF_COLOR_RED
;
1143 else if (ratio
> 25.0)
1144 color
= PERF_COLOR_GREEN
;
1145 else if (ratio
> 5.0)
1146 color
= PERF_COLOR_YELLOW
;
1148 printed
+= color_fprintf(fp
, color
, "%20s", thread
->comm
);
1149 printed
+= fprintf(fp
, " - %-5d :%11lu [", thread
->tid
, ttrace
->nr_events
);
1150 printed
+= color_fprintf(fp
, color
, "%5.1f%%", ratio
);
1151 printed
+= fprintf(fp
, " ] %10.3f ms\n", ttrace
->runtime_ms
);
1157 static int trace__set_duration(const struct option
*opt
, const char *str
,
1158 int unset __maybe_unused
)
1160 struct trace
*trace
= opt
->value
;
1162 trace
->duration_filter
= atof(str
);
1166 static int trace__open_output(struct trace
*trace
, const char *filename
)
1170 if (!stat(filename
, &st
) && st
.st_size
) {
1171 char oldname
[PATH_MAX
];
1173 scnprintf(oldname
, sizeof(oldname
), "%s.old", filename
);
1175 rename(filename
, oldname
);
1178 trace
->output
= fopen(filename
, "w");
1180 return trace
->output
== NULL
? -errno
: 0;
1183 int cmd_trace(int argc
, const char **argv
, const char *prefix __maybe_unused
)
1185 const char * const trace_usage
[] = {
1186 "perf trace [<options>] [<command>]",
1187 "perf trace [<options>] -- <command> [<options>]",
1190 struct trace trace
= {
1191 .audit_machine
= audit_detect_machine(),
1200 .user_freq
= UINT_MAX
,
1201 .user_interval
= ULLONG_MAX
,
1207 const char *output_name
= NULL
;
1208 const char *ev_qualifier_str
= NULL
;
1209 const struct option trace_options
[] = {
1210 OPT_STRING('e', "expr", &ev_qualifier_str
, "expr",
1211 "list of events to trace"),
1212 OPT_STRING('o', "output", &output_name
, "file", "output file name"),
1213 OPT_STRING('i', "input", &input_name
, "file", "Analyze events in file"),
1214 OPT_STRING('p', "pid", &trace
.opts
.target
.pid
, "pid",
1215 "trace events on existing process id"),
1216 OPT_STRING('t', "tid", &trace
.opts
.target
.tid
, "tid",
1217 "trace events on existing thread id"),
1218 OPT_BOOLEAN('a', "all-cpus", &trace
.opts
.target
.system_wide
,
1219 "system-wide collection from all CPUs"),
1220 OPT_STRING('C', "cpu", &trace
.opts
.target
.cpu_list
, "cpu",
1221 "list of cpus to monitor"),
1222 OPT_BOOLEAN(0, "no-inherit", &trace
.opts
.no_inherit
,
1223 "child tasks do not inherit counters"),
1224 OPT_UINTEGER('m', "mmap-pages", &trace
.opts
.mmap_pages
,
1225 "number of mmap data pages"),
1226 OPT_STRING('u', "uid", &trace
.opts
.target
.uid_str
, "user",
1228 OPT_CALLBACK(0, "duration", &trace
, "float",
1229 "show only events with duration > N.M ms",
1230 trace__set_duration
),
1231 OPT_BOOLEAN(0, "sched", &trace
.sched
, "show blocking scheduler events"),
1232 OPT_INCR('v', "verbose", &verbose
, "be more verbose"),
1238 argc
= parse_options(argc
, argv
, trace_options
, trace_usage
, 0);
1240 if (output_name
!= NULL
) {
1241 err
= trace__open_output(&trace
, output_name
);
1243 perror("failed to create output file");
1248 if (ev_qualifier_str
!= NULL
) {
1249 const char *s
= ev_qualifier_str
;
1251 trace
.not_ev_qualifier
= *s
== '!';
1252 if (trace
.not_ev_qualifier
)
1254 trace
.ev_qualifier
= strlist__new(true, s
);
1255 if (trace
.ev_qualifier
== NULL
) {
1256 fputs("Not enough memory to parse event qualifier",
1263 err
= perf_target__validate(&trace
.opts
.target
);
1265 perf_target__strerror(&trace
.opts
.target
, err
, bf
, sizeof(bf
));
1266 fprintf(trace
.output
, "%s", bf
);
1270 err
= perf_target__parse_uid(&trace
.opts
.target
);
1272 perf_target__strerror(&trace
.opts
.target
, err
, bf
, sizeof(bf
));
1273 fprintf(trace
.output
, "%s", bf
);
1277 if (!argc
&& perf_target__none(&trace
.opts
.target
))
1278 trace
.opts
.target
.system_wide
= true;
1281 err
= trace__replay(&trace
);
1283 err
= trace__run(&trace
, argc
, argv
);
1285 if (trace
.sched
&& !err
)
1286 trace__fprintf_thread_summary(&trace
, trace
.output
);
1289 if (output_name
!= NULL
)
1290 fclose(trace
.output
);