power: supply: bq24190_charger: Add disable-reset device-property
[linux/fpc-iii.git] / tools / perf / builtin-trace.c
blob4c596ba310cbfa8d61909c50abd1c1e5cbfea142
1 /*
2 * builtin-trace.c
4 * Builtin 'trace' command:
6 * Display a continuously updated trace of any workload, CPU, specific PID,
7 * system wide, etc. Default format is loosely strace like, but any other
8 * event may be specified using --event.
10 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
12 * Initially based on the 'trace' prototype by Thomas Gleixner:
14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
16 * Released under the GPL v2. (and only v2, not any later version)
19 #include <traceevent/event-parse.h>
20 #include <api/fs/tracing_path.h>
21 #include "builtin.h"
22 #include "util/color.h"
23 #include "util/debug.h"
24 #include "util/evlist.h"
25 #include <subcmd/exec-cmd.h>
26 #include "util/machine.h"
27 #include "util/session.h"
28 #include "util/thread.h"
29 #include <subcmd/parse-options.h>
30 #include "util/strlist.h"
31 #include "util/intlist.h"
32 #include "util/thread_map.h"
33 #include "util/stat.h"
34 #include "trace-event.h"
35 #include "util/parse-events.h"
36 #include "util/bpf-loader.h"
37 #include "callchain.h"
38 #include "syscalltbl.h"
39 #include "rb_resort.h"
41 #include <libaudit.h> /* FIXME: Still needed for audit_errno_to_name */
42 #include <stdlib.h>
43 #include <linux/err.h>
44 #include <linux/filter.h>
45 #include <linux/audit.h>
46 #include <linux/random.h>
47 #include <linux/stringify.h>
48 #include <linux/time64.h>
50 #ifndef O_CLOEXEC
51 # define O_CLOEXEC 02000000
52 #endif
54 struct trace {
55 struct perf_tool tool;
56 struct syscalltbl *sctbl;
57 struct {
58 int max;
59 struct syscall *table;
60 struct {
61 struct perf_evsel *sys_enter,
62 *sys_exit;
63 } events;
64 } syscalls;
65 struct record_opts opts;
66 struct perf_evlist *evlist;
67 struct machine *host;
68 struct thread *current;
69 u64 base_time;
70 FILE *output;
71 unsigned long nr_events;
72 struct strlist *ev_qualifier;
73 struct {
74 size_t nr;
75 int *entries;
76 } ev_qualifier_ids;
77 struct intlist *tid_list;
78 struct intlist *pid_list;
79 struct {
80 size_t nr;
81 pid_t *entries;
82 } filter_pids;
83 double duration_filter;
84 double runtime_ms;
85 struct {
86 u64 vfs_getname,
87 proc_getname;
88 } stats;
89 unsigned int max_stack;
90 unsigned int min_stack;
91 bool not_ev_qualifier;
92 bool live;
93 bool full_time;
94 bool sched;
95 bool multiple_threads;
96 bool summary;
97 bool summary_only;
98 bool show_comm;
99 bool show_tool_stats;
100 bool trace_syscalls;
101 bool kernel_syscallchains;
102 bool force;
103 bool vfs_getname;
104 int trace_pgfaults;
105 int open_id;
108 struct tp_field {
109 int offset;
110 union {
111 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
112 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
116 #define TP_UINT_FIELD(bits) \
117 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
119 u##bits value; \
120 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
121 return value; \
124 TP_UINT_FIELD(8);
125 TP_UINT_FIELD(16);
126 TP_UINT_FIELD(32);
127 TP_UINT_FIELD(64);
129 #define TP_UINT_FIELD__SWAPPED(bits) \
130 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
132 u##bits value; \
133 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
134 return bswap_##bits(value);\
137 TP_UINT_FIELD__SWAPPED(16);
138 TP_UINT_FIELD__SWAPPED(32);
139 TP_UINT_FIELD__SWAPPED(64);
141 static int tp_field__init_uint(struct tp_field *field,
142 struct format_field *format_field,
143 bool needs_swap)
145 field->offset = format_field->offset;
147 switch (format_field->size) {
148 case 1:
149 field->integer = tp_field__u8;
150 break;
151 case 2:
152 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
153 break;
154 case 4:
155 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
156 break;
157 case 8:
158 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
159 break;
160 default:
161 return -1;
164 return 0;
167 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
169 return sample->raw_data + field->offset;
172 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
174 field->offset = format_field->offset;
175 field->pointer = tp_field__ptr;
176 return 0;
179 struct syscall_tp {
180 struct tp_field id;
181 union {
182 struct tp_field args, ret;
186 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
187 struct tp_field *field,
188 const char *name)
190 struct format_field *format_field = perf_evsel__field(evsel, name);
192 if (format_field == NULL)
193 return -1;
195 return tp_field__init_uint(field, format_field, evsel->needs_swap);
198 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
199 ({ struct syscall_tp *sc = evsel->priv;\
200 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
202 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
203 struct tp_field *field,
204 const char *name)
206 struct format_field *format_field = perf_evsel__field(evsel, name);
208 if (format_field == NULL)
209 return -1;
211 return tp_field__init_ptr(field, format_field);
214 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
215 ({ struct syscall_tp *sc = evsel->priv;\
216 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
218 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
220 zfree(&evsel->priv);
221 perf_evsel__delete(evsel);
224 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
226 evsel->priv = malloc(sizeof(struct syscall_tp));
227 if (evsel->priv != NULL) {
228 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
229 goto out_delete;
231 evsel->handler = handler;
232 return 0;
235 return -ENOMEM;
237 out_delete:
238 zfree(&evsel->priv);
239 return -ENOENT;
242 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
244 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
246 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
247 if (IS_ERR(evsel))
248 evsel = perf_evsel__newtp("syscalls", direction);
250 if (IS_ERR(evsel))
251 return NULL;
253 if (perf_evsel__init_syscall_tp(evsel, handler))
254 goto out_delete;
256 return evsel;
258 out_delete:
259 perf_evsel__delete_priv(evsel);
260 return NULL;
263 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
264 ({ struct syscall_tp *fields = evsel->priv; \
265 fields->name.integer(&fields->name, sample); })
267 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
268 ({ struct syscall_tp *fields = evsel->priv; \
269 fields->name.pointer(&fields->name, sample); })
271 struct syscall_arg {
272 unsigned long val;
273 struct thread *thread;
274 struct trace *trace;
275 void *parm;
276 u8 idx;
277 u8 mask;
280 struct strarray {
281 int offset;
282 int nr_entries;
283 const char **entries;
286 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
287 .nr_entries = ARRAY_SIZE(array), \
288 .entries = array, \
291 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
292 .offset = off, \
293 .nr_entries = ARRAY_SIZE(array), \
294 .entries = array, \
297 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
298 const char *intfmt,
299 struct syscall_arg *arg)
301 struct strarray *sa = arg->parm;
302 int idx = arg->val - sa->offset;
304 if (idx < 0 || idx >= sa->nr_entries)
305 return scnprintf(bf, size, intfmt, arg->val);
307 return scnprintf(bf, size, "%s", sa->entries[idx]);
310 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
311 struct syscall_arg *arg)
313 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
316 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
318 #if defined(__i386__) || defined(__x86_64__)
320 * FIXME: Make this available to all arches as soon as the ioctl beautifier
321 * gets rewritten to support all arches.
323 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
324 struct syscall_arg *arg)
326 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
329 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
330 #endif /* defined(__i386__) || defined(__x86_64__) */
332 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
333 struct syscall_arg *arg);
335 #define SCA_FD syscall_arg__scnprintf_fd
337 #ifndef AT_FDCWD
338 #define AT_FDCWD -100
339 #endif
341 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
342 struct syscall_arg *arg)
344 int fd = arg->val;
346 if (fd == AT_FDCWD)
347 return scnprintf(bf, size, "CWD");
349 return syscall_arg__scnprintf_fd(bf, size, arg);
352 #define SCA_FDAT syscall_arg__scnprintf_fd_at
354 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
355 struct syscall_arg *arg);
357 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
359 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
360 struct syscall_arg *arg)
362 return scnprintf(bf, size, "%#lx", arg->val);
365 #define SCA_HEX syscall_arg__scnprintf_hex
367 static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
368 struct syscall_arg *arg)
370 return scnprintf(bf, size, "%d", arg->val);
373 #define SCA_INT syscall_arg__scnprintf_int
375 static const char *bpf_cmd[] = {
376 "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
377 "MAP_GET_NEXT_KEY", "PROG_LOAD",
379 static DEFINE_STRARRAY(bpf_cmd);
381 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
382 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
384 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
385 static DEFINE_STRARRAY(itimers);
387 static const char *keyctl_options[] = {
388 "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
389 "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
390 "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
391 "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
392 "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
394 static DEFINE_STRARRAY(keyctl_options);
396 static const char *whences[] = { "SET", "CUR", "END",
397 #ifdef SEEK_DATA
398 "DATA",
399 #endif
400 #ifdef SEEK_HOLE
401 "HOLE",
402 #endif
404 static DEFINE_STRARRAY(whences);
406 static const char *fcntl_cmds[] = {
407 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
408 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
409 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
410 "F_GETOWNER_UIDS",
412 static DEFINE_STRARRAY(fcntl_cmds);
414 static const char *rlimit_resources[] = {
415 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
416 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
417 "RTTIME",
419 static DEFINE_STRARRAY(rlimit_resources);
421 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
422 static DEFINE_STRARRAY(sighow);
424 static const char *clockid[] = {
425 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
426 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
427 "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
429 static DEFINE_STRARRAY(clockid);
431 static const char *socket_families[] = {
432 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
433 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
434 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
435 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
436 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
437 "ALG", "NFC", "VSOCK",
439 static DEFINE_STRARRAY(socket_families);
441 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
442 struct syscall_arg *arg)
444 size_t printed = 0;
445 int mode = arg->val;
447 if (mode == F_OK) /* 0 */
448 return scnprintf(bf, size, "F");
449 #define P_MODE(n) \
450 if (mode & n##_OK) { \
451 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
452 mode &= ~n##_OK; \
455 P_MODE(R);
456 P_MODE(W);
457 P_MODE(X);
458 #undef P_MODE
460 if (mode)
461 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
463 return printed;
466 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
468 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
469 struct syscall_arg *arg);
471 #define SCA_FILENAME syscall_arg__scnprintf_filename
473 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
474 struct syscall_arg *arg)
476 int printed = 0, flags = arg->val;
478 #define P_FLAG(n) \
479 if (flags & O_##n) { \
480 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
481 flags &= ~O_##n; \
484 P_FLAG(CLOEXEC);
485 P_FLAG(NONBLOCK);
486 #undef P_FLAG
488 if (flags)
489 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
491 return printed;
494 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
496 #if defined(__i386__) || defined(__x86_64__)
498 * FIXME: Make this available to all arches.
500 #define TCGETS 0x5401
502 static const char *tioctls[] = {
503 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
504 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
505 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
506 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
507 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
508 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
509 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
510 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
511 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
512 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
513 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
514 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
515 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
516 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
517 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
520 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
521 #endif /* defined(__i386__) || defined(__x86_64__) */
523 #ifndef GRND_NONBLOCK
524 #define GRND_NONBLOCK 0x0001
525 #endif
526 #ifndef GRND_RANDOM
527 #define GRND_RANDOM 0x0002
528 #endif
530 static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
531 struct syscall_arg *arg)
533 int printed = 0, flags = arg->val;
535 #define P_FLAG(n) \
536 if (flags & GRND_##n) { \
537 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
538 flags &= ~GRND_##n; \
541 P_FLAG(RANDOM);
542 P_FLAG(NONBLOCK);
543 #undef P_FLAG
545 if (flags)
546 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
548 return printed;
551 #define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
553 #define STRARRAY(arg, name, array) \
554 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
555 .arg_parm = { [arg] = &strarray__##array, }
557 #include "trace/beauty/eventfd.c"
558 #include "trace/beauty/flock.c"
559 #include "trace/beauty/futex_op.c"
560 #include "trace/beauty/mmap.c"
561 #include "trace/beauty/mode_t.c"
562 #include "trace/beauty/msg_flags.c"
563 #include "trace/beauty/open_flags.c"
564 #include "trace/beauty/perf_event_open.c"
565 #include "trace/beauty/pid.c"
566 #include "trace/beauty/sched_policy.c"
567 #include "trace/beauty/seccomp.c"
568 #include "trace/beauty/signum.c"
569 #include "trace/beauty/socket_type.c"
570 #include "trace/beauty/waitid_options.c"
572 static struct syscall_fmt {
573 const char *name;
574 const char *alias;
575 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
576 void *arg_parm[6];
577 bool errmsg;
578 bool errpid;
579 bool timeout;
580 bool hexret;
581 } syscall_fmts[] = {
582 { .name = "access", .errmsg = true,
583 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
584 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
585 { .name = "bpf", .errmsg = true, STRARRAY(0, cmd, bpf_cmd), },
586 { .name = "brk", .hexret = true,
587 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
588 { .name = "chdir", .errmsg = true, },
589 { .name = "chmod", .errmsg = true, },
590 { .name = "chroot", .errmsg = true, },
591 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
592 { .name = "clone", .errpid = true, },
593 { .name = "close", .errmsg = true,
594 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
595 { .name = "connect", .errmsg = true, },
596 { .name = "creat", .errmsg = true, },
597 { .name = "dup", .errmsg = true, },
598 { .name = "dup2", .errmsg = true, },
599 { .name = "dup3", .errmsg = true, },
600 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
601 { .name = "eventfd2", .errmsg = true,
602 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
603 { .name = "faccessat", .errmsg = true, },
604 { .name = "fadvise64", .errmsg = true, },
605 { .name = "fallocate", .errmsg = true, },
606 { .name = "fchdir", .errmsg = true, },
607 { .name = "fchmod", .errmsg = true, },
608 { .name = "fchmodat", .errmsg = true,
609 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
610 { .name = "fchown", .errmsg = true, },
611 { .name = "fchownat", .errmsg = true,
612 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
613 { .name = "fcntl", .errmsg = true,
614 .arg_scnprintf = { [1] = SCA_STRARRAY, /* cmd */ },
615 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
616 { .name = "fdatasync", .errmsg = true, },
617 { .name = "flock", .errmsg = true,
618 .arg_scnprintf = { [1] = SCA_FLOCK, /* cmd */ }, },
619 { .name = "fsetxattr", .errmsg = true, },
620 { .name = "fstat", .errmsg = true, .alias = "newfstat", },
621 { .name = "fstatat", .errmsg = true, .alias = "newfstatat", },
622 { .name = "fstatfs", .errmsg = true, },
623 { .name = "fsync", .errmsg = true, },
624 { .name = "ftruncate", .errmsg = true, },
625 { .name = "futex", .errmsg = true,
626 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
627 { .name = "futimesat", .errmsg = true,
628 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
629 { .name = "getdents", .errmsg = true, },
630 { .name = "getdents64", .errmsg = true, },
631 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
632 { .name = "getpid", .errpid = true, },
633 { .name = "getpgid", .errpid = true, },
634 { .name = "getppid", .errpid = true, },
635 { .name = "getrandom", .errmsg = true,
636 .arg_scnprintf = { [2] = SCA_GETRANDOM_FLAGS, /* flags */ }, },
637 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
638 { .name = "getxattr", .errmsg = true, },
639 { .name = "inotify_add_watch", .errmsg = true, },
640 { .name = "ioctl", .errmsg = true,
641 .arg_scnprintf = {
642 #if defined(__i386__) || defined(__x86_64__)
644 * FIXME: Make this available to all arches.
646 [1] = SCA_STRHEXARRAY, /* cmd */
647 [2] = SCA_HEX, /* arg */ },
648 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
649 #else
650 [2] = SCA_HEX, /* arg */ }, },
651 #endif
652 { .name = "keyctl", .errmsg = true, STRARRAY(0, option, keyctl_options), },
653 { .name = "kill", .errmsg = true,
654 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
655 { .name = "lchown", .errmsg = true, },
656 { .name = "lgetxattr", .errmsg = true, },
657 { .name = "linkat", .errmsg = true,
658 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
659 { .name = "listxattr", .errmsg = true, },
660 { .name = "llistxattr", .errmsg = true, },
661 { .name = "lremovexattr", .errmsg = true, },
662 { .name = "lseek", .errmsg = true,
663 .arg_scnprintf = { [2] = SCA_STRARRAY, /* whence */ },
664 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
665 { .name = "lsetxattr", .errmsg = true, },
666 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
667 { .name = "lsxattr", .errmsg = true, },
668 { .name = "madvise", .errmsg = true,
669 .arg_scnprintf = { [0] = SCA_HEX, /* start */
670 [2] = SCA_MADV_BHV, /* behavior */ }, },
671 { .name = "mkdir", .errmsg = true, },
672 { .name = "mkdirat", .errmsg = true,
673 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
674 { .name = "mknod", .errmsg = true, },
675 { .name = "mknodat", .errmsg = true,
676 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
677 { .name = "mlock", .errmsg = true,
678 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
679 { .name = "mlockall", .errmsg = true,
680 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
681 { .name = "mmap", .hexret = true,
682 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
683 [2] = SCA_MMAP_PROT, /* prot */
684 [3] = SCA_MMAP_FLAGS, /* flags */ }, },
685 { .name = "mprotect", .errmsg = true,
686 .arg_scnprintf = { [0] = SCA_HEX, /* start */
687 [2] = SCA_MMAP_PROT, /* prot */ }, },
688 { .name = "mq_unlink", .errmsg = true,
689 .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, },
690 { .name = "mremap", .hexret = true,
691 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
692 [3] = SCA_MREMAP_FLAGS, /* flags */
693 [4] = SCA_HEX, /* new_addr */ }, },
694 { .name = "munlock", .errmsg = true,
695 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
696 { .name = "munmap", .errmsg = true,
697 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
698 { .name = "name_to_handle_at", .errmsg = true,
699 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
700 { .name = "newfstatat", .errmsg = true,
701 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
702 { .name = "open", .errmsg = true,
703 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
704 { .name = "open_by_handle_at", .errmsg = true,
705 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
706 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
707 { .name = "openat", .errmsg = true,
708 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
709 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
710 { .name = "perf_event_open", .errmsg = true,
711 .arg_scnprintf = { [2] = SCA_INT, /* cpu */
712 [3] = SCA_FD, /* group_fd */
713 [4] = SCA_PERF_FLAGS, /* flags */ }, },
714 { .name = "pipe2", .errmsg = true,
715 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
716 { .name = "poll", .errmsg = true, .timeout = true, },
717 { .name = "ppoll", .errmsg = true, .timeout = true, },
718 { .name = "pread", .errmsg = true, .alias = "pread64", },
719 { .name = "preadv", .errmsg = true, .alias = "pread", },
720 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
721 { .name = "pwrite", .errmsg = true, .alias = "pwrite64", },
722 { .name = "pwritev", .errmsg = true, },
723 { .name = "read", .errmsg = true, },
724 { .name = "readlink", .errmsg = true, },
725 { .name = "readlinkat", .errmsg = true,
726 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
727 { .name = "readv", .errmsg = true, },
728 { .name = "recvfrom", .errmsg = true,
729 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
730 { .name = "recvmmsg", .errmsg = true,
731 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
732 { .name = "recvmsg", .errmsg = true,
733 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
734 { .name = "removexattr", .errmsg = true, },
735 { .name = "renameat", .errmsg = true,
736 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
737 { .name = "rmdir", .errmsg = true, },
738 { .name = "rt_sigaction", .errmsg = true,
739 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
740 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
741 { .name = "rt_sigqueueinfo", .errmsg = true,
742 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
743 { .name = "rt_tgsigqueueinfo", .errmsg = true,
744 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
745 { .name = "sched_getattr", .errmsg = true, },
746 { .name = "sched_setattr", .errmsg = true, },
747 { .name = "sched_setscheduler", .errmsg = true,
748 .arg_scnprintf = { [1] = SCA_SCHED_POLICY, /* policy */ }, },
749 { .name = "seccomp", .errmsg = true,
750 .arg_scnprintf = { [0] = SCA_SECCOMP_OP, /* op */
751 [1] = SCA_SECCOMP_FLAGS, /* flags */ }, },
752 { .name = "select", .errmsg = true, .timeout = true, },
753 { .name = "sendmmsg", .errmsg = true,
754 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
755 { .name = "sendmsg", .errmsg = true,
756 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
757 { .name = "sendto", .errmsg = true,
758 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
759 { .name = "set_tid_address", .errpid = true, },
760 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
761 { .name = "setpgid", .errmsg = true, },
762 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
763 { .name = "setxattr", .errmsg = true, },
764 { .name = "shutdown", .errmsg = true, },
765 { .name = "socket", .errmsg = true,
766 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
767 [1] = SCA_SK_TYPE, /* type */ },
768 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
769 { .name = "socketpair", .errmsg = true,
770 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
771 [1] = SCA_SK_TYPE, /* type */ },
772 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
773 { .name = "stat", .errmsg = true, .alias = "newstat", },
774 { .name = "statfs", .errmsg = true, },
775 { .name = "swapoff", .errmsg = true,
776 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
777 { .name = "swapon", .errmsg = true,
778 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
779 { .name = "symlinkat", .errmsg = true,
780 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
781 { .name = "tgkill", .errmsg = true,
782 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
783 { .name = "tkill", .errmsg = true,
784 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
785 { .name = "truncate", .errmsg = true, },
786 { .name = "uname", .errmsg = true, .alias = "newuname", },
787 { .name = "unlinkat", .errmsg = true,
788 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
789 { .name = "utime", .errmsg = true, },
790 { .name = "utimensat", .errmsg = true,
791 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
792 { .name = "utimes", .errmsg = true, },
793 { .name = "vmsplice", .errmsg = true, },
794 { .name = "wait4", .errpid = true,
795 .arg_scnprintf = { [2] = SCA_WAITID_OPTIONS, /* options */ }, },
796 { .name = "waitid", .errpid = true,
797 .arg_scnprintf = { [3] = SCA_WAITID_OPTIONS, /* options */ }, },
798 { .name = "write", .errmsg = true, },
799 { .name = "writev", .errmsg = true, },
802 static int syscall_fmt__cmp(const void *name, const void *fmtp)
804 const struct syscall_fmt *fmt = fmtp;
805 return strcmp(name, fmt->name);
808 static struct syscall_fmt *syscall_fmt__find(const char *name)
810 const int nmemb = ARRAY_SIZE(syscall_fmts);
811 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
814 struct syscall {
815 struct event_format *tp_format;
816 int nr_args;
817 struct format_field *args;
818 const char *name;
819 bool is_exit;
820 struct syscall_fmt *fmt;
821 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
822 void **arg_parm;
826 * We need to have this 'calculated' boolean because in some cases we really
827 * don't know what is the duration of a syscall, for instance, when we start
828 * a session and some threads are waiting for a syscall to finish, say 'poll',
829 * in which case all we can do is to print "( ? ) for duration and for the
830 * start timestamp.
832 static size_t fprintf_duration(unsigned long t, bool calculated, FILE *fp)
834 double duration = (double)t / NSEC_PER_MSEC;
835 size_t printed = fprintf(fp, "(");
837 if (!calculated)
838 printed += fprintf(fp, " ? ");
839 else if (duration >= 1.0)
840 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
841 else if (duration >= 0.01)
842 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
843 else
844 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
845 return printed + fprintf(fp, "): ");
849 * filename.ptr: The filename char pointer that will be vfs_getname'd
850 * filename.entry_str_pos: Where to insert the string translated from
851 * filename.ptr by the vfs_getname tracepoint/kprobe.
853 struct thread_trace {
854 u64 entry_time;
855 u64 exit_time;
856 bool entry_pending;
857 unsigned long nr_events;
858 unsigned long pfmaj, pfmin;
859 char *entry_str;
860 double runtime_ms;
861 struct {
862 unsigned long ptr;
863 short int entry_str_pos;
864 bool pending_open;
865 unsigned int namelen;
866 char *name;
867 } filename;
868 struct {
869 int max;
870 char **table;
871 } paths;
873 struct intlist *syscall_stats;
876 static struct thread_trace *thread_trace__new(void)
878 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
880 if (ttrace)
881 ttrace->paths.max = -1;
883 ttrace->syscall_stats = intlist__new(NULL);
885 return ttrace;
888 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
890 struct thread_trace *ttrace;
892 if (thread == NULL)
893 goto fail;
895 if (thread__priv(thread) == NULL)
896 thread__set_priv(thread, thread_trace__new());
898 if (thread__priv(thread) == NULL)
899 goto fail;
901 ttrace = thread__priv(thread);
902 ++ttrace->nr_events;
904 return ttrace;
905 fail:
906 color_fprintf(fp, PERF_COLOR_RED,
907 "WARNING: not enough memory, dropping samples!\n");
908 return NULL;
911 #define TRACE_PFMAJ (1 << 0)
912 #define TRACE_PFMIN (1 << 1)
914 static const size_t trace__entry_str_size = 2048;
916 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
918 struct thread_trace *ttrace = thread__priv(thread);
920 if (fd > ttrace->paths.max) {
921 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
923 if (npath == NULL)
924 return -1;
926 if (ttrace->paths.max != -1) {
927 memset(npath + ttrace->paths.max + 1, 0,
928 (fd - ttrace->paths.max) * sizeof(char *));
929 } else {
930 memset(npath, 0, (fd + 1) * sizeof(char *));
933 ttrace->paths.table = npath;
934 ttrace->paths.max = fd;
937 ttrace->paths.table[fd] = strdup(pathname);
939 return ttrace->paths.table[fd] != NULL ? 0 : -1;
942 static int thread__read_fd_path(struct thread *thread, int fd)
944 char linkname[PATH_MAX], pathname[PATH_MAX];
945 struct stat st;
946 int ret;
948 if (thread->pid_ == thread->tid) {
949 scnprintf(linkname, sizeof(linkname),
950 "/proc/%d/fd/%d", thread->pid_, fd);
951 } else {
952 scnprintf(linkname, sizeof(linkname),
953 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
956 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
957 return -1;
959 ret = readlink(linkname, pathname, sizeof(pathname));
961 if (ret < 0 || ret > st.st_size)
962 return -1;
964 pathname[ret] = '\0';
965 return trace__set_fd_pathname(thread, fd, pathname);
968 static const char *thread__fd_path(struct thread *thread, int fd,
969 struct trace *trace)
971 struct thread_trace *ttrace = thread__priv(thread);
973 if (ttrace == NULL)
974 return NULL;
976 if (fd < 0)
977 return NULL;
979 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
980 if (!trace->live)
981 return NULL;
982 ++trace->stats.proc_getname;
983 if (thread__read_fd_path(thread, fd))
984 return NULL;
987 return ttrace->paths.table[fd];
990 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
991 struct syscall_arg *arg)
993 int fd = arg->val;
994 size_t printed = scnprintf(bf, size, "%d", fd);
995 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
997 if (path)
998 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1000 return printed;
1003 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1004 struct syscall_arg *arg)
1006 int fd = arg->val;
1007 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1008 struct thread_trace *ttrace = thread__priv(arg->thread);
1010 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1011 zfree(&ttrace->paths.table[fd]);
1013 return printed;
1016 static void thread__set_filename_pos(struct thread *thread, const char *bf,
1017 unsigned long ptr)
1019 struct thread_trace *ttrace = thread__priv(thread);
1021 ttrace->filename.ptr = ptr;
1022 ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1025 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1026 struct syscall_arg *arg)
1028 unsigned long ptr = arg->val;
1030 if (!arg->trace->vfs_getname)
1031 return scnprintf(bf, size, "%#x", ptr);
1033 thread__set_filename_pos(arg->thread, bf, ptr);
1034 return 0;
1037 static bool trace__filter_duration(struct trace *trace, double t)
1039 return t < (trace->duration_filter * NSEC_PER_MSEC);
1042 static size_t __trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1044 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1046 return fprintf(fp, "%10.3f ", ts);
1050 * We're handling tstamp=0 as an undefined tstamp, i.e. like when we are
1051 * using ttrace->entry_time for a thread that receives a sys_exit without
1052 * first having received a sys_enter ("poll" issued before tracing session
1053 * starts, lost sys_enter exit due to ring buffer overflow).
1055 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1057 if (tstamp > 0)
1058 return __trace__fprintf_tstamp(trace, tstamp, fp);
1060 return fprintf(fp, " ? ");
1063 static bool done = false;
1064 static bool interrupted = false;
1066 static void sig_handler(int sig)
1068 done = true;
1069 interrupted = sig == SIGINT;
1072 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1073 u64 duration, bool duration_calculated, u64 tstamp, FILE *fp)
1075 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1076 printed += fprintf_duration(duration, duration_calculated, fp);
1078 if (trace->multiple_threads) {
1079 if (trace->show_comm)
1080 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1081 printed += fprintf(fp, "%d ", thread->tid);
1084 return printed;
1087 static int trace__process_event(struct trace *trace, struct machine *machine,
1088 union perf_event *event, struct perf_sample *sample)
1090 int ret = 0;
1092 switch (event->header.type) {
1093 case PERF_RECORD_LOST:
1094 color_fprintf(trace->output, PERF_COLOR_RED,
1095 "LOST %" PRIu64 " events!\n", event->lost.lost);
1096 ret = machine__process_lost_event(machine, event, sample);
1097 break;
1098 default:
1099 ret = machine__process_event(machine, event, sample);
1100 break;
1103 return ret;
1106 static int trace__tool_process(struct perf_tool *tool,
1107 union perf_event *event,
1108 struct perf_sample *sample,
1109 struct machine *machine)
1111 struct trace *trace = container_of(tool, struct trace, tool);
1112 return trace__process_event(trace, machine, event, sample);
1115 static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp)
1117 struct machine *machine = vmachine;
1119 if (machine->kptr_restrict_warned)
1120 return NULL;
1122 if (symbol_conf.kptr_restrict) {
1123 pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
1124 "Check /proc/sys/kernel/kptr_restrict.\n\n"
1125 "Kernel samples will not be resolved.\n");
1126 machine->kptr_restrict_warned = true;
1127 return NULL;
1130 return machine__resolve_kernel_addr(vmachine, addrp, modp);
1133 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1135 int err = symbol__init(NULL);
1137 if (err)
1138 return err;
1140 trace->host = machine__new_host();
1141 if (trace->host == NULL)
1142 return -ENOMEM;
1144 if (trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr) < 0)
1145 return -errno;
1147 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1148 evlist->threads, trace__tool_process, false,
1149 trace->opts.proc_map_timeout);
1150 if (err)
1151 symbol__exit();
1153 return err;
1156 static int syscall__set_arg_fmts(struct syscall *sc)
1158 struct format_field *field;
1159 int idx = 0, len;
1161 sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1162 if (sc->arg_scnprintf == NULL)
1163 return -1;
1165 if (sc->fmt)
1166 sc->arg_parm = sc->fmt->arg_parm;
1168 for (field = sc->args; field; field = field->next) {
1169 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1170 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1171 else if (strcmp(field->type, "const char *") == 0 &&
1172 (strcmp(field->name, "filename") == 0 ||
1173 strcmp(field->name, "path") == 0 ||
1174 strcmp(field->name, "pathname") == 0))
1175 sc->arg_scnprintf[idx] = SCA_FILENAME;
1176 else if (field->flags & FIELD_IS_POINTER)
1177 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1178 else if (strcmp(field->type, "pid_t") == 0)
1179 sc->arg_scnprintf[idx] = SCA_PID;
1180 else if (strcmp(field->type, "umode_t") == 0)
1181 sc->arg_scnprintf[idx] = SCA_MODE_T;
1182 else if ((strcmp(field->type, "int") == 0 ||
1183 strcmp(field->type, "unsigned int") == 0 ||
1184 strcmp(field->type, "long") == 0) &&
1185 (len = strlen(field->name)) >= 2 &&
1186 strcmp(field->name + len - 2, "fd") == 0) {
1188 * /sys/kernel/tracing/events/syscalls/sys_enter*
1189 * egrep 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c
1190 * 65 int
1191 * 23 unsigned int
1192 * 7 unsigned long
1194 sc->arg_scnprintf[idx] = SCA_FD;
1196 ++idx;
1199 return 0;
1202 static int trace__read_syscall_info(struct trace *trace, int id)
1204 char tp_name[128];
1205 struct syscall *sc;
1206 const char *name = syscalltbl__name(trace->sctbl, id);
1208 if (name == NULL)
1209 return -1;
1211 if (id > trace->syscalls.max) {
1212 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1214 if (nsyscalls == NULL)
1215 return -1;
1217 if (trace->syscalls.max != -1) {
1218 memset(nsyscalls + trace->syscalls.max + 1, 0,
1219 (id - trace->syscalls.max) * sizeof(*sc));
1220 } else {
1221 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1224 trace->syscalls.table = nsyscalls;
1225 trace->syscalls.max = id;
1228 sc = trace->syscalls.table + id;
1229 sc->name = name;
1231 sc->fmt = syscall_fmt__find(sc->name);
1233 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1234 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1236 if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
1237 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1238 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1241 if (IS_ERR(sc->tp_format))
1242 return -1;
1244 sc->args = sc->tp_format->format.fields;
1245 sc->nr_args = sc->tp_format->format.nr_fields;
1247 * We need to check and discard the first variable '__syscall_nr'
1248 * or 'nr' that mean the syscall number. It is needless here.
1249 * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
1251 if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
1252 sc->args = sc->args->next;
1253 --sc->nr_args;
1256 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1258 return syscall__set_arg_fmts(sc);
1261 static int trace__validate_ev_qualifier(struct trace *trace)
1263 int err = 0, i;
1264 struct str_node *pos;
1266 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1267 trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1268 sizeof(trace->ev_qualifier_ids.entries[0]));
1270 if (trace->ev_qualifier_ids.entries == NULL) {
1271 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1272 trace->output);
1273 err = -EINVAL;
1274 goto out;
1277 i = 0;
1279 strlist__for_each_entry(pos, trace->ev_qualifier) {
1280 const char *sc = pos->s;
1281 int id = syscalltbl__id(trace->sctbl, sc);
1283 if (id < 0) {
1284 if (err == 0) {
1285 fputs("Error:\tInvalid syscall ", trace->output);
1286 err = -EINVAL;
1287 } else {
1288 fputs(", ", trace->output);
1291 fputs(sc, trace->output);
1294 trace->ev_qualifier_ids.entries[i++] = id;
1297 if (err < 0) {
1298 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1299 "\nHint:\tand: 'man syscalls'\n", trace->output);
1300 zfree(&trace->ev_qualifier_ids.entries);
1301 trace->ev_qualifier_ids.nr = 0;
1303 out:
1304 return err;
1308 * args is to be interpreted as a series of longs but we need to handle
1309 * 8-byte unaligned accesses. args points to raw_data within the event
1310 * and raw_data is guaranteed to be 8-byte unaligned because it is
1311 * preceded by raw_size which is a u32. So we need to copy args to a temp
1312 * variable to read it. Most notably this avoids extended load instructions
1313 * on unaligned addresses
1316 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1317 unsigned char *args, struct trace *trace,
1318 struct thread *thread)
1320 size_t printed = 0;
1321 unsigned char *p;
1322 unsigned long val;
1324 if (sc->args != NULL) {
1325 struct format_field *field;
1326 u8 bit = 1;
1327 struct syscall_arg arg = {
1328 .idx = 0,
1329 .mask = 0,
1330 .trace = trace,
1331 .thread = thread,
1334 for (field = sc->args; field;
1335 field = field->next, ++arg.idx, bit <<= 1) {
1336 if (arg.mask & bit)
1337 continue;
1339 /* special care for unaligned accesses */
1340 p = args + sizeof(unsigned long) * arg.idx;
1341 memcpy(&val, p, sizeof(val));
1344 * Suppress this argument if its value is zero and
1345 * and we don't have a string associated in an
1346 * strarray for it.
1348 if (val == 0 &&
1349 !(sc->arg_scnprintf &&
1350 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1351 sc->arg_parm[arg.idx]))
1352 continue;
1354 printed += scnprintf(bf + printed, size - printed,
1355 "%s%s: ", printed ? ", " : "", field->name);
1356 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1357 arg.val = val;
1358 if (sc->arg_parm)
1359 arg.parm = sc->arg_parm[arg.idx];
1360 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1361 size - printed, &arg);
1362 } else {
1363 printed += scnprintf(bf + printed, size - printed,
1364 "%ld", val);
1367 } else if (IS_ERR(sc->tp_format)) {
1369 * If we managed to read the tracepoint /format file, then we
1370 * may end up not having any args, like with gettid(), so only
1371 * print the raw args when we didn't manage to read it.
1373 int i = 0;
1375 while (i < 6) {
1376 /* special care for unaligned accesses */
1377 p = args + sizeof(unsigned long) * i;
1378 memcpy(&val, p, sizeof(val));
1379 printed += scnprintf(bf + printed, size - printed,
1380 "%sarg%d: %ld",
1381 printed ? ", " : "", i, val);
1382 ++i;
1386 return printed;
1389 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1390 union perf_event *event,
1391 struct perf_sample *sample);
1393 static struct syscall *trace__syscall_info(struct trace *trace,
1394 struct perf_evsel *evsel, int id)
1397 if (id < 0) {
1400 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1401 * before that, leaving at a higher verbosity level till that is
1402 * explained. Reproduced with plain ftrace with:
1404 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1405 * grep "NR -1 " /t/trace_pipe
1407 * After generating some load on the machine.
1409 if (verbose > 1) {
1410 static u64 n;
1411 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1412 id, perf_evsel__name(evsel), ++n);
1414 return NULL;
1417 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1418 trace__read_syscall_info(trace, id))
1419 goto out_cant_read;
1421 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1422 goto out_cant_read;
1424 return &trace->syscalls.table[id];
1426 out_cant_read:
1427 if (verbose) {
1428 fprintf(trace->output, "Problems reading syscall %d", id);
1429 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1430 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1431 fputs(" information\n", trace->output);
1433 return NULL;
1436 static void thread__update_stats(struct thread_trace *ttrace,
1437 int id, struct perf_sample *sample)
1439 struct int_node *inode;
1440 struct stats *stats;
1441 u64 duration = 0;
1443 inode = intlist__findnew(ttrace->syscall_stats, id);
1444 if (inode == NULL)
1445 return;
1447 stats = inode->priv;
1448 if (stats == NULL) {
1449 stats = malloc(sizeof(struct stats));
1450 if (stats == NULL)
1451 return;
1452 init_stats(stats);
1453 inode->priv = stats;
1456 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1457 duration = sample->time - ttrace->entry_time;
1459 update_stats(stats, duration);
1462 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1464 struct thread_trace *ttrace;
1465 u64 duration;
1466 size_t printed;
1468 if (trace->current == NULL)
1469 return 0;
1471 ttrace = thread__priv(trace->current);
1473 if (!ttrace->entry_pending)
1474 return 0;
1476 duration = sample->time - ttrace->entry_time;
1478 printed = trace__fprintf_entry_head(trace, trace->current, duration, true, ttrace->entry_time, trace->output);
1479 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1480 ttrace->entry_pending = false;
1482 return printed;
1485 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1486 union perf_event *event __maybe_unused,
1487 struct perf_sample *sample)
1489 char *msg;
1490 void *args;
1491 size_t printed = 0;
1492 struct thread *thread;
1493 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1494 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1495 struct thread_trace *ttrace;
1497 if (sc == NULL)
1498 return -1;
1500 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1501 ttrace = thread__trace(thread, trace->output);
1502 if (ttrace == NULL)
1503 goto out_put;
1505 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1507 if (ttrace->entry_str == NULL) {
1508 ttrace->entry_str = malloc(trace__entry_str_size);
1509 if (!ttrace->entry_str)
1510 goto out_put;
1513 if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
1514 trace__printf_interrupted_entry(trace, sample);
1516 ttrace->entry_time = sample->time;
1517 msg = ttrace->entry_str;
1518 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
1520 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
1521 args, trace, thread);
1523 if (sc->is_exit) {
1524 if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) {
1525 trace__fprintf_entry_head(trace, thread, 0, false, ttrace->entry_time, trace->output);
1526 fprintf(trace->output, "%-70s)\n", ttrace->entry_str);
1528 } else {
1529 ttrace->entry_pending = true;
1530 /* See trace__vfs_getname & trace__sys_exit */
1531 ttrace->filename.pending_open = false;
1534 if (trace->current != thread) {
1535 thread__put(trace->current);
1536 trace->current = thread__get(thread);
1538 err = 0;
1539 out_put:
1540 thread__put(thread);
1541 return err;
1544 static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel,
1545 struct perf_sample *sample,
1546 struct callchain_cursor *cursor)
1548 struct addr_location al;
1550 if (machine__resolve(trace->host, &al, sample) < 0 ||
1551 thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, trace->max_stack))
1552 return -1;
1554 return 0;
1557 static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
1559 /* TODO: user-configurable print_opts */
1560 const unsigned int print_opts = EVSEL__PRINT_SYM |
1561 EVSEL__PRINT_DSO |
1562 EVSEL__PRINT_UNKNOWN_AS_ADDR;
1564 return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output);
1567 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1568 union perf_event *event __maybe_unused,
1569 struct perf_sample *sample)
1571 long ret;
1572 u64 duration = 0;
1573 bool duration_calculated = false;
1574 struct thread *thread;
1575 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0;
1576 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1577 struct thread_trace *ttrace;
1579 if (sc == NULL)
1580 return -1;
1582 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1583 ttrace = thread__trace(thread, trace->output);
1584 if (ttrace == NULL)
1585 goto out_put;
1587 if (trace->summary)
1588 thread__update_stats(ttrace, id, sample);
1590 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1592 if (id == trace->open_id && ret >= 0 && ttrace->filename.pending_open) {
1593 trace__set_fd_pathname(thread, ret, ttrace->filename.name);
1594 ttrace->filename.pending_open = false;
1595 ++trace->stats.vfs_getname;
1598 ttrace->exit_time = sample->time;
1600 if (ttrace->entry_time) {
1601 duration = sample->time - ttrace->entry_time;
1602 if (trace__filter_duration(trace, duration))
1603 goto out;
1604 duration_calculated = true;
1605 } else if (trace->duration_filter)
1606 goto out;
1608 if (sample->callchain) {
1609 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1610 if (callchain_ret == 0) {
1611 if (callchain_cursor.nr < trace->min_stack)
1612 goto out;
1613 callchain_ret = 1;
1617 if (trace->summary_only)
1618 goto out;
1620 trace__fprintf_entry_head(trace, thread, duration, duration_calculated, ttrace->entry_time, trace->output);
1622 if (ttrace->entry_pending) {
1623 fprintf(trace->output, "%-70s", ttrace->entry_str);
1624 } else {
1625 fprintf(trace->output, " ... [");
1626 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1627 fprintf(trace->output, "]: %s()", sc->name);
1630 if (sc->fmt == NULL) {
1631 signed_print:
1632 fprintf(trace->output, ") = %ld", ret);
1633 } else if (ret < 0 && (sc->fmt->errmsg || sc->fmt->errpid)) {
1634 char bf[STRERR_BUFSIZE];
1635 const char *emsg = str_error_r(-ret, bf, sizeof(bf)),
1636 *e = audit_errno_to_name(-ret);
1638 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1639 } else if (ret == 0 && sc->fmt->timeout)
1640 fprintf(trace->output, ") = 0 Timeout");
1641 else if (sc->fmt->hexret)
1642 fprintf(trace->output, ") = %#lx", ret);
1643 else if (sc->fmt->errpid) {
1644 struct thread *child = machine__find_thread(trace->host, ret, ret);
1646 if (child != NULL) {
1647 fprintf(trace->output, ") = %ld", ret);
1648 if (child->comm_set)
1649 fprintf(trace->output, " (%s)", thread__comm_str(child));
1650 thread__put(child);
1652 } else
1653 goto signed_print;
1655 fputc('\n', trace->output);
1657 if (callchain_ret > 0)
1658 trace__fprintf_callchain(trace, sample);
1659 else if (callchain_ret < 0)
1660 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1661 out:
1662 ttrace->entry_pending = false;
1663 err = 0;
1664 out_put:
1665 thread__put(thread);
1666 return err;
1669 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1670 union perf_event *event __maybe_unused,
1671 struct perf_sample *sample)
1673 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1674 struct thread_trace *ttrace;
1675 size_t filename_len, entry_str_len, to_move;
1676 ssize_t remaining_space;
1677 char *pos;
1678 const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
1680 if (!thread)
1681 goto out;
1683 ttrace = thread__priv(thread);
1684 if (!ttrace)
1685 goto out;
1687 filename_len = strlen(filename);
1689 if (ttrace->filename.namelen < filename_len) {
1690 char *f = realloc(ttrace->filename.name, filename_len + 1);
1692 if (f == NULL)
1693 goto out;
1695 ttrace->filename.namelen = filename_len;
1696 ttrace->filename.name = f;
1699 strcpy(ttrace->filename.name, filename);
1700 ttrace->filename.pending_open = true;
1702 if (!ttrace->filename.ptr)
1703 goto out;
1705 entry_str_len = strlen(ttrace->entry_str);
1706 remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
1707 if (remaining_space <= 0)
1708 goto out;
1710 if (filename_len > (size_t)remaining_space) {
1711 filename += filename_len - remaining_space;
1712 filename_len = remaining_space;
1715 to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
1716 pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
1717 memmove(pos + filename_len, pos, to_move);
1718 memcpy(pos, filename, filename_len);
1720 ttrace->filename.ptr = 0;
1721 ttrace->filename.entry_str_pos = 0;
1722 out:
1723 return 0;
1726 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1727 union perf_event *event __maybe_unused,
1728 struct perf_sample *sample)
1730 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1731 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1732 struct thread *thread = machine__findnew_thread(trace->host,
1733 sample->pid,
1734 sample->tid);
1735 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1737 if (ttrace == NULL)
1738 goto out_dump;
1740 ttrace->runtime_ms += runtime_ms;
1741 trace->runtime_ms += runtime_ms;
1742 thread__put(thread);
1743 return 0;
1745 out_dump:
1746 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1747 evsel->name,
1748 perf_evsel__strval(evsel, sample, "comm"),
1749 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1750 runtime,
1751 perf_evsel__intval(evsel, sample, "vruntime"));
1752 thread__put(thread);
1753 return 0;
1756 static void bpf_output__printer(enum binary_printer_ops op,
1757 unsigned int val, void *extra)
1759 FILE *output = extra;
1760 unsigned char ch = (unsigned char)val;
1762 switch (op) {
1763 case BINARY_PRINT_CHAR_DATA:
1764 fprintf(output, "%c", isprint(ch) ? ch : '.');
1765 break;
1766 case BINARY_PRINT_DATA_BEGIN:
1767 case BINARY_PRINT_LINE_BEGIN:
1768 case BINARY_PRINT_ADDR:
1769 case BINARY_PRINT_NUM_DATA:
1770 case BINARY_PRINT_NUM_PAD:
1771 case BINARY_PRINT_SEP:
1772 case BINARY_PRINT_CHAR_PAD:
1773 case BINARY_PRINT_LINE_END:
1774 case BINARY_PRINT_DATA_END:
1775 default:
1776 break;
1780 static void bpf_output__fprintf(struct trace *trace,
1781 struct perf_sample *sample)
1783 print_binary(sample->raw_data, sample->raw_size, 8,
1784 bpf_output__printer, trace->output);
1787 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
1788 union perf_event *event __maybe_unused,
1789 struct perf_sample *sample)
1791 int callchain_ret = 0;
1793 if (sample->callchain) {
1794 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1795 if (callchain_ret == 0) {
1796 if (callchain_cursor.nr < trace->min_stack)
1797 goto out;
1798 callchain_ret = 1;
1802 trace__printf_interrupted_entry(trace, sample);
1803 trace__fprintf_tstamp(trace, sample->time, trace->output);
1805 if (trace->trace_syscalls)
1806 fprintf(trace->output, "( ): ");
1808 fprintf(trace->output, "%s:", evsel->name);
1810 if (perf_evsel__is_bpf_output(evsel)) {
1811 bpf_output__fprintf(trace, sample);
1812 } else if (evsel->tp_format) {
1813 event_format__fprintf(evsel->tp_format, sample->cpu,
1814 sample->raw_data, sample->raw_size,
1815 trace->output);
1818 fprintf(trace->output, ")\n");
1820 if (callchain_ret > 0)
1821 trace__fprintf_callchain(trace, sample);
1822 else if (callchain_ret < 0)
1823 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1824 out:
1825 return 0;
1828 static void print_location(FILE *f, struct perf_sample *sample,
1829 struct addr_location *al,
1830 bool print_dso, bool print_sym)
1833 if ((verbose || print_dso) && al->map)
1834 fprintf(f, "%s@", al->map->dso->long_name);
1836 if ((verbose || print_sym) && al->sym)
1837 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
1838 al->addr - al->sym->start);
1839 else if (al->map)
1840 fprintf(f, "0x%" PRIx64, al->addr);
1841 else
1842 fprintf(f, "0x%" PRIx64, sample->addr);
1845 static int trace__pgfault(struct trace *trace,
1846 struct perf_evsel *evsel,
1847 union perf_event *event __maybe_unused,
1848 struct perf_sample *sample)
1850 struct thread *thread;
1851 struct addr_location al;
1852 char map_type = 'd';
1853 struct thread_trace *ttrace;
1854 int err = -1;
1855 int callchain_ret = 0;
1857 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1859 if (sample->callchain) {
1860 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1861 if (callchain_ret == 0) {
1862 if (callchain_cursor.nr < trace->min_stack)
1863 goto out_put;
1864 callchain_ret = 1;
1868 ttrace = thread__trace(thread, trace->output);
1869 if (ttrace == NULL)
1870 goto out_put;
1872 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
1873 ttrace->pfmaj++;
1874 else
1875 ttrace->pfmin++;
1877 if (trace->summary_only)
1878 goto out;
1880 thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION,
1881 sample->ip, &al);
1883 trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output);
1885 fprintf(trace->output, "%sfault [",
1886 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
1887 "maj" : "min");
1889 print_location(trace->output, sample, &al, false, true);
1891 fprintf(trace->output, "] => ");
1893 thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE,
1894 sample->addr, &al);
1896 if (!al.map) {
1897 thread__find_addr_location(thread, sample->cpumode,
1898 MAP__FUNCTION, sample->addr, &al);
1900 if (al.map)
1901 map_type = 'x';
1902 else
1903 map_type = '?';
1906 print_location(trace->output, sample, &al, true, false);
1908 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
1910 if (callchain_ret > 0)
1911 trace__fprintf_callchain(trace, sample);
1912 else if (callchain_ret < 0)
1913 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1914 out:
1915 err = 0;
1916 out_put:
1917 thread__put(thread);
1918 return err;
1921 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1923 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1924 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1925 return false;
1927 if (trace->pid_list || trace->tid_list)
1928 return true;
1930 return false;
1933 static void trace__set_base_time(struct trace *trace,
1934 struct perf_evsel *evsel,
1935 struct perf_sample *sample)
1938 * BPF events were not setting PERF_SAMPLE_TIME, so be more robust
1939 * and don't use sample->time unconditionally, we may end up having
1940 * some other event in the future without PERF_SAMPLE_TIME for good
1941 * reason, i.e. we may not be interested in its timestamps, just in
1942 * it taking place, picking some piece of information when it
1943 * appears in our event stream (vfs_getname comes to mind).
1945 if (trace->base_time == 0 && !trace->full_time &&
1946 (evsel->attr.sample_type & PERF_SAMPLE_TIME))
1947 trace->base_time = sample->time;
1950 static int trace__process_sample(struct perf_tool *tool,
1951 union perf_event *event,
1952 struct perf_sample *sample,
1953 struct perf_evsel *evsel,
1954 struct machine *machine __maybe_unused)
1956 struct trace *trace = container_of(tool, struct trace, tool);
1957 int err = 0;
1959 tracepoint_handler handler = evsel->handler;
1961 if (skip_sample(trace, sample))
1962 return 0;
1964 trace__set_base_time(trace, evsel, sample);
1966 if (handler) {
1967 ++trace->nr_events;
1968 handler(trace, evsel, event, sample);
1971 return err;
1974 static int parse_target_str(struct trace *trace)
1976 if (trace->opts.target.pid) {
1977 trace->pid_list = intlist__new(trace->opts.target.pid);
1978 if (trace->pid_list == NULL) {
1979 pr_err("Error parsing process id string\n");
1980 return -EINVAL;
1984 if (trace->opts.target.tid) {
1985 trace->tid_list = intlist__new(trace->opts.target.tid);
1986 if (trace->tid_list == NULL) {
1987 pr_err("Error parsing thread id string\n");
1988 return -EINVAL;
1992 return 0;
1995 static int trace__record(struct trace *trace, int argc, const char **argv)
1997 unsigned int rec_argc, i, j;
1998 const char **rec_argv;
1999 const char * const record_args[] = {
2000 "record",
2001 "-R",
2002 "-m", "1024",
2003 "-c", "1",
2006 const char * const sc_args[] = { "-e", };
2007 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2008 const char * const majpf_args[] = { "-e", "major-faults" };
2009 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2010 const char * const minpf_args[] = { "-e", "minor-faults" };
2011 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2013 /* +1 is for the event string below */
2014 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2015 majpf_args_nr + minpf_args_nr + argc;
2016 rec_argv = calloc(rec_argc + 1, sizeof(char *));
2018 if (rec_argv == NULL)
2019 return -ENOMEM;
2021 j = 0;
2022 for (i = 0; i < ARRAY_SIZE(record_args); i++)
2023 rec_argv[j++] = record_args[i];
2025 if (trace->trace_syscalls) {
2026 for (i = 0; i < sc_args_nr; i++)
2027 rec_argv[j++] = sc_args[i];
2029 /* event string may be different for older kernels - e.g., RHEL6 */
2030 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2031 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2032 else if (is_valid_tracepoint("syscalls:sys_enter"))
2033 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2034 else {
2035 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2036 return -1;
2040 if (trace->trace_pgfaults & TRACE_PFMAJ)
2041 for (i = 0; i < majpf_args_nr; i++)
2042 rec_argv[j++] = majpf_args[i];
2044 if (trace->trace_pgfaults & TRACE_PFMIN)
2045 for (i = 0; i < minpf_args_nr; i++)
2046 rec_argv[j++] = minpf_args[i];
2048 for (i = 0; i < (unsigned int)argc; i++)
2049 rec_argv[j++] = argv[i];
2051 return cmd_record(j, rec_argv, NULL);
2054 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2056 static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2058 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2060 if (IS_ERR(evsel))
2061 return false;
2063 if (perf_evsel__field(evsel, "pathname") == NULL) {
2064 perf_evsel__delete(evsel);
2065 return false;
2068 evsel->handler = trace__vfs_getname;
2069 perf_evlist__add(evlist, evsel);
2070 return true;
2073 static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
2075 struct perf_evsel *evsel;
2076 struct perf_event_attr attr = {
2077 .type = PERF_TYPE_SOFTWARE,
2078 .mmap_data = 1,
2081 attr.config = config;
2082 attr.sample_period = 1;
2084 event_attr_init(&attr);
2086 evsel = perf_evsel__new(&attr);
2087 if (evsel)
2088 evsel->handler = trace__pgfault;
2090 return evsel;
2093 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2095 const u32 type = event->header.type;
2096 struct perf_evsel *evsel;
2098 if (type != PERF_RECORD_SAMPLE) {
2099 trace__process_event(trace, trace->host, event, sample);
2100 return;
2103 evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2104 if (evsel == NULL) {
2105 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2106 return;
2109 trace__set_base_time(trace, evsel, sample);
2111 if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2112 sample->raw_data == NULL) {
2113 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2114 perf_evsel__name(evsel), sample->tid,
2115 sample->cpu, sample->raw_size);
2116 } else {
2117 tracepoint_handler handler = evsel->handler;
2118 handler(trace, evsel, event, sample);
2122 static int trace__add_syscall_newtp(struct trace *trace)
2124 int ret = -1;
2125 struct perf_evlist *evlist = trace->evlist;
2126 struct perf_evsel *sys_enter, *sys_exit;
2128 sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2129 if (sys_enter == NULL)
2130 goto out;
2132 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2133 goto out_delete_sys_enter;
2135 sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2136 if (sys_exit == NULL)
2137 goto out_delete_sys_enter;
2139 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2140 goto out_delete_sys_exit;
2142 perf_evlist__add(evlist, sys_enter);
2143 perf_evlist__add(evlist, sys_exit);
2145 if (callchain_param.enabled && !trace->kernel_syscallchains) {
2147 * We're interested only in the user space callchain
2148 * leading to the syscall, allow overriding that for
2149 * debugging reasons using --kernel_syscall_callchains
2151 sys_exit->attr.exclude_callchain_kernel = 1;
2154 trace->syscalls.events.sys_enter = sys_enter;
2155 trace->syscalls.events.sys_exit = sys_exit;
2157 ret = 0;
2158 out:
2159 return ret;
2161 out_delete_sys_exit:
2162 perf_evsel__delete_priv(sys_exit);
2163 out_delete_sys_enter:
2164 perf_evsel__delete_priv(sys_enter);
2165 goto out;
2168 static int trace__set_ev_qualifier_filter(struct trace *trace)
2170 int err = -1;
2171 struct perf_evsel *sys_exit;
2172 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2173 trace->ev_qualifier_ids.nr,
2174 trace->ev_qualifier_ids.entries);
2176 if (filter == NULL)
2177 goto out_enomem;
2179 if (!perf_evsel__append_tp_filter(trace->syscalls.events.sys_enter,
2180 filter)) {
2181 sys_exit = trace->syscalls.events.sys_exit;
2182 err = perf_evsel__append_tp_filter(sys_exit, filter);
2185 free(filter);
2186 out:
2187 return err;
2188 out_enomem:
2189 errno = ENOMEM;
2190 goto out;
2193 static int trace__run(struct trace *trace, int argc, const char **argv)
2195 struct perf_evlist *evlist = trace->evlist;
2196 struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
2197 int err = -1, i;
2198 unsigned long before;
2199 const bool forks = argc > 0;
2200 bool draining = false;
2202 trace->live = true;
2204 if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2205 goto out_error_raw_syscalls;
2207 if (trace->trace_syscalls)
2208 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
2210 if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
2211 pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
2212 if (pgfault_maj == NULL)
2213 goto out_error_mem;
2214 perf_evlist__add(evlist, pgfault_maj);
2217 if ((trace->trace_pgfaults & TRACE_PFMIN)) {
2218 pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
2219 if (pgfault_min == NULL)
2220 goto out_error_mem;
2221 perf_evlist__add(evlist, pgfault_min);
2224 if (trace->sched &&
2225 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2226 trace__sched_stat_runtime))
2227 goto out_error_sched_stat_runtime;
2229 err = perf_evlist__create_maps(evlist, &trace->opts.target);
2230 if (err < 0) {
2231 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2232 goto out_delete_evlist;
2235 err = trace__symbols_init(trace, evlist);
2236 if (err < 0) {
2237 fprintf(trace->output, "Problems initializing symbol libraries!\n");
2238 goto out_delete_evlist;
2241 perf_evlist__config(evlist, &trace->opts, NULL);
2243 if (callchain_param.enabled) {
2244 bool use_identifier = false;
2246 if (trace->syscalls.events.sys_exit) {
2247 perf_evsel__config_callchain(trace->syscalls.events.sys_exit,
2248 &trace->opts, &callchain_param);
2249 use_identifier = true;
2252 if (pgfault_maj) {
2253 perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
2254 use_identifier = true;
2257 if (pgfault_min) {
2258 perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
2259 use_identifier = true;
2262 if (use_identifier) {
2264 * Now we have evsels with different sample_ids, use
2265 * PERF_SAMPLE_IDENTIFIER to map from sample to evsel
2266 * from a fixed position in each ring buffer record.
2268 * As of this the changeset introducing this comment, this
2269 * isn't strictly needed, as the fields that can come before
2270 * PERF_SAMPLE_ID are all used, but we'll probably disable
2271 * some of those for things like copying the payload of
2272 * pointer syscall arguments, and for vfs_getname we don't
2273 * need PERF_SAMPLE_ADDR and PERF_SAMPLE_IP, so do this
2274 * here as a warning we need to use PERF_SAMPLE_IDENTIFIER.
2276 perf_evlist__set_sample_bit(evlist, IDENTIFIER);
2277 perf_evlist__reset_sample_bit(evlist, ID);
2281 signal(SIGCHLD, sig_handler);
2282 signal(SIGINT, sig_handler);
2284 if (forks) {
2285 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2286 argv, false, NULL);
2287 if (err < 0) {
2288 fprintf(trace->output, "Couldn't run the workload!\n");
2289 goto out_delete_evlist;
2293 err = perf_evlist__open(evlist);
2294 if (err < 0)
2295 goto out_error_open;
2297 err = bpf__apply_obj_config();
2298 if (err) {
2299 char errbuf[BUFSIZ];
2301 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
2302 pr_err("ERROR: Apply config to BPF failed: %s\n",
2303 errbuf);
2304 goto out_error_open;
2308 * Better not use !target__has_task() here because we need to cover the
2309 * case where no threads were specified in the command line, but a
2310 * workload was, and in that case we will fill in the thread_map when
2311 * we fork the workload in perf_evlist__prepare_workload.
2313 if (trace->filter_pids.nr > 0)
2314 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2315 else if (thread_map__pid(evlist->threads, 0) == -1)
2316 err = perf_evlist__set_filter_pid(evlist, getpid());
2318 if (err < 0)
2319 goto out_error_mem;
2321 if (trace->ev_qualifier_ids.nr > 0) {
2322 err = trace__set_ev_qualifier_filter(trace);
2323 if (err < 0)
2324 goto out_errno;
2326 pr_debug("event qualifier tracepoint filter: %s\n",
2327 trace->syscalls.events.sys_exit->filter);
2330 err = perf_evlist__apply_filters(evlist, &evsel);
2331 if (err < 0)
2332 goto out_error_apply_filters;
2334 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2335 if (err < 0)
2336 goto out_error_mmap;
2338 if (!target__none(&trace->opts.target))
2339 perf_evlist__enable(evlist);
2341 if (forks)
2342 perf_evlist__start_workload(evlist);
2344 trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2345 evlist->threads->nr > 1 ||
2346 perf_evlist__first(evlist)->attr.inherit;
2347 again:
2348 before = trace->nr_events;
2350 for (i = 0; i < evlist->nr_mmaps; i++) {
2351 union perf_event *event;
2353 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2354 struct perf_sample sample;
2356 ++trace->nr_events;
2358 err = perf_evlist__parse_sample(evlist, event, &sample);
2359 if (err) {
2360 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2361 goto next_event;
2364 trace__handle_event(trace, event, &sample);
2365 next_event:
2366 perf_evlist__mmap_consume(evlist, i);
2368 if (interrupted)
2369 goto out_disable;
2371 if (done && !draining) {
2372 perf_evlist__disable(evlist);
2373 draining = true;
2378 if (trace->nr_events == before) {
2379 int timeout = done ? 100 : -1;
2381 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2382 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2383 draining = true;
2385 goto again;
2387 } else {
2388 goto again;
2391 out_disable:
2392 thread__zput(trace->current);
2394 perf_evlist__disable(evlist);
2396 if (!err) {
2397 if (trace->summary)
2398 trace__fprintf_thread_summary(trace, trace->output);
2400 if (trace->show_tool_stats) {
2401 fprintf(trace->output, "Stats:\n "
2402 " vfs_getname : %" PRIu64 "\n"
2403 " proc_getname: %" PRIu64 "\n",
2404 trace->stats.vfs_getname,
2405 trace->stats.proc_getname);
2409 out_delete_evlist:
2410 perf_evlist__delete(evlist);
2411 trace->evlist = NULL;
2412 trace->live = false;
2413 return err;
2415 char errbuf[BUFSIZ];
2417 out_error_sched_stat_runtime:
2418 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2419 goto out_error;
2421 out_error_raw_syscalls:
2422 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2423 goto out_error;
2425 out_error_mmap:
2426 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2427 goto out_error;
2429 out_error_open:
2430 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2432 out_error:
2433 fprintf(trace->output, "%s\n", errbuf);
2434 goto out_delete_evlist;
2436 out_error_apply_filters:
2437 fprintf(trace->output,
2438 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2439 evsel->filter, perf_evsel__name(evsel), errno,
2440 str_error_r(errno, errbuf, sizeof(errbuf)));
2441 goto out_delete_evlist;
2443 out_error_mem:
2444 fprintf(trace->output, "Not enough memory to run!\n");
2445 goto out_delete_evlist;
2447 out_errno:
2448 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2449 goto out_delete_evlist;
2452 static int trace__replay(struct trace *trace)
2454 const struct perf_evsel_str_handler handlers[] = {
2455 { "probe:vfs_getname", trace__vfs_getname, },
2457 struct perf_data_file file = {
2458 .path = input_name,
2459 .mode = PERF_DATA_MODE_READ,
2460 .force = trace->force,
2462 struct perf_session *session;
2463 struct perf_evsel *evsel;
2464 int err = -1;
2466 trace->tool.sample = trace__process_sample;
2467 trace->tool.mmap = perf_event__process_mmap;
2468 trace->tool.mmap2 = perf_event__process_mmap2;
2469 trace->tool.comm = perf_event__process_comm;
2470 trace->tool.exit = perf_event__process_exit;
2471 trace->tool.fork = perf_event__process_fork;
2472 trace->tool.attr = perf_event__process_attr;
2473 trace->tool.tracing_data = perf_event__process_tracing_data;
2474 trace->tool.build_id = perf_event__process_build_id;
2476 trace->tool.ordered_events = true;
2477 trace->tool.ordering_requires_timestamps = true;
2479 /* add tid to output */
2480 trace->multiple_threads = true;
2482 session = perf_session__new(&file, false, &trace->tool);
2483 if (session == NULL)
2484 return -1;
2486 if (symbol__init(&session->header.env) < 0)
2487 goto out;
2489 trace->host = &session->machines.host;
2491 err = perf_session__set_tracepoints_handlers(session, handlers);
2492 if (err)
2493 goto out;
2495 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2496 "raw_syscalls:sys_enter");
2497 /* older kernels have syscalls tp versus raw_syscalls */
2498 if (evsel == NULL)
2499 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2500 "syscalls:sys_enter");
2502 if (evsel &&
2503 (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2504 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2505 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2506 goto out;
2509 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2510 "raw_syscalls:sys_exit");
2511 if (evsel == NULL)
2512 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2513 "syscalls:sys_exit");
2514 if (evsel &&
2515 (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2516 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2517 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2518 goto out;
2521 evlist__for_each_entry(session->evlist, evsel) {
2522 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2523 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2524 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2525 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2526 evsel->handler = trace__pgfault;
2529 err = parse_target_str(trace);
2530 if (err != 0)
2531 goto out;
2533 setup_pager();
2535 err = perf_session__process_events(session);
2536 if (err)
2537 pr_err("Failed to process events, error %d", err);
2539 else if (trace->summary)
2540 trace__fprintf_thread_summary(trace, trace->output);
2542 out:
2543 perf_session__delete(session);
2545 return err;
2548 static size_t trace__fprintf_threads_header(FILE *fp)
2550 size_t printed;
2552 printed = fprintf(fp, "\n Summary of events:\n\n");
2554 return printed;
2557 DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
2558 struct stats *stats;
2559 double msecs;
2560 int syscall;
2563 struct int_node *source = rb_entry(nd, struct int_node, rb_node);
2564 struct stats *stats = source->priv;
2566 entry->syscall = source->i;
2567 entry->stats = stats;
2568 entry->msecs = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0;
2571 static size_t thread__dump_stats(struct thread_trace *ttrace,
2572 struct trace *trace, FILE *fp)
2574 size_t printed = 0;
2575 struct syscall *sc;
2576 struct rb_node *nd;
2577 DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats);
2579 if (syscall_stats == NULL)
2580 return 0;
2582 printed += fprintf(fp, "\n");
2584 printed += fprintf(fp, " syscall calls total min avg max stddev\n");
2585 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
2586 printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n");
2588 resort_rb__for_each_entry(nd, syscall_stats) {
2589 struct stats *stats = syscall_stats_entry->stats;
2590 if (stats) {
2591 double min = (double)(stats->min) / NSEC_PER_MSEC;
2592 double max = (double)(stats->max) / NSEC_PER_MSEC;
2593 double avg = avg_stats(stats);
2594 double pct;
2595 u64 n = (u64) stats->n;
2597 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2598 avg /= NSEC_PER_MSEC;
2600 sc = &trace->syscalls.table[syscall_stats_entry->syscall];
2601 printed += fprintf(fp, " %-15s", sc->name);
2602 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
2603 n, syscall_stats_entry->msecs, min, avg);
2604 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2608 resort_rb__delete(syscall_stats);
2609 printed += fprintf(fp, "\n\n");
2611 return printed;
2614 static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
2616 size_t printed = 0;
2617 struct thread_trace *ttrace = thread__priv(thread);
2618 double ratio;
2620 if (ttrace == NULL)
2621 return 0;
2623 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2625 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2626 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2627 printed += fprintf(fp, "%.1f%%", ratio);
2628 if (ttrace->pfmaj)
2629 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2630 if (ttrace->pfmin)
2631 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2632 if (trace->sched)
2633 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2634 else if (fputc('\n', fp) != EOF)
2635 ++printed;
2637 printed += thread__dump_stats(ttrace, trace, fp);
2639 return printed;
2642 static unsigned long thread__nr_events(struct thread_trace *ttrace)
2644 return ttrace ? ttrace->nr_events : 0;
2647 DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)),
2648 struct thread *thread;
2651 entry->thread = rb_entry(nd, struct thread, rb_node);
2654 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2656 DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host);
2657 size_t printed = trace__fprintf_threads_header(fp);
2658 struct rb_node *nd;
2660 if (threads == NULL) {
2661 fprintf(fp, "%s", "Error sorting output by nr_events!\n");
2662 return 0;
2665 resort_rb__for_each_entry(nd, threads)
2666 printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
2668 resort_rb__delete(threads);
2670 return printed;
2673 static int trace__set_duration(const struct option *opt, const char *str,
2674 int unset __maybe_unused)
2676 struct trace *trace = opt->value;
2678 trace->duration_filter = atof(str);
2679 return 0;
2682 static int trace__set_filter_pids(const struct option *opt, const char *str,
2683 int unset __maybe_unused)
2685 int ret = -1;
2686 size_t i;
2687 struct trace *trace = opt->value;
2689 * FIXME: introduce a intarray class, plain parse csv and create a
2690 * { int nr, int entries[] } struct...
2692 struct intlist *list = intlist__new(str);
2694 if (list == NULL)
2695 return -1;
2697 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2698 trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2700 if (trace->filter_pids.entries == NULL)
2701 goto out;
2703 trace->filter_pids.entries[0] = getpid();
2705 for (i = 1; i < trace->filter_pids.nr; ++i)
2706 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2708 intlist__delete(list);
2709 ret = 0;
2710 out:
2711 return ret;
2714 static int trace__open_output(struct trace *trace, const char *filename)
2716 struct stat st;
2718 if (!stat(filename, &st) && st.st_size) {
2719 char oldname[PATH_MAX];
2721 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2722 unlink(oldname);
2723 rename(filename, oldname);
2726 trace->output = fopen(filename, "w");
2728 return trace->output == NULL ? -errno : 0;
2731 static int parse_pagefaults(const struct option *opt, const char *str,
2732 int unset __maybe_unused)
2734 int *trace_pgfaults = opt->value;
2736 if (strcmp(str, "all") == 0)
2737 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2738 else if (strcmp(str, "maj") == 0)
2739 *trace_pgfaults |= TRACE_PFMAJ;
2740 else if (strcmp(str, "min") == 0)
2741 *trace_pgfaults |= TRACE_PFMIN;
2742 else
2743 return -1;
2745 return 0;
2748 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2750 struct perf_evsel *evsel;
2752 evlist__for_each_entry(evlist, evsel)
2753 evsel->handler = handler;
2756 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2758 const char *trace_usage[] = {
2759 "perf trace [<options>] [<command>]",
2760 "perf trace [<options>] -- <command> [<options>]",
2761 "perf trace record [<options>] [<command>]",
2762 "perf trace record [<options>] -- <command> [<options>]",
2763 NULL
2765 struct trace trace = {
2766 .syscalls = {
2767 . max = -1,
2769 .opts = {
2770 .target = {
2771 .uid = UINT_MAX,
2772 .uses_mmap = true,
2774 .user_freq = UINT_MAX,
2775 .user_interval = ULLONG_MAX,
2776 .no_buffering = true,
2777 .mmap_pages = UINT_MAX,
2778 .proc_map_timeout = 500,
2780 .output = stderr,
2781 .show_comm = true,
2782 .trace_syscalls = true,
2783 .kernel_syscallchains = false,
2784 .max_stack = UINT_MAX,
2786 const char *output_name = NULL;
2787 const char *ev_qualifier_str = NULL;
2788 const struct option trace_options[] = {
2789 OPT_CALLBACK(0, "event", &trace.evlist, "event",
2790 "event selector. use 'perf list' to list available events",
2791 parse_events_option),
2792 OPT_BOOLEAN(0, "comm", &trace.show_comm,
2793 "show the thread COMM next to its id"),
2794 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2795 OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
2796 OPT_STRING('o', "output", &output_name, "file", "output file name"),
2797 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2798 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2799 "trace events on existing process id"),
2800 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2801 "trace events on existing thread id"),
2802 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
2803 "pids to filter (by the kernel)", trace__set_filter_pids),
2804 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2805 "system-wide collection from all CPUs"),
2806 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2807 "list of cpus to monitor"),
2808 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2809 "child tasks do not inherit counters"),
2810 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2811 "number of mmap data pages",
2812 perf_evlist__parse_mmap_pages),
2813 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2814 "user to profile"),
2815 OPT_CALLBACK(0, "duration", &trace, "float",
2816 "show only events with duration > N.M ms",
2817 trace__set_duration),
2818 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2819 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2820 OPT_BOOLEAN('T', "time", &trace.full_time,
2821 "Show full timestamp, not time relative to first start"),
2822 OPT_BOOLEAN('s', "summary", &trace.summary_only,
2823 "Show only syscall summary with statistics"),
2824 OPT_BOOLEAN('S', "with-summary", &trace.summary,
2825 "Show all syscalls and summary with statistics"),
2826 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
2827 "Trace pagefaults", parse_pagefaults, "maj"),
2828 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
2829 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
2830 OPT_CALLBACK(0, "call-graph", &trace.opts,
2831 "record_mode[,record_size]", record_callchain_help,
2832 &record_parse_callchain_opt),
2833 OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
2834 "Show the kernel callchains on the syscall exit path"),
2835 OPT_UINTEGER(0, "min-stack", &trace.min_stack,
2836 "Set the minimum stack depth when parsing the callchain, "
2837 "anything below the specified depth will be ignored."),
2838 OPT_UINTEGER(0, "max-stack", &trace.max_stack,
2839 "Set the maximum stack depth when parsing the callchain, "
2840 "anything beyond the specified depth will be ignored. "
2841 "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
2842 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
2843 "per thread proc mmap processing timeout in ms"),
2844 OPT_END()
2846 bool __maybe_unused max_stack_user_set = true;
2847 bool mmap_pages_user_set = true;
2848 const char * const trace_subcommands[] = { "record", NULL };
2849 int err;
2850 char bf[BUFSIZ];
2852 signal(SIGSEGV, sighandler_dump_stack);
2853 signal(SIGFPE, sighandler_dump_stack);
2855 trace.evlist = perf_evlist__new();
2856 trace.sctbl = syscalltbl__new();
2858 if (trace.evlist == NULL || trace.sctbl == NULL) {
2859 pr_err("Not enough memory to run!\n");
2860 err = -ENOMEM;
2861 goto out;
2864 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
2865 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
2867 err = bpf__setup_stdout(trace.evlist);
2868 if (err) {
2869 bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
2870 pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
2871 goto out;
2874 err = -1;
2876 if (trace.trace_pgfaults) {
2877 trace.opts.sample_address = true;
2878 trace.opts.sample_time = true;
2881 if (trace.opts.mmap_pages == UINT_MAX)
2882 mmap_pages_user_set = false;
2884 if (trace.max_stack == UINT_MAX) {
2885 trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl_perf_event_max_stack;
2886 max_stack_user_set = false;
2889 #ifdef HAVE_DWARF_UNWIND_SUPPORT
2890 if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled && trace.trace_syscalls)
2891 record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false);
2892 #endif
2894 if (callchain_param.enabled) {
2895 if (!mmap_pages_user_set && geteuid() == 0)
2896 trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4;
2898 symbol_conf.use_callchain = true;
2901 if (trace.evlist->nr_entries > 0)
2902 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
2904 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
2905 return trace__record(&trace, argc-1, &argv[1]);
2907 /* summary_only implies summary option, but don't overwrite summary if set */
2908 if (trace.summary_only)
2909 trace.summary = trace.summary_only;
2911 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
2912 trace.evlist->nr_entries == 0 /* Was --events used? */) {
2913 pr_err("Please specify something to trace.\n");
2914 return -1;
2917 if (!trace.trace_syscalls && ev_qualifier_str) {
2918 pr_err("The -e option can't be used with --no-syscalls.\n");
2919 goto out;
2922 if (output_name != NULL) {
2923 err = trace__open_output(&trace, output_name);
2924 if (err < 0) {
2925 perror("failed to create output file");
2926 goto out;
2930 trace.open_id = syscalltbl__id(trace.sctbl, "open");
2932 if (ev_qualifier_str != NULL) {
2933 const char *s = ev_qualifier_str;
2934 struct strlist_config slist_config = {
2935 .dirname = system_path(STRACE_GROUPS_DIR),
2938 trace.not_ev_qualifier = *s == '!';
2939 if (trace.not_ev_qualifier)
2940 ++s;
2941 trace.ev_qualifier = strlist__new(s, &slist_config);
2942 if (trace.ev_qualifier == NULL) {
2943 fputs("Not enough memory to parse event qualifier",
2944 trace.output);
2945 err = -ENOMEM;
2946 goto out_close;
2949 err = trace__validate_ev_qualifier(&trace);
2950 if (err)
2951 goto out_close;
2954 err = target__validate(&trace.opts.target);
2955 if (err) {
2956 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2957 fprintf(trace.output, "%s", bf);
2958 goto out_close;
2961 err = target__parse_uid(&trace.opts.target);
2962 if (err) {
2963 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2964 fprintf(trace.output, "%s", bf);
2965 goto out_close;
2968 if (!argc && target__none(&trace.opts.target))
2969 trace.opts.target.system_wide = true;
2971 if (input_name)
2972 err = trace__replay(&trace);
2973 else
2974 err = trace__run(&trace, argc, argv);
2976 out_close:
2977 if (output_name != NULL)
2978 fclose(trace.output);
2979 out:
2980 return err;