of: MSI: Simplify irqdomain lookup
[linux/fpc-iii.git] / tools / perf / builtin-trace.c
blob20916dd77aac24847bffbaa5d1c0fb56580d6e39
1 /*
2 * builtin-trace.c
4 * Builtin 'trace' command:
6 * Display a continuously updated trace of any workload, CPU, specific PID,
7 * system wide, etc. Default format is loosely strace like, but any other
8 * event may be specified using --event.
10 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
12 * Initially based on the 'trace' prototype by Thomas Gleixner:
14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
16 * Released under the GPL v2. (and only v2, not any later version)
19 #include <traceevent/event-parse.h>
20 #include <api/fs/tracing_path.h>
21 #include "builtin.h"
22 #include "util/color.h"
23 #include "util/debug.h"
24 #include "util/evlist.h"
25 #include <subcmd/exec-cmd.h>
26 #include "util/machine.h"
27 #include "util/session.h"
28 #include "util/thread.h"
29 #include <subcmd/parse-options.h>
30 #include "util/strlist.h"
31 #include "util/intlist.h"
32 #include "util/thread_map.h"
33 #include "util/stat.h"
34 #include "trace-event.h"
35 #include "util/parse-events.h"
37 #include <libaudit.h>
38 #include <stdlib.h>
39 #include <sys/mman.h>
40 #include <linux/futex.h>
41 #include <linux/err.h>
43 /* For older distros: */
44 #ifndef MAP_STACK
45 # define MAP_STACK 0x20000
46 #endif
48 #ifndef MADV_HWPOISON
49 # define MADV_HWPOISON 100
51 #endif
53 #ifndef MADV_MERGEABLE
54 # define MADV_MERGEABLE 12
55 #endif
57 #ifndef MADV_UNMERGEABLE
58 # define MADV_UNMERGEABLE 13
59 #endif
61 #ifndef EFD_SEMAPHORE
62 # define EFD_SEMAPHORE 1
63 #endif
65 #ifndef EFD_NONBLOCK
66 # define EFD_NONBLOCK 00004000
67 #endif
69 #ifndef EFD_CLOEXEC
70 # define EFD_CLOEXEC 02000000
71 #endif
73 #ifndef O_CLOEXEC
74 # define O_CLOEXEC 02000000
75 #endif
77 #ifndef SOCK_DCCP
78 # define SOCK_DCCP 6
79 #endif
81 #ifndef SOCK_CLOEXEC
82 # define SOCK_CLOEXEC 02000000
83 #endif
85 #ifndef SOCK_NONBLOCK
86 # define SOCK_NONBLOCK 00004000
87 #endif
89 #ifndef MSG_CMSG_CLOEXEC
90 # define MSG_CMSG_CLOEXEC 0x40000000
91 #endif
93 #ifndef PERF_FLAG_FD_NO_GROUP
94 # define PERF_FLAG_FD_NO_GROUP (1UL << 0)
95 #endif
97 #ifndef PERF_FLAG_FD_OUTPUT
98 # define PERF_FLAG_FD_OUTPUT (1UL << 1)
99 #endif
101 #ifndef PERF_FLAG_PID_CGROUP
102 # define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */
103 #endif
105 #ifndef PERF_FLAG_FD_CLOEXEC
106 # define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */
107 #endif
110 struct tp_field {
111 int offset;
112 union {
113 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
114 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
118 #define TP_UINT_FIELD(bits) \
119 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
121 u##bits value; \
122 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
123 return value; \
126 TP_UINT_FIELD(8);
127 TP_UINT_FIELD(16);
128 TP_UINT_FIELD(32);
129 TP_UINT_FIELD(64);
131 #define TP_UINT_FIELD__SWAPPED(bits) \
132 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
134 u##bits value; \
135 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
136 return bswap_##bits(value);\
139 TP_UINT_FIELD__SWAPPED(16);
140 TP_UINT_FIELD__SWAPPED(32);
141 TP_UINT_FIELD__SWAPPED(64);
143 static int tp_field__init_uint(struct tp_field *field,
144 struct format_field *format_field,
145 bool needs_swap)
147 field->offset = format_field->offset;
149 switch (format_field->size) {
150 case 1:
151 field->integer = tp_field__u8;
152 break;
153 case 2:
154 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
155 break;
156 case 4:
157 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
158 break;
159 case 8:
160 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
161 break;
162 default:
163 return -1;
166 return 0;
169 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
171 return sample->raw_data + field->offset;
174 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
176 field->offset = format_field->offset;
177 field->pointer = tp_field__ptr;
178 return 0;
181 struct syscall_tp {
182 struct tp_field id;
183 union {
184 struct tp_field args, ret;
188 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
189 struct tp_field *field,
190 const char *name)
192 struct format_field *format_field = perf_evsel__field(evsel, name);
194 if (format_field == NULL)
195 return -1;
197 return tp_field__init_uint(field, format_field, evsel->needs_swap);
200 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
201 ({ struct syscall_tp *sc = evsel->priv;\
202 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
204 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
205 struct tp_field *field,
206 const char *name)
208 struct format_field *format_field = perf_evsel__field(evsel, name);
210 if (format_field == NULL)
211 return -1;
213 return tp_field__init_ptr(field, format_field);
216 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
217 ({ struct syscall_tp *sc = evsel->priv;\
218 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
220 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
222 zfree(&evsel->priv);
223 perf_evsel__delete(evsel);
226 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
228 evsel->priv = malloc(sizeof(struct syscall_tp));
229 if (evsel->priv != NULL) {
230 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
231 goto out_delete;
233 evsel->handler = handler;
234 return 0;
237 return -ENOMEM;
239 out_delete:
240 zfree(&evsel->priv);
241 return -ENOENT;
244 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
246 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
248 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
249 if (IS_ERR(evsel))
250 evsel = perf_evsel__newtp("syscalls", direction);
252 if (IS_ERR(evsel))
253 return NULL;
255 if (perf_evsel__init_syscall_tp(evsel, handler))
256 goto out_delete;
258 return evsel;
260 out_delete:
261 perf_evsel__delete_priv(evsel);
262 return NULL;
265 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
266 ({ struct syscall_tp *fields = evsel->priv; \
267 fields->name.integer(&fields->name, sample); })
269 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
270 ({ struct syscall_tp *fields = evsel->priv; \
271 fields->name.pointer(&fields->name, sample); })
273 struct syscall_arg {
274 unsigned long val;
275 struct thread *thread;
276 struct trace *trace;
277 void *parm;
278 u8 idx;
279 u8 mask;
282 struct strarray {
283 int offset;
284 int nr_entries;
285 const char **entries;
288 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
289 .nr_entries = ARRAY_SIZE(array), \
290 .entries = array, \
293 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
294 .offset = off, \
295 .nr_entries = ARRAY_SIZE(array), \
296 .entries = array, \
299 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
300 const char *intfmt,
301 struct syscall_arg *arg)
303 struct strarray *sa = arg->parm;
304 int idx = arg->val - sa->offset;
306 if (idx < 0 || idx >= sa->nr_entries)
307 return scnprintf(bf, size, intfmt, arg->val);
309 return scnprintf(bf, size, "%s", sa->entries[idx]);
312 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
313 struct syscall_arg *arg)
315 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
318 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
320 #if defined(__i386__) || defined(__x86_64__)
322 * FIXME: Make this available to all arches as soon as the ioctl beautifier
323 * gets rewritten to support all arches.
325 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
326 struct syscall_arg *arg)
328 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
331 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
332 #endif /* defined(__i386__) || defined(__x86_64__) */
334 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
335 struct syscall_arg *arg);
337 #define SCA_FD syscall_arg__scnprintf_fd
339 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
340 struct syscall_arg *arg)
342 int fd = arg->val;
344 if (fd == AT_FDCWD)
345 return scnprintf(bf, size, "CWD");
347 return syscall_arg__scnprintf_fd(bf, size, arg);
350 #define SCA_FDAT syscall_arg__scnprintf_fd_at
352 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
353 struct syscall_arg *arg);
355 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
357 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
358 struct syscall_arg *arg)
360 return scnprintf(bf, size, "%#lx", arg->val);
363 #define SCA_HEX syscall_arg__scnprintf_hex
365 static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
366 struct syscall_arg *arg)
368 return scnprintf(bf, size, "%d", arg->val);
371 #define SCA_INT syscall_arg__scnprintf_int
373 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
374 struct syscall_arg *arg)
376 int printed = 0, prot = arg->val;
378 if (prot == PROT_NONE)
379 return scnprintf(bf, size, "NONE");
380 #define P_MMAP_PROT(n) \
381 if (prot & PROT_##n) { \
382 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
383 prot &= ~PROT_##n; \
386 P_MMAP_PROT(EXEC);
387 P_MMAP_PROT(READ);
388 P_MMAP_PROT(WRITE);
389 #ifdef PROT_SEM
390 P_MMAP_PROT(SEM);
391 #endif
392 P_MMAP_PROT(GROWSDOWN);
393 P_MMAP_PROT(GROWSUP);
394 #undef P_MMAP_PROT
396 if (prot)
397 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
399 return printed;
402 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
404 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
405 struct syscall_arg *arg)
407 int printed = 0, flags = arg->val;
409 #define P_MMAP_FLAG(n) \
410 if (flags & MAP_##n) { \
411 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
412 flags &= ~MAP_##n; \
415 P_MMAP_FLAG(SHARED);
416 P_MMAP_FLAG(PRIVATE);
417 #ifdef MAP_32BIT
418 P_MMAP_FLAG(32BIT);
419 #endif
420 P_MMAP_FLAG(ANONYMOUS);
421 P_MMAP_FLAG(DENYWRITE);
422 P_MMAP_FLAG(EXECUTABLE);
423 P_MMAP_FLAG(FILE);
424 P_MMAP_FLAG(FIXED);
425 P_MMAP_FLAG(GROWSDOWN);
426 #ifdef MAP_HUGETLB
427 P_MMAP_FLAG(HUGETLB);
428 #endif
429 P_MMAP_FLAG(LOCKED);
430 P_MMAP_FLAG(NONBLOCK);
431 P_MMAP_FLAG(NORESERVE);
432 P_MMAP_FLAG(POPULATE);
433 P_MMAP_FLAG(STACK);
434 #ifdef MAP_UNINITIALIZED
435 P_MMAP_FLAG(UNINITIALIZED);
436 #endif
437 #undef P_MMAP_FLAG
439 if (flags)
440 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
442 return printed;
445 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
447 static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
448 struct syscall_arg *arg)
450 int printed = 0, flags = arg->val;
452 #define P_MREMAP_FLAG(n) \
453 if (flags & MREMAP_##n) { \
454 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
455 flags &= ~MREMAP_##n; \
458 P_MREMAP_FLAG(MAYMOVE);
459 #ifdef MREMAP_FIXED
460 P_MREMAP_FLAG(FIXED);
461 #endif
462 #undef P_MREMAP_FLAG
464 if (flags)
465 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
467 return printed;
470 #define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
472 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
473 struct syscall_arg *arg)
475 int behavior = arg->val;
477 switch (behavior) {
478 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
479 P_MADV_BHV(NORMAL);
480 P_MADV_BHV(RANDOM);
481 P_MADV_BHV(SEQUENTIAL);
482 P_MADV_BHV(WILLNEED);
483 P_MADV_BHV(DONTNEED);
484 P_MADV_BHV(REMOVE);
485 P_MADV_BHV(DONTFORK);
486 P_MADV_BHV(DOFORK);
487 P_MADV_BHV(HWPOISON);
488 #ifdef MADV_SOFT_OFFLINE
489 P_MADV_BHV(SOFT_OFFLINE);
490 #endif
491 P_MADV_BHV(MERGEABLE);
492 P_MADV_BHV(UNMERGEABLE);
493 #ifdef MADV_HUGEPAGE
494 P_MADV_BHV(HUGEPAGE);
495 #endif
496 #ifdef MADV_NOHUGEPAGE
497 P_MADV_BHV(NOHUGEPAGE);
498 #endif
499 #ifdef MADV_DONTDUMP
500 P_MADV_BHV(DONTDUMP);
501 #endif
502 #ifdef MADV_DODUMP
503 P_MADV_BHV(DODUMP);
504 #endif
505 #undef P_MADV_PHV
506 default: break;
509 return scnprintf(bf, size, "%#x", behavior);
512 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
514 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
515 struct syscall_arg *arg)
517 int printed = 0, op = arg->val;
519 if (op == 0)
520 return scnprintf(bf, size, "NONE");
521 #define P_CMD(cmd) \
522 if ((op & LOCK_##cmd) == LOCK_##cmd) { \
523 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
524 op &= ~LOCK_##cmd; \
527 P_CMD(SH);
528 P_CMD(EX);
529 P_CMD(NB);
530 P_CMD(UN);
531 P_CMD(MAND);
532 P_CMD(RW);
533 P_CMD(READ);
534 P_CMD(WRITE);
535 #undef P_OP
537 if (op)
538 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
540 return printed;
543 #define SCA_FLOCK syscall_arg__scnprintf_flock
545 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
547 enum syscall_futex_args {
548 SCF_UADDR = (1 << 0),
549 SCF_OP = (1 << 1),
550 SCF_VAL = (1 << 2),
551 SCF_TIMEOUT = (1 << 3),
552 SCF_UADDR2 = (1 << 4),
553 SCF_VAL3 = (1 << 5),
555 int op = arg->val;
556 int cmd = op & FUTEX_CMD_MASK;
557 size_t printed = 0;
559 switch (cmd) {
560 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
561 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
562 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
563 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
564 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break;
565 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break;
566 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break;
567 P_FUTEX_OP(WAKE_OP); break;
568 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
569 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
570 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
571 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break;
572 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break;
573 P_FUTEX_OP(WAIT_REQUEUE_PI); break;
574 default: printed = scnprintf(bf, size, "%#x", cmd); break;
577 if (op & FUTEX_PRIVATE_FLAG)
578 printed += scnprintf(bf + printed, size - printed, "|PRIV");
580 if (op & FUTEX_CLOCK_REALTIME)
581 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
583 return printed;
586 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
588 static const char *bpf_cmd[] = {
589 "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
590 "MAP_GET_NEXT_KEY", "PROG_LOAD",
592 static DEFINE_STRARRAY(bpf_cmd);
594 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
595 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
597 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
598 static DEFINE_STRARRAY(itimers);
600 static const char *keyctl_options[] = {
601 "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
602 "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
603 "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
604 "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
605 "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
607 static DEFINE_STRARRAY(keyctl_options);
609 static const char *whences[] = { "SET", "CUR", "END",
610 #ifdef SEEK_DATA
611 "DATA",
612 #endif
613 #ifdef SEEK_HOLE
614 "HOLE",
615 #endif
617 static DEFINE_STRARRAY(whences);
619 static const char *fcntl_cmds[] = {
620 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
621 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
622 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
623 "F_GETOWNER_UIDS",
625 static DEFINE_STRARRAY(fcntl_cmds);
627 static const char *rlimit_resources[] = {
628 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
629 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
630 "RTTIME",
632 static DEFINE_STRARRAY(rlimit_resources);
634 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
635 static DEFINE_STRARRAY(sighow);
637 static const char *clockid[] = {
638 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
639 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
640 "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
642 static DEFINE_STRARRAY(clockid);
644 static const char *socket_families[] = {
645 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
646 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
647 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
648 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
649 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
650 "ALG", "NFC", "VSOCK",
652 static DEFINE_STRARRAY(socket_families);
654 #ifndef SOCK_TYPE_MASK
655 #define SOCK_TYPE_MASK 0xf
656 #endif
658 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
659 struct syscall_arg *arg)
661 size_t printed;
662 int type = arg->val,
663 flags = type & ~SOCK_TYPE_MASK;
665 type &= SOCK_TYPE_MASK;
667 * Can't use a strarray, MIPS may override for ABI reasons.
669 switch (type) {
670 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
671 P_SK_TYPE(STREAM);
672 P_SK_TYPE(DGRAM);
673 P_SK_TYPE(RAW);
674 P_SK_TYPE(RDM);
675 P_SK_TYPE(SEQPACKET);
676 P_SK_TYPE(DCCP);
677 P_SK_TYPE(PACKET);
678 #undef P_SK_TYPE
679 default:
680 printed = scnprintf(bf, size, "%#x", type);
683 #define P_SK_FLAG(n) \
684 if (flags & SOCK_##n) { \
685 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
686 flags &= ~SOCK_##n; \
689 P_SK_FLAG(CLOEXEC);
690 P_SK_FLAG(NONBLOCK);
691 #undef P_SK_FLAG
693 if (flags)
694 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
696 return printed;
699 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
701 #ifndef MSG_PROBE
702 #define MSG_PROBE 0x10
703 #endif
704 #ifndef MSG_WAITFORONE
705 #define MSG_WAITFORONE 0x10000
706 #endif
707 #ifndef MSG_SENDPAGE_NOTLAST
708 #define MSG_SENDPAGE_NOTLAST 0x20000
709 #endif
710 #ifndef MSG_FASTOPEN
711 #define MSG_FASTOPEN 0x20000000
712 #endif
714 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
715 struct syscall_arg *arg)
717 int printed = 0, flags = arg->val;
719 if (flags == 0)
720 return scnprintf(bf, size, "NONE");
721 #define P_MSG_FLAG(n) \
722 if (flags & MSG_##n) { \
723 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
724 flags &= ~MSG_##n; \
727 P_MSG_FLAG(OOB);
728 P_MSG_FLAG(PEEK);
729 P_MSG_FLAG(DONTROUTE);
730 P_MSG_FLAG(TRYHARD);
731 P_MSG_FLAG(CTRUNC);
732 P_MSG_FLAG(PROBE);
733 P_MSG_FLAG(TRUNC);
734 P_MSG_FLAG(DONTWAIT);
735 P_MSG_FLAG(EOR);
736 P_MSG_FLAG(WAITALL);
737 P_MSG_FLAG(FIN);
738 P_MSG_FLAG(SYN);
739 P_MSG_FLAG(CONFIRM);
740 P_MSG_FLAG(RST);
741 P_MSG_FLAG(ERRQUEUE);
742 P_MSG_FLAG(NOSIGNAL);
743 P_MSG_FLAG(MORE);
744 P_MSG_FLAG(WAITFORONE);
745 P_MSG_FLAG(SENDPAGE_NOTLAST);
746 P_MSG_FLAG(FASTOPEN);
747 P_MSG_FLAG(CMSG_CLOEXEC);
748 #undef P_MSG_FLAG
750 if (flags)
751 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
753 return printed;
756 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
758 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
759 struct syscall_arg *arg)
761 size_t printed = 0;
762 int mode = arg->val;
764 if (mode == F_OK) /* 0 */
765 return scnprintf(bf, size, "F");
766 #define P_MODE(n) \
767 if (mode & n##_OK) { \
768 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
769 mode &= ~n##_OK; \
772 P_MODE(R);
773 P_MODE(W);
774 P_MODE(X);
775 #undef P_MODE
777 if (mode)
778 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
780 return printed;
783 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
785 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
786 struct syscall_arg *arg);
788 #define SCA_FILENAME syscall_arg__scnprintf_filename
790 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
791 struct syscall_arg *arg)
793 int printed = 0, flags = arg->val;
795 if (!(flags & O_CREAT))
796 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
798 if (flags == 0)
799 return scnprintf(bf, size, "RDONLY");
800 #define P_FLAG(n) \
801 if (flags & O_##n) { \
802 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
803 flags &= ~O_##n; \
806 P_FLAG(APPEND);
807 P_FLAG(ASYNC);
808 P_FLAG(CLOEXEC);
809 P_FLAG(CREAT);
810 P_FLAG(DIRECT);
811 P_FLAG(DIRECTORY);
812 P_FLAG(EXCL);
813 P_FLAG(LARGEFILE);
814 P_FLAG(NOATIME);
815 P_FLAG(NOCTTY);
816 #ifdef O_NONBLOCK
817 P_FLAG(NONBLOCK);
818 #elif O_NDELAY
819 P_FLAG(NDELAY);
820 #endif
821 #ifdef O_PATH
822 P_FLAG(PATH);
823 #endif
824 P_FLAG(RDWR);
825 #ifdef O_DSYNC
826 if ((flags & O_SYNC) == O_SYNC)
827 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
828 else {
829 P_FLAG(DSYNC);
831 #else
832 P_FLAG(SYNC);
833 #endif
834 P_FLAG(TRUNC);
835 P_FLAG(WRONLY);
836 #undef P_FLAG
838 if (flags)
839 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
841 return printed;
844 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
846 static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
847 struct syscall_arg *arg)
849 int printed = 0, flags = arg->val;
851 if (flags == 0)
852 return 0;
854 #define P_FLAG(n) \
855 if (flags & PERF_FLAG_##n) { \
856 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
857 flags &= ~PERF_FLAG_##n; \
860 P_FLAG(FD_NO_GROUP);
861 P_FLAG(FD_OUTPUT);
862 P_FLAG(PID_CGROUP);
863 P_FLAG(FD_CLOEXEC);
864 #undef P_FLAG
866 if (flags)
867 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
869 return printed;
872 #define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
874 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
875 struct syscall_arg *arg)
877 int printed = 0, flags = arg->val;
879 if (flags == 0)
880 return scnprintf(bf, size, "NONE");
881 #define P_FLAG(n) \
882 if (flags & EFD_##n) { \
883 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
884 flags &= ~EFD_##n; \
887 P_FLAG(SEMAPHORE);
888 P_FLAG(CLOEXEC);
889 P_FLAG(NONBLOCK);
890 #undef P_FLAG
892 if (flags)
893 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
895 return printed;
898 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
900 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
901 struct syscall_arg *arg)
903 int printed = 0, flags = arg->val;
905 #define P_FLAG(n) \
906 if (flags & O_##n) { \
907 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
908 flags &= ~O_##n; \
911 P_FLAG(CLOEXEC);
912 P_FLAG(NONBLOCK);
913 #undef P_FLAG
915 if (flags)
916 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
918 return printed;
921 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
923 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
925 int sig = arg->val;
927 switch (sig) {
928 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
929 P_SIGNUM(HUP);
930 P_SIGNUM(INT);
931 P_SIGNUM(QUIT);
932 P_SIGNUM(ILL);
933 P_SIGNUM(TRAP);
934 P_SIGNUM(ABRT);
935 P_SIGNUM(BUS);
936 P_SIGNUM(FPE);
937 P_SIGNUM(KILL);
938 P_SIGNUM(USR1);
939 P_SIGNUM(SEGV);
940 P_SIGNUM(USR2);
941 P_SIGNUM(PIPE);
942 P_SIGNUM(ALRM);
943 P_SIGNUM(TERM);
944 P_SIGNUM(CHLD);
945 P_SIGNUM(CONT);
946 P_SIGNUM(STOP);
947 P_SIGNUM(TSTP);
948 P_SIGNUM(TTIN);
949 P_SIGNUM(TTOU);
950 P_SIGNUM(URG);
951 P_SIGNUM(XCPU);
952 P_SIGNUM(XFSZ);
953 P_SIGNUM(VTALRM);
954 P_SIGNUM(PROF);
955 P_SIGNUM(WINCH);
956 P_SIGNUM(IO);
957 P_SIGNUM(PWR);
958 P_SIGNUM(SYS);
959 #ifdef SIGEMT
960 P_SIGNUM(EMT);
961 #endif
962 #ifdef SIGSTKFLT
963 P_SIGNUM(STKFLT);
964 #endif
965 #ifdef SIGSWI
966 P_SIGNUM(SWI);
967 #endif
968 default: break;
971 return scnprintf(bf, size, "%#x", sig);
974 #define SCA_SIGNUM syscall_arg__scnprintf_signum
976 #if defined(__i386__) || defined(__x86_64__)
978 * FIXME: Make this available to all arches.
980 #define TCGETS 0x5401
982 static const char *tioctls[] = {
983 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
984 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
985 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
986 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
987 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
988 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
989 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
990 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
991 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
992 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
993 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
994 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
995 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
996 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
997 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
1000 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
1001 #endif /* defined(__i386__) || defined(__x86_64__) */
1003 #define STRARRAY(arg, name, array) \
1004 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
1005 .arg_parm = { [arg] = &strarray__##array, }
1007 static struct syscall_fmt {
1008 const char *name;
1009 const char *alias;
1010 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
1011 void *arg_parm[6];
1012 bool errmsg;
1013 bool timeout;
1014 bool hexret;
1015 } syscall_fmts[] = {
1016 { .name = "access", .errmsg = true,
1017 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */
1018 [1] = SCA_ACCMODE, /* mode */ }, },
1019 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
1020 { .name = "bpf", .errmsg = true, STRARRAY(0, cmd, bpf_cmd), },
1021 { .name = "brk", .hexret = true,
1022 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
1023 { .name = "chdir", .errmsg = true,
1024 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1025 { .name = "chmod", .errmsg = true,
1026 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1027 { .name = "chroot", .errmsg = true,
1028 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1029 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
1030 { .name = "close", .errmsg = true,
1031 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
1032 { .name = "connect", .errmsg = true, },
1033 { .name = "creat", .errmsg = true,
1034 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1035 { .name = "dup", .errmsg = true,
1036 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1037 { .name = "dup2", .errmsg = true,
1038 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1039 { .name = "dup3", .errmsg = true,
1040 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1041 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
1042 { .name = "eventfd2", .errmsg = true,
1043 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
1044 { .name = "faccessat", .errmsg = true,
1045 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1046 [1] = SCA_FILENAME, /* filename */ }, },
1047 { .name = "fadvise64", .errmsg = true,
1048 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1049 { .name = "fallocate", .errmsg = true,
1050 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1051 { .name = "fchdir", .errmsg = true,
1052 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1053 { .name = "fchmod", .errmsg = true,
1054 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1055 { .name = "fchmodat", .errmsg = true,
1056 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1057 [1] = SCA_FILENAME, /* filename */ }, },
1058 { .name = "fchown", .errmsg = true,
1059 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1060 { .name = "fchownat", .errmsg = true,
1061 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1062 [1] = SCA_FILENAME, /* filename */ }, },
1063 { .name = "fcntl", .errmsg = true,
1064 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1065 [1] = SCA_STRARRAY, /* cmd */ },
1066 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
1067 { .name = "fdatasync", .errmsg = true,
1068 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1069 { .name = "flock", .errmsg = true,
1070 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1071 [1] = SCA_FLOCK, /* cmd */ }, },
1072 { .name = "fsetxattr", .errmsg = true,
1073 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1074 { .name = "fstat", .errmsg = true, .alias = "newfstat",
1075 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1076 { .name = "fstatat", .errmsg = true, .alias = "newfstatat",
1077 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1078 [1] = SCA_FILENAME, /* filename */ }, },
1079 { .name = "fstatfs", .errmsg = true,
1080 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1081 { .name = "fsync", .errmsg = true,
1082 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1083 { .name = "ftruncate", .errmsg = true,
1084 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1085 { .name = "futex", .errmsg = true,
1086 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
1087 { .name = "futimesat", .errmsg = true,
1088 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1089 [1] = SCA_FILENAME, /* filename */ }, },
1090 { .name = "getdents", .errmsg = true,
1091 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1092 { .name = "getdents64", .errmsg = true,
1093 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1094 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
1095 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1096 { .name = "getxattr", .errmsg = true,
1097 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1098 { .name = "inotify_add_watch", .errmsg = true,
1099 .arg_scnprintf = { [1] = SCA_FILENAME, /* pathname */ }, },
1100 { .name = "ioctl", .errmsg = true,
1101 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1102 #if defined(__i386__) || defined(__x86_64__)
1104 * FIXME: Make this available to all arches.
1106 [1] = SCA_STRHEXARRAY, /* cmd */
1107 [2] = SCA_HEX, /* arg */ },
1108 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
1109 #else
1110 [2] = SCA_HEX, /* arg */ }, },
1111 #endif
1112 { .name = "keyctl", .errmsg = true, STRARRAY(0, option, keyctl_options), },
1113 { .name = "kill", .errmsg = true,
1114 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1115 { .name = "lchown", .errmsg = true,
1116 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1117 { .name = "lgetxattr", .errmsg = true,
1118 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1119 { .name = "linkat", .errmsg = true,
1120 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1121 { .name = "listxattr", .errmsg = true,
1122 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1123 { .name = "llistxattr", .errmsg = true,
1124 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1125 { .name = "lremovexattr", .errmsg = true,
1126 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1127 { .name = "lseek", .errmsg = true,
1128 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1129 [2] = SCA_STRARRAY, /* whence */ },
1130 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
1131 { .name = "lsetxattr", .errmsg = true,
1132 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1133 { .name = "lstat", .errmsg = true, .alias = "newlstat",
1134 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1135 { .name = "lsxattr", .errmsg = true,
1136 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1137 { .name = "madvise", .errmsg = true,
1138 .arg_scnprintf = { [0] = SCA_HEX, /* start */
1139 [2] = SCA_MADV_BHV, /* behavior */ }, },
1140 { .name = "mkdir", .errmsg = true,
1141 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1142 { .name = "mkdirat", .errmsg = true,
1143 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1144 [1] = SCA_FILENAME, /* pathname */ }, },
1145 { .name = "mknod", .errmsg = true,
1146 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1147 { .name = "mknodat", .errmsg = true,
1148 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1149 [1] = SCA_FILENAME, /* filename */ }, },
1150 { .name = "mlock", .errmsg = true,
1151 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1152 { .name = "mlockall", .errmsg = true,
1153 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1154 { .name = "mmap", .hexret = true,
1155 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1156 [2] = SCA_MMAP_PROT, /* prot */
1157 [3] = SCA_MMAP_FLAGS, /* flags */
1158 [4] = SCA_FD, /* fd */ }, },
1159 { .name = "mprotect", .errmsg = true,
1160 .arg_scnprintf = { [0] = SCA_HEX, /* start */
1161 [2] = SCA_MMAP_PROT, /* prot */ }, },
1162 { .name = "mq_unlink", .errmsg = true,
1163 .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, },
1164 { .name = "mremap", .hexret = true,
1165 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1166 [3] = SCA_MREMAP_FLAGS, /* flags */
1167 [4] = SCA_HEX, /* new_addr */ }, },
1168 { .name = "munlock", .errmsg = true,
1169 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1170 { .name = "munmap", .errmsg = true,
1171 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1172 { .name = "name_to_handle_at", .errmsg = true,
1173 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1174 { .name = "newfstatat", .errmsg = true,
1175 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1176 [1] = SCA_FILENAME, /* filename */ }, },
1177 { .name = "open", .errmsg = true,
1178 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */
1179 [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1180 { .name = "open_by_handle_at", .errmsg = true,
1181 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1182 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1183 { .name = "openat", .errmsg = true,
1184 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1185 [1] = SCA_FILENAME, /* filename */
1186 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1187 { .name = "perf_event_open", .errmsg = true,
1188 .arg_scnprintf = { [1] = SCA_INT, /* pid */
1189 [2] = SCA_INT, /* cpu */
1190 [3] = SCA_FD, /* group_fd */
1191 [4] = SCA_PERF_FLAGS, /* flags */ }, },
1192 { .name = "pipe2", .errmsg = true,
1193 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1194 { .name = "poll", .errmsg = true, .timeout = true, },
1195 { .name = "ppoll", .errmsg = true, .timeout = true, },
1196 { .name = "pread", .errmsg = true, .alias = "pread64",
1197 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1198 { .name = "preadv", .errmsg = true, .alias = "pread",
1199 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1200 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1201 { .name = "pwrite", .errmsg = true, .alias = "pwrite64",
1202 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1203 { .name = "pwritev", .errmsg = true,
1204 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1205 { .name = "read", .errmsg = true,
1206 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1207 { .name = "readlink", .errmsg = true,
1208 .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1209 { .name = "readlinkat", .errmsg = true,
1210 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1211 [1] = SCA_FILENAME, /* pathname */ }, },
1212 { .name = "readv", .errmsg = true,
1213 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1214 { .name = "recvfrom", .errmsg = true,
1215 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1216 [3] = SCA_MSG_FLAGS, /* flags */ }, },
1217 { .name = "recvmmsg", .errmsg = true,
1218 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1219 [3] = SCA_MSG_FLAGS, /* flags */ }, },
1220 { .name = "recvmsg", .errmsg = true,
1221 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1222 [2] = SCA_MSG_FLAGS, /* flags */ }, },
1223 { .name = "removexattr", .errmsg = true,
1224 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1225 { .name = "renameat", .errmsg = true,
1226 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1227 { .name = "rmdir", .errmsg = true,
1228 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1229 { .name = "rt_sigaction", .errmsg = true,
1230 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1231 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
1232 { .name = "rt_sigqueueinfo", .errmsg = true,
1233 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1234 { .name = "rt_tgsigqueueinfo", .errmsg = true,
1235 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1236 { .name = "select", .errmsg = true, .timeout = true, },
1237 { .name = "sendmmsg", .errmsg = true,
1238 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1239 [3] = SCA_MSG_FLAGS, /* flags */ }, },
1240 { .name = "sendmsg", .errmsg = true,
1241 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1242 [2] = SCA_MSG_FLAGS, /* flags */ }, },
1243 { .name = "sendto", .errmsg = true,
1244 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1245 [3] = SCA_MSG_FLAGS, /* flags */ }, },
1246 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
1247 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1248 { .name = "setxattr", .errmsg = true,
1249 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1250 { .name = "shutdown", .errmsg = true,
1251 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1252 { .name = "socket", .errmsg = true,
1253 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1254 [1] = SCA_SK_TYPE, /* type */ },
1255 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1256 { .name = "socketpair", .errmsg = true,
1257 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1258 [1] = SCA_SK_TYPE, /* type */ },
1259 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1260 { .name = "stat", .errmsg = true, .alias = "newstat",
1261 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1262 { .name = "statfs", .errmsg = true,
1263 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1264 { .name = "swapoff", .errmsg = true,
1265 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1266 { .name = "swapon", .errmsg = true,
1267 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1268 { .name = "symlinkat", .errmsg = true,
1269 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1270 { .name = "tgkill", .errmsg = true,
1271 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1272 { .name = "tkill", .errmsg = true,
1273 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1274 { .name = "truncate", .errmsg = true,
1275 .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1276 { .name = "uname", .errmsg = true, .alias = "newuname", },
1277 { .name = "unlinkat", .errmsg = true,
1278 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1279 [1] = SCA_FILENAME, /* pathname */ }, },
1280 { .name = "utime", .errmsg = true,
1281 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1282 { .name = "utimensat", .errmsg = true,
1283 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */
1284 [1] = SCA_FILENAME, /* filename */ }, },
1285 { .name = "utimes", .errmsg = true,
1286 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1287 { .name = "vmsplice", .errmsg = true,
1288 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1289 { .name = "write", .errmsg = true,
1290 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1291 { .name = "writev", .errmsg = true,
1292 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1295 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1297 const struct syscall_fmt *fmt = fmtp;
1298 return strcmp(name, fmt->name);
1301 static struct syscall_fmt *syscall_fmt__find(const char *name)
1303 const int nmemb = ARRAY_SIZE(syscall_fmts);
1304 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1307 struct syscall {
1308 struct event_format *tp_format;
1309 int nr_args;
1310 struct format_field *args;
1311 const char *name;
1312 bool is_exit;
1313 struct syscall_fmt *fmt;
1314 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1315 void **arg_parm;
1318 static size_t fprintf_duration(unsigned long t, FILE *fp)
1320 double duration = (double)t / NSEC_PER_MSEC;
1321 size_t printed = fprintf(fp, "(");
1323 if (duration >= 1.0)
1324 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1325 else if (duration >= 0.01)
1326 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1327 else
1328 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1329 return printed + fprintf(fp, "): ");
1333 * filename.ptr: The filename char pointer that will be vfs_getname'd
1334 * filename.entry_str_pos: Where to insert the string translated from
1335 * filename.ptr by the vfs_getname tracepoint/kprobe.
1337 struct thread_trace {
1338 u64 entry_time;
1339 u64 exit_time;
1340 bool entry_pending;
1341 unsigned long nr_events;
1342 unsigned long pfmaj, pfmin;
1343 char *entry_str;
1344 double runtime_ms;
1345 struct {
1346 unsigned long ptr;
1347 short int entry_str_pos;
1348 bool pending_open;
1349 unsigned int namelen;
1350 char *name;
1351 } filename;
1352 struct {
1353 int max;
1354 char **table;
1355 } paths;
1357 struct intlist *syscall_stats;
1360 static struct thread_trace *thread_trace__new(void)
1362 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
1364 if (ttrace)
1365 ttrace->paths.max = -1;
1367 ttrace->syscall_stats = intlist__new(NULL);
1369 return ttrace;
1372 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1374 struct thread_trace *ttrace;
1376 if (thread == NULL)
1377 goto fail;
1379 if (thread__priv(thread) == NULL)
1380 thread__set_priv(thread, thread_trace__new());
1382 if (thread__priv(thread) == NULL)
1383 goto fail;
1385 ttrace = thread__priv(thread);
1386 ++ttrace->nr_events;
1388 return ttrace;
1389 fail:
1390 color_fprintf(fp, PERF_COLOR_RED,
1391 "WARNING: not enough memory, dropping samples!\n");
1392 return NULL;
1395 #define TRACE_PFMAJ (1 << 0)
1396 #define TRACE_PFMIN (1 << 1)
1398 static const size_t trace__entry_str_size = 2048;
1400 struct trace {
1401 struct perf_tool tool;
1402 struct {
1403 int machine;
1404 int open_id;
1405 } audit;
1406 struct {
1407 int max;
1408 struct syscall *table;
1409 struct {
1410 struct perf_evsel *sys_enter,
1411 *sys_exit;
1412 } events;
1413 } syscalls;
1414 struct record_opts opts;
1415 struct perf_evlist *evlist;
1416 struct machine *host;
1417 struct thread *current;
1418 u64 base_time;
1419 FILE *output;
1420 unsigned long nr_events;
1421 struct strlist *ev_qualifier;
1422 struct {
1423 size_t nr;
1424 int *entries;
1425 } ev_qualifier_ids;
1426 struct intlist *tid_list;
1427 struct intlist *pid_list;
1428 struct {
1429 size_t nr;
1430 pid_t *entries;
1431 } filter_pids;
1432 double duration_filter;
1433 double runtime_ms;
1434 struct {
1435 u64 vfs_getname,
1436 proc_getname;
1437 } stats;
1438 bool not_ev_qualifier;
1439 bool live;
1440 bool full_time;
1441 bool sched;
1442 bool multiple_threads;
1443 bool summary;
1444 bool summary_only;
1445 bool show_comm;
1446 bool show_tool_stats;
1447 bool trace_syscalls;
1448 bool force;
1449 bool vfs_getname;
1450 int trace_pgfaults;
1453 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1455 struct thread_trace *ttrace = thread__priv(thread);
1457 if (fd > ttrace->paths.max) {
1458 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1460 if (npath == NULL)
1461 return -1;
1463 if (ttrace->paths.max != -1) {
1464 memset(npath + ttrace->paths.max + 1, 0,
1465 (fd - ttrace->paths.max) * sizeof(char *));
1466 } else {
1467 memset(npath, 0, (fd + 1) * sizeof(char *));
1470 ttrace->paths.table = npath;
1471 ttrace->paths.max = fd;
1474 ttrace->paths.table[fd] = strdup(pathname);
1476 return ttrace->paths.table[fd] != NULL ? 0 : -1;
1479 static int thread__read_fd_path(struct thread *thread, int fd)
1481 char linkname[PATH_MAX], pathname[PATH_MAX];
1482 struct stat st;
1483 int ret;
1485 if (thread->pid_ == thread->tid) {
1486 scnprintf(linkname, sizeof(linkname),
1487 "/proc/%d/fd/%d", thread->pid_, fd);
1488 } else {
1489 scnprintf(linkname, sizeof(linkname),
1490 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1493 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1494 return -1;
1496 ret = readlink(linkname, pathname, sizeof(pathname));
1498 if (ret < 0 || ret > st.st_size)
1499 return -1;
1501 pathname[ret] = '\0';
1502 return trace__set_fd_pathname(thread, fd, pathname);
1505 static const char *thread__fd_path(struct thread *thread, int fd,
1506 struct trace *trace)
1508 struct thread_trace *ttrace = thread__priv(thread);
1510 if (ttrace == NULL)
1511 return NULL;
1513 if (fd < 0)
1514 return NULL;
1516 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1517 if (!trace->live)
1518 return NULL;
1519 ++trace->stats.proc_getname;
1520 if (thread__read_fd_path(thread, fd))
1521 return NULL;
1524 return ttrace->paths.table[fd];
1527 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1528 struct syscall_arg *arg)
1530 int fd = arg->val;
1531 size_t printed = scnprintf(bf, size, "%d", fd);
1532 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1534 if (path)
1535 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1537 return printed;
1540 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1541 struct syscall_arg *arg)
1543 int fd = arg->val;
1544 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1545 struct thread_trace *ttrace = thread__priv(arg->thread);
1547 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1548 zfree(&ttrace->paths.table[fd]);
1550 return printed;
1553 static void thread__set_filename_pos(struct thread *thread, const char *bf,
1554 unsigned long ptr)
1556 struct thread_trace *ttrace = thread__priv(thread);
1558 ttrace->filename.ptr = ptr;
1559 ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1562 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1563 struct syscall_arg *arg)
1565 unsigned long ptr = arg->val;
1567 if (!arg->trace->vfs_getname)
1568 return scnprintf(bf, size, "%#x", ptr);
1570 thread__set_filename_pos(arg->thread, bf, ptr);
1571 return 0;
1574 static bool trace__filter_duration(struct trace *trace, double t)
1576 return t < (trace->duration_filter * NSEC_PER_MSEC);
1579 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1581 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1583 return fprintf(fp, "%10.3f ", ts);
1586 static bool done = false;
1587 static bool interrupted = false;
1589 static void sig_handler(int sig)
1591 done = true;
1592 interrupted = sig == SIGINT;
1595 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1596 u64 duration, u64 tstamp, FILE *fp)
1598 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1599 printed += fprintf_duration(duration, fp);
1601 if (trace->multiple_threads) {
1602 if (trace->show_comm)
1603 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1604 printed += fprintf(fp, "%d ", thread->tid);
1607 return printed;
1610 static int trace__process_event(struct trace *trace, struct machine *machine,
1611 union perf_event *event, struct perf_sample *sample)
1613 int ret = 0;
1615 switch (event->header.type) {
1616 case PERF_RECORD_LOST:
1617 color_fprintf(trace->output, PERF_COLOR_RED,
1618 "LOST %" PRIu64 " events!\n", event->lost.lost);
1619 ret = machine__process_lost_event(machine, event, sample);
1620 default:
1621 ret = machine__process_event(machine, event, sample);
1622 break;
1625 return ret;
1628 static int trace__tool_process(struct perf_tool *tool,
1629 union perf_event *event,
1630 struct perf_sample *sample,
1631 struct machine *machine)
1633 struct trace *trace = container_of(tool, struct trace, tool);
1634 return trace__process_event(trace, machine, event, sample);
1637 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1639 int err = symbol__init(NULL);
1641 if (err)
1642 return err;
1644 trace->host = machine__new_host();
1645 if (trace->host == NULL)
1646 return -ENOMEM;
1648 if (trace_event__register_resolver(trace->host, machine__resolve_kernel_addr) < 0)
1649 return -errno;
1651 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1652 evlist->threads, trace__tool_process, false,
1653 trace->opts.proc_map_timeout);
1654 if (err)
1655 symbol__exit();
1657 return err;
1660 static int syscall__set_arg_fmts(struct syscall *sc)
1662 struct format_field *field;
1663 int idx = 0;
1665 sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1666 if (sc->arg_scnprintf == NULL)
1667 return -1;
1669 if (sc->fmt)
1670 sc->arg_parm = sc->fmt->arg_parm;
1672 for (field = sc->args; field; field = field->next) {
1673 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1674 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1675 else if (field->flags & FIELD_IS_POINTER)
1676 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1677 ++idx;
1680 return 0;
1683 static int trace__read_syscall_info(struct trace *trace, int id)
1685 char tp_name[128];
1686 struct syscall *sc;
1687 const char *name = audit_syscall_to_name(id, trace->audit.machine);
1689 if (name == NULL)
1690 return -1;
1692 if (id > trace->syscalls.max) {
1693 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1695 if (nsyscalls == NULL)
1696 return -1;
1698 if (trace->syscalls.max != -1) {
1699 memset(nsyscalls + trace->syscalls.max + 1, 0,
1700 (id - trace->syscalls.max) * sizeof(*sc));
1701 } else {
1702 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1705 trace->syscalls.table = nsyscalls;
1706 trace->syscalls.max = id;
1709 sc = trace->syscalls.table + id;
1710 sc->name = name;
1712 sc->fmt = syscall_fmt__find(sc->name);
1714 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1715 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1717 if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
1718 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1719 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1722 if (IS_ERR(sc->tp_format))
1723 return -1;
1725 sc->args = sc->tp_format->format.fields;
1726 sc->nr_args = sc->tp_format->format.nr_fields;
1727 /* drop nr field - not relevant here; does not exist on older kernels */
1728 if (sc->args && strcmp(sc->args->name, "nr") == 0) {
1729 sc->args = sc->args->next;
1730 --sc->nr_args;
1733 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1735 return syscall__set_arg_fmts(sc);
1738 static int trace__validate_ev_qualifier(struct trace *trace)
1740 int err = 0, i;
1741 struct str_node *pos;
1743 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1744 trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1745 sizeof(trace->ev_qualifier_ids.entries[0]));
1747 if (trace->ev_qualifier_ids.entries == NULL) {
1748 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1749 trace->output);
1750 err = -EINVAL;
1751 goto out;
1754 i = 0;
1756 strlist__for_each(pos, trace->ev_qualifier) {
1757 const char *sc = pos->s;
1758 int id = audit_name_to_syscall(sc, trace->audit.machine);
1760 if (id < 0) {
1761 if (err == 0) {
1762 fputs("Error:\tInvalid syscall ", trace->output);
1763 err = -EINVAL;
1764 } else {
1765 fputs(", ", trace->output);
1768 fputs(sc, trace->output);
1771 trace->ev_qualifier_ids.entries[i++] = id;
1774 if (err < 0) {
1775 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1776 "\nHint:\tand: 'man syscalls'\n", trace->output);
1777 zfree(&trace->ev_qualifier_ids.entries);
1778 trace->ev_qualifier_ids.nr = 0;
1780 out:
1781 return err;
1785 * args is to be interpreted as a series of longs but we need to handle
1786 * 8-byte unaligned accesses. args points to raw_data within the event
1787 * and raw_data is guaranteed to be 8-byte unaligned because it is
1788 * preceded by raw_size which is a u32. So we need to copy args to a temp
1789 * variable to read it. Most notably this avoids extended load instructions
1790 * on unaligned addresses
1793 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1794 unsigned char *args, struct trace *trace,
1795 struct thread *thread)
1797 size_t printed = 0;
1798 unsigned char *p;
1799 unsigned long val;
1801 if (sc->args != NULL) {
1802 struct format_field *field;
1803 u8 bit = 1;
1804 struct syscall_arg arg = {
1805 .idx = 0,
1806 .mask = 0,
1807 .trace = trace,
1808 .thread = thread,
1811 for (field = sc->args; field;
1812 field = field->next, ++arg.idx, bit <<= 1) {
1813 if (arg.mask & bit)
1814 continue;
1816 /* special care for unaligned accesses */
1817 p = args + sizeof(unsigned long) * arg.idx;
1818 memcpy(&val, p, sizeof(val));
1821 * Suppress this argument if its value is zero and
1822 * and we don't have a string associated in an
1823 * strarray for it.
1825 if (val == 0 &&
1826 !(sc->arg_scnprintf &&
1827 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1828 sc->arg_parm[arg.idx]))
1829 continue;
1831 printed += scnprintf(bf + printed, size - printed,
1832 "%s%s: ", printed ? ", " : "", field->name);
1833 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1834 arg.val = val;
1835 if (sc->arg_parm)
1836 arg.parm = sc->arg_parm[arg.idx];
1837 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1838 size - printed, &arg);
1839 } else {
1840 printed += scnprintf(bf + printed, size - printed,
1841 "%ld", val);
1844 } else {
1845 int i = 0;
1847 while (i < 6) {
1848 /* special care for unaligned accesses */
1849 p = args + sizeof(unsigned long) * i;
1850 memcpy(&val, p, sizeof(val));
1851 printed += scnprintf(bf + printed, size - printed,
1852 "%sarg%d: %ld",
1853 printed ? ", " : "", i, val);
1854 ++i;
1858 return printed;
1861 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1862 union perf_event *event,
1863 struct perf_sample *sample);
1865 static struct syscall *trace__syscall_info(struct trace *trace,
1866 struct perf_evsel *evsel, int id)
1869 if (id < 0) {
1872 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1873 * before that, leaving at a higher verbosity level till that is
1874 * explained. Reproduced with plain ftrace with:
1876 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1877 * grep "NR -1 " /t/trace_pipe
1879 * After generating some load on the machine.
1881 if (verbose > 1) {
1882 static u64 n;
1883 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1884 id, perf_evsel__name(evsel), ++n);
1886 return NULL;
1889 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1890 trace__read_syscall_info(trace, id))
1891 goto out_cant_read;
1893 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1894 goto out_cant_read;
1896 return &trace->syscalls.table[id];
1898 out_cant_read:
1899 if (verbose) {
1900 fprintf(trace->output, "Problems reading syscall %d", id);
1901 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1902 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1903 fputs(" information\n", trace->output);
1905 return NULL;
1908 static void thread__update_stats(struct thread_trace *ttrace,
1909 int id, struct perf_sample *sample)
1911 struct int_node *inode;
1912 struct stats *stats;
1913 u64 duration = 0;
1915 inode = intlist__findnew(ttrace->syscall_stats, id);
1916 if (inode == NULL)
1917 return;
1919 stats = inode->priv;
1920 if (stats == NULL) {
1921 stats = malloc(sizeof(struct stats));
1922 if (stats == NULL)
1923 return;
1924 init_stats(stats);
1925 inode->priv = stats;
1928 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1929 duration = sample->time - ttrace->entry_time;
1931 update_stats(stats, duration);
1934 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1936 struct thread_trace *ttrace;
1937 u64 duration;
1938 size_t printed;
1940 if (trace->current == NULL)
1941 return 0;
1943 ttrace = thread__priv(trace->current);
1945 if (!ttrace->entry_pending)
1946 return 0;
1948 duration = sample->time - ttrace->entry_time;
1950 printed = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
1951 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1952 ttrace->entry_pending = false;
1954 return printed;
1957 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1958 union perf_event *event __maybe_unused,
1959 struct perf_sample *sample)
1961 char *msg;
1962 void *args;
1963 size_t printed = 0;
1964 struct thread *thread;
1965 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1966 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1967 struct thread_trace *ttrace;
1969 if (sc == NULL)
1970 return -1;
1972 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1973 ttrace = thread__trace(thread, trace->output);
1974 if (ttrace == NULL)
1975 goto out_put;
1977 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1979 if (ttrace->entry_str == NULL) {
1980 ttrace->entry_str = malloc(trace__entry_str_size);
1981 if (!ttrace->entry_str)
1982 goto out_put;
1985 if (!trace->summary_only)
1986 trace__printf_interrupted_entry(trace, sample);
1988 ttrace->entry_time = sample->time;
1989 msg = ttrace->entry_str;
1990 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
1992 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
1993 args, trace, thread);
1995 if (sc->is_exit) {
1996 if (!trace->duration_filter && !trace->summary_only) {
1997 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1998 fprintf(trace->output, "%-70s\n", ttrace->entry_str);
2000 } else {
2001 ttrace->entry_pending = true;
2002 /* See trace__vfs_getname & trace__sys_exit */
2003 ttrace->filename.pending_open = false;
2006 if (trace->current != thread) {
2007 thread__put(trace->current);
2008 trace->current = thread__get(thread);
2010 err = 0;
2011 out_put:
2012 thread__put(thread);
2013 return err;
2016 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
2017 union perf_event *event __maybe_unused,
2018 struct perf_sample *sample)
2020 long ret;
2021 u64 duration = 0;
2022 struct thread *thread;
2023 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
2024 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2025 struct thread_trace *ttrace;
2027 if (sc == NULL)
2028 return -1;
2030 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2031 ttrace = thread__trace(thread, trace->output);
2032 if (ttrace == NULL)
2033 goto out_put;
2035 if (trace->summary)
2036 thread__update_stats(ttrace, id, sample);
2038 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
2040 if (id == trace->audit.open_id && ret >= 0 && ttrace->filename.pending_open) {
2041 trace__set_fd_pathname(thread, ret, ttrace->filename.name);
2042 ttrace->filename.pending_open = false;
2043 ++trace->stats.vfs_getname;
2046 ttrace->exit_time = sample->time;
2048 if (ttrace->entry_time) {
2049 duration = sample->time - ttrace->entry_time;
2050 if (trace__filter_duration(trace, duration))
2051 goto out;
2052 } else if (trace->duration_filter)
2053 goto out;
2055 if (trace->summary_only)
2056 goto out;
2058 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
2060 if (ttrace->entry_pending) {
2061 fprintf(trace->output, "%-70s", ttrace->entry_str);
2062 } else {
2063 fprintf(trace->output, " ... [");
2064 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
2065 fprintf(trace->output, "]: %s()", sc->name);
2068 if (sc->fmt == NULL) {
2069 signed_print:
2070 fprintf(trace->output, ") = %ld", ret);
2071 } else if (ret < 0 && sc->fmt->errmsg) {
2072 char bf[STRERR_BUFSIZE];
2073 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
2074 *e = audit_errno_to_name(-ret);
2076 fprintf(trace->output, ") = -1 %s %s", e, emsg);
2077 } else if (ret == 0 && sc->fmt->timeout)
2078 fprintf(trace->output, ") = 0 Timeout");
2079 else if (sc->fmt->hexret)
2080 fprintf(trace->output, ") = %#lx", ret);
2081 else
2082 goto signed_print;
2084 fputc('\n', trace->output);
2085 out:
2086 ttrace->entry_pending = false;
2087 err = 0;
2088 out_put:
2089 thread__put(thread);
2090 return err;
2093 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
2094 union perf_event *event __maybe_unused,
2095 struct perf_sample *sample)
2097 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2098 struct thread_trace *ttrace;
2099 size_t filename_len, entry_str_len, to_move;
2100 ssize_t remaining_space;
2101 char *pos;
2102 const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
2104 if (!thread)
2105 goto out;
2107 ttrace = thread__priv(thread);
2108 if (!ttrace)
2109 goto out;
2111 filename_len = strlen(filename);
2113 if (ttrace->filename.namelen < filename_len) {
2114 char *f = realloc(ttrace->filename.name, filename_len + 1);
2116 if (f == NULL)
2117 goto out;
2119 ttrace->filename.namelen = filename_len;
2120 ttrace->filename.name = f;
2123 strcpy(ttrace->filename.name, filename);
2124 ttrace->filename.pending_open = true;
2126 if (!ttrace->filename.ptr)
2127 goto out;
2129 entry_str_len = strlen(ttrace->entry_str);
2130 remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
2131 if (remaining_space <= 0)
2132 goto out;
2134 if (filename_len > (size_t)remaining_space) {
2135 filename += filename_len - remaining_space;
2136 filename_len = remaining_space;
2139 to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
2140 pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
2141 memmove(pos + filename_len, pos, to_move);
2142 memcpy(pos, filename, filename_len);
2144 ttrace->filename.ptr = 0;
2145 ttrace->filename.entry_str_pos = 0;
2146 out:
2147 return 0;
2150 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
2151 union perf_event *event __maybe_unused,
2152 struct perf_sample *sample)
2154 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
2155 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
2156 struct thread *thread = machine__findnew_thread(trace->host,
2157 sample->pid,
2158 sample->tid);
2159 struct thread_trace *ttrace = thread__trace(thread, trace->output);
2161 if (ttrace == NULL)
2162 goto out_dump;
2164 ttrace->runtime_ms += runtime_ms;
2165 trace->runtime_ms += runtime_ms;
2166 thread__put(thread);
2167 return 0;
2169 out_dump:
2170 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
2171 evsel->name,
2172 perf_evsel__strval(evsel, sample, "comm"),
2173 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
2174 runtime,
2175 perf_evsel__intval(evsel, sample, "vruntime"));
2176 thread__put(thread);
2177 return 0;
2180 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
2181 union perf_event *event __maybe_unused,
2182 struct perf_sample *sample)
2184 trace__printf_interrupted_entry(trace, sample);
2185 trace__fprintf_tstamp(trace, sample->time, trace->output);
2187 if (trace->trace_syscalls)
2188 fprintf(trace->output, "( ): ");
2190 fprintf(trace->output, "%s:", evsel->name);
2192 if (evsel->tp_format) {
2193 event_format__fprintf(evsel->tp_format, sample->cpu,
2194 sample->raw_data, sample->raw_size,
2195 trace->output);
2198 fprintf(trace->output, ")\n");
2199 return 0;
2202 static void print_location(FILE *f, struct perf_sample *sample,
2203 struct addr_location *al,
2204 bool print_dso, bool print_sym)
2207 if ((verbose || print_dso) && al->map)
2208 fprintf(f, "%s@", al->map->dso->long_name);
2210 if ((verbose || print_sym) && al->sym)
2211 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
2212 al->addr - al->sym->start);
2213 else if (al->map)
2214 fprintf(f, "0x%" PRIx64, al->addr);
2215 else
2216 fprintf(f, "0x%" PRIx64, sample->addr);
2219 static int trace__pgfault(struct trace *trace,
2220 struct perf_evsel *evsel,
2221 union perf_event *event,
2222 struct perf_sample *sample)
2224 struct thread *thread;
2225 u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
2226 struct addr_location al;
2227 char map_type = 'd';
2228 struct thread_trace *ttrace;
2229 int err = -1;
2231 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2232 ttrace = thread__trace(thread, trace->output);
2233 if (ttrace == NULL)
2234 goto out_put;
2236 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
2237 ttrace->pfmaj++;
2238 else
2239 ttrace->pfmin++;
2241 if (trace->summary_only)
2242 goto out;
2244 thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
2245 sample->ip, &al);
2247 trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
2249 fprintf(trace->output, "%sfault [",
2250 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
2251 "maj" : "min");
2253 print_location(trace->output, sample, &al, false, true);
2255 fprintf(trace->output, "] => ");
2257 thread__find_addr_location(thread, cpumode, MAP__VARIABLE,
2258 sample->addr, &al);
2260 if (!al.map) {
2261 thread__find_addr_location(thread, cpumode,
2262 MAP__FUNCTION, sample->addr, &al);
2264 if (al.map)
2265 map_type = 'x';
2266 else
2267 map_type = '?';
2270 print_location(trace->output, sample, &al, true, false);
2272 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
2273 out:
2274 err = 0;
2275 out_put:
2276 thread__put(thread);
2277 return err;
2280 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
2282 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
2283 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
2284 return false;
2286 if (trace->pid_list || trace->tid_list)
2287 return true;
2289 return false;
2292 static int trace__process_sample(struct perf_tool *tool,
2293 union perf_event *event,
2294 struct perf_sample *sample,
2295 struct perf_evsel *evsel,
2296 struct machine *machine __maybe_unused)
2298 struct trace *trace = container_of(tool, struct trace, tool);
2299 int err = 0;
2301 tracepoint_handler handler = evsel->handler;
2303 if (skip_sample(trace, sample))
2304 return 0;
2306 if (!trace->full_time && trace->base_time == 0)
2307 trace->base_time = sample->time;
2309 if (handler) {
2310 ++trace->nr_events;
2311 handler(trace, evsel, event, sample);
2314 return err;
2317 static int parse_target_str(struct trace *trace)
2319 if (trace->opts.target.pid) {
2320 trace->pid_list = intlist__new(trace->opts.target.pid);
2321 if (trace->pid_list == NULL) {
2322 pr_err("Error parsing process id string\n");
2323 return -EINVAL;
2327 if (trace->opts.target.tid) {
2328 trace->tid_list = intlist__new(trace->opts.target.tid);
2329 if (trace->tid_list == NULL) {
2330 pr_err("Error parsing thread id string\n");
2331 return -EINVAL;
2335 return 0;
2338 static int trace__record(struct trace *trace, int argc, const char **argv)
2340 unsigned int rec_argc, i, j;
2341 const char **rec_argv;
2342 const char * const record_args[] = {
2343 "record",
2344 "-R",
2345 "-m", "1024",
2346 "-c", "1",
2349 const char * const sc_args[] = { "-e", };
2350 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2351 const char * const majpf_args[] = { "-e", "major-faults" };
2352 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2353 const char * const minpf_args[] = { "-e", "minor-faults" };
2354 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2356 /* +1 is for the event string below */
2357 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2358 majpf_args_nr + minpf_args_nr + argc;
2359 rec_argv = calloc(rec_argc + 1, sizeof(char *));
2361 if (rec_argv == NULL)
2362 return -ENOMEM;
2364 j = 0;
2365 for (i = 0; i < ARRAY_SIZE(record_args); i++)
2366 rec_argv[j++] = record_args[i];
2368 if (trace->trace_syscalls) {
2369 for (i = 0; i < sc_args_nr; i++)
2370 rec_argv[j++] = sc_args[i];
2372 /* event string may be different for older kernels - e.g., RHEL6 */
2373 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2374 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2375 else if (is_valid_tracepoint("syscalls:sys_enter"))
2376 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2377 else {
2378 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2379 return -1;
2383 if (trace->trace_pgfaults & TRACE_PFMAJ)
2384 for (i = 0; i < majpf_args_nr; i++)
2385 rec_argv[j++] = majpf_args[i];
2387 if (trace->trace_pgfaults & TRACE_PFMIN)
2388 for (i = 0; i < minpf_args_nr; i++)
2389 rec_argv[j++] = minpf_args[i];
2391 for (i = 0; i < (unsigned int)argc; i++)
2392 rec_argv[j++] = argv[i];
2394 return cmd_record(j, rec_argv, NULL);
2397 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2399 static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2401 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2403 if (IS_ERR(evsel))
2404 return false;
2406 if (perf_evsel__field(evsel, "pathname") == NULL) {
2407 perf_evsel__delete(evsel);
2408 return false;
2411 evsel->handler = trace__vfs_getname;
2412 perf_evlist__add(evlist, evsel);
2413 return true;
2416 static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
2417 u64 config)
2419 struct perf_evsel *evsel;
2420 struct perf_event_attr attr = {
2421 .type = PERF_TYPE_SOFTWARE,
2422 .mmap_data = 1,
2425 attr.config = config;
2426 attr.sample_period = 1;
2428 event_attr_init(&attr);
2430 evsel = perf_evsel__new(&attr);
2431 if (!evsel)
2432 return -ENOMEM;
2434 evsel->handler = trace__pgfault;
2435 perf_evlist__add(evlist, evsel);
2437 return 0;
2440 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2442 const u32 type = event->header.type;
2443 struct perf_evsel *evsel;
2445 if (!trace->full_time && trace->base_time == 0)
2446 trace->base_time = sample->time;
2448 if (type != PERF_RECORD_SAMPLE) {
2449 trace__process_event(trace, trace->host, event, sample);
2450 return;
2453 evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2454 if (evsel == NULL) {
2455 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2456 return;
2459 if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2460 sample->raw_data == NULL) {
2461 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2462 perf_evsel__name(evsel), sample->tid,
2463 sample->cpu, sample->raw_size);
2464 } else {
2465 tracepoint_handler handler = evsel->handler;
2466 handler(trace, evsel, event, sample);
2470 static int trace__add_syscall_newtp(struct trace *trace)
2472 int ret = -1;
2473 struct perf_evlist *evlist = trace->evlist;
2474 struct perf_evsel *sys_enter, *sys_exit;
2476 sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2477 if (sys_enter == NULL)
2478 goto out;
2480 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2481 goto out_delete_sys_enter;
2483 sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2484 if (sys_exit == NULL)
2485 goto out_delete_sys_enter;
2487 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2488 goto out_delete_sys_exit;
2490 perf_evlist__add(evlist, sys_enter);
2491 perf_evlist__add(evlist, sys_exit);
2493 trace->syscalls.events.sys_enter = sys_enter;
2494 trace->syscalls.events.sys_exit = sys_exit;
2496 ret = 0;
2497 out:
2498 return ret;
2500 out_delete_sys_exit:
2501 perf_evsel__delete_priv(sys_exit);
2502 out_delete_sys_enter:
2503 perf_evsel__delete_priv(sys_enter);
2504 goto out;
2507 static int trace__set_ev_qualifier_filter(struct trace *trace)
2509 int err = -1;
2510 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2511 trace->ev_qualifier_ids.nr,
2512 trace->ev_qualifier_ids.entries);
2514 if (filter == NULL)
2515 goto out_enomem;
2517 if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter))
2518 err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter);
2520 free(filter);
2521 out:
2522 return err;
2523 out_enomem:
2524 errno = ENOMEM;
2525 goto out;
2528 static int trace__run(struct trace *trace, int argc, const char **argv)
2530 struct perf_evlist *evlist = trace->evlist;
2531 struct perf_evsel *evsel;
2532 int err = -1, i;
2533 unsigned long before;
2534 const bool forks = argc > 0;
2535 bool draining = false;
2537 trace->live = true;
2539 if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2540 goto out_error_raw_syscalls;
2542 if (trace->trace_syscalls)
2543 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
2545 if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2546 perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
2547 goto out_error_mem;
2550 if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2551 perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2552 goto out_error_mem;
2554 if (trace->sched &&
2555 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2556 trace__sched_stat_runtime))
2557 goto out_error_sched_stat_runtime;
2559 err = perf_evlist__create_maps(evlist, &trace->opts.target);
2560 if (err < 0) {
2561 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2562 goto out_delete_evlist;
2565 err = trace__symbols_init(trace, evlist);
2566 if (err < 0) {
2567 fprintf(trace->output, "Problems initializing symbol libraries!\n");
2568 goto out_delete_evlist;
2571 perf_evlist__config(evlist, &trace->opts);
2573 signal(SIGCHLD, sig_handler);
2574 signal(SIGINT, sig_handler);
2576 if (forks) {
2577 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2578 argv, false, NULL);
2579 if (err < 0) {
2580 fprintf(trace->output, "Couldn't run the workload!\n");
2581 goto out_delete_evlist;
2585 err = perf_evlist__open(evlist);
2586 if (err < 0)
2587 goto out_error_open;
2590 * Better not use !target__has_task() here because we need to cover the
2591 * case where no threads were specified in the command line, but a
2592 * workload was, and in that case we will fill in the thread_map when
2593 * we fork the workload in perf_evlist__prepare_workload.
2595 if (trace->filter_pids.nr > 0)
2596 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2597 else if (thread_map__pid(evlist->threads, 0) == -1)
2598 err = perf_evlist__set_filter_pid(evlist, getpid());
2600 if (err < 0)
2601 goto out_error_mem;
2603 if (trace->ev_qualifier_ids.nr > 0) {
2604 err = trace__set_ev_qualifier_filter(trace);
2605 if (err < 0)
2606 goto out_errno;
2608 pr_debug("event qualifier tracepoint filter: %s\n",
2609 trace->syscalls.events.sys_exit->filter);
2612 err = perf_evlist__apply_filters(evlist, &evsel);
2613 if (err < 0)
2614 goto out_error_apply_filters;
2616 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2617 if (err < 0)
2618 goto out_error_mmap;
2620 if (!target__none(&trace->opts.target))
2621 perf_evlist__enable(evlist);
2623 if (forks)
2624 perf_evlist__start_workload(evlist);
2626 trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2627 evlist->threads->nr > 1 ||
2628 perf_evlist__first(evlist)->attr.inherit;
2629 again:
2630 before = trace->nr_events;
2632 for (i = 0; i < evlist->nr_mmaps; i++) {
2633 union perf_event *event;
2635 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2636 struct perf_sample sample;
2638 ++trace->nr_events;
2640 err = perf_evlist__parse_sample(evlist, event, &sample);
2641 if (err) {
2642 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2643 goto next_event;
2646 trace__handle_event(trace, event, &sample);
2647 next_event:
2648 perf_evlist__mmap_consume(evlist, i);
2650 if (interrupted)
2651 goto out_disable;
2653 if (done && !draining) {
2654 perf_evlist__disable(evlist);
2655 draining = true;
2660 if (trace->nr_events == before) {
2661 int timeout = done ? 100 : -1;
2663 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2664 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2665 draining = true;
2667 goto again;
2669 } else {
2670 goto again;
2673 out_disable:
2674 thread__zput(trace->current);
2676 perf_evlist__disable(evlist);
2678 if (!err) {
2679 if (trace->summary)
2680 trace__fprintf_thread_summary(trace, trace->output);
2682 if (trace->show_tool_stats) {
2683 fprintf(trace->output, "Stats:\n "
2684 " vfs_getname : %" PRIu64 "\n"
2685 " proc_getname: %" PRIu64 "\n",
2686 trace->stats.vfs_getname,
2687 trace->stats.proc_getname);
2691 out_delete_evlist:
2692 perf_evlist__delete(evlist);
2693 trace->evlist = NULL;
2694 trace->live = false;
2695 return err;
2697 char errbuf[BUFSIZ];
2699 out_error_sched_stat_runtime:
2700 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2701 goto out_error;
2703 out_error_raw_syscalls:
2704 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2705 goto out_error;
2707 out_error_mmap:
2708 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2709 goto out_error;
2711 out_error_open:
2712 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2714 out_error:
2715 fprintf(trace->output, "%s\n", errbuf);
2716 goto out_delete_evlist;
2718 out_error_apply_filters:
2719 fprintf(trace->output,
2720 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2721 evsel->filter, perf_evsel__name(evsel), errno,
2722 strerror_r(errno, errbuf, sizeof(errbuf)));
2723 goto out_delete_evlist;
2725 out_error_mem:
2726 fprintf(trace->output, "Not enough memory to run!\n");
2727 goto out_delete_evlist;
2729 out_errno:
2730 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2731 goto out_delete_evlist;
2734 static int trace__replay(struct trace *trace)
2736 const struct perf_evsel_str_handler handlers[] = {
2737 { "probe:vfs_getname", trace__vfs_getname, },
2739 struct perf_data_file file = {
2740 .path = input_name,
2741 .mode = PERF_DATA_MODE_READ,
2742 .force = trace->force,
2744 struct perf_session *session;
2745 struct perf_evsel *evsel;
2746 int err = -1;
2748 trace->tool.sample = trace__process_sample;
2749 trace->tool.mmap = perf_event__process_mmap;
2750 trace->tool.mmap2 = perf_event__process_mmap2;
2751 trace->tool.comm = perf_event__process_comm;
2752 trace->tool.exit = perf_event__process_exit;
2753 trace->tool.fork = perf_event__process_fork;
2754 trace->tool.attr = perf_event__process_attr;
2755 trace->tool.tracing_data = perf_event__process_tracing_data;
2756 trace->tool.build_id = perf_event__process_build_id;
2758 trace->tool.ordered_events = true;
2759 trace->tool.ordering_requires_timestamps = true;
2761 /* add tid to output */
2762 trace->multiple_threads = true;
2764 session = perf_session__new(&file, false, &trace->tool);
2765 if (session == NULL)
2766 return -1;
2768 if (symbol__init(&session->header.env) < 0)
2769 goto out;
2771 trace->host = &session->machines.host;
2773 err = perf_session__set_tracepoints_handlers(session, handlers);
2774 if (err)
2775 goto out;
2777 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2778 "raw_syscalls:sys_enter");
2779 /* older kernels have syscalls tp versus raw_syscalls */
2780 if (evsel == NULL)
2781 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2782 "syscalls:sys_enter");
2784 if (evsel &&
2785 (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2786 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2787 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2788 goto out;
2791 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2792 "raw_syscalls:sys_exit");
2793 if (evsel == NULL)
2794 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2795 "syscalls:sys_exit");
2796 if (evsel &&
2797 (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2798 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2799 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2800 goto out;
2803 evlist__for_each(session->evlist, evsel) {
2804 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2805 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2806 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2807 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2808 evsel->handler = trace__pgfault;
2811 err = parse_target_str(trace);
2812 if (err != 0)
2813 goto out;
2815 setup_pager();
2817 err = perf_session__process_events(session);
2818 if (err)
2819 pr_err("Failed to process events, error %d", err);
2821 else if (trace->summary)
2822 trace__fprintf_thread_summary(trace, trace->output);
2824 out:
2825 perf_session__delete(session);
2827 return err;
2830 static size_t trace__fprintf_threads_header(FILE *fp)
2832 size_t printed;
2834 printed = fprintf(fp, "\n Summary of events:\n\n");
2836 return printed;
2839 static size_t thread__dump_stats(struct thread_trace *ttrace,
2840 struct trace *trace, FILE *fp)
2842 struct stats *stats;
2843 size_t printed = 0;
2844 struct syscall *sc;
2845 struct int_node *inode = intlist__first(ttrace->syscall_stats);
2847 if (inode == NULL)
2848 return 0;
2850 printed += fprintf(fp, "\n");
2852 printed += fprintf(fp, " syscall calls total min avg max stddev\n");
2853 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
2854 printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n");
2856 /* each int_node is a syscall */
2857 while (inode) {
2858 stats = inode->priv;
2859 if (stats) {
2860 double min = (double)(stats->min) / NSEC_PER_MSEC;
2861 double max = (double)(stats->max) / NSEC_PER_MSEC;
2862 double avg = avg_stats(stats);
2863 double pct;
2864 u64 n = (u64) stats->n;
2866 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2867 avg /= NSEC_PER_MSEC;
2869 sc = &trace->syscalls.table[inode->i];
2870 printed += fprintf(fp, " %-15s", sc->name);
2871 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
2872 n, avg * n, min, avg);
2873 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2876 inode = intlist__next(inode);
2879 printed += fprintf(fp, "\n\n");
2881 return printed;
2884 /* struct used to pass data to per-thread function */
2885 struct summary_data {
2886 FILE *fp;
2887 struct trace *trace;
2888 size_t printed;
2891 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2893 struct summary_data *data = priv;
2894 FILE *fp = data->fp;
2895 size_t printed = data->printed;
2896 struct trace *trace = data->trace;
2897 struct thread_trace *ttrace = thread__priv(thread);
2898 double ratio;
2900 if (ttrace == NULL)
2901 return 0;
2903 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2905 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2906 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2907 printed += fprintf(fp, "%.1f%%", ratio);
2908 if (ttrace->pfmaj)
2909 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2910 if (ttrace->pfmin)
2911 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2912 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2913 printed += thread__dump_stats(ttrace, trace, fp);
2915 data->printed += printed;
2917 return 0;
2920 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2922 struct summary_data data = {
2923 .fp = fp,
2924 .trace = trace
2926 data.printed = trace__fprintf_threads_header(fp);
2928 machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2930 return data.printed;
2933 static int trace__set_duration(const struct option *opt, const char *str,
2934 int unset __maybe_unused)
2936 struct trace *trace = opt->value;
2938 trace->duration_filter = atof(str);
2939 return 0;
2942 static int trace__set_filter_pids(const struct option *opt, const char *str,
2943 int unset __maybe_unused)
2945 int ret = -1;
2946 size_t i;
2947 struct trace *trace = opt->value;
2949 * FIXME: introduce a intarray class, plain parse csv and create a
2950 * { int nr, int entries[] } struct...
2952 struct intlist *list = intlist__new(str);
2954 if (list == NULL)
2955 return -1;
2957 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2958 trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2960 if (trace->filter_pids.entries == NULL)
2961 goto out;
2963 trace->filter_pids.entries[0] = getpid();
2965 for (i = 1; i < trace->filter_pids.nr; ++i)
2966 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2968 intlist__delete(list);
2969 ret = 0;
2970 out:
2971 return ret;
2974 static int trace__open_output(struct trace *trace, const char *filename)
2976 struct stat st;
2978 if (!stat(filename, &st) && st.st_size) {
2979 char oldname[PATH_MAX];
2981 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2982 unlink(oldname);
2983 rename(filename, oldname);
2986 trace->output = fopen(filename, "w");
2988 return trace->output == NULL ? -errno : 0;
2991 static int parse_pagefaults(const struct option *opt, const char *str,
2992 int unset __maybe_unused)
2994 int *trace_pgfaults = opt->value;
2996 if (strcmp(str, "all") == 0)
2997 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2998 else if (strcmp(str, "maj") == 0)
2999 *trace_pgfaults |= TRACE_PFMAJ;
3000 else if (strcmp(str, "min") == 0)
3001 *trace_pgfaults |= TRACE_PFMIN;
3002 else
3003 return -1;
3005 return 0;
3008 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
3010 struct perf_evsel *evsel;
3012 evlist__for_each(evlist, evsel)
3013 evsel->handler = handler;
3016 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
3018 const char *trace_usage[] = {
3019 "perf trace [<options>] [<command>]",
3020 "perf trace [<options>] -- <command> [<options>]",
3021 "perf trace record [<options>] [<command>]",
3022 "perf trace record [<options>] -- <command> [<options>]",
3023 NULL
3025 struct trace trace = {
3026 .audit = {
3027 .machine = audit_detect_machine(),
3028 .open_id = audit_name_to_syscall("open", trace.audit.machine),
3030 .syscalls = {
3031 . max = -1,
3033 .opts = {
3034 .target = {
3035 .uid = UINT_MAX,
3036 .uses_mmap = true,
3038 .user_freq = UINT_MAX,
3039 .user_interval = ULLONG_MAX,
3040 .no_buffering = true,
3041 .mmap_pages = UINT_MAX,
3042 .proc_map_timeout = 500,
3044 .output = stderr,
3045 .show_comm = true,
3046 .trace_syscalls = true,
3048 const char *output_name = NULL;
3049 const char *ev_qualifier_str = NULL;
3050 const struct option trace_options[] = {
3051 OPT_CALLBACK(0, "event", &trace.evlist, "event",
3052 "event selector. use 'perf list' to list available events",
3053 parse_events_option),
3054 OPT_BOOLEAN(0, "comm", &trace.show_comm,
3055 "show the thread COMM next to its id"),
3056 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
3057 OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
3058 OPT_STRING('o', "output", &output_name, "file", "output file name"),
3059 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
3060 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
3061 "trace events on existing process id"),
3062 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
3063 "trace events on existing thread id"),
3064 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
3065 "pids to filter (by the kernel)", trace__set_filter_pids),
3066 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
3067 "system-wide collection from all CPUs"),
3068 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
3069 "list of cpus to monitor"),
3070 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
3071 "child tasks do not inherit counters"),
3072 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
3073 "number of mmap data pages",
3074 perf_evlist__parse_mmap_pages),
3075 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
3076 "user to profile"),
3077 OPT_CALLBACK(0, "duration", &trace, "float",
3078 "show only events with duration > N.M ms",
3079 trace__set_duration),
3080 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
3081 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
3082 OPT_BOOLEAN('T', "time", &trace.full_time,
3083 "Show full timestamp, not time relative to first start"),
3084 OPT_BOOLEAN('s', "summary", &trace.summary_only,
3085 "Show only syscall summary with statistics"),
3086 OPT_BOOLEAN('S', "with-summary", &trace.summary,
3087 "Show all syscalls and summary with statistics"),
3088 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
3089 "Trace pagefaults", parse_pagefaults, "maj"),
3090 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
3091 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
3092 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
3093 "per thread proc mmap processing timeout in ms"),
3094 OPT_END()
3096 const char * const trace_subcommands[] = { "record", NULL };
3097 int err;
3098 char bf[BUFSIZ];
3100 signal(SIGSEGV, sighandler_dump_stack);
3101 signal(SIGFPE, sighandler_dump_stack);
3103 trace.evlist = perf_evlist__new();
3105 if (trace.evlist == NULL) {
3106 pr_err("Not enough memory to run!\n");
3107 err = -ENOMEM;
3108 goto out;
3111 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
3112 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
3114 if (trace.trace_pgfaults) {
3115 trace.opts.sample_address = true;
3116 trace.opts.sample_time = true;
3119 if (trace.evlist->nr_entries > 0)
3120 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
3122 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
3123 return trace__record(&trace, argc-1, &argv[1]);
3125 /* summary_only implies summary option, but don't overwrite summary if set */
3126 if (trace.summary_only)
3127 trace.summary = trace.summary_only;
3129 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
3130 trace.evlist->nr_entries == 0 /* Was --events used? */) {
3131 pr_err("Please specify something to trace.\n");
3132 return -1;
3135 if (output_name != NULL) {
3136 err = trace__open_output(&trace, output_name);
3137 if (err < 0) {
3138 perror("failed to create output file");
3139 goto out;
3143 if (ev_qualifier_str != NULL) {
3144 const char *s = ev_qualifier_str;
3145 struct strlist_config slist_config = {
3146 .dirname = system_path(STRACE_GROUPS_DIR),
3149 trace.not_ev_qualifier = *s == '!';
3150 if (trace.not_ev_qualifier)
3151 ++s;
3152 trace.ev_qualifier = strlist__new(s, &slist_config);
3153 if (trace.ev_qualifier == NULL) {
3154 fputs("Not enough memory to parse event qualifier",
3155 trace.output);
3156 err = -ENOMEM;
3157 goto out_close;
3160 err = trace__validate_ev_qualifier(&trace);
3161 if (err)
3162 goto out_close;
3165 err = target__validate(&trace.opts.target);
3166 if (err) {
3167 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3168 fprintf(trace.output, "%s", bf);
3169 goto out_close;
3172 err = target__parse_uid(&trace.opts.target);
3173 if (err) {
3174 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3175 fprintf(trace.output, "%s", bf);
3176 goto out_close;
3179 if (!argc && target__none(&trace.opts.target))
3180 trace.opts.target.system_wide = true;
3182 if (input_name)
3183 err = trace__replay(&trace);
3184 else
3185 err = trace__run(&trace, argc, argv);
3187 out_close:
3188 if (output_name != NULL)
3189 fclose(trace.output);
3190 out:
3191 return err;