HID: hiddev: Fix slab-out-of-bounds write in hiddev_ioctl_usage()
[linux/fpc-iii.git] / tools / perf / builtin-trace.c
blobda8afc12111822e083cfff1df9273c378b71021c
1 /*
2 * builtin-trace.c
4 * Builtin 'trace' command:
6 * Display a continuously updated trace of any workload, CPU, specific PID,
7 * system wide, etc. Default format is loosely strace like, but any other
8 * event may be specified using --event.
10 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
12 * Initially based on the 'trace' prototype by Thomas Gleixner:
14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
16 * Released under the GPL v2. (and only v2, not any later version)
19 #include <traceevent/event-parse.h>
20 #include <api/fs/tracing_path.h>
21 #include "builtin.h"
22 #include "util/color.h"
23 #include "util/debug.h"
24 #include "util/evlist.h"
25 #include "util/exec_cmd.h"
26 #include "util/machine.h"
27 #include "util/session.h"
28 #include "util/thread.h"
29 #include "util/parse-options.h"
30 #include "util/strlist.h"
31 #include "util/intlist.h"
32 #include "util/thread_map.h"
33 #include "util/stat.h"
34 #include "trace-event.h"
35 #include "util/parse-events.h"
37 #include <libaudit.h>
38 #include <stdlib.h>
39 #include <sys/mman.h>
40 #include <linux/futex.h>
41 #include <linux/err.h>
43 /* For older distros: */
44 #ifndef MAP_STACK
45 # define MAP_STACK 0x20000
46 #endif
48 #ifndef MADV_HWPOISON
49 # define MADV_HWPOISON 100
51 #endif
53 #ifndef MADV_MERGEABLE
54 # define MADV_MERGEABLE 12
55 #endif
57 #ifndef MADV_UNMERGEABLE
58 # define MADV_UNMERGEABLE 13
59 #endif
61 #ifndef EFD_SEMAPHORE
62 # define EFD_SEMAPHORE 1
63 #endif
65 #ifndef EFD_NONBLOCK
66 # define EFD_NONBLOCK 00004000
67 #endif
69 #ifndef EFD_CLOEXEC
70 # define EFD_CLOEXEC 02000000
71 #endif
73 #ifndef O_CLOEXEC
74 # define O_CLOEXEC 02000000
75 #endif
77 #ifndef SOCK_DCCP
78 # define SOCK_DCCP 6
79 #endif
81 #ifndef SOCK_CLOEXEC
82 # define SOCK_CLOEXEC 02000000
83 #endif
85 #ifndef SOCK_NONBLOCK
86 # define SOCK_NONBLOCK 00004000
87 #endif
89 #ifndef MSG_CMSG_CLOEXEC
90 # define MSG_CMSG_CLOEXEC 0x40000000
91 #endif
93 #ifndef PERF_FLAG_FD_NO_GROUP
94 # define PERF_FLAG_FD_NO_GROUP (1UL << 0)
95 #endif
97 #ifndef PERF_FLAG_FD_OUTPUT
98 # define PERF_FLAG_FD_OUTPUT (1UL << 1)
99 #endif
101 #ifndef PERF_FLAG_PID_CGROUP
102 # define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */
103 #endif
105 #ifndef PERF_FLAG_FD_CLOEXEC
106 # define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */
107 #endif
110 struct tp_field {
111 int offset;
112 union {
113 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
114 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
118 #define TP_UINT_FIELD(bits) \
119 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
121 u##bits value; \
122 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
123 return value; \
126 TP_UINT_FIELD(8);
127 TP_UINT_FIELD(16);
128 TP_UINT_FIELD(32);
129 TP_UINT_FIELD(64);
131 #define TP_UINT_FIELD__SWAPPED(bits) \
132 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
134 u##bits value; \
135 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
136 return bswap_##bits(value);\
139 TP_UINT_FIELD__SWAPPED(16);
140 TP_UINT_FIELD__SWAPPED(32);
141 TP_UINT_FIELD__SWAPPED(64);
143 static int tp_field__init_uint(struct tp_field *field,
144 struct format_field *format_field,
145 bool needs_swap)
147 field->offset = format_field->offset;
149 switch (format_field->size) {
150 case 1:
151 field->integer = tp_field__u8;
152 break;
153 case 2:
154 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
155 break;
156 case 4:
157 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
158 break;
159 case 8:
160 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
161 break;
162 default:
163 return -1;
166 return 0;
169 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
171 return sample->raw_data + field->offset;
174 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
176 field->offset = format_field->offset;
177 field->pointer = tp_field__ptr;
178 return 0;
181 struct syscall_tp {
182 struct tp_field id;
183 union {
184 struct tp_field args, ret;
188 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
189 struct tp_field *field,
190 const char *name)
192 struct format_field *format_field = perf_evsel__field(evsel, name);
194 if (format_field == NULL)
195 return -1;
197 return tp_field__init_uint(field, format_field, evsel->needs_swap);
200 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
201 ({ struct syscall_tp *sc = evsel->priv;\
202 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
204 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
205 struct tp_field *field,
206 const char *name)
208 struct format_field *format_field = perf_evsel__field(evsel, name);
210 if (format_field == NULL)
211 return -1;
213 return tp_field__init_ptr(field, format_field);
216 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
217 ({ struct syscall_tp *sc = evsel->priv;\
218 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
220 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
222 zfree(&evsel->priv);
223 perf_evsel__delete(evsel);
226 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
228 evsel->priv = malloc(sizeof(struct syscall_tp));
229 if (evsel->priv != NULL) {
230 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
231 goto out_delete;
233 evsel->handler = handler;
234 return 0;
237 return -ENOMEM;
239 out_delete:
240 zfree(&evsel->priv);
241 return -ENOENT;
244 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
246 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
248 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
249 if (IS_ERR(evsel))
250 evsel = perf_evsel__newtp("syscalls", direction);
252 if (IS_ERR(evsel))
253 return NULL;
255 if (perf_evsel__init_syscall_tp(evsel, handler))
256 goto out_delete;
258 return evsel;
260 out_delete:
261 perf_evsel__delete_priv(evsel);
262 return NULL;
265 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
266 ({ struct syscall_tp *fields = evsel->priv; \
267 fields->name.integer(&fields->name, sample); })
269 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
270 ({ struct syscall_tp *fields = evsel->priv; \
271 fields->name.pointer(&fields->name, sample); })
273 struct syscall_arg {
274 unsigned long val;
275 struct thread *thread;
276 struct trace *trace;
277 void *parm;
278 u8 idx;
279 u8 mask;
282 struct strarray {
283 int offset;
284 int nr_entries;
285 const char **entries;
288 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
289 .nr_entries = ARRAY_SIZE(array), \
290 .entries = array, \
293 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
294 .offset = off, \
295 .nr_entries = ARRAY_SIZE(array), \
296 .entries = array, \
299 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
300 const char *intfmt,
301 struct syscall_arg *arg)
303 struct strarray *sa = arg->parm;
304 int idx = arg->val - sa->offset;
306 if (idx < 0 || idx >= sa->nr_entries)
307 return scnprintf(bf, size, intfmt, arg->val);
309 return scnprintf(bf, size, "%s", sa->entries[idx]);
312 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
313 struct syscall_arg *arg)
315 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
318 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
320 #if defined(__i386__) || defined(__x86_64__)
322 * FIXME: Make this available to all arches as soon as the ioctl beautifier
323 * gets rewritten to support all arches.
325 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
326 struct syscall_arg *arg)
328 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
331 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
332 #endif /* defined(__i386__) || defined(__x86_64__) */
334 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
335 struct syscall_arg *arg);
337 #define SCA_FD syscall_arg__scnprintf_fd
339 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
340 struct syscall_arg *arg)
342 int fd = arg->val;
344 if (fd == AT_FDCWD)
345 return scnprintf(bf, size, "CWD");
347 return syscall_arg__scnprintf_fd(bf, size, arg);
350 #define SCA_FDAT syscall_arg__scnprintf_fd_at
352 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
353 struct syscall_arg *arg);
355 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
357 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
358 struct syscall_arg *arg)
360 return scnprintf(bf, size, "%#lx", arg->val);
363 #define SCA_HEX syscall_arg__scnprintf_hex
365 static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
366 struct syscall_arg *arg)
368 return scnprintf(bf, size, "%d", arg->val);
371 #define SCA_INT syscall_arg__scnprintf_int
373 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
374 struct syscall_arg *arg)
376 int printed = 0, prot = arg->val;
378 if (prot == PROT_NONE)
379 return scnprintf(bf, size, "NONE");
380 #define P_MMAP_PROT(n) \
381 if (prot & PROT_##n) { \
382 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
383 prot &= ~PROT_##n; \
386 P_MMAP_PROT(EXEC);
387 P_MMAP_PROT(READ);
388 P_MMAP_PROT(WRITE);
389 #ifdef PROT_SEM
390 P_MMAP_PROT(SEM);
391 #endif
392 P_MMAP_PROT(GROWSDOWN);
393 P_MMAP_PROT(GROWSUP);
394 #undef P_MMAP_PROT
396 if (prot)
397 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
399 return printed;
402 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
404 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
405 struct syscall_arg *arg)
407 int printed = 0, flags = arg->val;
409 #define P_MMAP_FLAG(n) \
410 if (flags & MAP_##n) { \
411 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
412 flags &= ~MAP_##n; \
415 P_MMAP_FLAG(SHARED);
416 P_MMAP_FLAG(PRIVATE);
417 #ifdef MAP_32BIT
418 P_MMAP_FLAG(32BIT);
419 #endif
420 P_MMAP_FLAG(ANONYMOUS);
421 P_MMAP_FLAG(DENYWRITE);
422 P_MMAP_FLAG(EXECUTABLE);
423 P_MMAP_FLAG(FILE);
424 P_MMAP_FLAG(FIXED);
425 P_MMAP_FLAG(GROWSDOWN);
426 #ifdef MAP_HUGETLB
427 P_MMAP_FLAG(HUGETLB);
428 #endif
429 P_MMAP_FLAG(LOCKED);
430 P_MMAP_FLAG(NONBLOCK);
431 P_MMAP_FLAG(NORESERVE);
432 P_MMAP_FLAG(POPULATE);
433 P_MMAP_FLAG(STACK);
434 #ifdef MAP_UNINITIALIZED
435 P_MMAP_FLAG(UNINITIALIZED);
436 #endif
437 #undef P_MMAP_FLAG
439 if (flags)
440 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
442 return printed;
445 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
447 static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
448 struct syscall_arg *arg)
450 int printed = 0, flags = arg->val;
452 #define P_MREMAP_FLAG(n) \
453 if (flags & MREMAP_##n) { \
454 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
455 flags &= ~MREMAP_##n; \
458 P_MREMAP_FLAG(MAYMOVE);
459 #ifdef MREMAP_FIXED
460 P_MREMAP_FLAG(FIXED);
461 #endif
462 #undef P_MREMAP_FLAG
464 if (flags)
465 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
467 return printed;
470 #define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
472 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
473 struct syscall_arg *arg)
475 int behavior = arg->val;
477 switch (behavior) {
478 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
479 P_MADV_BHV(NORMAL);
480 P_MADV_BHV(RANDOM);
481 P_MADV_BHV(SEQUENTIAL);
482 P_MADV_BHV(WILLNEED);
483 P_MADV_BHV(DONTNEED);
484 P_MADV_BHV(REMOVE);
485 P_MADV_BHV(DONTFORK);
486 P_MADV_BHV(DOFORK);
487 P_MADV_BHV(HWPOISON);
488 #ifdef MADV_SOFT_OFFLINE
489 P_MADV_BHV(SOFT_OFFLINE);
490 #endif
491 P_MADV_BHV(MERGEABLE);
492 P_MADV_BHV(UNMERGEABLE);
493 #ifdef MADV_HUGEPAGE
494 P_MADV_BHV(HUGEPAGE);
495 #endif
496 #ifdef MADV_NOHUGEPAGE
497 P_MADV_BHV(NOHUGEPAGE);
498 #endif
499 #ifdef MADV_DONTDUMP
500 P_MADV_BHV(DONTDUMP);
501 #endif
502 #ifdef MADV_DODUMP
503 P_MADV_BHV(DODUMP);
504 #endif
505 #undef P_MADV_PHV
506 default: break;
509 return scnprintf(bf, size, "%#x", behavior);
512 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
514 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
515 struct syscall_arg *arg)
517 int printed = 0, op = arg->val;
519 if (op == 0)
520 return scnprintf(bf, size, "NONE");
521 #define P_CMD(cmd) \
522 if ((op & LOCK_##cmd) == LOCK_##cmd) { \
523 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
524 op &= ~LOCK_##cmd; \
527 P_CMD(SH);
528 P_CMD(EX);
529 P_CMD(NB);
530 P_CMD(UN);
531 P_CMD(MAND);
532 P_CMD(RW);
533 P_CMD(READ);
534 P_CMD(WRITE);
535 #undef P_OP
537 if (op)
538 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
540 return printed;
543 #define SCA_FLOCK syscall_arg__scnprintf_flock
545 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
547 enum syscall_futex_args {
548 SCF_UADDR = (1 << 0),
549 SCF_OP = (1 << 1),
550 SCF_VAL = (1 << 2),
551 SCF_TIMEOUT = (1 << 3),
552 SCF_UADDR2 = (1 << 4),
553 SCF_VAL3 = (1 << 5),
555 int op = arg->val;
556 int cmd = op & FUTEX_CMD_MASK;
557 size_t printed = 0;
559 switch (cmd) {
560 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
561 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
562 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
563 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
564 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break;
565 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break;
566 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break;
567 P_FUTEX_OP(WAKE_OP); break;
568 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
569 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
570 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
571 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break;
572 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break;
573 P_FUTEX_OP(WAIT_REQUEUE_PI); break;
574 default: printed = scnprintf(bf, size, "%#x", cmd); break;
577 if (op & FUTEX_PRIVATE_FLAG)
578 printed += scnprintf(bf + printed, size - printed, "|PRIV");
580 if (op & FUTEX_CLOCK_REALTIME)
581 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
583 return printed;
586 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
588 static const char *bpf_cmd[] = {
589 "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
590 "MAP_GET_NEXT_KEY", "PROG_LOAD",
592 static DEFINE_STRARRAY(bpf_cmd);
594 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
595 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
597 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
598 static DEFINE_STRARRAY(itimers);
600 static const char *keyctl_options[] = {
601 "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
602 "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
603 "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
604 "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
605 "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
607 static DEFINE_STRARRAY(keyctl_options);
609 static const char *whences[] = { "SET", "CUR", "END",
610 #ifdef SEEK_DATA
611 "DATA",
612 #endif
613 #ifdef SEEK_HOLE
614 "HOLE",
615 #endif
617 static DEFINE_STRARRAY(whences);
619 static const char *fcntl_cmds[] = {
620 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
621 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
622 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
623 "F_GETOWNER_UIDS",
625 static DEFINE_STRARRAY(fcntl_cmds);
627 static const char *rlimit_resources[] = {
628 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
629 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
630 "RTTIME",
632 static DEFINE_STRARRAY(rlimit_resources);
634 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
635 static DEFINE_STRARRAY(sighow);
637 static const char *clockid[] = {
638 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
639 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
640 "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
642 static DEFINE_STRARRAY(clockid);
644 static const char *socket_families[] = {
645 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
646 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
647 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
648 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
649 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
650 "ALG", "NFC", "VSOCK",
652 static DEFINE_STRARRAY(socket_families);
654 #ifndef SOCK_TYPE_MASK
655 #define SOCK_TYPE_MASK 0xf
656 #endif
658 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
659 struct syscall_arg *arg)
661 size_t printed;
662 int type = arg->val,
663 flags = type & ~SOCK_TYPE_MASK;
665 type &= SOCK_TYPE_MASK;
667 * Can't use a strarray, MIPS may override for ABI reasons.
669 switch (type) {
670 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
671 P_SK_TYPE(STREAM);
672 P_SK_TYPE(DGRAM);
673 P_SK_TYPE(RAW);
674 P_SK_TYPE(RDM);
675 P_SK_TYPE(SEQPACKET);
676 P_SK_TYPE(DCCP);
677 P_SK_TYPE(PACKET);
678 #undef P_SK_TYPE
679 default:
680 printed = scnprintf(bf, size, "%#x", type);
683 #define P_SK_FLAG(n) \
684 if (flags & SOCK_##n) { \
685 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
686 flags &= ~SOCK_##n; \
689 P_SK_FLAG(CLOEXEC);
690 P_SK_FLAG(NONBLOCK);
691 #undef P_SK_FLAG
693 if (flags)
694 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
696 return printed;
699 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
701 #ifndef MSG_PROBE
702 #define MSG_PROBE 0x10
703 #endif
704 #ifndef MSG_WAITFORONE
705 #define MSG_WAITFORONE 0x10000
706 #endif
707 #ifndef MSG_SENDPAGE_NOTLAST
708 #define MSG_SENDPAGE_NOTLAST 0x20000
709 #endif
710 #ifndef MSG_FASTOPEN
711 #define MSG_FASTOPEN 0x20000000
712 #endif
714 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
715 struct syscall_arg *arg)
717 int printed = 0, flags = arg->val;
719 if (flags == 0)
720 return scnprintf(bf, size, "NONE");
721 #define P_MSG_FLAG(n) \
722 if (flags & MSG_##n) { \
723 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
724 flags &= ~MSG_##n; \
727 P_MSG_FLAG(OOB);
728 P_MSG_FLAG(PEEK);
729 P_MSG_FLAG(DONTROUTE);
730 P_MSG_FLAG(TRYHARD);
731 P_MSG_FLAG(CTRUNC);
732 P_MSG_FLAG(PROBE);
733 P_MSG_FLAG(TRUNC);
734 P_MSG_FLAG(DONTWAIT);
735 P_MSG_FLAG(EOR);
736 P_MSG_FLAG(WAITALL);
737 P_MSG_FLAG(FIN);
738 P_MSG_FLAG(SYN);
739 P_MSG_FLAG(CONFIRM);
740 P_MSG_FLAG(RST);
741 P_MSG_FLAG(ERRQUEUE);
742 P_MSG_FLAG(NOSIGNAL);
743 P_MSG_FLAG(MORE);
744 P_MSG_FLAG(WAITFORONE);
745 P_MSG_FLAG(SENDPAGE_NOTLAST);
746 P_MSG_FLAG(FASTOPEN);
747 P_MSG_FLAG(CMSG_CLOEXEC);
748 #undef P_MSG_FLAG
750 if (flags)
751 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
753 return printed;
756 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
758 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
759 struct syscall_arg *arg)
761 size_t printed = 0;
762 int mode = arg->val;
764 if (mode == F_OK) /* 0 */
765 return scnprintf(bf, size, "F");
766 #define P_MODE(n) \
767 if (mode & n##_OK) { \
768 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
769 mode &= ~n##_OK; \
772 P_MODE(R);
773 P_MODE(W);
774 P_MODE(X);
775 #undef P_MODE
777 if (mode)
778 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
780 return printed;
783 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
785 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
786 struct syscall_arg *arg);
788 #define SCA_FILENAME syscall_arg__scnprintf_filename
790 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
791 struct syscall_arg *arg)
793 int printed = 0, flags = arg->val;
795 if (!(flags & O_CREAT))
796 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
798 if (flags == 0)
799 return scnprintf(bf, size, "RDONLY");
800 #define P_FLAG(n) \
801 if (flags & O_##n) { \
802 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
803 flags &= ~O_##n; \
806 P_FLAG(APPEND);
807 P_FLAG(ASYNC);
808 P_FLAG(CLOEXEC);
809 P_FLAG(CREAT);
810 P_FLAG(DIRECT);
811 P_FLAG(DIRECTORY);
812 P_FLAG(EXCL);
813 P_FLAG(LARGEFILE);
814 P_FLAG(NOATIME);
815 P_FLAG(NOCTTY);
816 #ifdef O_NONBLOCK
817 P_FLAG(NONBLOCK);
818 #elif O_NDELAY
819 P_FLAG(NDELAY);
820 #endif
821 #ifdef O_PATH
822 P_FLAG(PATH);
823 #endif
824 P_FLAG(RDWR);
825 #ifdef O_DSYNC
826 if ((flags & O_SYNC) == O_SYNC)
827 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
828 else {
829 P_FLAG(DSYNC);
831 #else
832 P_FLAG(SYNC);
833 #endif
834 P_FLAG(TRUNC);
835 P_FLAG(WRONLY);
836 #undef P_FLAG
838 if (flags)
839 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
841 return printed;
844 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
846 static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
847 struct syscall_arg *arg)
849 int printed = 0, flags = arg->val;
851 if (flags == 0)
852 return 0;
854 #define P_FLAG(n) \
855 if (flags & PERF_FLAG_##n) { \
856 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
857 flags &= ~PERF_FLAG_##n; \
860 P_FLAG(FD_NO_GROUP);
861 P_FLAG(FD_OUTPUT);
862 P_FLAG(PID_CGROUP);
863 P_FLAG(FD_CLOEXEC);
864 #undef P_FLAG
866 if (flags)
867 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
869 return printed;
872 #define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
874 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
875 struct syscall_arg *arg)
877 int printed = 0, flags = arg->val;
879 if (flags == 0)
880 return scnprintf(bf, size, "NONE");
881 #define P_FLAG(n) \
882 if (flags & EFD_##n) { \
883 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
884 flags &= ~EFD_##n; \
887 P_FLAG(SEMAPHORE);
888 P_FLAG(CLOEXEC);
889 P_FLAG(NONBLOCK);
890 #undef P_FLAG
892 if (flags)
893 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
895 return printed;
898 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
900 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
901 struct syscall_arg *arg)
903 int printed = 0, flags = arg->val;
905 #define P_FLAG(n) \
906 if (flags & O_##n) { \
907 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
908 flags &= ~O_##n; \
911 P_FLAG(CLOEXEC);
912 P_FLAG(NONBLOCK);
913 #undef P_FLAG
915 if (flags)
916 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
918 return printed;
921 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
923 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
925 int sig = arg->val;
927 switch (sig) {
928 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
929 P_SIGNUM(HUP);
930 P_SIGNUM(INT);
931 P_SIGNUM(QUIT);
932 P_SIGNUM(ILL);
933 P_SIGNUM(TRAP);
934 P_SIGNUM(ABRT);
935 P_SIGNUM(BUS);
936 P_SIGNUM(FPE);
937 P_SIGNUM(KILL);
938 P_SIGNUM(USR1);
939 P_SIGNUM(SEGV);
940 P_SIGNUM(USR2);
941 P_SIGNUM(PIPE);
942 P_SIGNUM(ALRM);
943 P_SIGNUM(TERM);
944 P_SIGNUM(CHLD);
945 P_SIGNUM(CONT);
946 P_SIGNUM(STOP);
947 P_SIGNUM(TSTP);
948 P_SIGNUM(TTIN);
949 P_SIGNUM(TTOU);
950 P_SIGNUM(URG);
951 P_SIGNUM(XCPU);
952 P_SIGNUM(XFSZ);
953 P_SIGNUM(VTALRM);
954 P_SIGNUM(PROF);
955 P_SIGNUM(WINCH);
956 P_SIGNUM(IO);
957 P_SIGNUM(PWR);
958 P_SIGNUM(SYS);
959 #ifdef SIGEMT
960 P_SIGNUM(EMT);
961 #endif
962 #ifdef SIGSTKFLT
963 P_SIGNUM(STKFLT);
964 #endif
965 #ifdef SIGSWI
966 P_SIGNUM(SWI);
967 #endif
968 default: break;
971 return scnprintf(bf, size, "%#x", sig);
974 #define SCA_SIGNUM syscall_arg__scnprintf_signum
976 #if defined(__i386__) || defined(__x86_64__)
978 * FIXME: Make this available to all arches.
980 #define TCGETS 0x5401
982 static const char *tioctls[] = {
983 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
984 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
985 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
986 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
987 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
988 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
989 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
990 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
991 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
992 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
993 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
994 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
995 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
996 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
997 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
1000 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
1001 #endif /* defined(__i386__) || defined(__x86_64__) */
1003 #define STRARRAY(arg, name, array) \
1004 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
1005 .arg_parm = { [arg] = &strarray__##array, }
1007 static struct syscall_fmt {
1008 const char *name;
1009 const char *alias;
1010 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
1011 void *arg_parm[6];
1012 bool errmsg;
1013 bool timeout;
1014 bool hexret;
1015 } syscall_fmts[] = {
1016 { .name = "access", .errmsg = true,
1017 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */
1018 [1] = SCA_ACCMODE, /* mode */ }, },
1019 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
1020 { .name = "bpf", .errmsg = true, STRARRAY(0, cmd, bpf_cmd), },
1021 { .name = "brk", .hexret = true,
1022 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
1023 { .name = "chdir", .errmsg = true,
1024 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1025 { .name = "chmod", .errmsg = true,
1026 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1027 { .name = "chroot", .errmsg = true,
1028 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1029 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
1030 { .name = "close", .errmsg = true,
1031 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
1032 { .name = "connect", .errmsg = true, },
1033 { .name = "creat", .errmsg = true,
1034 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1035 { .name = "dup", .errmsg = true,
1036 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1037 { .name = "dup2", .errmsg = true,
1038 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1039 { .name = "dup3", .errmsg = true,
1040 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1041 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
1042 { .name = "eventfd2", .errmsg = true,
1043 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
1044 { .name = "faccessat", .errmsg = true,
1045 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1046 [1] = SCA_FILENAME, /* filename */ }, },
1047 { .name = "fadvise64", .errmsg = true,
1048 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1049 { .name = "fallocate", .errmsg = true,
1050 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1051 { .name = "fchdir", .errmsg = true,
1052 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1053 { .name = "fchmod", .errmsg = true,
1054 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1055 { .name = "fchmodat", .errmsg = true,
1056 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1057 [1] = SCA_FILENAME, /* filename */ }, },
1058 { .name = "fchown", .errmsg = true,
1059 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1060 { .name = "fchownat", .errmsg = true,
1061 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1062 [1] = SCA_FILENAME, /* filename */ }, },
1063 { .name = "fcntl", .errmsg = true,
1064 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1065 [1] = SCA_STRARRAY, /* cmd */ },
1066 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
1067 { .name = "fdatasync", .errmsg = true,
1068 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1069 { .name = "flock", .errmsg = true,
1070 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1071 [1] = SCA_FLOCK, /* cmd */ }, },
1072 { .name = "fsetxattr", .errmsg = true,
1073 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1074 { .name = "fstat", .errmsg = true, .alias = "newfstat",
1075 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1076 { .name = "fstatat", .errmsg = true, .alias = "newfstatat",
1077 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1078 [1] = SCA_FILENAME, /* filename */ }, },
1079 { .name = "fstatfs", .errmsg = true,
1080 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1081 { .name = "fsync", .errmsg = true,
1082 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1083 { .name = "ftruncate", .errmsg = true,
1084 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1085 { .name = "futex", .errmsg = true,
1086 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
1087 { .name = "futimesat", .errmsg = true,
1088 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1089 [1] = SCA_FILENAME, /* filename */ }, },
1090 { .name = "getdents", .errmsg = true,
1091 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1092 { .name = "getdents64", .errmsg = true,
1093 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1094 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
1095 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1096 { .name = "getxattr", .errmsg = true,
1097 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1098 { .name = "inotify_add_watch", .errmsg = true,
1099 .arg_scnprintf = { [1] = SCA_FILENAME, /* pathname */ }, },
1100 { .name = "ioctl", .errmsg = true,
1101 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1102 #if defined(__i386__) || defined(__x86_64__)
1104 * FIXME: Make this available to all arches.
1106 [1] = SCA_STRHEXARRAY, /* cmd */
1107 [2] = SCA_HEX, /* arg */ },
1108 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
1109 #else
1110 [2] = SCA_HEX, /* arg */ }, },
1111 #endif
1112 { .name = "keyctl", .errmsg = true, STRARRAY(0, option, keyctl_options), },
1113 { .name = "kill", .errmsg = true,
1114 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1115 { .name = "lchown", .errmsg = true,
1116 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1117 { .name = "lgetxattr", .errmsg = true,
1118 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1119 { .name = "linkat", .errmsg = true,
1120 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1121 { .name = "listxattr", .errmsg = true,
1122 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1123 { .name = "llistxattr", .errmsg = true,
1124 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1125 { .name = "lremovexattr", .errmsg = true,
1126 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1127 { .name = "lseek", .errmsg = true,
1128 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1129 [2] = SCA_STRARRAY, /* whence */ },
1130 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
1131 { .name = "lsetxattr", .errmsg = true,
1132 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1133 { .name = "lstat", .errmsg = true, .alias = "newlstat",
1134 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1135 { .name = "lsxattr", .errmsg = true,
1136 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1137 { .name = "madvise", .errmsg = true,
1138 .arg_scnprintf = { [0] = SCA_HEX, /* start */
1139 [2] = SCA_MADV_BHV, /* behavior */ }, },
1140 { .name = "mkdir", .errmsg = true,
1141 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1142 { .name = "mkdirat", .errmsg = true,
1143 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1144 [1] = SCA_FILENAME, /* pathname */ }, },
1145 { .name = "mknod", .errmsg = true,
1146 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1147 { .name = "mknodat", .errmsg = true,
1148 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1149 [1] = SCA_FILENAME, /* filename */ }, },
1150 { .name = "mlock", .errmsg = true,
1151 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1152 { .name = "mlockall", .errmsg = true,
1153 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1154 { .name = "mmap", .hexret = true,
1155 /* The standard mmap maps to old_mmap on s390x */
1156 #if defined(__s390x__)
1157 .alias = "old_mmap",
1158 #endif
1159 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1160 [2] = SCA_MMAP_PROT, /* prot */
1161 [3] = SCA_MMAP_FLAGS, /* flags */
1162 [4] = SCA_FD, /* fd */ }, },
1163 { .name = "mprotect", .errmsg = true,
1164 .arg_scnprintf = { [0] = SCA_HEX, /* start */
1165 [2] = SCA_MMAP_PROT, /* prot */ }, },
1166 { .name = "mq_unlink", .errmsg = true,
1167 .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, },
1168 { .name = "mremap", .hexret = true,
1169 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1170 [3] = SCA_MREMAP_FLAGS, /* flags */
1171 [4] = SCA_HEX, /* new_addr */ }, },
1172 { .name = "munlock", .errmsg = true,
1173 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1174 { .name = "munmap", .errmsg = true,
1175 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1176 { .name = "name_to_handle_at", .errmsg = true,
1177 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1178 { .name = "newfstatat", .errmsg = true,
1179 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1180 [1] = SCA_FILENAME, /* filename */ }, },
1181 { .name = "open", .errmsg = true,
1182 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */
1183 [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1184 { .name = "open_by_handle_at", .errmsg = true,
1185 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1186 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1187 { .name = "openat", .errmsg = true,
1188 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1189 [1] = SCA_FILENAME, /* filename */
1190 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1191 { .name = "perf_event_open", .errmsg = true,
1192 .arg_scnprintf = { [1] = SCA_INT, /* pid */
1193 [2] = SCA_INT, /* cpu */
1194 [3] = SCA_FD, /* group_fd */
1195 [4] = SCA_PERF_FLAGS, /* flags */ }, },
1196 { .name = "pipe2", .errmsg = true,
1197 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1198 { .name = "poll", .errmsg = true, .timeout = true, },
1199 { .name = "ppoll", .errmsg = true, .timeout = true, },
1200 { .name = "pread", .errmsg = true, .alias = "pread64",
1201 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1202 { .name = "preadv", .errmsg = true, .alias = "pread",
1203 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1204 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1205 { .name = "pwrite", .errmsg = true, .alias = "pwrite64",
1206 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1207 { .name = "pwritev", .errmsg = true,
1208 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1209 { .name = "read", .errmsg = true,
1210 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1211 { .name = "readlink", .errmsg = true,
1212 .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1213 { .name = "readlinkat", .errmsg = true,
1214 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1215 [1] = SCA_FILENAME, /* pathname */ }, },
1216 { .name = "readv", .errmsg = true,
1217 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1218 { .name = "recvfrom", .errmsg = true,
1219 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1220 [3] = SCA_MSG_FLAGS, /* flags */ }, },
1221 { .name = "recvmmsg", .errmsg = true,
1222 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1223 [3] = SCA_MSG_FLAGS, /* flags */ }, },
1224 { .name = "recvmsg", .errmsg = true,
1225 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1226 [2] = SCA_MSG_FLAGS, /* flags */ }, },
1227 { .name = "removexattr", .errmsg = true,
1228 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1229 { .name = "renameat", .errmsg = true,
1230 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1231 { .name = "rmdir", .errmsg = true,
1232 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1233 { .name = "rt_sigaction", .errmsg = true,
1234 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1235 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
1236 { .name = "rt_sigqueueinfo", .errmsg = true,
1237 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1238 { .name = "rt_tgsigqueueinfo", .errmsg = true,
1239 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1240 { .name = "select", .errmsg = true, .timeout = true, },
1241 { .name = "sendmmsg", .errmsg = true,
1242 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1243 [3] = SCA_MSG_FLAGS, /* flags */ }, },
1244 { .name = "sendmsg", .errmsg = true,
1245 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1246 [2] = SCA_MSG_FLAGS, /* flags */ }, },
1247 { .name = "sendto", .errmsg = true,
1248 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1249 [3] = SCA_MSG_FLAGS, /* flags */ }, },
1250 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
1251 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1252 { .name = "setxattr", .errmsg = true,
1253 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1254 { .name = "shutdown", .errmsg = true,
1255 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1256 { .name = "socket", .errmsg = true,
1257 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1258 [1] = SCA_SK_TYPE, /* type */ },
1259 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1260 { .name = "socketpair", .errmsg = true,
1261 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1262 [1] = SCA_SK_TYPE, /* type */ },
1263 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1264 { .name = "stat", .errmsg = true, .alias = "newstat",
1265 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1266 { .name = "statfs", .errmsg = true,
1267 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1268 { .name = "swapoff", .errmsg = true,
1269 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1270 { .name = "swapon", .errmsg = true,
1271 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1272 { .name = "symlinkat", .errmsg = true,
1273 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1274 { .name = "tgkill", .errmsg = true,
1275 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1276 { .name = "tkill", .errmsg = true,
1277 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1278 { .name = "truncate", .errmsg = true,
1279 .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1280 { .name = "uname", .errmsg = true, .alias = "newuname", },
1281 { .name = "unlinkat", .errmsg = true,
1282 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1283 [1] = SCA_FILENAME, /* pathname */ }, },
1284 { .name = "utime", .errmsg = true,
1285 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1286 { .name = "utimensat", .errmsg = true,
1287 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */
1288 [1] = SCA_FILENAME, /* filename */ }, },
1289 { .name = "utimes", .errmsg = true,
1290 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1291 { .name = "vmsplice", .errmsg = true,
1292 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1293 { .name = "write", .errmsg = true,
1294 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1295 { .name = "writev", .errmsg = true,
1296 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1299 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1301 const struct syscall_fmt *fmt = fmtp;
1302 return strcmp(name, fmt->name);
1305 static struct syscall_fmt *syscall_fmt__find(const char *name)
1307 const int nmemb = ARRAY_SIZE(syscall_fmts);
1308 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1311 struct syscall {
1312 struct event_format *tp_format;
1313 int nr_args;
1314 struct format_field *args;
1315 const char *name;
1316 bool is_exit;
1317 struct syscall_fmt *fmt;
1318 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1319 void **arg_parm;
1322 static size_t fprintf_duration(unsigned long t, FILE *fp)
1324 double duration = (double)t / NSEC_PER_MSEC;
1325 size_t printed = fprintf(fp, "(");
1327 if (duration >= 1.0)
1328 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1329 else if (duration >= 0.01)
1330 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1331 else
1332 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1333 return printed + fprintf(fp, "): ");
1337 * filename.ptr: The filename char pointer that will be vfs_getname'd
1338 * filename.entry_str_pos: Where to insert the string translated from
1339 * filename.ptr by the vfs_getname tracepoint/kprobe.
1341 struct thread_trace {
1342 u64 entry_time;
1343 u64 exit_time;
1344 bool entry_pending;
1345 unsigned long nr_events;
1346 unsigned long pfmaj, pfmin;
1347 char *entry_str;
1348 double runtime_ms;
1349 struct {
1350 unsigned long ptr;
1351 short int entry_str_pos;
1352 bool pending_open;
1353 unsigned int namelen;
1354 char *name;
1355 } filename;
1356 struct {
1357 int max;
1358 char **table;
1359 } paths;
1361 struct intlist *syscall_stats;
1364 static struct thread_trace *thread_trace__new(void)
1366 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
1368 if (ttrace)
1369 ttrace->paths.max = -1;
1371 ttrace->syscall_stats = intlist__new(NULL);
1373 return ttrace;
1376 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1378 struct thread_trace *ttrace;
1380 if (thread == NULL)
1381 goto fail;
1383 if (thread__priv(thread) == NULL)
1384 thread__set_priv(thread, thread_trace__new());
1386 if (thread__priv(thread) == NULL)
1387 goto fail;
1389 ttrace = thread__priv(thread);
1390 ++ttrace->nr_events;
1392 return ttrace;
1393 fail:
1394 color_fprintf(fp, PERF_COLOR_RED,
1395 "WARNING: not enough memory, dropping samples!\n");
1396 return NULL;
1399 #define TRACE_PFMAJ (1 << 0)
1400 #define TRACE_PFMIN (1 << 1)
1402 static const size_t trace__entry_str_size = 2048;
1404 struct trace {
1405 struct perf_tool tool;
1406 struct {
1407 int machine;
1408 int open_id;
1409 } audit;
1410 struct {
1411 int max;
1412 struct syscall *table;
1413 struct {
1414 struct perf_evsel *sys_enter,
1415 *sys_exit;
1416 } events;
1417 } syscalls;
1418 struct record_opts opts;
1419 struct perf_evlist *evlist;
1420 struct machine *host;
1421 struct thread *current;
1422 u64 base_time;
1423 FILE *output;
1424 unsigned long nr_events;
1425 struct strlist *ev_qualifier;
1426 struct {
1427 size_t nr;
1428 int *entries;
1429 } ev_qualifier_ids;
1430 struct intlist *tid_list;
1431 struct intlist *pid_list;
1432 struct {
1433 size_t nr;
1434 pid_t *entries;
1435 } filter_pids;
1436 double duration_filter;
1437 double runtime_ms;
1438 struct {
1439 u64 vfs_getname,
1440 proc_getname;
1441 } stats;
1442 bool not_ev_qualifier;
1443 bool live;
1444 bool full_time;
1445 bool sched;
1446 bool multiple_threads;
1447 bool summary;
1448 bool summary_only;
1449 bool show_comm;
1450 bool show_tool_stats;
1451 bool trace_syscalls;
1452 bool force;
1453 bool vfs_getname;
1454 int trace_pgfaults;
1457 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1459 struct thread_trace *ttrace = thread__priv(thread);
1461 if (fd > ttrace->paths.max) {
1462 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1464 if (npath == NULL)
1465 return -1;
1467 if (ttrace->paths.max != -1) {
1468 memset(npath + ttrace->paths.max + 1, 0,
1469 (fd - ttrace->paths.max) * sizeof(char *));
1470 } else {
1471 memset(npath, 0, (fd + 1) * sizeof(char *));
1474 ttrace->paths.table = npath;
1475 ttrace->paths.max = fd;
1478 ttrace->paths.table[fd] = strdup(pathname);
1480 return ttrace->paths.table[fd] != NULL ? 0 : -1;
1483 static int thread__read_fd_path(struct thread *thread, int fd)
1485 char linkname[PATH_MAX], pathname[PATH_MAX];
1486 struct stat st;
1487 int ret;
1489 if (thread->pid_ == thread->tid) {
1490 scnprintf(linkname, sizeof(linkname),
1491 "/proc/%d/fd/%d", thread->pid_, fd);
1492 } else {
1493 scnprintf(linkname, sizeof(linkname),
1494 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1497 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1498 return -1;
1500 ret = readlink(linkname, pathname, sizeof(pathname));
1502 if (ret < 0 || ret > st.st_size)
1503 return -1;
1505 pathname[ret] = '\0';
1506 return trace__set_fd_pathname(thread, fd, pathname);
1509 static const char *thread__fd_path(struct thread *thread, int fd,
1510 struct trace *trace)
1512 struct thread_trace *ttrace = thread__priv(thread);
1514 if (ttrace == NULL)
1515 return NULL;
1517 if (fd < 0)
1518 return NULL;
1520 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1521 if (!trace->live)
1522 return NULL;
1523 ++trace->stats.proc_getname;
1524 if (thread__read_fd_path(thread, fd))
1525 return NULL;
1528 return ttrace->paths.table[fd];
1531 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1532 struct syscall_arg *arg)
1534 int fd = arg->val;
1535 size_t printed = scnprintf(bf, size, "%d", fd);
1536 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1538 if (path)
1539 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1541 return printed;
1544 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1545 struct syscall_arg *arg)
1547 int fd = arg->val;
1548 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1549 struct thread_trace *ttrace = thread__priv(arg->thread);
1551 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1552 zfree(&ttrace->paths.table[fd]);
1554 return printed;
1557 static void thread__set_filename_pos(struct thread *thread, const char *bf,
1558 unsigned long ptr)
1560 struct thread_trace *ttrace = thread__priv(thread);
1562 ttrace->filename.ptr = ptr;
1563 ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1566 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1567 struct syscall_arg *arg)
1569 unsigned long ptr = arg->val;
1571 if (!arg->trace->vfs_getname)
1572 return scnprintf(bf, size, "%#x", ptr);
1574 thread__set_filename_pos(arg->thread, bf, ptr);
1575 return 0;
1578 static bool trace__filter_duration(struct trace *trace, double t)
1580 return t < (trace->duration_filter * NSEC_PER_MSEC);
1583 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1585 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1587 return fprintf(fp, "%10.3f ", ts);
1590 static bool done = false;
1591 static bool interrupted = false;
1593 static void sig_handler(int sig)
1595 done = true;
1596 interrupted = sig == SIGINT;
1599 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1600 u64 duration, u64 tstamp, FILE *fp)
1602 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1603 printed += fprintf_duration(duration, fp);
1605 if (trace->multiple_threads) {
1606 if (trace->show_comm)
1607 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1608 printed += fprintf(fp, "%d ", thread->tid);
1611 return printed;
1614 static int trace__process_event(struct trace *trace, struct machine *machine,
1615 union perf_event *event, struct perf_sample *sample)
1617 int ret = 0;
1619 switch (event->header.type) {
1620 case PERF_RECORD_LOST:
1621 color_fprintf(trace->output, PERF_COLOR_RED,
1622 "LOST %" PRIu64 " events!\n", event->lost.lost);
1623 ret = machine__process_lost_event(machine, event, sample);
1624 break;
1625 default:
1626 ret = machine__process_event(machine, event, sample);
1627 break;
1630 return ret;
1633 static int trace__tool_process(struct perf_tool *tool,
1634 union perf_event *event,
1635 struct perf_sample *sample,
1636 struct machine *machine)
1638 struct trace *trace = container_of(tool, struct trace, tool);
1639 return trace__process_event(trace, machine, event, sample);
1642 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1644 int err = symbol__init(NULL);
1646 if (err)
1647 return err;
1649 trace->host = machine__new_host();
1650 if (trace->host == NULL)
1651 return -ENOMEM;
1653 if (trace_event__register_resolver(trace->host, machine__resolve_kernel_addr) < 0)
1654 return -errno;
1656 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1657 evlist->threads, trace__tool_process, false,
1658 trace->opts.proc_map_timeout);
1659 if (err)
1660 symbol__exit();
1662 return err;
1665 static int syscall__set_arg_fmts(struct syscall *sc)
1667 struct format_field *field;
1668 int idx = 0;
1670 sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1671 if (sc->arg_scnprintf == NULL)
1672 return -1;
1674 if (sc->fmt)
1675 sc->arg_parm = sc->fmt->arg_parm;
1677 for (field = sc->args; field; field = field->next) {
1678 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1679 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1680 else if (field->flags & FIELD_IS_POINTER)
1681 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1682 ++idx;
1685 return 0;
1688 static int trace__read_syscall_info(struct trace *trace, int id)
1690 char tp_name[128];
1691 struct syscall *sc;
1692 const char *name = audit_syscall_to_name(id, trace->audit.machine);
1694 if (name == NULL)
1695 return -1;
1697 if (id > trace->syscalls.max) {
1698 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1700 if (nsyscalls == NULL)
1701 return -1;
1703 if (trace->syscalls.max != -1) {
1704 memset(nsyscalls + trace->syscalls.max + 1, 0,
1705 (id - trace->syscalls.max) * sizeof(*sc));
1706 } else {
1707 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1710 trace->syscalls.table = nsyscalls;
1711 trace->syscalls.max = id;
1714 sc = trace->syscalls.table + id;
1715 sc->name = name;
1717 sc->fmt = syscall_fmt__find(sc->name);
1719 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1720 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1722 if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
1723 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1724 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1727 if (IS_ERR(sc->tp_format))
1728 return -1;
1730 sc->args = sc->tp_format->format.fields;
1731 sc->nr_args = sc->tp_format->format.nr_fields;
1732 /* drop nr field - not relevant here; does not exist on older kernels */
1733 if (sc->args && strcmp(sc->args->name, "nr") == 0) {
1734 sc->args = sc->args->next;
1735 --sc->nr_args;
1738 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1740 return syscall__set_arg_fmts(sc);
1743 static int trace__validate_ev_qualifier(struct trace *trace)
1745 int err = 0, i;
1746 struct str_node *pos;
1748 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1749 trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1750 sizeof(trace->ev_qualifier_ids.entries[0]));
1752 if (trace->ev_qualifier_ids.entries == NULL) {
1753 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1754 trace->output);
1755 err = -EINVAL;
1756 goto out;
1759 i = 0;
1761 strlist__for_each(pos, trace->ev_qualifier) {
1762 const char *sc = pos->s;
1763 int id = audit_name_to_syscall(sc, trace->audit.machine);
1765 if (id < 0) {
1766 if (err == 0) {
1767 fputs("Error:\tInvalid syscall ", trace->output);
1768 err = -EINVAL;
1769 } else {
1770 fputs(", ", trace->output);
1773 fputs(sc, trace->output);
1776 trace->ev_qualifier_ids.entries[i++] = id;
1779 if (err < 0) {
1780 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1781 "\nHint:\tand: 'man syscalls'\n", trace->output);
1782 zfree(&trace->ev_qualifier_ids.entries);
1783 trace->ev_qualifier_ids.nr = 0;
1785 out:
1786 return err;
1790 * args is to be interpreted as a series of longs but we need to handle
1791 * 8-byte unaligned accesses. args points to raw_data within the event
1792 * and raw_data is guaranteed to be 8-byte unaligned because it is
1793 * preceded by raw_size which is a u32. So we need to copy args to a temp
1794 * variable to read it. Most notably this avoids extended load instructions
1795 * on unaligned addresses
1798 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1799 unsigned char *args, struct trace *trace,
1800 struct thread *thread)
1802 size_t printed = 0;
1803 unsigned char *p;
1804 unsigned long val;
1806 if (sc->args != NULL) {
1807 struct format_field *field;
1808 u8 bit = 1;
1809 struct syscall_arg arg = {
1810 .idx = 0,
1811 .mask = 0,
1812 .trace = trace,
1813 .thread = thread,
1816 for (field = sc->args; field;
1817 field = field->next, ++arg.idx, bit <<= 1) {
1818 if (arg.mask & bit)
1819 continue;
1821 /* special care for unaligned accesses */
1822 p = args + sizeof(unsigned long) * arg.idx;
1823 memcpy(&val, p, sizeof(val));
1826 * Suppress this argument if its value is zero and
1827 * and we don't have a string associated in an
1828 * strarray for it.
1830 if (val == 0 &&
1831 !(sc->arg_scnprintf &&
1832 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1833 sc->arg_parm[arg.idx]))
1834 continue;
1836 printed += scnprintf(bf + printed, size - printed,
1837 "%s%s: ", printed ? ", " : "", field->name);
1838 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1839 arg.val = val;
1840 if (sc->arg_parm)
1841 arg.parm = sc->arg_parm[arg.idx];
1842 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1843 size - printed, &arg);
1844 } else {
1845 printed += scnprintf(bf + printed, size - printed,
1846 "%ld", val);
1849 } else {
1850 int i = 0;
1852 while (i < 6) {
1853 /* special care for unaligned accesses */
1854 p = args + sizeof(unsigned long) * i;
1855 memcpy(&val, p, sizeof(val));
1856 printed += scnprintf(bf + printed, size - printed,
1857 "%sarg%d: %ld",
1858 printed ? ", " : "", i, val);
1859 ++i;
1863 return printed;
1866 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1867 union perf_event *event,
1868 struct perf_sample *sample);
1870 static struct syscall *trace__syscall_info(struct trace *trace,
1871 struct perf_evsel *evsel, int id)
1874 if (id < 0) {
1877 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1878 * before that, leaving at a higher verbosity level till that is
1879 * explained. Reproduced with plain ftrace with:
1881 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1882 * grep "NR -1 " /t/trace_pipe
1884 * After generating some load on the machine.
1886 if (verbose > 1) {
1887 static u64 n;
1888 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1889 id, perf_evsel__name(evsel), ++n);
1891 return NULL;
1894 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1895 trace__read_syscall_info(trace, id))
1896 goto out_cant_read;
1898 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1899 goto out_cant_read;
1901 return &trace->syscalls.table[id];
1903 out_cant_read:
1904 if (verbose) {
1905 fprintf(trace->output, "Problems reading syscall %d", id);
1906 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1907 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1908 fputs(" information\n", trace->output);
1910 return NULL;
1913 static void thread__update_stats(struct thread_trace *ttrace,
1914 int id, struct perf_sample *sample)
1916 struct int_node *inode;
1917 struct stats *stats;
1918 u64 duration = 0;
1920 inode = intlist__findnew(ttrace->syscall_stats, id);
1921 if (inode == NULL)
1922 return;
1924 stats = inode->priv;
1925 if (stats == NULL) {
1926 stats = malloc(sizeof(struct stats));
1927 if (stats == NULL)
1928 return;
1929 init_stats(stats);
1930 inode->priv = stats;
1933 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1934 duration = sample->time - ttrace->entry_time;
1936 update_stats(stats, duration);
1939 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1941 struct thread_trace *ttrace;
1942 u64 duration;
1943 size_t printed;
1945 if (trace->current == NULL)
1946 return 0;
1948 ttrace = thread__priv(trace->current);
1950 if (!ttrace->entry_pending)
1951 return 0;
1953 duration = sample->time - ttrace->entry_time;
1955 printed = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
1956 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1957 ttrace->entry_pending = false;
1959 return printed;
1962 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1963 union perf_event *event __maybe_unused,
1964 struct perf_sample *sample)
1966 char *msg;
1967 void *args;
1968 size_t printed = 0;
1969 struct thread *thread;
1970 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1971 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1972 struct thread_trace *ttrace;
1974 if (sc == NULL)
1975 return -1;
1977 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1978 ttrace = thread__trace(thread, trace->output);
1979 if (ttrace == NULL)
1980 goto out_put;
1982 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1984 if (ttrace->entry_str == NULL) {
1985 ttrace->entry_str = malloc(trace__entry_str_size);
1986 if (!ttrace->entry_str)
1987 goto out_put;
1990 if (!trace->summary_only)
1991 trace__printf_interrupted_entry(trace, sample);
1993 ttrace->entry_time = sample->time;
1994 msg = ttrace->entry_str;
1995 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
1997 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
1998 args, trace, thread);
2000 if (sc->is_exit) {
2001 if (!trace->duration_filter && !trace->summary_only) {
2002 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
2003 fprintf(trace->output, "%-70s\n", ttrace->entry_str);
2005 } else {
2006 ttrace->entry_pending = true;
2007 /* See trace__vfs_getname & trace__sys_exit */
2008 ttrace->filename.pending_open = false;
2011 if (trace->current != thread) {
2012 thread__put(trace->current);
2013 trace->current = thread__get(thread);
2015 err = 0;
2016 out_put:
2017 thread__put(thread);
2018 return err;
2021 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
2022 union perf_event *event __maybe_unused,
2023 struct perf_sample *sample)
2025 long ret;
2026 u64 duration = 0;
2027 struct thread *thread;
2028 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
2029 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2030 struct thread_trace *ttrace;
2032 if (sc == NULL)
2033 return -1;
2035 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2036 ttrace = thread__trace(thread, trace->output);
2037 if (ttrace == NULL)
2038 goto out_put;
2040 if (trace->summary)
2041 thread__update_stats(ttrace, id, sample);
2043 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
2045 if (id == trace->audit.open_id && ret >= 0 && ttrace->filename.pending_open) {
2046 trace__set_fd_pathname(thread, ret, ttrace->filename.name);
2047 ttrace->filename.pending_open = false;
2048 ++trace->stats.vfs_getname;
2051 ttrace->exit_time = sample->time;
2053 if (ttrace->entry_time) {
2054 duration = sample->time - ttrace->entry_time;
2055 if (trace__filter_duration(trace, duration))
2056 goto out;
2057 } else if (trace->duration_filter)
2058 goto out;
2060 if (trace->summary_only)
2061 goto out;
2063 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
2065 if (ttrace->entry_pending) {
2066 fprintf(trace->output, "%-70s", ttrace->entry_str);
2067 } else {
2068 fprintf(trace->output, " ... [");
2069 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
2070 fprintf(trace->output, "]: %s()", sc->name);
2073 if (sc->fmt == NULL) {
2074 signed_print:
2075 fprintf(trace->output, ") = %ld", ret);
2076 } else if (ret < 0 && sc->fmt->errmsg) {
2077 char bf[STRERR_BUFSIZE];
2078 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
2079 *e = audit_errno_to_name(-ret);
2081 fprintf(trace->output, ") = -1 %s %s", e, emsg);
2082 } else if (ret == 0 && sc->fmt->timeout)
2083 fprintf(trace->output, ") = 0 Timeout");
2084 else if (sc->fmt->hexret)
2085 fprintf(trace->output, ") = %#lx", ret);
2086 else
2087 goto signed_print;
2089 fputc('\n', trace->output);
2090 out:
2091 ttrace->entry_pending = false;
2092 err = 0;
2093 out_put:
2094 thread__put(thread);
2095 return err;
2098 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
2099 union perf_event *event __maybe_unused,
2100 struct perf_sample *sample)
2102 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2103 struct thread_trace *ttrace;
2104 size_t filename_len, entry_str_len, to_move;
2105 ssize_t remaining_space;
2106 char *pos;
2107 const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
2109 if (!thread)
2110 goto out;
2112 ttrace = thread__priv(thread);
2113 if (!ttrace)
2114 goto out;
2116 filename_len = strlen(filename);
2118 if (ttrace->filename.namelen < filename_len) {
2119 char *f = realloc(ttrace->filename.name, filename_len + 1);
2121 if (f == NULL)
2122 goto out;
2124 ttrace->filename.namelen = filename_len;
2125 ttrace->filename.name = f;
2128 strcpy(ttrace->filename.name, filename);
2129 ttrace->filename.pending_open = true;
2131 if (!ttrace->filename.ptr)
2132 goto out;
2134 entry_str_len = strlen(ttrace->entry_str);
2135 remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
2136 if (remaining_space <= 0)
2137 goto out;
2139 if (filename_len > (size_t)remaining_space) {
2140 filename += filename_len - remaining_space;
2141 filename_len = remaining_space;
2144 to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
2145 pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
2146 memmove(pos + filename_len, pos, to_move);
2147 memcpy(pos, filename, filename_len);
2149 ttrace->filename.ptr = 0;
2150 ttrace->filename.entry_str_pos = 0;
2151 out:
2152 return 0;
2155 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
2156 union perf_event *event __maybe_unused,
2157 struct perf_sample *sample)
2159 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
2160 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
2161 struct thread *thread = machine__findnew_thread(trace->host,
2162 sample->pid,
2163 sample->tid);
2164 struct thread_trace *ttrace = thread__trace(thread, trace->output);
2166 if (ttrace == NULL)
2167 goto out_dump;
2169 ttrace->runtime_ms += runtime_ms;
2170 trace->runtime_ms += runtime_ms;
2171 thread__put(thread);
2172 return 0;
2174 out_dump:
2175 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
2176 evsel->name,
2177 perf_evsel__strval(evsel, sample, "comm"),
2178 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
2179 runtime,
2180 perf_evsel__intval(evsel, sample, "vruntime"));
2181 thread__put(thread);
2182 return 0;
2185 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
2186 union perf_event *event __maybe_unused,
2187 struct perf_sample *sample)
2189 trace__printf_interrupted_entry(trace, sample);
2190 trace__fprintf_tstamp(trace, sample->time, trace->output);
2192 if (trace->trace_syscalls)
2193 fprintf(trace->output, "( ): ");
2195 fprintf(trace->output, "%s:", evsel->name);
2197 if (evsel->tp_format) {
2198 event_format__fprintf(evsel->tp_format, sample->cpu,
2199 sample->raw_data, sample->raw_size,
2200 trace->output);
2203 fprintf(trace->output, ")\n");
2204 return 0;
2207 static void print_location(FILE *f, struct perf_sample *sample,
2208 struct addr_location *al,
2209 bool print_dso, bool print_sym)
2212 if ((verbose || print_dso) && al->map)
2213 fprintf(f, "%s@", al->map->dso->long_name);
2215 if ((verbose || print_sym) && al->sym)
2216 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
2217 al->addr - al->sym->start);
2218 else if (al->map)
2219 fprintf(f, "0x%" PRIx64, al->addr);
2220 else
2221 fprintf(f, "0x%" PRIx64, sample->addr);
2224 static int trace__pgfault(struct trace *trace,
2225 struct perf_evsel *evsel,
2226 union perf_event *event,
2227 struct perf_sample *sample)
2229 struct thread *thread;
2230 u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
2231 struct addr_location al;
2232 char map_type = 'd';
2233 struct thread_trace *ttrace;
2234 int err = -1;
2236 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2237 ttrace = thread__trace(thread, trace->output);
2238 if (ttrace == NULL)
2239 goto out_put;
2241 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
2242 ttrace->pfmaj++;
2243 else
2244 ttrace->pfmin++;
2246 if (trace->summary_only)
2247 goto out;
2249 thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
2250 sample->ip, &al);
2252 trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
2254 fprintf(trace->output, "%sfault [",
2255 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
2256 "maj" : "min");
2258 print_location(trace->output, sample, &al, false, true);
2260 fprintf(trace->output, "] => ");
2262 thread__find_addr_location(thread, cpumode, MAP__VARIABLE,
2263 sample->addr, &al);
2265 if (!al.map) {
2266 thread__find_addr_location(thread, cpumode,
2267 MAP__FUNCTION, sample->addr, &al);
2269 if (al.map)
2270 map_type = 'x';
2271 else
2272 map_type = '?';
2275 print_location(trace->output, sample, &al, true, false);
2277 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
2278 out:
2279 err = 0;
2280 out_put:
2281 thread__put(thread);
2282 return err;
2285 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
2287 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
2288 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
2289 return false;
2291 if (trace->pid_list || trace->tid_list)
2292 return true;
2294 return false;
2297 static int trace__process_sample(struct perf_tool *tool,
2298 union perf_event *event,
2299 struct perf_sample *sample,
2300 struct perf_evsel *evsel,
2301 struct machine *machine __maybe_unused)
2303 struct trace *trace = container_of(tool, struct trace, tool);
2304 int err = 0;
2306 tracepoint_handler handler = evsel->handler;
2308 if (skip_sample(trace, sample))
2309 return 0;
2311 if (!trace->full_time && trace->base_time == 0)
2312 trace->base_time = sample->time;
2314 if (handler) {
2315 ++trace->nr_events;
2316 handler(trace, evsel, event, sample);
2319 return err;
2322 static int parse_target_str(struct trace *trace)
2324 if (trace->opts.target.pid) {
2325 trace->pid_list = intlist__new(trace->opts.target.pid);
2326 if (trace->pid_list == NULL) {
2327 pr_err("Error parsing process id string\n");
2328 return -EINVAL;
2332 if (trace->opts.target.tid) {
2333 trace->tid_list = intlist__new(trace->opts.target.tid);
2334 if (trace->tid_list == NULL) {
2335 pr_err("Error parsing thread id string\n");
2336 return -EINVAL;
2340 return 0;
2343 static int trace__record(struct trace *trace, int argc, const char **argv)
2345 unsigned int rec_argc, i, j;
2346 const char **rec_argv;
2347 const char * const record_args[] = {
2348 "record",
2349 "-R",
2350 "-m", "1024",
2351 "-c", "1",
2354 const char * const sc_args[] = { "-e", };
2355 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2356 const char * const majpf_args[] = { "-e", "major-faults" };
2357 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2358 const char * const minpf_args[] = { "-e", "minor-faults" };
2359 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2361 /* +1 is for the event string below */
2362 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2363 majpf_args_nr + minpf_args_nr + argc;
2364 rec_argv = calloc(rec_argc + 1, sizeof(char *));
2366 if (rec_argv == NULL)
2367 return -ENOMEM;
2369 j = 0;
2370 for (i = 0; i < ARRAY_SIZE(record_args); i++)
2371 rec_argv[j++] = record_args[i];
2373 if (trace->trace_syscalls) {
2374 for (i = 0; i < sc_args_nr; i++)
2375 rec_argv[j++] = sc_args[i];
2377 /* event string may be different for older kernels - e.g., RHEL6 */
2378 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2379 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2380 else if (is_valid_tracepoint("syscalls:sys_enter"))
2381 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2382 else {
2383 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2384 return -1;
2388 if (trace->trace_pgfaults & TRACE_PFMAJ)
2389 for (i = 0; i < majpf_args_nr; i++)
2390 rec_argv[j++] = majpf_args[i];
2392 if (trace->trace_pgfaults & TRACE_PFMIN)
2393 for (i = 0; i < minpf_args_nr; i++)
2394 rec_argv[j++] = minpf_args[i];
2396 for (i = 0; i < (unsigned int)argc; i++)
2397 rec_argv[j++] = argv[i];
2399 return cmd_record(j, rec_argv, NULL);
2402 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2404 static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2406 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2408 if (IS_ERR(evsel))
2409 return false;
2411 if (perf_evsel__field(evsel, "pathname") == NULL) {
2412 perf_evsel__delete(evsel);
2413 return false;
2416 evsel->handler = trace__vfs_getname;
2417 perf_evlist__add(evlist, evsel);
2418 return true;
2421 static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
2422 u64 config)
2424 struct perf_evsel *evsel;
2425 struct perf_event_attr attr = {
2426 .type = PERF_TYPE_SOFTWARE,
2427 .mmap_data = 1,
2430 attr.config = config;
2431 attr.sample_period = 1;
2433 event_attr_init(&attr);
2435 evsel = perf_evsel__new(&attr);
2436 if (!evsel)
2437 return -ENOMEM;
2439 evsel->handler = trace__pgfault;
2440 perf_evlist__add(evlist, evsel);
2442 return 0;
2445 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2447 const u32 type = event->header.type;
2448 struct perf_evsel *evsel;
2450 if (!trace->full_time && trace->base_time == 0)
2451 trace->base_time = sample->time;
2453 if (type != PERF_RECORD_SAMPLE) {
2454 trace__process_event(trace, trace->host, event, sample);
2455 return;
2458 evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2459 if (evsel == NULL) {
2460 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2461 return;
2464 if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2465 sample->raw_data == NULL) {
2466 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2467 perf_evsel__name(evsel), sample->tid,
2468 sample->cpu, sample->raw_size);
2469 } else {
2470 tracepoint_handler handler = evsel->handler;
2471 handler(trace, evsel, event, sample);
2475 static int trace__add_syscall_newtp(struct trace *trace)
2477 int ret = -1;
2478 struct perf_evlist *evlist = trace->evlist;
2479 struct perf_evsel *sys_enter, *sys_exit;
2481 sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2482 if (sys_enter == NULL)
2483 goto out;
2485 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2486 goto out_delete_sys_enter;
2488 sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2489 if (sys_exit == NULL)
2490 goto out_delete_sys_enter;
2492 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2493 goto out_delete_sys_exit;
2495 perf_evlist__add(evlist, sys_enter);
2496 perf_evlist__add(evlist, sys_exit);
2498 trace->syscalls.events.sys_enter = sys_enter;
2499 trace->syscalls.events.sys_exit = sys_exit;
2501 ret = 0;
2502 out:
2503 return ret;
2505 out_delete_sys_exit:
2506 perf_evsel__delete_priv(sys_exit);
2507 out_delete_sys_enter:
2508 perf_evsel__delete_priv(sys_enter);
2509 goto out;
2512 static int trace__set_ev_qualifier_filter(struct trace *trace)
2514 int err = -1;
2515 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2516 trace->ev_qualifier_ids.nr,
2517 trace->ev_qualifier_ids.entries);
2519 if (filter == NULL)
2520 goto out_enomem;
2522 if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter))
2523 err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter);
2525 free(filter);
2526 out:
2527 return err;
2528 out_enomem:
2529 errno = ENOMEM;
2530 goto out;
2533 static int trace__run(struct trace *trace, int argc, const char **argv)
2535 struct perf_evlist *evlist = trace->evlist;
2536 struct perf_evsel *evsel;
2537 int err = -1, i;
2538 unsigned long before;
2539 const bool forks = argc > 0;
2540 bool draining = false;
2542 trace->live = true;
2544 if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2545 goto out_error_raw_syscalls;
2547 if (trace->trace_syscalls)
2548 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
2550 if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2551 perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
2552 goto out_error_mem;
2555 if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2556 perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2557 goto out_error_mem;
2559 if (trace->sched &&
2560 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2561 trace__sched_stat_runtime))
2562 goto out_error_sched_stat_runtime;
2564 err = perf_evlist__create_maps(evlist, &trace->opts.target);
2565 if (err < 0) {
2566 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2567 goto out_delete_evlist;
2570 err = trace__symbols_init(trace, evlist);
2571 if (err < 0) {
2572 fprintf(trace->output, "Problems initializing symbol libraries!\n");
2573 goto out_delete_evlist;
2576 perf_evlist__config(evlist, &trace->opts);
2578 signal(SIGCHLD, sig_handler);
2579 signal(SIGINT, sig_handler);
2581 if (forks) {
2582 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2583 argv, false, NULL);
2584 if (err < 0) {
2585 fprintf(trace->output, "Couldn't run the workload!\n");
2586 goto out_delete_evlist;
2590 err = perf_evlist__open(evlist);
2591 if (err < 0)
2592 goto out_error_open;
2595 * Better not use !target__has_task() here because we need to cover the
2596 * case where no threads were specified in the command line, but a
2597 * workload was, and in that case we will fill in the thread_map when
2598 * we fork the workload in perf_evlist__prepare_workload.
2600 if (trace->filter_pids.nr > 0)
2601 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2602 else if (thread_map__pid(evlist->threads, 0) == -1)
2603 err = perf_evlist__set_filter_pid(evlist, getpid());
2605 if (err < 0)
2606 goto out_error_mem;
2608 if (trace->ev_qualifier_ids.nr > 0) {
2609 err = trace__set_ev_qualifier_filter(trace);
2610 if (err < 0)
2611 goto out_errno;
2613 pr_debug("event qualifier tracepoint filter: %s\n",
2614 trace->syscalls.events.sys_exit->filter);
2617 err = perf_evlist__apply_filters(evlist, &evsel);
2618 if (err < 0)
2619 goto out_error_apply_filters;
2621 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2622 if (err < 0)
2623 goto out_error_mmap;
2625 if (!target__none(&trace->opts.target))
2626 perf_evlist__enable(evlist);
2628 if (forks)
2629 perf_evlist__start_workload(evlist);
2631 trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2632 evlist->threads->nr > 1 ||
2633 perf_evlist__first(evlist)->attr.inherit;
2634 again:
2635 before = trace->nr_events;
2637 for (i = 0; i < evlist->nr_mmaps; i++) {
2638 union perf_event *event;
2640 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2641 struct perf_sample sample;
2643 ++trace->nr_events;
2645 err = perf_evlist__parse_sample(evlist, event, &sample);
2646 if (err) {
2647 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2648 goto next_event;
2651 trace__handle_event(trace, event, &sample);
2652 next_event:
2653 perf_evlist__mmap_consume(evlist, i);
2655 if (interrupted)
2656 goto out_disable;
2658 if (done && !draining) {
2659 perf_evlist__disable(evlist);
2660 draining = true;
2665 if (trace->nr_events == before) {
2666 int timeout = done ? 100 : -1;
2668 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2669 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2670 draining = true;
2672 goto again;
2674 } else {
2675 goto again;
2678 out_disable:
2679 thread__zput(trace->current);
2681 perf_evlist__disable(evlist);
2683 if (!err) {
2684 if (trace->summary)
2685 trace__fprintf_thread_summary(trace, trace->output);
2687 if (trace->show_tool_stats) {
2688 fprintf(trace->output, "Stats:\n "
2689 " vfs_getname : %" PRIu64 "\n"
2690 " proc_getname: %" PRIu64 "\n",
2691 trace->stats.vfs_getname,
2692 trace->stats.proc_getname);
2696 out_delete_evlist:
2697 perf_evlist__delete(evlist);
2698 trace->evlist = NULL;
2699 trace->live = false;
2700 return err;
2702 char errbuf[BUFSIZ];
2704 out_error_sched_stat_runtime:
2705 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2706 goto out_error;
2708 out_error_raw_syscalls:
2709 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2710 goto out_error;
2712 out_error_mmap:
2713 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2714 goto out_error;
2716 out_error_open:
2717 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2719 out_error:
2720 fprintf(trace->output, "%s\n", errbuf);
2721 goto out_delete_evlist;
2723 out_error_apply_filters:
2724 fprintf(trace->output,
2725 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2726 evsel->filter, perf_evsel__name(evsel), errno,
2727 strerror_r(errno, errbuf, sizeof(errbuf)));
2728 goto out_delete_evlist;
2730 out_error_mem:
2731 fprintf(trace->output, "Not enough memory to run!\n");
2732 goto out_delete_evlist;
2734 out_errno:
2735 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2736 goto out_delete_evlist;
2739 static int trace__replay(struct trace *trace)
2741 const struct perf_evsel_str_handler handlers[] = {
2742 { "probe:vfs_getname", trace__vfs_getname, },
2744 struct perf_data_file file = {
2745 .path = input_name,
2746 .mode = PERF_DATA_MODE_READ,
2747 .force = trace->force,
2749 struct perf_session *session;
2750 struct perf_evsel *evsel;
2751 int err = -1;
2753 trace->tool.sample = trace__process_sample;
2754 trace->tool.mmap = perf_event__process_mmap;
2755 trace->tool.mmap2 = perf_event__process_mmap2;
2756 trace->tool.comm = perf_event__process_comm;
2757 trace->tool.exit = perf_event__process_exit;
2758 trace->tool.fork = perf_event__process_fork;
2759 trace->tool.attr = perf_event__process_attr;
2760 trace->tool.tracing_data = perf_event__process_tracing_data;
2761 trace->tool.build_id = perf_event__process_build_id;
2763 trace->tool.ordered_events = true;
2764 trace->tool.ordering_requires_timestamps = true;
2766 /* add tid to output */
2767 trace->multiple_threads = true;
2769 session = perf_session__new(&file, false, &trace->tool);
2770 if (session == NULL)
2771 return -1;
2773 if (symbol__init(&session->header.env) < 0)
2774 goto out;
2776 trace->host = &session->machines.host;
2778 err = perf_session__set_tracepoints_handlers(session, handlers);
2779 if (err)
2780 goto out;
2782 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2783 "raw_syscalls:sys_enter");
2784 /* older kernels have syscalls tp versus raw_syscalls */
2785 if (evsel == NULL)
2786 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2787 "syscalls:sys_enter");
2789 if (evsel &&
2790 (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2791 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2792 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2793 goto out;
2796 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2797 "raw_syscalls:sys_exit");
2798 if (evsel == NULL)
2799 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2800 "syscalls:sys_exit");
2801 if (evsel &&
2802 (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2803 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2804 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2805 goto out;
2808 evlist__for_each(session->evlist, evsel) {
2809 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2810 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2811 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2812 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2813 evsel->handler = trace__pgfault;
2816 err = parse_target_str(trace);
2817 if (err != 0)
2818 goto out;
2820 setup_pager();
2822 err = perf_session__process_events(session);
2823 if (err)
2824 pr_err("Failed to process events, error %d", err);
2826 else if (trace->summary)
2827 trace__fprintf_thread_summary(trace, trace->output);
2829 out:
2830 perf_session__delete(session);
2832 return err;
2835 static size_t trace__fprintf_threads_header(FILE *fp)
2837 size_t printed;
2839 printed = fprintf(fp, "\n Summary of events:\n\n");
2841 return printed;
2844 static size_t thread__dump_stats(struct thread_trace *ttrace,
2845 struct trace *trace, FILE *fp)
2847 struct stats *stats;
2848 size_t printed = 0;
2849 struct syscall *sc;
2850 struct int_node *inode = intlist__first(ttrace->syscall_stats);
2852 if (inode == NULL)
2853 return 0;
2855 printed += fprintf(fp, "\n");
2857 printed += fprintf(fp, " syscall calls total min avg max stddev\n");
2858 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
2859 printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n");
2861 /* each int_node is a syscall */
2862 while (inode) {
2863 stats = inode->priv;
2864 if (stats) {
2865 double min = (double)(stats->min) / NSEC_PER_MSEC;
2866 double max = (double)(stats->max) / NSEC_PER_MSEC;
2867 double avg = avg_stats(stats);
2868 double pct;
2869 u64 n = (u64) stats->n;
2871 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2872 avg /= NSEC_PER_MSEC;
2874 sc = &trace->syscalls.table[inode->i];
2875 printed += fprintf(fp, " %-15s", sc->name);
2876 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
2877 n, avg * n, min, avg);
2878 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2881 inode = intlist__next(inode);
2884 printed += fprintf(fp, "\n\n");
2886 return printed;
2889 /* struct used to pass data to per-thread function */
2890 struct summary_data {
2891 FILE *fp;
2892 struct trace *trace;
2893 size_t printed;
2896 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2898 struct summary_data *data = priv;
2899 FILE *fp = data->fp;
2900 size_t printed = data->printed;
2901 struct trace *trace = data->trace;
2902 struct thread_trace *ttrace = thread__priv(thread);
2903 double ratio;
2905 if (ttrace == NULL)
2906 return 0;
2908 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2910 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2911 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2912 printed += fprintf(fp, "%.1f%%", ratio);
2913 if (ttrace->pfmaj)
2914 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2915 if (ttrace->pfmin)
2916 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2917 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2918 printed += thread__dump_stats(ttrace, trace, fp);
2920 data->printed += printed;
2922 return 0;
2925 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2927 struct summary_data data = {
2928 .fp = fp,
2929 .trace = trace
2931 data.printed = trace__fprintf_threads_header(fp);
2933 machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2935 return data.printed;
2938 static int trace__set_duration(const struct option *opt, const char *str,
2939 int unset __maybe_unused)
2941 struct trace *trace = opt->value;
2943 trace->duration_filter = atof(str);
2944 return 0;
2947 static int trace__set_filter_pids(const struct option *opt, const char *str,
2948 int unset __maybe_unused)
2950 int ret = -1;
2951 size_t i;
2952 struct trace *trace = opt->value;
2954 * FIXME: introduce a intarray class, plain parse csv and create a
2955 * { int nr, int entries[] } struct...
2957 struct intlist *list = intlist__new(str);
2959 if (list == NULL)
2960 return -1;
2962 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2963 trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2965 if (trace->filter_pids.entries == NULL)
2966 goto out;
2968 trace->filter_pids.entries[0] = getpid();
2970 for (i = 1; i < trace->filter_pids.nr; ++i)
2971 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2973 intlist__delete(list);
2974 ret = 0;
2975 out:
2976 return ret;
2979 static int trace__open_output(struct trace *trace, const char *filename)
2981 struct stat st;
2983 if (!stat(filename, &st) && st.st_size) {
2984 char oldname[PATH_MAX];
2986 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2987 unlink(oldname);
2988 rename(filename, oldname);
2991 trace->output = fopen(filename, "w");
2993 return trace->output == NULL ? -errno : 0;
2996 static int parse_pagefaults(const struct option *opt, const char *str,
2997 int unset __maybe_unused)
2999 int *trace_pgfaults = opt->value;
3001 if (strcmp(str, "all") == 0)
3002 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
3003 else if (strcmp(str, "maj") == 0)
3004 *trace_pgfaults |= TRACE_PFMAJ;
3005 else if (strcmp(str, "min") == 0)
3006 *trace_pgfaults |= TRACE_PFMIN;
3007 else
3008 return -1;
3010 return 0;
3013 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
3015 struct perf_evsel *evsel;
3017 evlist__for_each(evlist, evsel)
3018 evsel->handler = handler;
3021 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
3023 const char *trace_usage[] = {
3024 "perf trace [<options>] [<command>]",
3025 "perf trace [<options>] -- <command> [<options>]",
3026 "perf trace record [<options>] [<command>]",
3027 "perf trace record [<options>] -- <command> [<options>]",
3028 NULL
3030 struct trace trace = {
3031 .audit = {
3032 .machine = audit_detect_machine(),
3033 .open_id = audit_name_to_syscall("open", trace.audit.machine),
3035 .syscalls = {
3036 . max = -1,
3038 .opts = {
3039 .target = {
3040 .uid = UINT_MAX,
3041 .uses_mmap = true,
3043 .user_freq = UINT_MAX,
3044 .user_interval = ULLONG_MAX,
3045 .no_buffering = true,
3046 .mmap_pages = UINT_MAX,
3047 .proc_map_timeout = 500,
3049 .output = stderr,
3050 .show_comm = true,
3051 .trace_syscalls = true,
3053 const char *output_name = NULL;
3054 const char *ev_qualifier_str = NULL;
3055 const struct option trace_options[] = {
3056 OPT_CALLBACK(0, "event", &trace.evlist, "event",
3057 "event selector. use 'perf list' to list available events",
3058 parse_events_option),
3059 OPT_BOOLEAN(0, "comm", &trace.show_comm,
3060 "show the thread COMM next to its id"),
3061 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
3062 OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
3063 OPT_STRING('o', "output", &output_name, "file", "output file name"),
3064 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
3065 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
3066 "trace events on existing process id"),
3067 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
3068 "trace events on existing thread id"),
3069 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
3070 "pids to filter (by the kernel)", trace__set_filter_pids),
3071 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
3072 "system-wide collection from all CPUs"),
3073 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
3074 "list of cpus to monitor"),
3075 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
3076 "child tasks do not inherit counters"),
3077 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
3078 "number of mmap data pages",
3079 perf_evlist__parse_mmap_pages),
3080 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
3081 "user to profile"),
3082 OPT_CALLBACK(0, "duration", &trace, "float",
3083 "show only events with duration > N.M ms",
3084 trace__set_duration),
3085 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
3086 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
3087 OPT_BOOLEAN('T', "time", &trace.full_time,
3088 "Show full timestamp, not time relative to first start"),
3089 OPT_BOOLEAN('s', "summary", &trace.summary_only,
3090 "Show only syscall summary with statistics"),
3091 OPT_BOOLEAN('S', "with-summary", &trace.summary,
3092 "Show all syscalls and summary with statistics"),
3093 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
3094 "Trace pagefaults", parse_pagefaults, "maj"),
3095 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
3096 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
3097 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
3098 "per thread proc mmap processing timeout in ms"),
3099 OPT_END()
3101 const char * const trace_subcommands[] = { "record", NULL };
3102 int err;
3103 char bf[BUFSIZ];
3105 signal(SIGSEGV, sighandler_dump_stack);
3106 signal(SIGFPE, sighandler_dump_stack);
3108 trace.evlist = perf_evlist__new();
3110 if (trace.evlist == NULL) {
3111 pr_err("Not enough memory to run!\n");
3112 err = -ENOMEM;
3113 goto out;
3116 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
3117 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
3119 if (trace.trace_pgfaults) {
3120 trace.opts.sample_address = true;
3121 trace.opts.sample_time = true;
3124 if (trace.evlist->nr_entries > 0)
3125 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
3127 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
3128 return trace__record(&trace, argc-1, &argv[1]);
3130 /* summary_only implies summary option, but don't overwrite summary if set */
3131 if (trace.summary_only)
3132 trace.summary = trace.summary_only;
3134 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
3135 trace.evlist->nr_entries == 0 /* Was --events used? */) {
3136 pr_err("Please specify something to trace.\n");
3137 return -1;
3140 if (output_name != NULL) {
3141 err = trace__open_output(&trace, output_name);
3142 if (err < 0) {
3143 perror("failed to create output file");
3144 goto out;
3148 if (ev_qualifier_str != NULL) {
3149 const char *s = ev_qualifier_str;
3150 struct strlist_config slist_config = {
3151 .dirname = system_path(STRACE_GROUPS_DIR),
3154 trace.not_ev_qualifier = *s == '!';
3155 if (trace.not_ev_qualifier)
3156 ++s;
3157 trace.ev_qualifier = strlist__new(s, &slist_config);
3158 if (trace.ev_qualifier == NULL) {
3159 fputs("Not enough memory to parse event qualifier",
3160 trace.output);
3161 err = -ENOMEM;
3162 goto out_close;
3165 err = trace__validate_ev_qualifier(&trace);
3166 if (err)
3167 goto out_close;
3170 err = target__validate(&trace.opts.target);
3171 if (err) {
3172 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3173 fprintf(trace.output, "%s", bf);
3174 goto out_close;
3177 err = target__parse_uid(&trace.opts.target);
3178 if (err) {
3179 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3180 fprintf(trace.output, "%s", bf);
3181 goto out_close;
3184 if (!argc && target__none(&trace.opts.target))
3185 trace.opts.target.system_wide = true;
3187 if (input_name)
3188 err = trace__replay(&trace);
3189 else
3190 err = trace__run(&trace, argc, argv);
3192 out_close:
3193 if (output_name != NULL)
3194 fclose(trace.output);
3195 out:
3196 return err;