1 #include <traceevent/event-parse.h>
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13 #include "util/stat.h"
14 #include "trace-event.h"
15 #include "util/parse-events.h"
19 #include <sys/eventfd.h>
21 #include <linux/futex.h>
23 /* For older distros: */
25 # define MAP_STACK 0x20000
29 # define MADV_HWPOISON 100
32 #ifndef MADV_MERGEABLE
33 # define MADV_MERGEABLE 12
36 #ifndef MADV_UNMERGEABLE
37 # define MADV_UNMERGEABLE 13
41 # define EFD_SEMAPHORE 1
47 u64 (*integer
)(struct tp_field
*field
, struct perf_sample
*sample
);
48 void *(*pointer
)(struct tp_field
*field
, struct perf_sample
*sample
);
52 #define TP_UINT_FIELD(bits) \
53 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
56 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
65 #define TP_UINT_FIELD__SWAPPED(bits) \
66 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
69 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
70 return bswap_##bits(value);\
73 TP_UINT_FIELD__SWAPPED(16);
74 TP_UINT_FIELD__SWAPPED(32);
75 TP_UINT_FIELD__SWAPPED(64);
77 static int tp_field__init_uint(struct tp_field
*field
,
78 struct format_field
*format_field
,
81 field
->offset
= format_field
->offset
;
83 switch (format_field
->size
) {
85 field
->integer
= tp_field__u8
;
88 field
->integer
= needs_swap
? tp_field__swapped_u16
: tp_field__u16
;
91 field
->integer
= needs_swap
? tp_field__swapped_u32
: tp_field__u32
;
94 field
->integer
= needs_swap
? tp_field__swapped_u64
: tp_field__u64
;
103 static void *tp_field__ptr(struct tp_field
*field
, struct perf_sample
*sample
)
105 return sample
->raw_data
+ field
->offset
;
108 static int tp_field__init_ptr(struct tp_field
*field
, struct format_field
*format_field
)
110 field
->offset
= format_field
->offset
;
111 field
->pointer
= tp_field__ptr
;
118 struct tp_field args
, ret
;
122 static int perf_evsel__init_tp_uint_field(struct perf_evsel
*evsel
,
123 struct tp_field
*field
,
126 struct format_field
*format_field
= perf_evsel__field(evsel
, name
);
128 if (format_field
== NULL
)
131 return tp_field__init_uint(field
, format_field
, evsel
->needs_swap
);
134 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
135 ({ struct syscall_tp *sc = evsel->priv;\
136 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
138 static int perf_evsel__init_tp_ptr_field(struct perf_evsel
*evsel
,
139 struct tp_field
*field
,
142 struct format_field
*format_field
= perf_evsel__field(evsel
, name
);
144 if (format_field
== NULL
)
147 return tp_field__init_ptr(field
, format_field
);
150 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
151 ({ struct syscall_tp *sc = evsel->priv;\
152 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
154 static void perf_evsel__delete_priv(struct perf_evsel
*evsel
)
157 perf_evsel__delete(evsel
);
160 static int perf_evsel__init_syscall_tp(struct perf_evsel
*evsel
, void *handler
)
162 evsel
->priv
= malloc(sizeof(struct syscall_tp
));
163 if (evsel
->priv
!= NULL
) {
164 if (perf_evsel__init_sc_tp_uint_field(evsel
, id
))
167 evsel
->handler
= handler
;
178 static struct perf_evsel
*perf_evsel__syscall_newtp(const char *direction
, void *handler
)
180 struct perf_evsel
*evsel
= perf_evsel__newtp("raw_syscalls", direction
);
182 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
184 evsel
= perf_evsel__newtp("syscalls", direction
);
187 if (perf_evsel__init_syscall_tp(evsel
, handler
))
194 perf_evsel__delete_priv(evsel
);
198 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
199 ({ struct syscall_tp *fields = evsel->priv; \
200 fields->name.integer(&fields->name, sample); })
202 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
203 ({ struct syscall_tp *fields = evsel->priv; \
204 fields->name.pointer(&fields->name, sample); })
206 static int perf_evlist__add_syscall_newtp(struct perf_evlist
*evlist
,
207 void *sys_enter_handler
,
208 void *sys_exit_handler
)
211 struct perf_evsel
*sys_enter
, *sys_exit
;
213 sys_enter
= perf_evsel__syscall_newtp("sys_enter", sys_enter_handler
);
214 if (sys_enter
== NULL
)
217 if (perf_evsel__init_sc_tp_ptr_field(sys_enter
, args
))
218 goto out_delete_sys_enter
;
220 sys_exit
= perf_evsel__syscall_newtp("sys_exit", sys_exit_handler
);
221 if (sys_exit
== NULL
)
222 goto out_delete_sys_enter
;
224 if (perf_evsel__init_sc_tp_uint_field(sys_exit
, ret
))
225 goto out_delete_sys_exit
;
227 perf_evlist__add(evlist
, sys_enter
);
228 perf_evlist__add(evlist
, sys_exit
);
235 perf_evsel__delete_priv(sys_exit
);
236 out_delete_sys_enter
:
237 perf_evsel__delete_priv(sys_enter
);
244 struct thread
*thread
;
254 const char **entries
;
257 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
258 .nr_entries = ARRAY_SIZE(array), \
262 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
264 .nr_entries = ARRAY_SIZE(array), \
268 static size_t __syscall_arg__scnprintf_strarray(char *bf
, size_t size
,
270 struct syscall_arg
*arg
)
272 struct strarray
*sa
= arg
->parm
;
273 int idx
= arg
->val
- sa
->offset
;
275 if (idx
< 0 || idx
>= sa
->nr_entries
)
276 return scnprintf(bf
, size
, intfmt
, arg
->val
);
278 return scnprintf(bf
, size
, "%s", sa
->entries
[idx
]);
281 static size_t syscall_arg__scnprintf_strarray(char *bf
, size_t size
,
282 struct syscall_arg
*arg
)
284 return __syscall_arg__scnprintf_strarray(bf
, size
, "%d", arg
);
287 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
289 #if defined(__i386__) || defined(__x86_64__)
291 * FIXME: Make this available to all arches as soon as the ioctl beautifier
292 * gets rewritten to support all arches.
294 static size_t syscall_arg__scnprintf_strhexarray(char *bf
, size_t size
,
295 struct syscall_arg
*arg
)
297 return __syscall_arg__scnprintf_strarray(bf
, size
, "%#x", arg
);
300 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
301 #endif /* defined(__i386__) || defined(__x86_64__) */
303 static size_t syscall_arg__scnprintf_fd(char *bf
, size_t size
,
304 struct syscall_arg
*arg
);
306 #define SCA_FD syscall_arg__scnprintf_fd
308 static size_t syscall_arg__scnprintf_fd_at(char *bf
, size_t size
,
309 struct syscall_arg
*arg
)
314 return scnprintf(bf
, size
, "CWD");
316 return syscall_arg__scnprintf_fd(bf
, size
, arg
);
319 #define SCA_FDAT syscall_arg__scnprintf_fd_at
321 static size_t syscall_arg__scnprintf_close_fd(char *bf
, size_t size
,
322 struct syscall_arg
*arg
);
324 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
326 static size_t syscall_arg__scnprintf_hex(char *bf
, size_t size
,
327 struct syscall_arg
*arg
)
329 return scnprintf(bf
, size
, "%#lx", arg
->val
);
332 #define SCA_HEX syscall_arg__scnprintf_hex
334 static size_t syscall_arg__scnprintf_mmap_prot(char *bf
, size_t size
,
335 struct syscall_arg
*arg
)
337 int printed
= 0, prot
= arg
->val
;
339 if (prot
== PROT_NONE
)
340 return scnprintf(bf
, size
, "NONE");
341 #define P_MMAP_PROT(n) \
342 if (prot & PROT_##n) { \
343 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
353 P_MMAP_PROT(GROWSDOWN
);
354 P_MMAP_PROT(GROWSUP
);
358 printed
+= scnprintf(bf
+ printed
, size
- printed
, "%s%#x", printed
? "|" : "", prot
);
363 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
365 static size_t syscall_arg__scnprintf_mmap_flags(char *bf
, size_t size
,
366 struct syscall_arg
*arg
)
368 int printed
= 0, flags
= arg
->val
;
370 #define P_MMAP_FLAG(n) \
371 if (flags & MAP_##n) { \
372 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
377 P_MMAP_FLAG(PRIVATE
);
381 P_MMAP_FLAG(ANONYMOUS
);
382 P_MMAP_FLAG(DENYWRITE
);
383 P_MMAP_FLAG(EXECUTABLE
);
386 P_MMAP_FLAG(GROWSDOWN
);
388 P_MMAP_FLAG(HUGETLB
);
391 P_MMAP_FLAG(NONBLOCK
);
392 P_MMAP_FLAG(NORESERVE
);
393 P_MMAP_FLAG(POPULATE
);
395 #ifdef MAP_UNINITIALIZED
396 P_MMAP_FLAG(UNINITIALIZED
);
401 printed
+= scnprintf(bf
+ printed
, size
- printed
, "%s%#x", printed
? "|" : "", flags
);
406 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
408 static size_t syscall_arg__scnprintf_mremap_flags(char *bf
, size_t size
,
409 struct syscall_arg
*arg
)
411 int printed
= 0, flags
= arg
->val
;
413 #define P_MREMAP_FLAG(n) \
414 if (flags & MREMAP_##n) { \
415 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
416 flags &= ~MREMAP_##n; \
419 P_MREMAP_FLAG(MAYMOVE
);
421 P_MREMAP_FLAG(FIXED
);
426 printed
+= scnprintf(bf
+ printed
, size
- printed
, "%s%#x", printed
? "|" : "", flags
);
431 #define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
433 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf
, size_t size
,
434 struct syscall_arg
*arg
)
436 int behavior
= arg
->val
;
439 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
442 P_MADV_BHV(SEQUENTIAL
);
443 P_MADV_BHV(WILLNEED
);
444 P_MADV_BHV(DONTNEED
);
446 P_MADV_BHV(DONTFORK
);
448 P_MADV_BHV(HWPOISON
);
449 #ifdef MADV_SOFT_OFFLINE
450 P_MADV_BHV(SOFT_OFFLINE
);
452 P_MADV_BHV(MERGEABLE
);
453 P_MADV_BHV(UNMERGEABLE
);
455 P_MADV_BHV(HUGEPAGE
);
457 #ifdef MADV_NOHUGEPAGE
458 P_MADV_BHV(NOHUGEPAGE
);
461 P_MADV_BHV(DONTDUMP
);
470 return scnprintf(bf
, size
, "%#x", behavior
);
473 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
475 static size_t syscall_arg__scnprintf_flock(char *bf
, size_t size
,
476 struct syscall_arg
*arg
)
478 int printed
= 0, op
= arg
->val
;
481 return scnprintf(bf
, size
, "NONE");
483 if ((op & LOCK_##cmd) == LOCK_##cmd) { \
484 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
499 printed
+= scnprintf(bf
+ printed
, size
- printed
, "%s%#x", printed
? "|" : "", op
);
504 #define SCA_FLOCK syscall_arg__scnprintf_flock
506 static size_t syscall_arg__scnprintf_futex_op(char *bf
, size_t size
, struct syscall_arg
*arg
)
508 enum syscall_futex_args
{
509 SCF_UADDR
= (1 << 0),
512 SCF_TIMEOUT
= (1 << 3),
513 SCF_UADDR2
= (1 << 4),
517 int cmd
= op
& FUTEX_CMD_MASK
;
521 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
522 P_FUTEX_OP(WAIT
); arg
->mask
|= SCF_VAL3
|SCF_UADDR2
; break;
523 P_FUTEX_OP(WAKE
); arg
->mask
|= SCF_VAL3
|SCF_UADDR2
|SCF_TIMEOUT
; break;
524 P_FUTEX_OP(FD
); arg
->mask
|= SCF_VAL3
|SCF_UADDR2
|SCF_TIMEOUT
; break;
525 P_FUTEX_OP(REQUEUE
); arg
->mask
|= SCF_VAL3
|SCF_TIMEOUT
; break;
526 P_FUTEX_OP(CMP_REQUEUE
); arg
->mask
|= SCF_TIMEOUT
; break;
527 P_FUTEX_OP(CMP_REQUEUE_PI
); arg
->mask
|= SCF_TIMEOUT
; break;
528 P_FUTEX_OP(WAKE_OP
); break;
529 P_FUTEX_OP(LOCK_PI
); arg
->mask
|= SCF_VAL3
|SCF_UADDR2
|SCF_TIMEOUT
; break;
530 P_FUTEX_OP(UNLOCK_PI
); arg
->mask
|= SCF_VAL3
|SCF_UADDR2
|SCF_TIMEOUT
; break;
531 P_FUTEX_OP(TRYLOCK_PI
); arg
->mask
|= SCF_VAL3
|SCF_UADDR2
; break;
532 P_FUTEX_OP(WAIT_BITSET
); arg
->mask
|= SCF_UADDR2
; break;
533 P_FUTEX_OP(WAKE_BITSET
); arg
->mask
|= SCF_UADDR2
; break;
534 P_FUTEX_OP(WAIT_REQUEUE_PI
); break;
535 default: printed
= scnprintf(bf
, size
, "%#x", cmd
); break;
538 if (op
& FUTEX_PRIVATE_FLAG
)
539 printed
+= scnprintf(bf
+ printed
, size
- printed
, "|PRIV");
541 if (op
& FUTEX_CLOCK_REALTIME
)
542 printed
+= scnprintf(bf
+ printed
, size
- printed
, "|CLKRT");
547 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
549 static const char *epoll_ctl_ops
[] = { "ADD", "DEL", "MOD", };
550 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops
, 1);
552 static const char *itimers
[] = { "REAL", "VIRTUAL", "PROF", };
553 static DEFINE_STRARRAY(itimers
);
555 static const char *whences
[] = { "SET", "CUR", "END",
563 static DEFINE_STRARRAY(whences
);
565 static const char *fcntl_cmds
[] = {
566 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
567 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
568 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
571 static DEFINE_STRARRAY(fcntl_cmds
);
573 static const char *rlimit_resources
[] = {
574 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
575 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
578 static DEFINE_STRARRAY(rlimit_resources
);
580 static const char *sighow
[] = { "BLOCK", "UNBLOCK", "SETMASK", };
581 static DEFINE_STRARRAY(sighow
);
583 static const char *clockid
[] = {
584 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
585 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
587 static DEFINE_STRARRAY(clockid
);
589 static const char *socket_families
[] = {
590 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
591 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
592 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
593 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
594 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
595 "ALG", "NFC", "VSOCK",
597 static DEFINE_STRARRAY(socket_families
);
599 #ifndef SOCK_TYPE_MASK
600 #define SOCK_TYPE_MASK 0xf
603 static size_t syscall_arg__scnprintf_socket_type(char *bf
, size_t size
,
604 struct syscall_arg
*arg
)
608 flags
= type
& ~SOCK_TYPE_MASK
;
610 type
&= SOCK_TYPE_MASK
;
612 * Can't use a strarray, MIPS may override for ABI reasons.
615 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
620 P_SK_TYPE(SEQPACKET
);
625 printed
= scnprintf(bf
, size
, "%#x", type
);
628 #define P_SK_FLAG(n) \
629 if (flags & SOCK_##n) { \
630 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
631 flags &= ~SOCK_##n; \
639 printed
+= scnprintf(bf
+ printed
, size
- printed
, "|%#x", flags
);
644 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
647 #define MSG_PROBE 0x10
649 #ifndef MSG_WAITFORONE
650 #define MSG_WAITFORONE 0x10000
652 #ifndef MSG_SENDPAGE_NOTLAST
653 #define MSG_SENDPAGE_NOTLAST 0x20000
656 #define MSG_FASTOPEN 0x20000000
659 static size_t syscall_arg__scnprintf_msg_flags(char *bf
, size_t size
,
660 struct syscall_arg
*arg
)
662 int printed
= 0, flags
= arg
->val
;
665 return scnprintf(bf
, size
, "NONE");
666 #define P_MSG_FLAG(n) \
667 if (flags & MSG_##n) { \
668 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
674 P_MSG_FLAG(DONTROUTE
);
679 P_MSG_FLAG(DONTWAIT
);
686 P_MSG_FLAG(ERRQUEUE
);
687 P_MSG_FLAG(NOSIGNAL
);
689 P_MSG_FLAG(WAITFORONE
);
690 P_MSG_FLAG(SENDPAGE_NOTLAST
);
691 P_MSG_FLAG(FASTOPEN
);
692 P_MSG_FLAG(CMSG_CLOEXEC
);
696 printed
+= scnprintf(bf
+ printed
, size
- printed
, "%s%#x", printed
? "|" : "", flags
);
701 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
703 static size_t syscall_arg__scnprintf_access_mode(char *bf
, size_t size
,
704 struct syscall_arg
*arg
)
709 if (mode
== F_OK
) /* 0 */
710 return scnprintf(bf
, size
, "F");
712 if (mode & n##_OK) { \
713 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
723 printed
+= scnprintf(bf
+ printed
, size
- printed
, "|%#x", mode
);
728 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
730 static size_t syscall_arg__scnprintf_open_flags(char *bf
, size_t size
,
731 struct syscall_arg
*arg
)
733 int printed
= 0, flags
= arg
->val
;
735 if (!(flags
& O_CREAT
))
736 arg
->mask
|= 1 << (arg
->idx
+ 1); /* Mask the mode parm */
739 return scnprintf(bf
, size
, "RDONLY");
741 if (flags & O_##n) { \
742 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
766 if ((flags
& O_SYNC
) == O_SYNC
)
767 printed
+= scnprintf(bf
+ printed
, size
- printed
, "%s%s", printed
? "|" : "", "SYNC");
779 printed
+= scnprintf(bf
+ printed
, size
- printed
, "%s%#x", printed
? "|" : "", flags
);
784 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
786 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf
, size_t size
,
787 struct syscall_arg
*arg
)
789 int printed
= 0, flags
= arg
->val
;
792 return scnprintf(bf
, size
, "NONE");
794 if (flags & EFD_##n) { \
795 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
805 printed
+= scnprintf(bf
+ printed
, size
- printed
, "%s%#x", printed
? "|" : "", flags
);
810 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
812 static size_t syscall_arg__scnprintf_pipe_flags(char *bf
, size_t size
,
813 struct syscall_arg
*arg
)
815 int printed
= 0, flags
= arg
->val
;
818 if (flags & O_##n) { \
819 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
828 printed
+= scnprintf(bf
+ printed
, size
- printed
, "%s%#x", printed
? "|" : "", flags
);
833 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
835 static size_t syscall_arg__scnprintf_signum(char *bf
, size_t size
, struct syscall_arg
*arg
)
840 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
883 return scnprintf(bf
, size
, "%#x", sig
);
886 #define SCA_SIGNUM syscall_arg__scnprintf_signum
888 #if defined(__i386__) || defined(__x86_64__)
890 * FIXME: Make this available to all arches.
892 #define TCGETS 0x5401
894 static const char *tioctls
[] = {
895 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
896 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
897 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
898 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
899 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
900 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
901 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
902 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
903 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
904 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
905 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
906 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
907 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
908 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
909 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
912 static DEFINE_STRARRAY_OFFSET(tioctls
, 0x5401);
913 #endif /* defined(__i386__) || defined(__x86_64__) */
915 #define STRARRAY(arg, name, array) \
916 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
917 .arg_parm = { [arg] = &strarray__##array, }
919 static struct syscall_fmt
{
922 size_t (*arg_scnprintf
[6])(char *bf
, size_t size
, struct syscall_arg
*arg
);
928 { .name
= "access", .errmsg
= true,
929 .arg_scnprintf
= { [1] = SCA_ACCMODE
, /* mode */ }, },
930 { .name
= "arch_prctl", .errmsg
= true, .alias
= "prctl", },
931 { .name
= "brk", .hexret
= true,
932 .arg_scnprintf
= { [0] = SCA_HEX
, /* brk */ }, },
933 { .name
= "clock_gettime", .errmsg
= true, STRARRAY(0, clk_id
, clockid
), },
934 { .name
= "close", .errmsg
= true,
935 .arg_scnprintf
= { [0] = SCA_CLOSE_FD
, /* fd */ }, },
936 { .name
= "connect", .errmsg
= true, },
937 { .name
= "dup", .errmsg
= true,
938 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
939 { .name
= "dup2", .errmsg
= true,
940 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
941 { .name
= "dup3", .errmsg
= true,
942 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
943 { .name
= "epoll_ctl", .errmsg
= true, STRARRAY(1, op
, epoll_ctl_ops
), },
944 { .name
= "eventfd2", .errmsg
= true,
945 .arg_scnprintf
= { [1] = SCA_EFD_FLAGS
, /* flags */ }, },
946 { .name
= "faccessat", .errmsg
= true,
947 .arg_scnprintf
= { [0] = SCA_FDAT
, /* dfd */ }, },
948 { .name
= "fadvise64", .errmsg
= true,
949 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
950 { .name
= "fallocate", .errmsg
= true,
951 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
952 { .name
= "fchdir", .errmsg
= true,
953 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
954 { .name
= "fchmod", .errmsg
= true,
955 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
956 { .name
= "fchmodat", .errmsg
= true,
957 .arg_scnprintf
= { [0] = SCA_FDAT
, /* fd */ }, },
958 { .name
= "fchown", .errmsg
= true,
959 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
960 { .name
= "fchownat", .errmsg
= true,
961 .arg_scnprintf
= { [0] = SCA_FDAT
, /* fd */ }, },
962 { .name
= "fcntl", .errmsg
= true,
963 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */
964 [1] = SCA_STRARRAY
, /* cmd */ },
965 .arg_parm
= { [1] = &strarray__fcntl_cmds
, /* cmd */ }, },
966 { .name
= "fdatasync", .errmsg
= true,
967 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
968 { .name
= "flock", .errmsg
= true,
969 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */
970 [1] = SCA_FLOCK
, /* cmd */ }, },
971 { .name
= "fsetxattr", .errmsg
= true,
972 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
973 { .name
= "fstat", .errmsg
= true, .alias
= "newfstat",
974 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
975 { .name
= "fstatat", .errmsg
= true, .alias
= "newfstatat",
976 .arg_scnprintf
= { [0] = SCA_FDAT
, /* dfd */ }, },
977 { .name
= "fstatfs", .errmsg
= true,
978 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
979 { .name
= "fsync", .errmsg
= true,
980 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
981 { .name
= "ftruncate", .errmsg
= true,
982 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
983 { .name
= "futex", .errmsg
= true,
984 .arg_scnprintf
= { [1] = SCA_FUTEX_OP
, /* op */ }, },
985 { .name
= "futimesat", .errmsg
= true,
986 .arg_scnprintf
= { [0] = SCA_FDAT
, /* fd */ }, },
987 { .name
= "getdents", .errmsg
= true,
988 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
989 { .name
= "getdents64", .errmsg
= true,
990 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
991 { .name
= "getitimer", .errmsg
= true, STRARRAY(0, which
, itimers
), },
992 { .name
= "getrlimit", .errmsg
= true, STRARRAY(0, resource
, rlimit_resources
), },
993 { .name
= "ioctl", .errmsg
= true,
994 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */
995 #if defined(__i386__) || defined(__x86_64__)
997 * FIXME: Make this available to all arches.
999 [1] = SCA_STRHEXARRAY
, /* cmd */
1000 [2] = SCA_HEX
, /* arg */ },
1001 .arg_parm
= { [1] = &strarray__tioctls
, /* cmd */ }, },
1003 [2] = SCA_HEX
, /* arg */ }, },
1005 { .name
= "kill", .errmsg
= true,
1006 .arg_scnprintf
= { [1] = SCA_SIGNUM
, /* sig */ }, },
1007 { .name
= "linkat", .errmsg
= true,
1008 .arg_scnprintf
= { [0] = SCA_FDAT
, /* fd */ }, },
1009 { .name
= "lseek", .errmsg
= true,
1010 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */
1011 [2] = SCA_STRARRAY
, /* whence */ },
1012 .arg_parm
= { [2] = &strarray__whences
, /* whence */ }, },
1013 { .name
= "lstat", .errmsg
= true, .alias
= "newlstat", },
1014 { .name
= "madvise", .errmsg
= true,
1015 .arg_scnprintf
= { [0] = SCA_HEX
, /* start */
1016 [2] = SCA_MADV_BHV
, /* behavior */ }, },
1017 { .name
= "mkdirat", .errmsg
= true,
1018 .arg_scnprintf
= { [0] = SCA_FDAT
, /* fd */ }, },
1019 { .name
= "mknodat", .errmsg
= true,
1020 .arg_scnprintf
= { [0] = SCA_FDAT
, /* fd */ }, },
1021 { .name
= "mlock", .errmsg
= true,
1022 .arg_scnprintf
= { [0] = SCA_HEX
, /* addr */ }, },
1023 { .name
= "mlockall", .errmsg
= true,
1024 .arg_scnprintf
= { [0] = SCA_HEX
, /* addr */ }, },
1025 { .name
= "mmap", .hexret
= true,
1026 .arg_scnprintf
= { [0] = SCA_HEX
, /* addr */
1027 [2] = SCA_MMAP_PROT
, /* prot */
1028 [3] = SCA_MMAP_FLAGS
, /* flags */
1029 [4] = SCA_FD
, /* fd */ }, },
1030 { .name
= "mprotect", .errmsg
= true,
1031 .arg_scnprintf
= { [0] = SCA_HEX
, /* start */
1032 [2] = SCA_MMAP_PROT
, /* prot */ }, },
1033 { .name
= "mremap", .hexret
= true,
1034 .arg_scnprintf
= { [0] = SCA_HEX
, /* addr */
1035 [3] = SCA_MREMAP_FLAGS
, /* flags */
1036 [4] = SCA_HEX
, /* new_addr */ }, },
1037 { .name
= "munlock", .errmsg
= true,
1038 .arg_scnprintf
= { [0] = SCA_HEX
, /* addr */ }, },
1039 { .name
= "munmap", .errmsg
= true,
1040 .arg_scnprintf
= { [0] = SCA_HEX
, /* addr */ }, },
1041 { .name
= "name_to_handle_at", .errmsg
= true,
1042 .arg_scnprintf
= { [0] = SCA_FDAT
, /* dfd */ }, },
1043 { .name
= "newfstatat", .errmsg
= true,
1044 .arg_scnprintf
= { [0] = SCA_FDAT
, /* dfd */ }, },
1045 { .name
= "open", .errmsg
= true,
1046 .arg_scnprintf
= { [1] = SCA_OPEN_FLAGS
, /* flags */ }, },
1047 { .name
= "open_by_handle_at", .errmsg
= true,
1048 .arg_scnprintf
= { [0] = SCA_FDAT
, /* dfd */
1049 [2] = SCA_OPEN_FLAGS
, /* flags */ }, },
1050 { .name
= "openat", .errmsg
= true,
1051 .arg_scnprintf
= { [0] = SCA_FDAT
, /* dfd */
1052 [2] = SCA_OPEN_FLAGS
, /* flags */ }, },
1053 { .name
= "pipe2", .errmsg
= true,
1054 .arg_scnprintf
= { [1] = SCA_PIPE_FLAGS
, /* flags */ }, },
1055 { .name
= "poll", .errmsg
= true, .timeout
= true, },
1056 { .name
= "ppoll", .errmsg
= true, .timeout
= true, },
1057 { .name
= "pread", .errmsg
= true, .alias
= "pread64",
1058 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1059 { .name
= "preadv", .errmsg
= true, .alias
= "pread",
1060 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1061 { .name
= "prlimit64", .errmsg
= true, STRARRAY(1, resource
, rlimit_resources
), },
1062 { .name
= "pwrite", .errmsg
= true, .alias
= "pwrite64",
1063 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1064 { .name
= "pwritev", .errmsg
= true,
1065 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1066 { .name
= "read", .errmsg
= true,
1067 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1068 { .name
= "readlinkat", .errmsg
= true,
1069 .arg_scnprintf
= { [0] = SCA_FDAT
, /* dfd */ }, },
1070 { .name
= "readv", .errmsg
= true,
1071 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1072 { .name
= "recvfrom", .errmsg
= true,
1073 .arg_scnprintf
= { [3] = SCA_MSG_FLAGS
, /* flags */ }, },
1074 { .name
= "recvmmsg", .errmsg
= true,
1075 .arg_scnprintf
= { [3] = SCA_MSG_FLAGS
, /* flags */ }, },
1076 { .name
= "recvmsg", .errmsg
= true,
1077 .arg_scnprintf
= { [2] = SCA_MSG_FLAGS
, /* flags */ }, },
1078 { .name
= "renameat", .errmsg
= true,
1079 .arg_scnprintf
= { [0] = SCA_FDAT
, /* dfd */ }, },
1080 { .name
= "rt_sigaction", .errmsg
= true,
1081 .arg_scnprintf
= { [0] = SCA_SIGNUM
, /* sig */ }, },
1082 { .name
= "rt_sigprocmask", .errmsg
= true, STRARRAY(0, how
, sighow
), },
1083 { .name
= "rt_sigqueueinfo", .errmsg
= true,
1084 .arg_scnprintf
= { [1] = SCA_SIGNUM
, /* sig */ }, },
1085 { .name
= "rt_tgsigqueueinfo", .errmsg
= true,
1086 .arg_scnprintf
= { [2] = SCA_SIGNUM
, /* sig */ }, },
1087 { .name
= "select", .errmsg
= true, .timeout
= true, },
1088 { .name
= "sendmmsg", .errmsg
= true,
1089 .arg_scnprintf
= { [3] = SCA_MSG_FLAGS
, /* flags */ }, },
1090 { .name
= "sendmsg", .errmsg
= true,
1091 .arg_scnprintf
= { [2] = SCA_MSG_FLAGS
, /* flags */ }, },
1092 { .name
= "sendto", .errmsg
= true,
1093 .arg_scnprintf
= { [3] = SCA_MSG_FLAGS
, /* flags */ }, },
1094 { .name
= "setitimer", .errmsg
= true, STRARRAY(0, which
, itimers
), },
1095 { .name
= "setrlimit", .errmsg
= true, STRARRAY(0, resource
, rlimit_resources
), },
1096 { .name
= "shutdown", .errmsg
= true,
1097 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1098 { .name
= "socket", .errmsg
= true,
1099 .arg_scnprintf
= { [0] = SCA_STRARRAY
, /* family */
1100 [1] = SCA_SK_TYPE
, /* type */ },
1101 .arg_parm
= { [0] = &strarray__socket_families
, /* family */ }, },
1102 { .name
= "socketpair", .errmsg
= true,
1103 .arg_scnprintf
= { [0] = SCA_STRARRAY
, /* family */
1104 [1] = SCA_SK_TYPE
, /* type */ },
1105 .arg_parm
= { [0] = &strarray__socket_families
, /* family */ }, },
1106 { .name
= "stat", .errmsg
= true, .alias
= "newstat", },
1107 { .name
= "symlinkat", .errmsg
= true,
1108 .arg_scnprintf
= { [0] = SCA_FDAT
, /* dfd */ }, },
1109 { .name
= "tgkill", .errmsg
= true,
1110 .arg_scnprintf
= { [2] = SCA_SIGNUM
, /* sig */ }, },
1111 { .name
= "tkill", .errmsg
= true,
1112 .arg_scnprintf
= { [1] = SCA_SIGNUM
, /* sig */ }, },
1113 { .name
= "uname", .errmsg
= true, .alias
= "newuname", },
1114 { .name
= "unlinkat", .errmsg
= true,
1115 .arg_scnprintf
= { [0] = SCA_FDAT
, /* dfd */ }, },
1116 { .name
= "utimensat", .errmsg
= true,
1117 .arg_scnprintf
= { [0] = SCA_FDAT
, /* dirfd */ }, },
1118 { .name
= "write", .errmsg
= true,
1119 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1120 { .name
= "writev", .errmsg
= true,
1121 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1124 static int syscall_fmt__cmp(const void *name
, const void *fmtp
)
1126 const struct syscall_fmt
*fmt
= fmtp
;
1127 return strcmp(name
, fmt
->name
);
1130 static struct syscall_fmt
*syscall_fmt__find(const char *name
)
1132 const int nmemb
= ARRAY_SIZE(syscall_fmts
);
1133 return bsearch(name
, syscall_fmts
, nmemb
, sizeof(struct syscall_fmt
), syscall_fmt__cmp
);
1137 struct event_format
*tp_format
;
1139 struct format_field
*args
;
1143 struct syscall_fmt
*fmt
;
1144 size_t (**arg_scnprintf
)(char *bf
, size_t size
, struct syscall_arg
*arg
);
1148 static size_t fprintf_duration(unsigned long t
, FILE *fp
)
1150 double duration
= (double)t
/ NSEC_PER_MSEC
;
1151 size_t printed
= fprintf(fp
, "(");
1153 if (duration
>= 1.0)
1154 printed
+= color_fprintf(fp
, PERF_COLOR_RED
, "%6.3f ms", duration
);
1155 else if (duration
>= 0.01)
1156 printed
+= color_fprintf(fp
, PERF_COLOR_YELLOW
, "%6.3f ms", duration
);
1158 printed
+= color_fprintf(fp
, PERF_COLOR_NORMAL
, "%6.3f ms", duration
);
1159 return printed
+ fprintf(fp
, "): ");
1162 struct thread_trace
{
1166 unsigned long nr_events
;
1167 unsigned long pfmaj
, pfmin
;
1175 struct intlist
*syscall_stats
;
1178 static struct thread_trace
*thread_trace__new(void)
1180 struct thread_trace
*ttrace
= zalloc(sizeof(struct thread_trace
));
1183 ttrace
->paths
.max
= -1;
1185 ttrace
->syscall_stats
= intlist__new(NULL
);
1190 static struct thread_trace
*thread__trace(struct thread
*thread
, FILE *fp
)
1192 struct thread_trace
*ttrace
;
1197 if (thread__priv(thread
) == NULL
)
1198 thread__set_priv(thread
, thread_trace__new());
1200 if (thread__priv(thread
) == NULL
)
1203 ttrace
= thread__priv(thread
);
1204 ++ttrace
->nr_events
;
1208 color_fprintf(fp
, PERF_COLOR_RED
,
1209 "WARNING: not enough memory, dropping samples!\n");
1213 #define TRACE_PFMAJ (1 << 0)
1214 #define TRACE_PFMIN (1 << 1)
1217 struct perf_tool tool
;
1224 struct syscall
*table
;
1226 struct record_opts opts
;
1227 struct perf_evlist
*evlist
;
1228 struct machine
*host
;
1229 struct thread
*current
;
1232 unsigned long nr_events
;
1233 struct strlist
*ev_qualifier
;
1234 const char *last_vfs_getname
;
1235 struct intlist
*tid_list
;
1236 struct intlist
*pid_list
;
1241 double duration_filter
;
1247 bool not_ev_qualifier
;
1251 bool multiple_threads
;
1255 bool show_tool_stats
;
1256 bool trace_syscalls
;
1261 static int trace__set_fd_pathname(struct thread
*thread
, int fd
, const char *pathname
)
1263 struct thread_trace
*ttrace
= thread__priv(thread
);
1265 if (fd
> ttrace
->paths
.max
) {
1266 char **npath
= realloc(ttrace
->paths
.table
, (fd
+ 1) * sizeof(char *));
1271 if (ttrace
->paths
.max
!= -1) {
1272 memset(npath
+ ttrace
->paths
.max
+ 1, 0,
1273 (fd
- ttrace
->paths
.max
) * sizeof(char *));
1275 memset(npath
, 0, (fd
+ 1) * sizeof(char *));
1278 ttrace
->paths
.table
= npath
;
1279 ttrace
->paths
.max
= fd
;
1282 ttrace
->paths
.table
[fd
] = strdup(pathname
);
1284 return ttrace
->paths
.table
[fd
] != NULL
? 0 : -1;
1287 static int thread__read_fd_path(struct thread
*thread
, int fd
)
1289 char linkname
[PATH_MAX
], pathname
[PATH_MAX
];
1293 if (thread
->pid_
== thread
->tid
) {
1294 scnprintf(linkname
, sizeof(linkname
),
1295 "/proc/%d/fd/%d", thread
->pid_
, fd
);
1297 scnprintf(linkname
, sizeof(linkname
),
1298 "/proc/%d/task/%d/fd/%d", thread
->pid_
, thread
->tid
, fd
);
1301 if (lstat(linkname
, &st
) < 0 || st
.st_size
+ 1 > (off_t
)sizeof(pathname
))
1304 ret
= readlink(linkname
, pathname
, sizeof(pathname
));
1306 if (ret
< 0 || ret
> st
.st_size
)
1309 pathname
[ret
] = '\0';
1310 return trace__set_fd_pathname(thread
, fd
, pathname
);
1313 static const char *thread__fd_path(struct thread
*thread
, int fd
,
1314 struct trace
*trace
)
1316 struct thread_trace
*ttrace
= thread__priv(thread
);
1324 if ((fd
> ttrace
->paths
.max
|| ttrace
->paths
.table
[fd
] == NULL
)) {
1327 ++trace
->stats
.proc_getname
;
1328 if (thread__read_fd_path(thread
, fd
))
1332 return ttrace
->paths
.table
[fd
];
1335 static size_t syscall_arg__scnprintf_fd(char *bf
, size_t size
,
1336 struct syscall_arg
*arg
)
1339 size_t printed
= scnprintf(bf
, size
, "%d", fd
);
1340 const char *path
= thread__fd_path(arg
->thread
, fd
, arg
->trace
);
1343 printed
+= scnprintf(bf
+ printed
, size
- printed
, "<%s>", path
);
1348 static size_t syscall_arg__scnprintf_close_fd(char *bf
, size_t size
,
1349 struct syscall_arg
*arg
)
1352 size_t printed
= syscall_arg__scnprintf_fd(bf
, size
, arg
);
1353 struct thread_trace
*ttrace
= thread__priv(arg
->thread
);
1355 if (ttrace
&& fd
>= 0 && fd
<= ttrace
->paths
.max
)
1356 zfree(&ttrace
->paths
.table
[fd
]);
1361 static bool trace__filter_duration(struct trace
*trace
, double t
)
1363 return t
< (trace
->duration_filter
* NSEC_PER_MSEC
);
1366 static size_t trace__fprintf_tstamp(struct trace
*trace
, u64 tstamp
, FILE *fp
)
1368 double ts
= (double)(tstamp
- trace
->base_time
) / NSEC_PER_MSEC
;
1370 return fprintf(fp
, "%10.3f ", ts
);
1373 static bool done
= false;
1374 static bool interrupted
= false;
1376 static void sig_handler(int sig
)
1379 interrupted
= sig
== SIGINT
;
1382 static size_t trace__fprintf_entry_head(struct trace
*trace
, struct thread
*thread
,
1383 u64 duration
, u64 tstamp
, FILE *fp
)
1385 size_t printed
= trace__fprintf_tstamp(trace
, tstamp
, fp
);
1386 printed
+= fprintf_duration(duration
, fp
);
1388 if (trace
->multiple_threads
) {
1389 if (trace
->show_comm
)
1390 printed
+= fprintf(fp
, "%.14s/", thread__comm_str(thread
));
1391 printed
+= fprintf(fp
, "%d ", thread
->tid
);
1397 static int trace__process_event(struct trace
*trace
, struct machine
*machine
,
1398 union perf_event
*event
, struct perf_sample
*sample
)
1402 switch (event
->header
.type
) {
1403 case PERF_RECORD_LOST
:
1404 color_fprintf(trace
->output
, PERF_COLOR_RED
,
1405 "LOST %" PRIu64
" events!\n", event
->lost
.lost
);
1406 ret
= machine__process_lost_event(machine
, event
, sample
);
1408 ret
= machine__process_event(machine
, event
, sample
);
1415 static int trace__tool_process(struct perf_tool
*tool
,
1416 union perf_event
*event
,
1417 struct perf_sample
*sample
,
1418 struct machine
*machine
)
1420 struct trace
*trace
= container_of(tool
, struct trace
, tool
);
1421 return trace__process_event(trace
, machine
, event
, sample
);
1424 static int trace__symbols_init(struct trace
*trace
, struct perf_evlist
*evlist
)
1426 int err
= symbol__init(NULL
);
1431 trace
->host
= machine__new_host();
1432 if (trace
->host
== NULL
)
1435 err
= __machine__synthesize_threads(trace
->host
, &trace
->tool
, &trace
->opts
.target
,
1436 evlist
->threads
, trace__tool_process
, false);
1443 static int syscall__set_arg_fmts(struct syscall
*sc
)
1445 struct format_field
*field
;
1448 sc
->arg_scnprintf
= calloc(sc
->nr_args
, sizeof(void *));
1449 if (sc
->arg_scnprintf
== NULL
)
1453 sc
->arg_parm
= sc
->fmt
->arg_parm
;
1455 for (field
= sc
->args
; field
; field
= field
->next
) {
1456 if (sc
->fmt
&& sc
->fmt
->arg_scnprintf
[idx
])
1457 sc
->arg_scnprintf
[idx
] = sc
->fmt
->arg_scnprintf
[idx
];
1458 else if (field
->flags
& FIELD_IS_POINTER
)
1459 sc
->arg_scnprintf
[idx
] = syscall_arg__scnprintf_hex
;
1466 static int trace__read_syscall_info(struct trace
*trace
, int id
)
1470 const char *name
= audit_syscall_to_name(id
, trace
->audit
.machine
);
1475 if (id
> trace
->syscalls
.max
) {
1476 struct syscall
*nsyscalls
= realloc(trace
->syscalls
.table
, (id
+ 1) * sizeof(*sc
));
1478 if (nsyscalls
== NULL
)
1481 if (trace
->syscalls
.max
!= -1) {
1482 memset(nsyscalls
+ trace
->syscalls
.max
+ 1, 0,
1483 (id
- trace
->syscalls
.max
) * sizeof(*sc
));
1485 memset(nsyscalls
, 0, (id
+ 1) * sizeof(*sc
));
1488 trace
->syscalls
.table
= nsyscalls
;
1489 trace
->syscalls
.max
= id
;
1492 sc
= trace
->syscalls
.table
+ id
;
1495 if (trace
->ev_qualifier
) {
1496 bool in
= strlist__find(trace
->ev_qualifier
, name
) != NULL
;
1498 if (!(in
^ trace
->not_ev_qualifier
)) {
1499 sc
->filtered
= true;
1501 * No need to do read tracepoint information since this will be
1508 sc
->fmt
= syscall_fmt__find(sc
->name
);
1510 snprintf(tp_name
, sizeof(tp_name
), "sys_enter_%s", sc
->name
);
1511 sc
->tp_format
= trace_event__tp_format("syscalls", tp_name
);
1513 if (sc
->tp_format
== NULL
&& sc
->fmt
&& sc
->fmt
->alias
) {
1514 snprintf(tp_name
, sizeof(tp_name
), "sys_enter_%s", sc
->fmt
->alias
);
1515 sc
->tp_format
= trace_event__tp_format("syscalls", tp_name
);
1518 if (sc
->tp_format
== NULL
)
1521 sc
->args
= sc
->tp_format
->format
.fields
;
1522 sc
->nr_args
= sc
->tp_format
->format
.nr_fields
;
1523 /* drop nr field - not relevant here; does not exist on older kernels */
1524 if (sc
->args
&& strcmp(sc
->args
->name
, "nr") == 0) {
1525 sc
->args
= sc
->args
->next
;
1529 sc
->is_exit
= !strcmp(name
, "exit_group") || !strcmp(name
, "exit");
1531 return syscall__set_arg_fmts(sc
);
1535 * args is to be interpreted as a series of longs but we need to handle
1536 * 8-byte unaligned accesses. args points to raw_data within the event
1537 * and raw_data is guaranteed to be 8-byte unaligned because it is
1538 * preceded by raw_size which is a u32. So we need to copy args to a temp
1539 * variable to read it. Most notably this avoids extended load instructions
1540 * on unaligned addresses
1543 static size_t syscall__scnprintf_args(struct syscall
*sc
, char *bf
, size_t size
,
1544 unsigned char *args
, struct trace
*trace
,
1545 struct thread
*thread
)
1551 if (sc
->args
!= NULL
) {
1552 struct format_field
*field
;
1554 struct syscall_arg arg
= {
1561 for (field
= sc
->args
; field
;
1562 field
= field
->next
, ++arg
.idx
, bit
<<= 1) {
1566 /* special care for unaligned accesses */
1567 p
= args
+ sizeof(unsigned long) * arg
.idx
;
1568 memcpy(&val
, p
, sizeof(val
));
1571 * Suppress this argument if its value is zero and
1572 * and we don't have a string associated in an
1576 !(sc
->arg_scnprintf
&&
1577 sc
->arg_scnprintf
[arg
.idx
] == SCA_STRARRAY
&&
1578 sc
->arg_parm
[arg
.idx
]))
1581 printed
+= scnprintf(bf
+ printed
, size
- printed
,
1582 "%s%s: ", printed
? ", " : "", field
->name
);
1583 if (sc
->arg_scnprintf
&& sc
->arg_scnprintf
[arg
.idx
]) {
1586 arg
.parm
= sc
->arg_parm
[arg
.idx
];
1587 printed
+= sc
->arg_scnprintf
[arg
.idx
](bf
+ printed
,
1588 size
- printed
, &arg
);
1590 printed
+= scnprintf(bf
+ printed
, size
- printed
,
1598 /* special care for unaligned accesses */
1599 p
= args
+ sizeof(unsigned long) * i
;
1600 memcpy(&val
, p
, sizeof(val
));
1601 printed
+= scnprintf(bf
+ printed
, size
- printed
,
1603 printed
? ", " : "", i
, val
);
1611 typedef int (*tracepoint_handler
)(struct trace
*trace
, struct perf_evsel
*evsel
,
1612 union perf_event
*event
,
1613 struct perf_sample
*sample
);
1615 static struct syscall
*trace__syscall_info(struct trace
*trace
,
1616 struct perf_evsel
*evsel
, int id
)
1622 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1623 * before that, leaving at a higher verbosity level till that is
1624 * explained. Reproduced with plain ftrace with:
1626 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1627 * grep "NR -1 " /t/trace_pipe
1629 * After generating some load on the machine.
1633 fprintf(trace
->output
, "Invalid syscall %d id, skipping (%s, %" PRIu64
") ...\n",
1634 id
, perf_evsel__name(evsel
), ++n
);
1639 if ((id
> trace
->syscalls
.max
|| trace
->syscalls
.table
[id
].name
== NULL
) &&
1640 trace__read_syscall_info(trace
, id
))
1643 if ((id
> trace
->syscalls
.max
|| trace
->syscalls
.table
[id
].name
== NULL
))
1646 return &trace
->syscalls
.table
[id
];
1650 fprintf(trace
->output
, "Problems reading syscall %d", id
);
1651 if (id
<= trace
->syscalls
.max
&& trace
->syscalls
.table
[id
].name
!= NULL
)
1652 fprintf(trace
->output
, "(%s)", trace
->syscalls
.table
[id
].name
);
1653 fputs(" information\n", trace
->output
);
1658 static void thread__update_stats(struct thread_trace
*ttrace
,
1659 int id
, struct perf_sample
*sample
)
1661 struct int_node
*inode
;
1662 struct stats
*stats
;
1665 inode
= intlist__findnew(ttrace
->syscall_stats
, id
);
1669 stats
= inode
->priv
;
1670 if (stats
== NULL
) {
1671 stats
= malloc(sizeof(struct stats
));
1675 inode
->priv
= stats
;
1678 if (ttrace
->entry_time
&& sample
->time
> ttrace
->entry_time
)
1679 duration
= sample
->time
- ttrace
->entry_time
;
1681 update_stats(stats
, duration
);
1684 static int trace__printf_interrupted_entry(struct trace
*trace
, struct perf_sample
*sample
)
1686 struct thread_trace
*ttrace
;
1690 if (trace
->current
== NULL
)
1693 ttrace
= thread__priv(trace
->current
);
1695 if (!ttrace
->entry_pending
)
1698 duration
= sample
->time
- ttrace
->entry_time
;
1700 printed
= trace__fprintf_entry_head(trace
, trace
->current
, duration
, sample
->time
, trace
->output
);
1701 printed
+= fprintf(trace
->output
, "%-70s) ...\n", ttrace
->entry_str
);
1702 ttrace
->entry_pending
= false;
1707 static int trace__sys_enter(struct trace
*trace
, struct perf_evsel
*evsel
,
1708 union perf_event
*event __maybe_unused
,
1709 struct perf_sample
*sample
)
1714 struct thread
*thread
;
1715 int id
= perf_evsel__sc_tp_uint(evsel
, id
, sample
);
1716 struct syscall
*sc
= trace__syscall_info(trace
, evsel
, id
);
1717 struct thread_trace
*ttrace
;
1725 thread
= machine__findnew_thread(trace
->host
, sample
->pid
, sample
->tid
);
1726 ttrace
= thread__trace(thread
, trace
->output
);
1730 args
= perf_evsel__sc_tp_ptr(evsel
, args
, sample
);
1732 if (ttrace
->entry_str
== NULL
) {
1733 ttrace
->entry_str
= malloc(1024);
1734 if (!ttrace
->entry_str
)
1738 if (!trace
->summary_only
)
1739 trace__printf_interrupted_entry(trace
, sample
);
1741 ttrace
->entry_time
= sample
->time
;
1742 msg
= ttrace
->entry_str
;
1743 printed
+= scnprintf(msg
+ printed
, 1024 - printed
, "%s(", sc
->name
);
1745 printed
+= syscall__scnprintf_args(sc
, msg
+ printed
, 1024 - printed
,
1746 args
, trace
, thread
);
1749 if (!trace
->duration_filter
&& !trace
->summary_only
) {
1750 trace__fprintf_entry_head(trace
, thread
, 1, sample
->time
, trace
->output
);
1751 fprintf(trace
->output
, "%-70s\n", ttrace
->entry_str
);
1754 ttrace
->entry_pending
= true;
1756 if (trace
->current
!= thread
) {
1757 thread__put(trace
->current
);
1758 trace
->current
= thread__get(thread
);
1764 static int trace__sys_exit(struct trace
*trace
, struct perf_evsel
*evsel
,
1765 union perf_event
*event __maybe_unused
,
1766 struct perf_sample
*sample
)
1770 struct thread
*thread
;
1771 int id
= perf_evsel__sc_tp_uint(evsel
, id
, sample
);
1772 struct syscall
*sc
= trace__syscall_info(trace
, evsel
, id
);
1773 struct thread_trace
*ttrace
;
1781 thread
= machine__findnew_thread(trace
->host
, sample
->pid
, sample
->tid
);
1782 ttrace
= thread__trace(thread
, trace
->output
);
1787 thread__update_stats(ttrace
, id
, sample
);
1789 ret
= perf_evsel__sc_tp_uint(evsel
, ret
, sample
);
1791 if (id
== trace
->audit
.open_id
&& ret
>= 0 && trace
->last_vfs_getname
) {
1792 trace__set_fd_pathname(thread
, ret
, trace
->last_vfs_getname
);
1793 trace
->last_vfs_getname
= NULL
;
1794 ++trace
->stats
.vfs_getname
;
1797 ttrace
->exit_time
= sample
->time
;
1799 if (ttrace
->entry_time
) {
1800 duration
= sample
->time
- ttrace
->entry_time
;
1801 if (trace__filter_duration(trace
, duration
))
1803 } else if (trace
->duration_filter
)
1806 if (trace
->summary_only
)
1809 trace__fprintf_entry_head(trace
, thread
, duration
, sample
->time
, trace
->output
);
1811 if (ttrace
->entry_pending
) {
1812 fprintf(trace
->output
, "%-70s", ttrace
->entry_str
);
1814 fprintf(trace
->output
, " ... [");
1815 color_fprintf(trace
->output
, PERF_COLOR_YELLOW
, "continued");
1816 fprintf(trace
->output
, "]: %s()", sc
->name
);
1819 if (sc
->fmt
== NULL
) {
1821 fprintf(trace
->output
, ") = %ld", ret
);
1822 } else if (ret
< 0 && sc
->fmt
->errmsg
) {
1823 char bf
[STRERR_BUFSIZE
];
1824 const char *emsg
= strerror_r(-ret
, bf
, sizeof(bf
)),
1825 *e
= audit_errno_to_name(-ret
);
1827 fprintf(trace
->output
, ") = -1 %s %s", e
, emsg
);
1828 } else if (ret
== 0 && sc
->fmt
->timeout
)
1829 fprintf(trace
->output
, ") = 0 Timeout");
1830 else if (sc
->fmt
->hexret
)
1831 fprintf(trace
->output
, ") = %#lx", ret
);
1835 fputc('\n', trace
->output
);
1837 ttrace
->entry_pending
= false;
1842 static int trace__vfs_getname(struct trace
*trace
, struct perf_evsel
*evsel
,
1843 union perf_event
*event __maybe_unused
,
1844 struct perf_sample
*sample
)
1846 trace
->last_vfs_getname
= perf_evsel__rawptr(evsel
, sample
, "pathname");
1850 static int trace__sched_stat_runtime(struct trace
*trace
, struct perf_evsel
*evsel
,
1851 union perf_event
*event __maybe_unused
,
1852 struct perf_sample
*sample
)
1854 u64 runtime
= perf_evsel__intval(evsel
, sample
, "runtime");
1855 double runtime_ms
= (double)runtime
/ NSEC_PER_MSEC
;
1856 struct thread
*thread
= machine__findnew_thread(trace
->host
,
1859 struct thread_trace
*ttrace
= thread__trace(thread
, trace
->output
);
1864 ttrace
->runtime_ms
+= runtime_ms
;
1865 trace
->runtime_ms
+= runtime_ms
;
1869 fprintf(trace
->output
, "%s: comm=%s,pid=%u,runtime=%" PRIu64
",vruntime=%" PRIu64
")\n",
1871 perf_evsel__strval(evsel
, sample
, "comm"),
1872 (pid_t
)perf_evsel__intval(evsel
, sample
, "pid"),
1874 perf_evsel__intval(evsel
, sample
, "vruntime"));
1878 static int trace__event_handler(struct trace
*trace
, struct perf_evsel
*evsel
,
1879 union perf_event
*event __maybe_unused
,
1880 struct perf_sample
*sample
)
1882 trace__printf_interrupted_entry(trace
, sample
);
1883 trace__fprintf_tstamp(trace
, sample
->time
, trace
->output
);
1885 if (trace
->trace_syscalls
)
1886 fprintf(trace
->output
, "( ): ");
1888 fprintf(trace
->output
, "%s:", evsel
->name
);
1890 if (evsel
->tp_format
) {
1891 event_format__fprintf(evsel
->tp_format
, sample
->cpu
,
1892 sample
->raw_data
, sample
->raw_size
,
1896 fprintf(trace
->output
, ")\n");
1900 static void print_location(FILE *f
, struct perf_sample
*sample
,
1901 struct addr_location
*al
,
1902 bool print_dso
, bool print_sym
)
1905 if ((verbose
|| print_dso
) && al
->map
)
1906 fprintf(f
, "%s@", al
->map
->dso
->long_name
);
1908 if ((verbose
|| print_sym
) && al
->sym
)
1909 fprintf(f
, "%s+0x%" PRIx64
, al
->sym
->name
,
1910 al
->addr
- al
->sym
->start
);
1912 fprintf(f
, "0x%" PRIx64
, al
->addr
);
1914 fprintf(f
, "0x%" PRIx64
, sample
->addr
);
1917 static int trace__pgfault(struct trace
*trace
,
1918 struct perf_evsel
*evsel
,
1919 union perf_event
*event
,
1920 struct perf_sample
*sample
)
1922 struct thread
*thread
;
1923 u8 cpumode
= event
->header
.misc
& PERF_RECORD_MISC_CPUMODE_MASK
;
1924 struct addr_location al
;
1925 char map_type
= 'd';
1926 struct thread_trace
*ttrace
;
1928 thread
= machine__findnew_thread(trace
->host
, sample
->pid
, sample
->tid
);
1929 ttrace
= thread__trace(thread
, trace
->output
);
1933 if (evsel
->attr
.config
== PERF_COUNT_SW_PAGE_FAULTS_MAJ
)
1938 if (trace
->summary_only
)
1941 thread__find_addr_location(thread
, cpumode
, MAP__FUNCTION
,
1944 trace__fprintf_entry_head(trace
, thread
, 0, sample
->time
, trace
->output
);
1946 fprintf(trace
->output
, "%sfault [",
1947 evsel
->attr
.config
== PERF_COUNT_SW_PAGE_FAULTS_MAJ
?
1950 print_location(trace
->output
, sample
, &al
, false, true);
1952 fprintf(trace
->output
, "] => ");
1954 thread__find_addr_location(thread
, cpumode
, MAP__VARIABLE
,
1958 thread__find_addr_location(thread
, cpumode
,
1959 MAP__FUNCTION
, sample
->addr
, &al
);
1967 print_location(trace
->output
, sample
, &al
, true, false);
1969 fprintf(trace
->output
, " (%c%c)\n", map_type
, al
.level
);
1974 static bool skip_sample(struct trace
*trace
, struct perf_sample
*sample
)
1976 if ((trace
->pid_list
&& intlist__find(trace
->pid_list
, sample
->pid
)) ||
1977 (trace
->tid_list
&& intlist__find(trace
->tid_list
, sample
->tid
)))
1980 if (trace
->pid_list
|| trace
->tid_list
)
1986 static int trace__process_sample(struct perf_tool
*tool
,
1987 union perf_event
*event
,
1988 struct perf_sample
*sample
,
1989 struct perf_evsel
*evsel
,
1990 struct machine
*machine __maybe_unused
)
1992 struct trace
*trace
= container_of(tool
, struct trace
, tool
);
1995 tracepoint_handler handler
= evsel
->handler
;
1997 if (skip_sample(trace
, sample
))
2000 if (!trace
->full_time
&& trace
->base_time
== 0)
2001 trace
->base_time
= sample
->time
;
2005 handler(trace
, evsel
, event
, sample
);
2011 static int parse_target_str(struct trace
*trace
)
2013 if (trace
->opts
.target
.pid
) {
2014 trace
->pid_list
= intlist__new(trace
->opts
.target
.pid
);
2015 if (trace
->pid_list
== NULL
) {
2016 pr_err("Error parsing process id string\n");
2021 if (trace
->opts
.target
.tid
) {
2022 trace
->tid_list
= intlist__new(trace
->opts
.target
.tid
);
2023 if (trace
->tid_list
== NULL
) {
2024 pr_err("Error parsing thread id string\n");
2032 static int trace__record(struct trace
*trace
, int argc
, const char **argv
)
2034 unsigned int rec_argc
, i
, j
;
2035 const char **rec_argv
;
2036 const char * const record_args
[] = {
2043 const char * const sc_args
[] = { "-e", };
2044 unsigned int sc_args_nr
= ARRAY_SIZE(sc_args
);
2045 const char * const majpf_args
[] = { "-e", "major-faults" };
2046 unsigned int majpf_args_nr
= ARRAY_SIZE(majpf_args
);
2047 const char * const minpf_args
[] = { "-e", "minor-faults" };
2048 unsigned int minpf_args_nr
= ARRAY_SIZE(minpf_args
);
2050 /* +1 is for the event string below */
2051 rec_argc
= ARRAY_SIZE(record_args
) + sc_args_nr
+ 1 +
2052 majpf_args_nr
+ minpf_args_nr
+ argc
;
2053 rec_argv
= calloc(rec_argc
+ 1, sizeof(char *));
2055 if (rec_argv
== NULL
)
2059 for (i
= 0; i
< ARRAY_SIZE(record_args
); i
++)
2060 rec_argv
[j
++] = record_args
[i
];
2062 if (trace
->trace_syscalls
) {
2063 for (i
= 0; i
< sc_args_nr
; i
++)
2064 rec_argv
[j
++] = sc_args
[i
];
2066 /* event string may be different for older kernels - e.g., RHEL6 */
2067 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2068 rec_argv
[j
++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2069 else if (is_valid_tracepoint("syscalls:sys_enter"))
2070 rec_argv
[j
++] = "syscalls:sys_enter,syscalls:sys_exit";
2072 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2077 if (trace
->trace_pgfaults
& TRACE_PFMAJ
)
2078 for (i
= 0; i
< majpf_args_nr
; i
++)
2079 rec_argv
[j
++] = majpf_args
[i
];
2081 if (trace
->trace_pgfaults
& TRACE_PFMIN
)
2082 for (i
= 0; i
< minpf_args_nr
; i
++)
2083 rec_argv
[j
++] = minpf_args
[i
];
2085 for (i
= 0; i
< (unsigned int)argc
; i
++)
2086 rec_argv
[j
++] = argv
[i
];
2088 return cmd_record(j
, rec_argv
, NULL
);
2091 static size_t trace__fprintf_thread_summary(struct trace
*trace
, FILE *fp
);
2093 static void perf_evlist__add_vfs_getname(struct perf_evlist
*evlist
)
2095 struct perf_evsel
*evsel
= perf_evsel__newtp("probe", "vfs_getname");
2099 if (perf_evsel__field(evsel
, "pathname") == NULL
) {
2100 perf_evsel__delete(evsel
);
2104 evsel
->handler
= trace__vfs_getname
;
2105 perf_evlist__add(evlist
, evsel
);
2108 static int perf_evlist__add_pgfault(struct perf_evlist
*evlist
,
2111 struct perf_evsel
*evsel
;
2112 struct perf_event_attr attr
= {
2113 .type
= PERF_TYPE_SOFTWARE
,
2117 attr
.config
= config
;
2118 attr
.sample_period
= 1;
2120 event_attr_init(&attr
);
2122 evsel
= perf_evsel__new(&attr
);
2126 evsel
->handler
= trace__pgfault
;
2127 perf_evlist__add(evlist
, evsel
);
2132 static void trace__handle_event(struct trace
*trace
, union perf_event
*event
, struct perf_sample
*sample
)
2134 const u32 type
= event
->header
.type
;
2135 struct perf_evsel
*evsel
;
2137 if (!trace
->full_time
&& trace
->base_time
== 0)
2138 trace
->base_time
= sample
->time
;
2140 if (type
!= PERF_RECORD_SAMPLE
) {
2141 trace__process_event(trace
, trace
->host
, event
, sample
);
2145 evsel
= perf_evlist__id2evsel(trace
->evlist
, sample
->id
);
2146 if (evsel
== NULL
) {
2147 fprintf(trace
->output
, "Unknown tp ID %" PRIu64
", skipping...\n", sample
->id
);
2151 if (evsel
->attr
.type
== PERF_TYPE_TRACEPOINT
&&
2152 sample
->raw_data
== NULL
) {
2153 fprintf(trace
->output
, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2154 perf_evsel__name(evsel
), sample
->tid
,
2155 sample
->cpu
, sample
->raw_size
);
2157 tracepoint_handler handler
= evsel
->handler
;
2158 handler(trace
, evsel
, event
, sample
);
2162 static int trace__run(struct trace
*trace
, int argc
, const char **argv
)
2164 struct perf_evlist
*evlist
= trace
->evlist
;
2166 unsigned long before
;
2167 const bool forks
= argc
> 0;
2168 bool draining
= false;
2172 if (trace
->trace_syscalls
&&
2173 perf_evlist__add_syscall_newtp(evlist
, trace__sys_enter
,
2175 goto out_error_raw_syscalls
;
2177 if (trace
->trace_syscalls
)
2178 perf_evlist__add_vfs_getname(evlist
);
2180 if ((trace
->trace_pgfaults
& TRACE_PFMAJ
) &&
2181 perf_evlist__add_pgfault(evlist
, PERF_COUNT_SW_PAGE_FAULTS_MAJ
)) {
2185 if ((trace
->trace_pgfaults
& TRACE_PFMIN
) &&
2186 perf_evlist__add_pgfault(evlist
, PERF_COUNT_SW_PAGE_FAULTS_MIN
))
2190 perf_evlist__add_newtp(evlist
, "sched", "sched_stat_runtime",
2191 trace__sched_stat_runtime
))
2192 goto out_error_sched_stat_runtime
;
2194 err
= perf_evlist__create_maps(evlist
, &trace
->opts
.target
);
2196 fprintf(trace
->output
, "Problems parsing the target to trace, check your options!\n");
2197 goto out_delete_evlist
;
2200 err
= trace__symbols_init(trace
, evlist
);
2202 fprintf(trace
->output
, "Problems initializing symbol libraries!\n");
2203 goto out_delete_evlist
;
2206 perf_evlist__config(evlist
, &trace
->opts
);
2208 signal(SIGCHLD
, sig_handler
);
2209 signal(SIGINT
, sig_handler
);
2212 err
= perf_evlist__prepare_workload(evlist
, &trace
->opts
.target
,
2215 fprintf(trace
->output
, "Couldn't run the workload!\n");
2216 goto out_delete_evlist
;
2220 err
= perf_evlist__open(evlist
);
2222 goto out_error_open
;
2225 * Better not use !target__has_task() here because we need to cover the
2226 * case where no threads were specified in the command line, but a
2227 * workload was, and in that case we will fill in the thread_map when
2228 * we fork the workload in perf_evlist__prepare_workload.
2230 if (trace
->filter_pids
.nr
> 0)
2231 err
= perf_evlist__set_filter_pids(evlist
, trace
->filter_pids
.nr
, trace
->filter_pids
.entries
);
2232 else if (evlist
->threads
->map
[0] == -1)
2233 err
= perf_evlist__set_filter_pid(evlist
, getpid());
2236 printf("err=%d,%s\n", -err
, strerror(-err
));
2240 err
= perf_evlist__mmap(evlist
, trace
->opts
.mmap_pages
, false);
2242 goto out_error_mmap
;
2244 if (!target__none(&trace
->opts
.target
))
2245 perf_evlist__enable(evlist
);
2248 perf_evlist__start_workload(evlist
);
2250 trace
->multiple_threads
= evlist
->threads
->map
[0] == -1 ||
2251 evlist
->threads
->nr
> 1 ||
2252 perf_evlist__first(evlist
)->attr
.inherit
;
2254 before
= trace
->nr_events
;
2256 for (i
= 0; i
< evlist
->nr_mmaps
; i
++) {
2257 union perf_event
*event
;
2259 while ((event
= perf_evlist__mmap_read(evlist
, i
)) != NULL
) {
2260 struct perf_sample sample
;
2264 err
= perf_evlist__parse_sample(evlist
, event
, &sample
);
2266 fprintf(trace
->output
, "Can't parse sample, err = %d, skipping...\n", err
);
2270 trace__handle_event(trace
, event
, &sample
);
2272 perf_evlist__mmap_consume(evlist
, i
);
2277 if (done
&& !draining
) {
2278 perf_evlist__disable(evlist
);
2284 if (trace
->nr_events
== before
) {
2285 int timeout
= done
? 100 : -1;
2287 if (!draining
&& perf_evlist__poll(evlist
, timeout
) > 0) {
2288 if (perf_evlist__filter_pollfd(evlist
, POLLERR
| POLLHUP
) == 0)
2298 thread__zput(trace
->current
);
2300 perf_evlist__disable(evlist
);
2304 trace__fprintf_thread_summary(trace
, trace
->output
);
2306 if (trace
->show_tool_stats
) {
2307 fprintf(trace
->output
, "Stats:\n "
2308 " vfs_getname : %" PRIu64
"\n"
2309 " proc_getname: %" PRIu64
"\n",
2310 trace
->stats
.vfs_getname
,
2311 trace
->stats
.proc_getname
);
2316 perf_evlist__delete(evlist
);
2317 trace
->evlist
= NULL
;
2318 trace
->live
= false;
2321 char errbuf
[BUFSIZ
];
2323 out_error_sched_stat_runtime
:
2324 debugfs__strerror_open_tp(errno
, errbuf
, sizeof(errbuf
), "sched", "sched_stat_runtime");
2327 out_error_raw_syscalls
:
2328 debugfs__strerror_open_tp(errno
, errbuf
, sizeof(errbuf
), "raw_syscalls", "sys_(enter|exit)");
2332 perf_evlist__strerror_mmap(evlist
, errno
, errbuf
, sizeof(errbuf
));
2336 perf_evlist__strerror_open(evlist
, errno
, errbuf
, sizeof(errbuf
));
2339 fprintf(trace
->output
, "%s\n", errbuf
);
2340 goto out_delete_evlist
;
2343 fprintf(trace
->output
, "Not enough memory to run!\n");
2344 goto out_delete_evlist
;
2347 static int trace__replay(struct trace
*trace
)
2349 const struct perf_evsel_str_handler handlers
[] = {
2350 { "probe:vfs_getname", trace__vfs_getname
, },
2352 struct perf_data_file file
= {
2354 .mode
= PERF_DATA_MODE_READ
,
2355 .force
= trace
->force
,
2357 struct perf_session
*session
;
2358 struct perf_evsel
*evsel
;
2361 trace
->tool
.sample
= trace__process_sample
;
2362 trace
->tool
.mmap
= perf_event__process_mmap
;
2363 trace
->tool
.mmap2
= perf_event__process_mmap2
;
2364 trace
->tool
.comm
= perf_event__process_comm
;
2365 trace
->tool
.exit
= perf_event__process_exit
;
2366 trace
->tool
.fork
= perf_event__process_fork
;
2367 trace
->tool
.attr
= perf_event__process_attr
;
2368 trace
->tool
.tracing_data
= perf_event__process_tracing_data
;
2369 trace
->tool
.build_id
= perf_event__process_build_id
;
2371 trace
->tool
.ordered_events
= true;
2372 trace
->tool
.ordering_requires_timestamps
= true;
2374 /* add tid to output */
2375 trace
->multiple_threads
= true;
2377 session
= perf_session__new(&file
, false, &trace
->tool
);
2378 if (session
== NULL
)
2381 if (symbol__init(&session
->header
.env
) < 0)
2384 trace
->host
= &session
->machines
.host
;
2386 err
= perf_session__set_tracepoints_handlers(session
, handlers
);
2390 evsel
= perf_evlist__find_tracepoint_by_name(session
->evlist
,
2391 "raw_syscalls:sys_enter");
2392 /* older kernels have syscalls tp versus raw_syscalls */
2394 evsel
= perf_evlist__find_tracepoint_by_name(session
->evlist
,
2395 "syscalls:sys_enter");
2398 (perf_evsel__init_syscall_tp(evsel
, trace__sys_enter
) < 0 ||
2399 perf_evsel__init_sc_tp_ptr_field(evsel
, args
))) {
2400 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2404 evsel
= perf_evlist__find_tracepoint_by_name(session
->evlist
,
2405 "raw_syscalls:sys_exit");
2407 evsel
= perf_evlist__find_tracepoint_by_name(session
->evlist
,
2408 "syscalls:sys_exit");
2410 (perf_evsel__init_syscall_tp(evsel
, trace__sys_exit
) < 0 ||
2411 perf_evsel__init_sc_tp_uint_field(evsel
, ret
))) {
2412 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2416 evlist__for_each(session
->evlist
, evsel
) {
2417 if (evsel
->attr
.type
== PERF_TYPE_SOFTWARE
&&
2418 (evsel
->attr
.config
== PERF_COUNT_SW_PAGE_FAULTS_MAJ
||
2419 evsel
->attr
.config
== PERF_COUNT_SW_PAGE_FAULTS_MIN
||
2420 evsel
->attr
.config
== PERF_COUNT_SW_PAGE_FAULTS
))
2421 evsel
->handler
= trace__pgfault
;
2424 err
= parse_target_str(trace
);
2430 err
= perf_session__process_events(session
);
2432 pr_err("Failed to process events, error %d", err
);
2434 else if (trace
->summary
)
2435 trace__fprintf_thread_summary(trace
, trace
->output
);
2438 perf_session__delete(session
);
2443 static size_t trace__fprintf_threads_header(FILE *fp
)
2447 printed
= fprintf(fp
, "\n Summary of events:\n\n");
2452 static size_t thread__dump_stats(struct thread_trace
*ttrace
,
2453 struct trace
*trace
, FILE *fp
)
2455 struct stats
*stats
;
2458 struct int_node
*inode
= intlist__first(ttrace
->syscall_stats
);
2463 printed
+= fprintf(fp
, "\n");
2465 printed
+= fprintf(fp
, " syscall calls min avg max stddev\n");
2466 printed
+= fprintf(fp
, " (msec) (msec) (msec) (%%)\n");
2467 printed
+= fprintf(fp
, " --------------- -------- --------- --------- --------- ------\n");
2469 /* each int_node is a syscall */
2471 stats
= inode
->priv
;
2473 double min
= (double)(stats
->min
) / NSEC_PER_MSEC
;
2474 double max
= (double)(stats
->max
) / NSEC_PER_MSEC
;
2475 double avg
= avg_stats(stats
);
2477 u64 n
= (u64
) stats
->n
;
2479 pct
= avg
? 100.0 * stddev_stats(stats
)/avg
: 0.0;
2480 avg
/= NSEC_PER_MSEC
;
2482 sc
= &trace
->syscalls
.table
[inode
->i
];
2483 printed
+= fprintf(fp
, " %-15s", sc
->name
);
2484 printed
+= fprintf(fp
, " %8" PRIu64
" %9.3f %9.3f",
2486 printed
+= fprintf(fp
, " %9.3f %9.2f%%\n", max
, pct
);
2489 inode
= intlist__next(inode
);
2492 printed
+= fprintf(fp
, "\n\n");
2497 /* struct used to pass data to per-thread function */
2498 struct summary_data
{
2500 struct trace
*trace
;
2504 static int trace__fprintf_one_thread(struct thread
*thread
, void *priv
)
2506 struct summary_data
*data
= priv
;
2507 FILE *fp
= data
->fp
;
2508 size_t printed
= data
->printed
;
2509 struct trace
*trace
= data
->trace
;
2510 struct thread_trace
*ttrace
= thread__priv(thread
);
2516 ratio
= (double)ttrace
->nr_events
/ trace
->nr_events
* 100.0;
2518 printed
+= fprintf(fp
, " %s (%d), ", thread__comm_str(thread
), thread
->tid
);
2519 printed
+= fprintf(fp
, "%lu events, ", ttrace
->nr_events
);
2520 printed
+= fprintf(fp
, "%.1f%%", ratio
);
2522 printed
+= fprintf(fp
, ", %lu majfaults", ttrace
->pfmaj
);
2524 printed
+= fprintf(fp
, ", %lu minfaults", ttrace
->pfmin
);
2525 printed
+= fprintf(fp
, ", %.3f msec\n", ttrace
->runtime_ms
);
2526 printed
+= thread__dump_stats(ttrace
, trace
, fp
);
2528 data
->printed
+= printed
;
2533 static size_t trace__fprintf_thread_summary(struct trace
*trace
, FILE *fp
)
2535 struct summary_data data
= {
2539 data
.printed
= trace__fprintf_threads_header(fp
);
2541 machine__for_each_thread(trace
->host
, trace__fprintf_one_thread
, &data
);
2543 return data
.printed
;
2546 static int trace__set_duration(const struct option
*opt
, const char *str
,
2547 int unset __maybe_unused
)
2549 struct trace
*trace
= opt
->value
;
2551 trace
->duration_filter
= atof(str
);
2555 static int trace__set_filter_pids(const struct option
*opt
, const char *str
,
2556 int unset __maybe_unused
)
2560 struct trace
*trace
= opt
->value
;
2562 * FIXME: introduce a intarray class, plain parse csv and create a
2563 * { int nr, int entries[] } struct...
2565 struct intlist
*list
= intlist__new(str
);
2570 i
= trace
->filter_pids
.nr
= intlist__nr_entries(list
) + 1;
2571 trace
->filter_pids
.entries
= calloc(i
, sizeof(pid_t
));
2573 if (trace
->filter_pids
.entries
== NULL
)
2576 trace
->filter_pids
.entries
[0] = getpid();
2578 for (i
= 1; i
< trace
->filter_pids
.nr
; ++i
)
2579 trace
->filter_pids
.entries
[i
] = intlist__entry(list
, i
- 1)->i
;
2581 intlist__delete(list
);
2587 static int trace__open_output(struct trace
*trace
, const char *filename
)
2591 if (!stat(filename
, &st
) && st
.st_size
) {
2592 char oldname
[PATH_MAX
];
2594 scnprintf(oldname
, sizeof(oldname
), "%s.old", filename
);
2596 rename(filename
, oldname
);
2599 trace
->output
= fopen(filename
, "w");
2601 return trace
->output
== NULL
? -errno
: 0;
2604 static int parse_pagefaults(const struct option
*opt
, const char *str
,
2605 int unset __maybe_unused
)
2607 int *trace_pgfaults
= opt
->value
;
2609 if (strcmp(str
, "all") == 0)
2610 *trace_pgfaults
|= TRACE_PFMAJ
| TRACE_PFMIN
;
2611 else if (strcmp(str
, "maj") == 0)
2612 *trace_pgfaults
|= TRACE_PFMAJ
;
2613 else if (strcmp(str
, "min") == 0)
2614 *trace_pgfaults
|= TRACE_PFMIN
;
2621 static void evlist__set_evsel_handler(struct perf_evlist
*evlist
, void *handler
)
2623 struct perf_evsel
*evsel
;
2625 evlist__for_each(evlist
, evsel
)
2626 evsel
->handler
= handler
;
2629 int cmd_trace(int argc
, const char **argv
, const char *prefix __maybe_unused
)
2631 const char *trace_usage
[] = {
2632 "perf trace [<options>] [<command>]",
2633 "perf trace [<options>] -- <command> [<options>]",
2634 "perf trace record [<options>] [<command>]",
2635 "perf trace record [<options>] -- <command> [<options>]",
2638 struct trace trace
= {
2640 .machine
= audit_detect_machine(),
2641 .open_id
= audit_name_to_syscall("open", trace
.audit
.machine
),
2651 .user_freq
= UINT_MAX
,
2652 .user_interval
= ULLONG_MAX
,
2653 .no_buffering
= true,
2654 .mmap_pages
= UINT_MAX
,
2658 .trace_syscalls
= true,
2660 const char *output_name
= NULL
;
2661 const char *ev_qualifier_str
= NULL
;
2662 const struct option trace_options
[] = {
2663 OPT_CALLBACK(0, "event", &trace
.evlist
, "event",
2664 "event selector. use 'perf list' to list available events",
2665 parse_events_option
),
2666 OPT_BOOLEAN(0, "comm", &trace
.show_comm
,
2667 "show the thread COMM next to its id"),
2668 OPT_BOOLEAN(0, "tool_stats", &trace
.show_tool_stats
, "show tool stats"),
2669 OPT_STRING('e', "expr", &ev_qualifier_str
, "expr",
2670 "list of events to trace"),
2671 OPT_STRING('o', "output", &output_name
, "file", "output file name"),
2672 OPT_STRING('i', "input", &input_name
, "file", "Analyze events in file"),
2673 OPT_STRING('p', "pid", &trace
.opts
.target
.pid
, "pid",
2674 "trace events on existing process id"),
2675 OPT_STRING('t', "tid", &trace
.opts
.target
.tid
, "tid",
2676 "trace events on existing thread id"),
2677 OPT_CALLBACK(0, "filter-pids", &trace
, "float",
2678 "show only events with duration > N.M ms", trace__set_filter_pids
),
2679 OPT_BOOLEAN('a', "all-cpus", &trace
.opts
.target
.system_wide
,
2680 "system-wide collection from all CPUs"),
2681 OPT_STRING('C', "cpu", &trace
.opts
.target
.cpu_list
, "cpu",
2682 "list of cpus to monitor"),
2683 OPT_BOOLEAN(0, "no-inherit", &trace
.opts
.no_inherit
,
2684 "child tasks do not inherit counters"),
2685 OPT_CALLBACK('m', "mmap-pages", &trace
.opts
.mmap_pages
, "pages",
2686 "number of mmap data pages",
2687 perf_evlist__parse_mmap_pages
),
2688 OPT_STRING('u', "uid", &trace
.opts
.target
.uid_str
, "user",
2690 OPT_CALLBACK(0, "duration", &trace
, "float",
2691 "show only events with duration > N.M ms",
2692 trace__set_duration
),
2693 OPT_BOOLEAN(0, "sched", &trace
.sched
, "show blocking scheduler events"),
2694 OPT_INCR('v', "verbose", &verbose
, "be more verbose"),
2695 OPT_BOOLEAN('T', "time", &trace
.full_time
,
2696 "Show full timestamp, not time relative to first start"),
2697 OPT_BOOLEAN('s', "summary", &trace
.summary_only
,
2698 "Show only syscall summary with statistics"),
2699 OPT_BOOLEAN('S', "with-summary", &trace
.summary
,
2700 "Show all syscalls and summary with statistics"),
2701 OPT_CALLBACK_DEFAULT('F', "pf", &trace
.trace_pgfaults
, "all|maj|min",
2702 "Trace pagefaults", parse_pagefaults
, "maj"),
2703 OPT_BOOLEAN(0, "syscalls", &trace
.trace_syscalls
, "Trace syscalls"),
2704 OPT_BOOLEAN('f', "force", &trace
.force
, "don't complain, do it"),
2707 const char * const trace_subcommands
[] = { "record", NULL
};
2711 signal(SIGSEGV
, sighandler_dump_stack
);
2712 signal(SIGFPE
, sighandler_dump_stack
);
2714 trace
.evlist
= perf_evlist__new();
2715 if (trace
.evlist
== NULL
)
2718 if (trace
.evlist
== NULL
) {
2719 pr_err("Not enough memory to run!\n");
2723 argc
= parse_options_subcommand(argc
, argv
, trace_options
, trace_subcommands
,
2724 trace_usage
, PARSE_OPT_STOP_AT_NON_OPTION
);
2726 if (trace
.trace_pgfaults
) {
2727 trace
.opts
.sample_address
= true;
2728 trace
.opts
.sample_time
= true;
2731 if (trace
.evlist
->nr_entries
> 0)
2732 evlist__set_evsel_handler(trace
.evlist
, trace__event_handler
);
2734 if ((argc
>= 1) && (strcmp(argv
[0], "record") == 0))
2735 return trace__record(&trace
, argc
-1, &argv
[1]);
2737 /* summary_only implies summary option, but don't overwrite summary if set */
2738 if (trace
.summary_only
)
2739 trace
.summary
= trace
.summary_only
;
2741 if (!trace
.trace_syscalls
&& !trace
.trace_pgfaults
&&
2742 trace
.evlist
->nr_entries
== 0 /* Was --events used? */) {
2743 pr_err("Please specify something to trace.\n");
2747 if (output_name
!= NULL
) {
2748 err
= trace__open_output(&trace
, output_name
);
2750 perror("failed to create output file");
2755 if (ev_qualifier_str
!= NULL
) {
2756 const char *s
= ev_qualifier_str
;
2758 trace
.not_ev_qualifier
= *s
== '!';
2759 if (trace
.not_ev_qualifier
)
2761 trace
.ev_qualifier
= strlist__new(true, s
);
2762 if (trace
.ev_qualifier
== NULL
) {
2763 fputs("Not enough memory to parse event qualifier",
2770 err
= target__validate(&trace
.opts
.target
);
2772 target__strerror(&trace
.opts
.target
, err
, bf
, sizeof(bf
));
2773 fprintf(trace
.output
, "%s", bf
);
2777 err
= target__parse_uid(&trace
.opts
.target
);
2779 target__strerror(&trace
.opts
.target
, err
, bf
, sizeof(bf
));
2780 fprintf(trace
.output
, "%s", bf
);
2784 if (!argc
&& target__none(&trace
.opts
.target
))
2785 trace
.opts
.target
.system_wide
= true;
2788 err
= trace__replay(&trace
);
2790 err
= trace__run(&trace
, argc
, argv
);
2793 if (output_name
!= NULL
)
2794 fclose(trace
.output
);