1 // SPDX-License-Identifier: GPL-2.0
6 #include "util/evlist.h"
7 #include "util/cache.h"
8 #include "util/evsel.h"
9 #include "util/symbol.h"
10 #include "util/thread.h"
11 #include "util/header.h"
12 #include "util/session.h"
13 #include "util/tool.h"
14 #include "util/cloexec.h"
15 #include "util/thread_map.h"
16 #include "util/color.h"
17 #include "util/stat.h"
18 #include "util/callchain.h"
19 #include "util/time-utils.h"
21 #include <subcmd/parse-options.h>
22 #include "util/trace-event.h"
24 #include "util/debug.h"
26 #include <linux/kernel.h>
27 #include <linux/log2.h>
28 #include <sys/prctl.h>
29 #include <sys/resource.h>
33 #include <semaphore.h>
36 #include <api/fs/fs.h>
37 #include <linux/time64.h>
39 #include "sane_ctype.h"
41 #define PR_SET_NAME 15 /* Set process name */
45 #define MAX_PID 1024000
54 unsigned long nr_events
;
55 unsigned long curr_event
;
56 struct sched_atom
**atoms
;
67 enum sched_event_type
{
71 SCHED_EVENT_MIGRATION
,
75 enum sched_event_type type
;
81 struct task_desc
*wakee
;
84 #define TASK_STATE_TO_CHAR_STR "RSDTtZXxKWP"
86 /* task state bitmask, copied from include/linux/sched.h */
87 #define TASK_RUNNING 0
88 #define TASK_INTERRUPTIBLE 1
89 #define TASK_UNINTERRUPTIBLE 2
90 #define __TASK_STOPPED 4
91 #define __TASK_TRACED 8
92 /* in tsk->exit_state */
94 #define EXIT_ZOMBIE 32
95 #define EXIT_TRACE (EXIT_ZOMBIE | EXIT_DEAD)
96 /* in tsk->state again */
98 #define TASK_WAKEKILL 128
99 #define TASK_WAKING 256
100 #define TASK_PARKED 512
110 struct list_head list
;
111 enum thread_state state
;
119 struct list_head work_list
;
120 struct thread
*thread
;
130 typedef int (*sort_fn_t
)(struct work_atoms
*, struct work_atoms
*);
134 struct trace_sched_handler
{
135 int (*switch_event
)(struct perf_sched
*sched
, struct perf_evsel
*evsel
,
136 struct perf_sample
*sample
, struct machine
*machine
);
138 int (*runtime_event
)(struct perf_sched
*sched
, struct perf_evsel
*evsel
,
139 struct perf_sample
*sample
, struct machine
*machine
);
141 int (*wakeup_event
)(struct perf_sched
*sched
, struct perf_evsel
*evsel
,
142 struct perf_sample
*sample
, struct machine
*machine
);
144 /* PERF_RECORD_FORK event, not sched_process_fork tracepoint */
145 int (*fork_event
)(struct perf_sched
*sched
, union perf_event
*event
,
146 struct machine
*machine
);
148 int (*migrate_task_event
)(struct perf_sched
*sched
,
149 struct perf_evsel
*evsel
,
150 struct perf_sample
*sample
,
151 struct machine
*machine
);
154 #define COLOR_PIDS PERF_COLOR_BLUE
155 #define COLOR_CPUS PERF_COLOR_BG_RED
157 struct perf_sched_map
{
158 DECLARE_BITMAP(comp_cpus_mask
, MAX_CPUS
);
161 struct thread_map
*color_pids
;
162 const char *color_pids_str
;
163 struct cpu_map
*color_cpus
;
164 const char *color_cpus_str
;
165 struct cpu_map
*cpus
;
166 const char *cpus_str
;
170 struct perf_tool tool
;
171 const char *sort_order
;
172 unsigned long nr_tasks
;
173 struct task_desc
**pid_to_task
;
174 struct task_desc
**tasks
;
175 const struct trace_sched_handler
*tp_handler
;
176 pthread_mutex_t start_work_mutex
;
177 pthread_mutex_t work_done_wait_mutex
;
180 * Track the current task - that way we can know whether there's any
181 * weird events, such as a task being switched away that is not current.
184 u32 curr_pid
[MAX_CPUS
];
185 struct thread
*curr_thread
[MAX_CPUS
];
186 char next_shortname1
;
187 char next_shortname2
;
188 unsigned int replay_repeat
;
189 unsigned long nr_run_events
;
190 unsigned long nr_sleep_events
;
191 unsigned long nr_wakeup_events
;
192 unsigned long nr_sleep_corrections
;
193 unsigned long nr_run_events_optimized
;
194 unsigned long targetless_wakeups
;
195 unsigned long multitarget_wakeups
;
196 unsigned long nr_runs
;
197 unsigned long nr_timestamps
;
198 unsigned long nr_unordered_timestamps
;
199 unsigned long nr_context_switch_bugs
;
200 unsigned long nr_events
;
201 unsigned long nr_lost_chunks
;
202 unsigned long nr_lost_events
;
203 u64 run_measurement_overhead
;
204 u64 sleep_measurement_overhead
;
207 u64 runavg_cpu_usage
;
208 u64 parent_cpu_usage
;
209 u64 runavg_parent_cpu_usage
;
215 u64 cpu_last_switched
[MAX_CPUS
];
216 struct rb_root atom_root
, sorted_atom_root
, merged_atom_root
;
217 struct list_head sort_list
, cmp_pid
;
220 struct perf_sched_map map
;
222 /* options for timehist command */
227 unsigned int max_stack
;
228 bool show_cpu_visual
;
231 bool show_migrations
;
234 const char *time_str
;
235 struct perf_time_interval ptime
;
236 struct perf_time_interval hist_time
;
239 /* per thread run time data */
240 struct thread_runtime
{
241 u64 last_time
; /* time of previous sched in/out event */
242 u64 dt_run
; /* run time */
243 u64 dt_sleep
; /* time between CPU access by sleep (off cpu) */
244 u64 dt_iowait
; /* time between CPU access by iowait (off cpu) */
245 u64 dt_preempt
; /* time between CPU access by preempt (off cpu) */
246 u64 dt_delay
; /* time between wakeup and sched-in */
247 u64 ready_to_run
; /* time of wakeup */
249 struct stats run_stats
;
251 u64 total_sleep_time
;
252 u64 total_iowait_time
;
253 u64 total_preempt_time
;
254 u64 total_delay_time
;
260 /* per event run time data */
261 struct evsel_runtime
{
262 u64
*last_time
; /* time this event was last seen per cpu */
263 u32 ncpu
; /* highest cpu slot allocated */
266 /* per cpu idle time data */
267 struct idle_thread_runtime
{
268 struct thread_runtime tr
;
269 struct thread
*last_thread
;
270 struct rb_root sorted_root
;
271 struct callchain_root callchain
;
272 struct callchain_cursor cursor
;
275 /* track idle times per cpu */
276 static struct thread
**idle_threads
;
277 static int idle_max_cpu
;
278 static char idle_comm
[] = "<idle>";
280 static u64
get_nsecs(void)
284 clock_gettime(CLOCK_MONOTONIC
, &ts
);
286 return ts
.tv_sec
* NSEC_PER_SEC
+ ts
.tv_nsec
;
289 static void burn_nsecs(struct perf_sched
*sched
, u64 nsecs
)
291 u64 T0
= get_nsecs(), T1
;
295 } while (T1
+ sched
->run_measurement_overhead
< T0
+ nsecs
);
298 static void sleep_nsecs(u64 nsecs
)
302 ts
.tv_nsec
= nsecs
% 999999999;
303 ts
.tv_sec
= nsecs
/ 999999999;
305 nanosleep(&ts
, NULL
);
308 static void calibrate_run_measurement_overhead(struct perf_sched
*sched
)
310 u64 T0
, T1
, delta
, min_delta
= NSEC_PER_SEC
;
313 for (i
= 0; i
< 10; i
++) {
315 burn_nsecs(sched
, 0);
318 min_delta
= min(min_delta
, delta
);
320 sched
->run_measurement_overhead
= min_delta
;
322 printf("run measurement overhead: %" PRIu64
" nsecs\n", min_delta
);
325 static void calibrate_sleep_measurement_overhead(struct perf_sched
*sched
)
327 u64 T0
, T1
, delta
, min_delta
= NSEC_PER_SEC
;
330 for (i
= 0; i
< 10; i
++) {
335 min_delta
= min(min_delta
, delta
);
338 sched
->sleep_measurement_overhead
= min_delta
;
340 printf("sleep measurement overhead: %" PRIu64
" nsecs\n", min_delta
);
343 static struct sched_atom
*
344 get_new_event(struct task_desc
*task
, u64 timestamp
)
346 struct sched_atom
*event
= zalloc(sizeof(*event
));
347 unsigned long idx
= task
->nr_events
;
350 event
->timestamp
= timestamp
;
354 size
= sizeof(struct sched_atom
*) * task
->nr_events
;
355 task
->atoms
= realloc(task
->atoms
, size
);
356 BUG_ON(!task
->atoms
);
358 task
->atoms
[idx
] = event
;
363 static struct sched_atom
*last_event(struct task_desc
*task
)
365 if (!task
->nr_events
)
368 return task
->atoms
[task
->nr_events
- 1];
371 static void add_sched_event_run(struct perf_sched
*sched
, struct task_desc
*task
,
372 u64 timestamp
, u64 duration
)
374 struct sched_atom
*event
, *curr_event
= last_event(task
);
377 * optimize an existing RUN event by merging this one
380 if (curr_event
&& curr_event
->type
== SCHED_EVENT_RUN
) {
381 sched
->nr_run_events_optimized
++;
382 curr_event
->duration
+= duration
;
386 event
= get_new_event(task
, timestamp
);
388 event
->type
= SCHED_EVENT_RUN
;
389 event
->duration
= duration
;
391 sched
->nr_run_events
++;
394 static void add_sched_event_wakeup(struct perf_sched
*sched
, struct task_desc
*task
,
395 u64 timestamp
, struct task_desc
*wakee
)
397 struct sched_atom
*event
, *wakee_event
;
399 event
= get_new_event(task
, timestamp
);
400 event
->type
= SCHED_EVENT_WAKEUP
;
401 event
->wakee
= wakee
;
403 wakee_event
= last_event(wakee
);
404 if (!wakee_event
|| wakee_event
->type
!= SCHED_EVENT_SLEEP
) {
405 sched
->targetless_wakeups
++;
408 if (wakee_event
->wait_sem
) {
409 sched
->multitarget_wakeups
++;
413 wakee_event
->wait_sem
= zalloc(sizeof(*wakee_event
->wait_sem
));
414 sem_init(wakee_event
->wait_sem
, 0, 0);
415 wakee_event
->specific_wait
= 1;
416 event
->wait_sem
= wakee_event
->wait_sem
;
418 sched
->nr_wakeup_events
++;
421 static void add_sched_event_sleep(struct perf_sched
*sched
, struct task_desc
*task
,
422 u64 timestamp
, u64 task_state __maybe_unused
)
424 struct sched_atom
*event
= get_new_event(task
, timestamp
);
426 event
->type
= SCHED_EVENT_SLEEP
;
428 sched
->nr_sleep_events
++;
431 static struct task_desc
*register_pid(struct perf_sched
*sched
,
432 unsigned long pid
, const char *comm
)
434 struct task_desc
*task
;
437 if (sched
->pid_to_task
== NULL
) {
438 if (sysctl__read_int("kernel/pid_max", &pid_max
) < 0)
440 BUG_ON((sched
->pid_to_task
= calloc(pid_max
, sizeof(struct task_desc
*))) == NULL
);
442 if (pid
>= (unsigned long)pid_max
) {
443 BUG_ON((sched
->pid_to_task
= realloc(sched
->pid_to_task
, (pid
+ 1) *
444 sizeof(struct task_desc
*))) == NULL
);
445 while (pid
>= (unsigned long)pid_max
)
446 sched
->pid_to_task
[pid_max
++] = NULL
;
449 task
= sched
->pid_to_task
[pid
];
454 task
= zalloc(sizeof(*task
));
456 task
->nr
= sched
->nr_tasks
;
457 strcpy(task
->comm
, comm
);
459 * every task starts in sleeping state - this gets ignored
460 * if there's no wakeup pointing to this sleep state:
462 add_sched_event_sleep(sched
, task
, 0, 0);
464 sched
->pid_to_task
[pid
] = task
;
466 sched
->tasks
= realloc(sched
->tasks
, sched
->nr_tasks
* sizeof(struct task_desc
*));
467 BUG_ON(!sched
->tasks
);
468 sched
->tasks
[task
->nr
] = task
;
471 printf("registered task #%ld, PID %ld (%s)\n", sched
->nr_tasks
, pid
, comm
);
477 static void print_task_traces(struct perf_sched
*sched
)
479 struct task_desc
*task
;
482 for (i
= 0; i
< sched
->nr_tasks
; i
++) {
483 task
= sched
->tasks
[i
];
484 printf("task %6ld (%20s:%10ld), nr_events: %ld\n",
485 task
->nr
, task
->comm
, task
->pid
, task
->nr_events
);
489 static void add_cross_task_wakeups(struct perf_sched
*sched
)
491 struct task_desc
*task1
, *task2
;
494 for (i
= 0; i
< sched
->nr_tasks
; i
++) {
495 task1
= sched
->tasks
[i
];
497 if (j
== sched
->nr_tasks
)
499 task2
= sched
->tasks
[j
];
500 add_sched_event_wakeup(sched
, task1
, 0, task2
);
504 static void perf_sched__process_event(struct perf_sched
*sched
,
505 struct sched_atom
*atom
)
509 switch (atom
->type
) {
510 case SCHED_EVENT_RUN
:
511 burn_nsecs(sched
, atom
->duration
);
513 case SCHED_EVENT_SLEEP
:
515 ret
= sem_wait(atom
->wait_sem
);
518 case SCHED_EVENT_WAKEUP
:
520 ret
= sem_post(atom
->wait_sem
);
523 case SCHED_EVENT_MIGRATION
:
530 static u64
get_cpu_usage_nsec_parent(void)
536 err
= getrusage(RUSAGE_SELF
, &ru
);
539 sum
= ru
.ru_utime
.tv_sec
* NSEC_PER_SEC
+ ru
.ru_utime
.tv_usec
* NSEC_PER_USEC
;
540 sum
+= ru
.ru_stime
.tv_sec
* NSEC_PER_SEC
+ ru
.ru_stime
.tv_usec
* NSEC_PER_USEC
;
545 static int self_open_counters(struct perf_sched
*sched
, unsigned long cur_task
)
547 struct perf_event_attr attr
;
548 char sbuf
[STRERR_BUFSIZE
], info
[STRERR_BUFSIZE
];
551 bool need_privilege
= false;
553 memset(&attr
, 0, sizeof(attr
));
555 attr
.type
= PERF_TYPE_SOFTWARE
;
556 attr
.config
= PERF_COUNT_SW_TASK_CLOCK
;
559 fd
= sys_perf_event_open(&attr
, 0, -1, -1,
560 perf_event_open_cloexec_flag());
563 if (errno
== EMFILE
) {
565 BUG_ON(getrlimit(RLIMIT_NOFILE
, &limit
) == -1);
566 limit
.rlim_cur
+= sched
->nr_tasks
- cur_task
;
567 if (limit
.rlim_cur
> limit
.rlim_max
) {
568 limit
.rlim_max
= limit
.rlim_cur
;
569 need_privilege
= true;
571 if (setrlimit(RLIMIT_NOFILE
, &limit
) == -1) {
572 if (need_privilege
&& errno
== EPERM
)
573 strcpy(info
, "Need privilege\n");
577 strcpy(info
, "Have a try with -f option\n");
579 pr_err("Error: sys_perf_event_open() syscall returned "
580 "with %d (%s)\n%s", fd
,
581 str_error_r(errno
, sbuf
, sizeof(sbuf
)), info
);
587 static u64
get_cpu_usage_nsec_self(int fd
)
592 ret
= read(fd
, &runtime
, sizeof(runtime
));
593 BUG_ON(ret
!= sizeof(runtime
));
598 struct sched_thread_parms
{
599 struct task_desc
*task
;
600 struct perf_sched
*sched
;
604 static void *thread_func(void *ctx
)
606 struct sched_thread_parms
*parms
= ctx
;
607 struct task_desc
*this_task
= parms
->task
;
608 struct perf_sched
*sched
= parms
->sched
;
609 u64 cpu_usage_0
, cpu_usage_1
;
610 unsigned long i
, ret
;
616 sprintf(comm2
, ":%s", this_task
->comm
);
617 prctl(PR_SET_NAME
, comm2
);
621 ret
= sem_post(&this_task
->ready_for_work
);
623 ret
= pthread_mutex_lock(&sched
->start_work_mutex
);
625 ret
= pthread_mutex_unlock(&sched
->start_work_mutex
);
628 cpu_usage_0
= get_cpu_usage_nsec_self(fd
);
630 for (i
= 0; i
< this_task
->nr_events
; i
++) {
631 this_task
->curr_event
= i
;
632 perf_sched__process_event(sched
, this_task
->atoms
[i
]);
635 cpu_usage_1
= get_cpu_usage_nsec_self(fd
);
636 this_task
->cpu_usage
= cpu_usage_1
- cpu_usage_0
;
637 ret
= sem_post(&this_task
->work_done_sem
);
640 ret
= pthread_mutex_lock(&sched
->work_done_wait_mutex
);
642 ret
= pthread_mutex_unlock(&sched
->work_done_wait_mutex
);
648 static void create_tasks(struct perf_sched
*sched
)
650 struct task_desc
*task
;
655 err
= pthread_attr_init(&attr
);
657 err
= pthread_attr_setstacksize(&attr
,
658 (size_t) max(16 * 1024, PTHREAD_STACK_MIN
));
660 err
= pthread_mutex_lock(&sched
->start_work_mutex
);
662 err
= pthread_mutex_lock(&sched
->work_done_wait_mutex
);
664 for (i
= 0; i
< sched
->nr_tasks
; i
++) {
665 struct sched_thread_parms
*parms
= malloc(sizeof(*parms
));
666 BUG_ON(parms
== NULL
);
667 parms
->task
= task
= sched
->tasks
[i
];
668 parms
->sched
= sched
;
669 parms
->fd
= self_open_counters(sched
, i
);
670 sem_init(&task
->sleep_sem
, 0, 0);
671 sem_init(&task
->ready_for_work
, 0, 0);
672 sem_init(&task
->work_done_sem
, 0, 0);
673 task
->curr_event
= 0;
674 err
= pthread_create(&task
->thread
, &attr
, thread_func
, parms
);
679 static void wait_for_tasks(struct perf_sched
*sched
)
681 u64 cpu_usage_0
, cpu_usage_1
;
682 struct task_desc
*task
;
683 unsigned long i
, ret
;
685 sched
->start_time
= get_nsecs();
686 sched
->cpu_usage
= 0;
687 pthread_mutex_unlock(&sched
->work_done_wait_mutex
);
689 for (i
= 0; i
< sched
->nr_tasks
; i
++) {
690 task
= sched
->tasks
[i
];
691 ret
= sem_wait(&task
->ready_for_work
);
693 sem_init(&task
->ready_for_work
, 0, 0);
695 ret
= pthread_mutex_lock(&sched
->work_done_wait_mutex
);
698 cpu_usage_0
= get_cpu_usage_nsec_parent();
700 pthread_mutex_unlock(&sched
->start_work_mutex
);
702 for (i
= 0; i
< sched
->nr_tasks
; i
++) {
703 task
= sched
->tasks
[i
];
704 ret
= sem_wait(&task
->work_done_sem
);
706 sem_init(&task
->work_done_sem
, 0, 0);
707 sched
->cpu_usage
+= task
->cpu_usage
;
711 cpu_usage_1
= get_cpu_usage_nsec_parent();
712 if (!sched
->runavg_cpu_usage
)
713 sched
->runavg_cpu_usage
= sched
->cpu_usage
;
714 sched
->runavg_cpu_usage
= (sched
->runavg_cpu_usage
* (sched
->replay_repeat
- 1) + sched
->cpu_usage
) / sched
->replay_repeat
;
716 sched
->parent_cpu_usage
= cpu_usage_1
- cpu_usage_0
;
717 if (!sched
->runavg_parent_cpu_usage
)
718 sched
->runavg_parent_cpu_usage
= sched
->parent_cpu_usage
;
719 sched
->runavg_parent_cpu_usage
= (sched
->runavg_parent_cpu_usage
* (sched
->replay_repeat
- 1) +
720 sched
->parent_cpu_usage
)/sched
->replay_repeat
;
722 ret
= pthread_mutex_lock(&sched
->start_work_mutex
);
725 for (i
= 0; i
< sched
->nr_tasks
; i
++) {
726 task
= sched
->tasks
[i
];
727 sem_init(&task
->sleep_sem
, 0, 0);
728 task
->curr_event
= 0;
732 static void run_one_test(struct perf_sched
*sched
)
734 u64 T0
, T1
, delta
, avg_delta
, fluct
;
737 wait_for_tasks(sched
);
741 sched
->sum_runtime
+= delta
;
744 avg_delta
= sched
->sum_runtime
/ sched
->nr_runs
;
745 if (delta
< avg_delta
)
746 fluct
= avg_delta
- delta
;
748 fluct
= delta
- avg_delta
;
749 sched
->sum_fluct
+= fluct
;
751 sched
->run_avg
= delta
;
752 sched
->run_avg
= (sched
->run_avg
* (sched
->replay_repeat
- 1) + delta
) / sched
->replay_repeat
;
754 printf("#%-3ld: %0.3f, ", sched
->nr_runs
, (double)delta
/ NSEC_PER_MSEC
);
756 printf("ravg: %0.2f, ", (double)sched
->run_avg
/ NSEC_PER_MSEC
);
758 printf("cpu: %0.2f / %0.2f",
759 (double)sched
->cpu_usage
/ NSEC_PER_MSEC
, (double)sched
->runavg_cpu_usage
/ NSEC_PER_MSEC
);
763 * rusage statistics done by the parent, these are less
764 * accurate than the sched->sum_exec_runtime based statistics:
766 printf(" [%0.2f / %0.2f]",
767 (double)sched
->parent_cpu_usage
/ NSEC_PER_MSEC
,
768 (double)sched
->runavg_parent_cpu_usage
/ NSEC_PER_MSEC
);
773 if (sched
->nr_sleep_corrections
)
774 printf(" (%ld sleep corrections)\n", sched
->nr_sleep_corrections
);
775 sched
->nr_sleep_corrections
= 0;
778 static void test_calibrations(struct perf_sched
*sched
)
783 burn_nsecs(sched
, NSEC_PER_MSEC
);
786 printf("the run test took %" PRIu64
" nsecs\n", T1
- T0
);
789 sleep_nsecs(NSEC_PER_MSEC
);
792 printf("the sleep test took %" PRIu64
" nsecs\n", T1
- T0
);
796 replay_wakeup_event(struct perf_sched
*sched
,
797 struct perf_evsel
*evsel
, struct perf_sample
*sample
,
798 struct machine
*machine __maybe_unused
)
800 const char *comm
= perf_evsel__strval(evsel
, sample
, "comm");
801 const u32 pid
= perf_evsel__intval(evsel
, sample
, "pid");
802 struct task_desc
*waker
, *wakee
;
805 printf("sched_wakeup event %p\n", evsel
);
807 printf(" ... pid %d woke up %s/%d\n", sample
->tid
, comm
, pid
);
810 waker
= register_pid(sched
, sample
->tid
, "<unknown>");
811 wakee
= register_pid(sched
, pid
, comm
);
813 add_sched_event_wakeup(sched
, waker
, sample
->time
, wakee
);
817 static int replay_switch_event(struct perf_sched
*sched
,
818 struct perf_evsel
*evsel
,
819 struct perf_sample
*sample
,
820 struct machine
*machine __maybe_unused
)
822 const char *prev_comm
= perf_evsel__strval(evsel
, sample
, "prev_comm"),
823 *next_comm
= perf_evsel__strval(evsel
, sample
, "next_comm");
824 const u32 prev_pid
= perf_evsel__intval(evsel
, sample
, "prev_pid"),
825 next_pid
= perf_evsel__intval(evsel
, sample
, "next_pid");
826 const u64 prev_state
= perf_evsel__intval(evsel
, sample
, "prev_state");
827 struct task_desc
*prev
, __maybe_unused
*next
;
828 u64 timestamp0
, timestamp
= sample
->time
;
829 int cpu
= sample
->cpu
;
833 printf("sched_switch event %p\n", evsel
);
835 if (cpu
>= MAX_CPUS
|| cpu
< 0)
838 timestamp0
= sched
->cpu_last_switched
[cpu
];
840 delta
= timestamp
- timestamp0
;
845 pr_err("hm, delta: %" PRIu64
" < 0 ?\n", delta
);
849 pr_debug(" ... switch from %s/%d to %s/%d [ran %" PRIu64
" nsecs]\n",
850 prev_comm
, prev_pid
, next_comm
, next_pid
, delta
);
852 prev
= register_pid(sched
, prev_pid
, prev_comm
);
853 next
= register_pid(sched
, next_pid
, next_comm
);
855 sched
->cpu_last_switched
[cpu
] = timestamp
;
857 add_sched_event_run(sched
, prev
, timestamp
, delta
);
858 add_sched_event_sleep(sched
, prev
, timestamp
, prev_state
);
863 static int replay_fork_event(struct perf_sched
*sched
,
864 union perf_event
*event
,
865 struct machine
*machine
)
867 struct thread
*child
, *parent
;
869 child
= machine__findnew_thread(machine
, event
->fork
.pid
,
871 parent
= machine__findnew_thread(machine
, event
->fork
.ppid
,
874 if (child
== NULL
|| parent
== NULL
) {
875 pr_debug("thread does not exist on fork event: child %p, parent %p\n",
881 printf("fork event\n");
882 printf("... parent: %s/%d\n", thread__comm_str(parent
), parent
->tid
);
883 printf("... child: %s/%d\n", thread__comm_str(child
), child
->tid
);
886 register_pid(sched
, parent
->tid
, thread__comm_str(parent
));
887 register_pid(sched
, child
->tid
, thread__comm_str(child
));
894 struct sort_dimension
{
897 struct list_head list
;
901 thread_lat_cmp(struct list_head
*list
, struct work_atoms
*l
, struct work_atoms
*r
)
903 struct sort_dimension
*sort
;
906 BUG_ON(list_empty(list
));
908 list_for_each_entry(sort
, list
, list
) {
909 ret
= sort
->cmp(l
, r
);
917 static struct work_atoms
*
918 thread_atoms_search(struct rb_root
*root
, struct thread
*thread
,
919 struct list_head
*sort_list
)
921 struct rb_node
*node
= root
->rb_node
;
922 struct work_atoms key
= { .thread
= thread
};
925 struct work_atoms
*atoms
;
928 atoms
= container_of(node
, struct work_atoms
, node
);
930 cmp
= thread_lat_cmp(sort_list
, &key
, atoms
);
932 node
= node
->rb_left
;
934 node
= node
->rb_right
;
936 BUG_ON(thread
!= atoms
->thread
);
944 __thread_latency_insert(struct rb_root
*root
, struct work_atoms
*data
,
945 struct list_head
*sort_list
)
947 struct rb_node
**new = &(root
->rb_node
), *parent
= NULL
;
950 struct work_atoms
*this;
953 this = container_of(*new, struct work_atoms
, node
);
956 cmp
= thread_lat_cmp(sort_list
, data
, this);
959 new = &((*new)->rb_left
);
961 new = &((*new)->rb_right
);
964 rb_link_node(&data
->node
, parent
, new);
965 rb_insert_color(&data
->node
, root
);
968 static int thread_atoms_insert(struct perf_sched
*sched
, struct thread
*thread
)
970 struct work_atoms
*atoms
= zalloc(sizeof(*atoms
));
972 pr_err("No memory at %s\n", __func__
);
976 atoms
->thread
= thread__get(thread
);
977 INIT_LIST_HEAD(&atoms
->work_list
);
978 __thread_latency_insert(&sched
->atom_root
, atoms
, &sched
->cmp_pid
);
982 static char sched_out_state(u64 prev_state
)
984 const char *str
= TASK_STATE_TO_CHAR_STR
;
986 return str
[prev_state
];
990 add_sched_out_event(struct work_atoms
*atoms
,
994 struct work_atom
*atom
= zalloc(sizeof(*atom
));
996 pr_err("Non memory at %s", __func__
);
1000 atom
->sched_out_time
= timestamp
;
1002 if (run_state
== 'R') {
1003 atom
->state
= THREAD_WAIT_CPU
;
1004 atom
->wake_up_time
= atom
->sched_out_time
;
1007 list_add_tail(&atom
->list
, &atoms
->work_list
);
1012 add_runtime_event(struct work_atoms
*atoms
, u64 delta
,
1013 u64 timestamp __maybe_unused
)
1015 struct work_atom
*atom
;
1017 BUG_ON(list_empty(&atoms
->work_list
));
1019 atom
= list_entry(atoms
->work_list
.prev
, struct work_atom
, list
);
1021 atom
->runtime
+= delta
;
1022 atoms
->total_runtime
+= delta
;
1026 add_sched_in_event(struct work_atoms
*atoms
, u64 timestamp
)
1028 struct work_atom
*atom
;
1031 if (list_empty(&atoms
->work_list
))
1034 atom
= list_entry(atoms
->work_list
.prev
, struct work_atom
, list
);
1036 if (atom
->state
!= THREAD_WAIT_CPU
)
1039 if (timestamp
< atom
->wake_up_time
) {
1040 atom
->state
= THREAD_IGNORE
;
1044 atom
->state
= THREAD_SCHED_IN
;
1045 atom
->sched_in_time
= timestamp
;
1047 delta
= atom
->sched_in_time
- atom
->wake_up_time
;
1048 atoms
->total_lat
+= delta
;
1049 if (delta
> atoms
->max_lat
) {
1050 atoms
->max_lat
= delta
;
1051 atoms
->max_lat_at
= timestamp
;
1056 static int latency_switch_event(struct perf_sched
*sched
,
1057 struct perf_evsel
*evsel
,
1058 struct perf_sample
*sample
,
1059 struct machine
*machine
)
1061 const u32 prev_pid
= perf_evsel__intval(evsel
, sample
, "prev_pid"),
1062 next_pid
= perf_evsel__intval(evsel
, sample
, "next_pid");
1063 const u64 prev_state
= perf_evsel__intval(evsel
, sample
, "prev_state");
1064 struct work_atoms
*out_events
, *in_events
;
1065 struct thread
*sched_out
, *sched_in
;
1066 u64 timestamp0
, timestamp
= sample
->time
;
1067 int cpu
= sample
->cpu
, err
= -1;
1070 BUG_ON(cpu
>= MAX_CPUS
|| cpu
< 0);
1072 timestamp0
= sched
->cpu_last_switched
[cpu
];
1073 sched
->cpu_last_switched
[cpu
] = timestamp
;
1075 delta
= timestamp
- timestamp0
;
1080 pr_err("hm, delta: %" PRIu64
" < 0 ?\n", delta
);
1084 sched_out
= machine__findnew_thread(machine
, -1, prev_pid
);
1085 sched_in
= machine__findnew_thread(machine
, -1, next_pid
);
1086 if (sched_out
== NULL
|| sched_in
== NULL
)
1089 out_events
= thread_atoms_search(&sched
->atom_root
, sched_out
, &sched
->cmp_pid
);
1091 if (thread_atoms_insert(sched
, sched_out
))
1093 out_events
= thread_atoms_search(&sched
->atom_root
, sched_out
, &sched
->cmp_pid
);
1095 pr_err("out-event: Internal tree error");
1099 if (add_sched_out_event(out_events
, sched_out_state(prev_state
), timestamp
))
1102 in_events
= thread_atoms_search(&sched
->atom_root
, sched_in
, &sched
->cmp_pid
);
1104 if (thread_atoms_insert(sched
, sched_in
))
1106 in_events
= thread_atoms_search(&sched
->atom_root
, sched_in
, &sched
->cmp_pid
);
1108 pr_err("in-event: Internal tree error");
1112 * Take came in we have not heard about yet,
1113 * add in an initial atom in runnable state:
1115 if (add_sched_out_event(in_events
, 'R', timestamp
))
1118 add_sched_in_event(in_events
, timestamp
);
1121 thread__put(sched_out
);
1122 thread__put(sched_in
);
1126 static int latency_runtime_event(struct perf_sched
*sched
,
1127 struct perf_evsel
*evsel
,
1128 struct perf_sample
*sample
,
1129 struct machine
*machine
)
1131 const u32 pid
= perf_evsel__intval(evsel
, sample
, "pid");
1132 const u64 runtime
= perf_evsel__intval(evsel
, sample
, "runtime");
1133 struct thread
*thread
= machine__findnew_thread(machine
, -1, pid
);
1134 struct work_atoms
*atoms
= thread_atoms_search(&sched
->atom_root
, thread
, &sched
->cmp_pid
);
1135 u64 timestamp
= sample
->time
;
1136 int cpu
= sample
->cpu
, err
= -1;
1141 BUG_ON(cpu
>= MAX_CPUS
|| cpu
< 0);
1143 if (thread_atoms_insert(sched
, thread
))
1145 atoms
= thread_atoms_search(&sched
->atom_root
, thread
, &sched
->cmp_pid
);
1147 pr_err("in-event: Internal tree error");
1150 if (add_sched_out_event(atoms
, 'R', timestamp
))
1154 add_runtime_event(atoms
, runtime
, timestamp
);
1157 thread__put(thread
);
1161 static int latency_wakeup_event(struct perf_sched
*sched
,
1162 struct perf_evsel
*evsel
,
1163 struct perf_sample
*sample
,
1164 struct machine
*machine
)
1166 const u32 pid
= perf_evsel__intval(evsel
, sample
, "pid");
1167 struct work_atoms
*atoms
;
1168 struct work_atom
*atom
;
1169 struct thread
*wakee
;
1170 u64 timestamp
= sample
->time
;
1173 wakee
= machine__findnew_thread(machine
, -1, pid
);
1176 atoms
= thread_atoms_search(&sched
->atom_root
, wakee
, &sched
->cmp_pid
);
1178 if (thread_atoms_insert(sched
, wakee
))
1180 atoms
= thread_atoms_search(&sched
->atom_root
, wakee
, &sched
->cmp_pid
);
1182 pr_err("wakeup-event: Internal tree error");
1185 if (add_sched_out_event(atoms
, 'S', timestamp
))
1189 BUG_ON(list_empty(&atoms
->work_list
));
1191 atom
= list_entry(atoms
->work_list
.prev
, struct work_atom
, list
);
1194 * As we do not guarantee the wakeup event happens when
1195 * task is out of run queue, also may happen when task is
1196 * on run queue and wakeup only change ->state to TASK_RUNNING,
1197 * then we should not set the ->wake_up_time when wake up a
1198 * task which is on run queue.
1200 * You WILL be missing events if you've recorded only
1201 * one CPU, or are only looking at only one, so don't
1202 * skip in this case.
1204 if (sched
->profile_cpu
== -1 && atom
->state
!= THREAD_SLEEPING
)
1207 sched
->nr_timestamps
++;
1208 if (atom
->sched_out_time
> timestamp
) {
1209 sched
->nr_unordered_timestamps
++;
1213 atom
->state
= THREAD_WAIT_CPU
;
1214 atom
->wake_up_time
= timestamp
;
1222 static int latency_migrate_task_event(struct perf_sched
*sched
,
1223 struct perf_evsel
*evsel
,
1224 struct perf_sample
*sample
,
1225 struct machine
*machine
)
1227 const u32 pid
= perf_evsel__intval(evsel
, sample
, "pid");
1228 u64 timestamp
= sample
->time
;
1229 struct work_atoms
*atoms
;
1230 struct work_atom
*atom
;
1231 struct thread
*migrant
;
1235 * Only need to worry about migration when profiling one CPU.
1237 if (sched
->profile_cpu
== -1)
1240 migrant
= machine__findnew_thread(machine
, -1, pid
);
1241 if (migrant
== NULL
)
1243 atoms
= thread_atoms_search(&sched
->atom_root
, migrant
, &sched
->cmp_pid
);
1245 if (thread_atoms_insert(sched
, migrant
))
1247 register_pid(sched
, migrant
->tid
, thread__comm_str(migrant
));
1248 atoms
= thread_atoms_search(&sched
->atom_root
, migrant
, &sched
->cmp_pid
);
1250 pr_err("migration-event: Internal tree error");
1253 if (add_sched_out_event(atoms
, 'R', timestamp
))
1257 BUG_ON(list_empty(&atoms
->work_list
));
1259 atom
= list_entry(atoms
->work_list
.prev
, struct work_atom
, list
);
1260 atom
->sched_in_time
= atom
->sched_out_time
= atom
->wake_up_time
= timestamp
;
1262 sched
->nr_timestamps
++;
1264 if (atom
->sched_out_time
> timestamp
)
1265 sched
->nr_unordered_timestamps
++;
1268 thread__put(migrant
);
1272 static void output_lat_thread(struct perf_sched
*sched
, struct work_atoms
*work_list
)
1277 char max_lat_at
[32];
1279 if (!work_list
->nb_atoms
)
1282 * Ignore idle threads:
1284 if (!strcmp(thread__comm_str(work_list
->thread
), "swapper"))
1287 sched
->all_runtime
+= work_list
->total_runtime
;
1288 sched
->all_count
+= work_list
->nb_atoms
;
1290 if (work_list
->num_merged
> 1)
1291 ret
= printf(" %s:(%d) ", thread__comm_str(work_list
->thread
), work_list
->num_merged
);
1293 ret
= printf(" %s:%d ", thread__comm_str(work_list
->thread
), work_list
->thread
->tid
);
1295 for (i
= 0; i
< 24 - ret
; i
++)
1298 avg
= work_list
->total_lat
/ work_list
->nb_atoms
;
1299 timestamp__scnprintf_usec(work_list
->max_lat_at
, max_lat_at
, sizeof(max_lat_at
));
1301 printf("|%11.3f ms |%9" PRIu64
" | avg:%9.3f ms | max:%9.3f ms | max at: %13s s\n",
1302 (double)work_list
->total_runtime
/ NSEC_PER_MSEC
,
1303 work_list
->nb_atoms
, (double)avg
/ NSEC_PER_MSEC
,
1304 (double)work_list
->max_lat
/ NSEC_PER_MSEC
,
1308 static int pid_cmp(struct work_atoms
*l
, struct work_atoms
*r
)
1310 if (l
->thread
== r
->thread
)
1312 if (l
->thread
->tid
< r
->thread
->tid
)
1314 if (l
->thread
->tid
> r
->thread
->tid
)
1316 return (int)(l
->thread
- r
->thread
);
1319 static int avg_cmp(struct work_atoms
*l
, struct work_atoms
*r
)
1329 avgl
= l
->total_lat
/ l
->nb_atoms
;
1330 avgr
= r
->total_lat
/ r
->nb_atoms
;
1340 static int max_cmp(struct work_atoms
*l
, struct work_atoms
*r
)
1342 if (l
->max_lat
< r
->max_lat
)
1344 if (l
->max_lat
> r
->max_lat
)
1350 static int switch_cmp(struct work_atoms
*l
, struct work_atoms
*r
)
1352 if (l
->nb_atoms
< r
->nb_atoms
)
1354 if (l
->nb_atoms
> r
->nb_atoms
)
1360 static int runtime_cmp(struct work_atoms
*l
, struct work_atoms
*r
)
1362 if (l
->total_runtime
< r
->total_runtime
)
1364 if (l
->total_runtime
> r
->total_runtime
)
1370 static int sort_dimension__add(const char *tok
, struct list_head
*list
)
1373 static struct sort_dimension avg_sort_dimension
= {
1377 static struct sort_dimension max_sort_dimension
= {
1381 static struct sort_dimension pid_sort_dimension
= {
1385 static struct sort_dimension runtime_sort_dimension
= {
1389 static struct sort_dimension switch_sort_dimension
= {
1393 struct sort_dimension
*available_sorts
[] = {
1394 &pid_sort_dimension
,
1395 &avg_sort_dimension
,
1396 &max_sort_dimension
,
1397 &switch_sort_dimension
,
1398 &runtime_sort_dimension
,
1401 for (i
= 0; i
< ARRAY_SIZE(available_sorts
); i
++) {
1402 if (!strcmp(available_sorts
[i
]->name
, tok
)) {
1403 list_add_tail(&available_sorts
[i
]->list
, list
);
1412 static void perf_sched__sort_lat(struct perf_sched
*sched
)
1414 struct rb_node
*node
;
1415 struct rb_root
*root
= &sched
->atom_root
;
1418 struct work_atoms
*data
;
1419 node
= rb_first(root
);
1423 rb_erase(node
, root
);
1424 data
= rb_entry(node
, struct work_atoms
, node
);
1425 __thread_latency_insert(&sched
->sorted_atom_root
, data
, &sched
->sort_list
);
1427 if (root
== &sched
->atom_root
) {
1428 root
= &sched
->merged_atom_root
;
1433 static int process_sched_wakeup_event(struct perf_tool
*tool
,
1434 struct perf_evsel
*evsel
,
1435 struct perf_sample
*sample
,
1436 struct machine
*machine
)
1438 struct perf_sched
*sched
= container_of(tool
, struct perf_sched
, tool
);
1440 if (sched
->tp_handler
->wakeup_event
)
1441 return sched
->tp_handler
->wakeup_event(sched
, evsel
, sample
, machine
);
1451 static bool thread__has_color(struct thread
*thread
)
1453 union map_priv priv
= {
1454 .ptr
= thread__priv(thread
),
1460 static struct thread
*
1461 map__findnew_thread(struct perf_sched
*sched
, struct machine
*machine
, pid_t pid
, pid_t tid
)
1463 struct thread
*thread
= machine__findnew_thread(machine
, pid
, tid
);
1464 union map_priv priv
= {
1468 if (!sched
->map
.color_pids
|| !thread
|| thread__priv(thread
))
1471 if (thread_map__has(sched
->map
.color_pids
, tid
))
1474 thread__set_priv(thread
, priv
.ptr
);
1478 static int map_switch_event(struct perf_sched
*sched
, struct perf_evsel
*evsel
,
1479 struct perf_sample
*sample
, struct machine
*machine
)
1481 const u32 next_pid
= perf_evsel__intval(evsel
, sample
, "next_pid");
1482 struct thread
*sched_in
;
1484 u64 timestamp0
, timestamp
= sample
->time
;
1486 int i
, this_cpu
= sample
->cpu
;
1488 bool new_cpu
= false;
1489 const char *color
= PERF_COLOR_NORMAL
;
1490 char stimestamp
[32];
1492 BUG_ON(this_cpu
>= MAX_CPUS
|| this_cpu
< 0);
1494 if (this_cpu
> sched
->max_cpu
)
1495 sched
->max_cpu
= this_cpu
;
1497 if (sched
->map
.comp
) {
1498 cpus_nr
= bitmap_weight(sched
->map
.comp_cpus_mask
, MAX_CPUS
);
1499 if (!test_and_set_bit(this_cpu
, sched
->map
.comp_cpus_mask
)) {
1500 sched
->map
.comp_cpus
[cpus_nr
++] = this_cpu
;
1504 cpus_nr
= sched
->max_cpu
;
1506 timestamp0
= sched
->cpu_last_switched
[this_cpu
];
1507 sched
->cpu_last_switched
[this_cpu
] = timestamp
;
1509 delta
= timestamp
- timestamp0
;
1514 pr_err("hm, delta: %" PRIu64
" < 0 ?\n", delta
);
1518 sched_in
= map__findnew_thread(sched
, machine
, -1, next_pid
);
1519 if (sched_in
== NULL
)
1522 sched
->curr_thread
[this_cpu
] = thread__get(sched_in
);
1527 if (!sched_in
->shortname
[0]) {
1528 if (!strcmp(thread__comm_str(sched_in
), "swapper")) {
1530 * Don't allocate a letter-number for swapper:0
1531 * as a shortname. Instead, we use '.' for it.
1533 sched_in
->shortname
[0] = '.';
1534 sched_in
->shortname
[1] = ' ';
1536 sched_in
->shortname
[0] = sched
->next_shortname1
;
1537 sched_in
->shortname
[1] = sched
->next_shortname2
;
1539 if (sched
->next_shortname1
< 'Z') {
1540 sched
->next_shortname1
++;
1542 sched
->next_shortname1
= 'A';
1543 if (sched
->next_shortname2
< '9')
1544 sched
->next_shortname2
++;
1546 sched
->next_shortname2
= '0';
1552 for (i
= 0; i
< cpus_nr
; i
++) {
1553 int cpu
= sched
->map
.comp
? sched
->map
.comp_cpus
[i
] : i
;
1554 struct thread
*curr_thread
= sched
->curr_thread
[cpu
];
1555 const char *pid_color
= color
;
1556 const char *cpu_color
= color
;
1558 if (curr_thread
&& thread__has_color(curr_thread
))
1559 pid_color
= COLOR_PIDS
;
1561 if (sched
->map
.cpus
&& !cpu_map__has(sched
->map
.cpus
, cpu
))
1564 if (sched
->map
.color_cpus
&& cpu_map__has(sched
->map
.color_cpus
, cpu
))
1565 cpu_color
= COLOR_CPUS
;
1567 if (cpu
!= this_cpu
)
1568 color_fprintf(stdout
, color
, " ");
1570 color_fprintf(stdout
, cpu_color
, "*");
1572 if (sched
->curr_thread
[cpu
])
1573 color_fprintf(stdout
, pid_color
, "%2s ", sched
->curr_thread
[cpu
]->shortname
);
1575 color_fprintf(stdout
, color
, " ");
1578 if (sched
->map
.cpus
&& !cpu_map__has(sched
->map
.cpus
, this_cpu
))
1581 timestamp__scnprintf_usec(timestamp
, stimestamp
, sizeof(stimestamp
));
1582 color_fprintf(stdout
, color
, " %12s secs ", stimestamp
);
1583 if (new_shortname
|| (verbose
> 0 && sched_in
->tid
)) {
1584 const char *pid_color
= color
;
1586 if (thread__has_color(sched_in
))
1587 pid_color
= COLOR_PIDS
;
1589 color_fprintf(stdout
, pid_color
, "%s => %s:%d",
1590 sched_in
->shortname
, thread__comm_str(sched_in
), sched_in
->tid
);
1593 if (sched
->map
.comp
&& new_cpu
)
1594 color_fprintf(stdout
, color
, " (CPU %d)", this_cpu
);
1597 color_fprintf(stdout
, color
, "\n");
1599 thread__put(sched_in
);
1604 static int process_sched_switch_event(struct perf_tool
*tool
,
1605 struct perf_evsel
*evsel
,
1606 struct perf_sample
*sample
,
1607 struct machine
*machine
)
1609 struct perf_sched
*sched
= container_of(tool
, struct perf_sched
, tool
);
1610 int this_cpu
= sample
->cpu
, err
= 0;
1611 u32 prev_pid
= perf_evsel__intval(evsel
, sample
, "prev_pid"),
1612 next_pid
= perf_evsel__intval(evsel
, sample
, "next_pid");
1614 if (sched
->curr_pid
[this_cpu
] != (u32
)-1) {
1616 * Are we trying to switch away a PID that is
1619 if (sched
->curr_pid
[this_cpu
] != prev_pid
)
1620 sched
->nr_context_switch_bugs
++;
1623 if (sched
->tp_handler
->switch_event
)
1624 err
= sched
->tp_handler
->switch_event(sched
, evsel
, sample
, machine
);
1626 sched
->curr_pid
[this_cpu
] = next_pid
;
1630 static int process_sched_runtime_event(struct perf_tool
*tool
,
1631 struct perf_evsel
*evsel
,
1632 struct perf_sample
*sample
,
1633 struct machine
*machine
)
1635 struct perf_sched
*sched
= container_of(tool
, struct perf_sched
, tool
);
1637 if (sched
->tp_handler
->runtime_event
)
1638 return sched
->tp_handler
->runtime_event(sched
, evsel
, sample
, machine
);
1643 static int perf_sched__process_fork_event(struct perf_tool
*tool
,
1644 union perf_event
*event
,
1645 struct perf_sample
*sample
,
1646 struct machine
*machine
)
1648 struct perf_sched
*sched
= container_of(tool
, struct perf_sched
, tool
);
1650 /* run the fork event through the perf machineruy */
1651 perf_event__process_fork(tool
, event
, sample
, machine
);
1653 /* and then run additional processing needed for this command */
1654 if (sched
->tp_handler
->fork_event
)
1655 return sched
->tp_handler
->fork_event(sched
, event
, machine
);
1660 static int process_sched_migrate_task_event(struct perf_tool
*tool
,
1661 struct perf_evsel
*evsel
,
1662 struct perf_sample
*sample
,
1663 struct machine
*machine
)
1665 struct perf_sched
*sched
= container_of(tool
, struct perf_sched
, tool
);
1667 if (sched
->tp_handler
->migrate_task_event
)
1668 return sched
->tp_handler
->migrate_task_event(sched
, evsel
, sample
, machine
);
1673 typedef int (*tracepoint_handler
)(struct perf_tool
*tool
,
1674 struct perf_evsel
*evsel
,
1675 struct perf_sample
*sample
,
1676 struct machine
*machine
);
1678 static int perf_sched__process_tracepoint_sample(struct perf_tool
*tool __maybe_unused
,
1679 union perf_event
*event __maybe_unused
,
1680 struct perf_sample
*sample
,
1681 struct perf_evsel
*evsel
,
1682 struct machine
*machine
)
1686 if (evsel
->handler
!= NULL
) {
1687 tracepoint_handler f
= evsel
->handler
;
1688 err
= f(tool
, evsel
, sample
, machine
);
1694 static int perf_sched__read_events(struct perf_sched
*sched
)
1696 const struct perf_evsel_str_handler handlers
[] = {
1697 { "sched:sched_switch", process_sched_switch_event
, },
1698 { "sched:sched_stat_runtime", process_sched_runtime_event
, },
1699 { "sched:sched_wakeup", process_sched_wakeup_event
, },
1700 { "sched:sched_wakeup_new", process_sched_wakeup_event
, },
1701 { "sched:sched_migrate_task", process_sched_migrate_task_event
, },
1703 struct perf_session
*session
;
1704 struct perf_data data
= {
1708 .mode
= PERF_DATA_MODE_READ
,
1709 .force
= sched
->force
,
1713 session
= perf_session__new(&data
, false, &sched
->tool
);
1714 if (session
== NULL
) {
1715 pr_debug("No Memory for session\n");
1719 symbol__init(&session
->header
.env
);
1721 if (perf_session__set_tracepoints_handlers(session
, handlers
))
1724 if (perf_session__has_traces(session
, "record -R")) {
1725 int err
= perf_session__process_events(session
);
1727 pr_err("Failed to process events, error %d", err
);
1731 sched
->nr_events
= session
->evlist
->stats
.nr_events
[0];
1732 sched
->nr_lost_events
= session
->evlist
->stats
.total_lost
;
1733 sched
->nr_lost_chunks
= session
->evlist
->stats
.nr_events
[PERF_RECORD_LOST
];
1738 perf_session__delete(session
);
1743 * scheduling times are printed as msec.usec
1745 static inline void print_sched_time(unsigned long long nsecs
, int width
)
1747 unsigned long msecs
;
1748 unsigned long usecs
;
1750 msecs
= nsecs
/ NSEC_PER_MSEC
;
1751 nsecs
-= msecs
* NSEC_PER_MSEC
;
1752 usecs
= nsecs
/ NSEC_PER_USEC
;
1753 printf("%*lu.%03lu ", width
, msecs
, usecs
);
1757 * returns runtime data for event, allocating memory for it the
1758 * first time it is used.
1760 static struct evsel_runtime
*perf_evsel__get_runtime(struct perf_evsel
*evsel
)
1762 struct evsel_runtime
*r
= evsel
->priv
;
1765 r
= zalloc(sizeof(struct evsel_runtime
));
1773 * save last time event was seen per cpu
1775 static void perf_evsel__save_time(struct perf_evsel
*evsel
,
1776 u64 timestamp
, u32 cpu
)
1778 struct evsel_runtime
*r
= perf_evsel__get_runtime(evsel
);
1783 if ((cpu
>= r
->ncpu
) || (r
->last_time
== NULL
)) {
1784 int i
, n
= __roundup_pow_of_two(cpu
+1);
1785 void *p
= r
->last_time
;
1787 p
= realloc(r
->last_time
, n
* sizeof(u64
));
1792 for (i
= r
->ncpu
; i
< n
; ++i
)
1793 r
->last_time
[i
] = (u64
) 0;
1798 r
->last_time
[cpu
] = timestamp
;
1801 /* returns last time this event was seen on the given cpu */
1802 static u64
perf_evsel__get_time(struct perf_evsel
*evsel
, u32 cpu
)
1804 struct evsel_runtime
*r
= perf_evsel__get_runtime(evsel
);
1806 if ((r
== NULL
) || (r
->last_time
== NULL
) || (cpu
>= r
->ncpu
))
1809 return r
->last_time
[cpu
];
1812 static int comm_width
= 30;
1814 static char *timehist_get_commstr(struct thread
*thread
)
1816 static char str
[32];
1817 const char *comm
= thread__comm_str(thread
);
1818 pid_t tid
= thread
->tid
;
1819 pid_t pid
= thread
->pid_
;
1823 n
= scnprintf(str
, sizeof(str
), "%s", comm
);
1825 else if (tid
!= pid
)
1826 n
= scnprintf(str
, sizeof(str
), "%s[%d/%d]", comm
, tid
, pid
);
1829 n
= scnprintf(str
, sizeof(str
), "%s[%d]", comm
, tid
);
1837 static void timehist_header(struct perf_sched
*sched
)
1839 u32 ncpus
= sched
->max_cpu
+ 1;
1842 printf("%15s %6s ", "time", "cpu");
1844 if (sched
->show_cpu_visual
) {
1846 for (i
= 0, j
= 0; i
< ncpus
; ++i
) {
1854 printf(" %-*s %9s %9s %9s", comm_width
,
1855 "task name", "wait time", "sch delay", "run time");
1857 if (sched
->show_state
)
1858 printf(" %s", "state");
1865 printf("%15s %-6s ", "", "");
1867 if (sched
->show_cpu_visual
)
1868 printf(" %*s ", ncpus
, "");
1870 printf(" %-*s %9s %9s %9s", comm_width
,
1871 "[tid/pid]", "(msec)", "(msec)", "(msec)");
1873 if (sched
->show_state
)
1881 printf("%.15s %.6s ", graph_dotted_line
, graph_dotted_line
);
1883 if (sched
->show_cpu_visual
)
1884 printf(" %.*s ", ncpus
, graph_dotted_line
);
1886 printf(" %.*s %.9s %.9s %.9s", comm_width
,
1887 graph_dotted_line
, graph_dotted_line
, graph_dotted_line
,
1890 if (sched
->show_state
)
1891 printf(" %.5s", graph_dotted_line
);
1896 static char task_state_char(struct thread
*thread
, int state
)
1898 static const char state_to_char
[] = TASK_STATE_TO_CHAR_STR
;
1899 unsigned bit
= state
? ffs(state
) : 0;
1902 if (thread
->tid
== 0)
1905 return bit
< sizeof(state_to_char
) - 1 ? state_to_char
[bit
] : '?';
1908 static void timehist_print_sample(struct perf_sched
*sched
,
1909 struct perf_evsel
*evsel
,
1910 struct perf_sample
*sample
,
1911 struct addr_location
*al
,
1912 struct thread
*thread
,
1915 struct thread_runtime
*tr
= thread__priv(thread
);
1916 const char *next_comm
= perf_evsel__strval(evsel
, sample
, "next_comm");
1917 const u32 next_pid
= perf_evsel__intval(evsel
, sample
, "next_pid");
1918 u32 max_cpus
= sched
->max_cpu
+ 1;
1923 timestamp__scnprintf_usec(t
, tstr
, sizeof(tstr
));
1924 printf("%15s [%04d] ", tstr
, sample
->cpu
);
1926 if (sched
->show_cpu_visual
) {
1931 for (i
= 0; i
< max_cpus
; ++i
) {
1932 /* flag idle times with 'i'; others are sched events */
1933 if (i
== sample
->cpu
)
1934 c
= (thread
->tid
== 0) ? 'i' : 's';
1942 printf(" %-*s ", comm_width
, timehist_get_commstr(thread
));
1944 wait_time
= tr
->dt_sleep
+ tr
->dt_iowait
+ tr
->dt_preempt
;
1945 print_sched_time(wait_time
, 6);
1947 print_sched_time(tr
->dt_delay
, 6);
1948 print_sched_time(tr
->dt_run
, 6);
1950 if (sched
->show_state
)
1951 printf(" %5c ", task_state_char(thread
, state
));
1953 if (sched
->show_next
) {
1954 snprintf(nstr
, sizeof(nstr
), "next: %s[%d]", next_comm
, next_pid
);
1955 printf(" %-*s", comm_width
, nstr
);
1958 if (sched
->show_wakeups
&& !sched
->show_next
)
1959 printf(" %-*s", comm_width
, "");
1961 if (thread
->tid
== 0)
1964 if (sched
->show_callchain
)
1967 sample__fprintf_sym(sample
, al
, 0,
1968 EVSEL__PRINT_SYM
| EVSEL__PRINT_ONELINE
|
1969 EVSEL__PRINT_CALLCHAIN_ARROW
|
1970 EVSEL__PRINT_SKIP_IGNORED
,
1971 &callchain_cursor
, stdout
);
1978 * Explanation of delta-time stats:
1980 * t = time of current schedule out event
1981 * tprev = time of previous sched out event
1982 * also time of schedule-in event for current task
1983 * last_time = time of last sched change event for current task
1984 * (i.e, time process was last scheduled out)
1985 * ready_to_run = time of wakeup for current task
1987 * -----|------------|------------|------------|------
1988 * last ready tprev t
1991 * |-------- dt_wait --------|
1992 * |- dt_delay -|-- dt_run --|
1994 * dt_run = run time of current task
1995 * dt_wait = time between last schedule out event for task and tprev
1996 * represents time spent off the cpu
1997 * dt_delay = time between wakeup and schedule-in of task
2000 static void timehist_update_runtime_stats(struct thread_runtime
*r
,
2010 r
->dt_run
= t
- tprev
;
2011 if (r
->ready_to_run
) {
2012 if (r
->ready_to_run
> tprev
)
2013 pr_debug("time travel: wakeup time for task > previous sched_switch event\n");
2015 r
->dt_delay
= tprev
- r
->ready_to_run
;
2018 if (r
->last_time
> tprev
)
2019 pr_debug("time travel: last sched out time for task > previous sched_switch event\n");
2020 else if (r
->last_time
) {
2021 u64 dt_wait
= tprev
- r
->last_time
;
2023 if (r
->last_state
== TASK_RUNNING
)
2024 r
->dt_preempt
= dt_wait
;
2025 else if (r
->last_state
== TASK_UNINTERRUPTIBLE
)
2026 r
->dt_iowait
= dt_wait
;
2028 r
->dt_sleep
= dt_wait
;
2032 update_stats(&r
->run_stats
, r
->dt_run
);
2034 r
->total_run_time
+= r
->dt_run
;
2035 r
->total_delay_time
+= r
->dt_delay
;
2036 r
->total_sleep_time
+= r
->dt_sleep
;
2037 r
->total_iowait_time
+= r
->dt_iowait
;
2038 r
->total_preempt_time
+= r
->dt_preempt
;
2041 static bool is_idle_sample(struct perf_sample
*sample
,
2042 struct perf_evsel
*evsel
)
2044 /* pid 0 == swapper == idle task */
2045 if (strcmp(perf_evsel__name(evsel
), "sched:sched_switch") == 0)
2046 return perf_evsel__intval(evsel
, sample
, "prev_pid") == 0;
2048 return sample
->pid
== 0;
2051 static void save_task_callchain(struct perf_sched
*sched
,
2052 struct perf_sample
*sample
,
2053 struct perf_evsel
*evsel
,
2054 struct machine
*machine
)
2056 struct callchain_cursor
*cursor
= &callchain_cursor
;
2057 struct thread
*thread
;
2059 /* want main thread for process - has maps */
2060 thread
= machine__findnew_thread(machine
, sample
->pid
, sample
->pid
);
2061 if (thread
== NULL
) {
2062 pr_debug("Failed to get thread for pid %d.\n", sample
->pid
);
2066 if (!symbol_conf
.use_callchain
|| sample
->callchain
== NULL
)
2069 if (thread__resolve_callchain(thread
, cursor
, evsel
, sample
,
2070 NULL
, NULL
, sched
->max_stack
+ 2) != 0) {
2072 pr_err("Failed to resolve callchain. Skipping\n");
2077 callchain_cursor_commit(cursor
);
2080 struct callchain_cursor_node
*node
;
2083 node
= callchain_cursor_current(cursor
);
2089 if (!strcmp(sym
->name
, "schedule") ||
2090 !strcmp(sym
->name
, "__schedule") ||
2091 !strcmp(sym
->name
, "preempt_schedule"))
2095 callchain_cursor_advance(cursor
);
2099 static int init_idle_thread(struct thread
*thread
)
2101 struct idle_thread_runtime
*itr
;
2103 thread__set_comm(thread
, idle_comm
, 0);
2105 itr
= zalloc(sizeof(*itr
));
2109 init_stats(&itr
->tr
.run_stats
);
2110 callchain_init(&itr
->callchain
);
2111 callchain_cursor_reset(&itr
->cursor
);
2112 thread__set_priv(thread
, itr
);
2118 * Track idle stats per cpu by maintaining a local thread
2119 * struct for the idle task on each cpu.
2121 static int init_idle_threads(int ncpu
)
2125 idle_threads
= zalloc(ncpu
* sizeof(struct thread
*));
2129 idle_max_cpu
= ncpu
;
2131 /* allocate the actual thread struct if needed */
2132 for (i
= 0; i
< ncpu
; ++i
) {
2133 idle_threads
[i
] = thread__new(0, 0);
2134 if (idle_threads
[i
] == NULL
)
2137 ret
= init_idle_thread(idle_threads
[i
]);
2145 static void free_idle_threads(void)
2149 if (idle_threads
== NULL
)
2152 for (i
= 0; i
< idle_max_cpu
; ++i
) {
2153 if ((idle_threads
[i
]))
2154 thread__delete(idle_threads
[i
]);
2160 static struct thread
*get_idle_thread(int cpu
)
2163 * expand/allocate array of pointers to local thread
2166 if ((cpu
>= idle_max_cpu
) || (idle_threads
== NULL
)) {
2167 int i
, j
= __roundup_pow_of_two(cpu
+1);
2170 p
= realloc(idle_threads
, j
* sizeof(struct thread
*));
2174 idle_threads
= (struct thread
**) p
;
2175 for (i
= idle_max_cpu
; i
< j
; ++i
)
2176 idle_threads
[i
] = NULL
;
2181 /* allocate a new thread struct if needed */
2182 if (idle_threads
[cpu
] == NULL
) {
2183 idle_threads
[cpu
] = thread__new(0, 0);
2185 if (idle_threads
[cpu
]) {
2186 if (init_idle_thread(idle_threads
[cpu
]) < 0)
2191 return idle_threads
[cpu
];
2194 static void save_idle_callchain(struct idle_thread_runtime
*itr
,
2195 struct perf_sample
*sample
)
2197 if (!symbol_conf
.use_callchain
|| sample
->callchain
== NULL
)
2200 callchain_cursor__copy(&itr
->cursor
, &callchain_cursor
);
2204 * handle runtime stats saved per thread
2206 static struct thread_runtime
*thread__init_runtime(struct thread
*thread
)
2208 struct thread_runtime
*r
;
2210 r
= zalloc(sizeof(struct thread_runtime
));
2214 init_stats(&r
->run_stats
);
2215 thread__set_priv(thread
, r
);
2220 static struct thread_runtime
*thread__get_runtime(struct thread
*thread
)
2222 struct thread_runtime
*tr
;
2224 tr
= thread__priv(thread
);
2226 tr
= thread__init_runtime(thread
);
2228 pr_debug("Failed to malloc memory for runtime data.\n");
2234 static struct thread
*timehist_get_thread(struct perf_sched
*sched
,
2235 struct perf_sample
*sample
,
2236 struct machine
*machine
,
2237 struct perf_evsel
*evsel
)
2239 struct thread
*thread
;
2241 if (is_idle_sample(sample
, evsel
)) {
2242 thread
= get_idle_thread(sample
->cpu
);
2244 pr_err("Failed to get idle thread for cpu %d.\n", sample
->cpu
);
2247 /* there were samples with tid 0 but non-zero pid */
2248 thread
= machine__findnew_thread(machine
, sample
->pid
,
2249 sample
->tid
?: sample
->pid
);
2250 if (thread
== NULL
) {
2251 pr_debug("Failed to get thread for tid %d. skipping sample.\n",
2255 save_task_callchain(sched
, sample
, evsel
, machine
);
2256 if (sched
->idle_hist
) {
2257 struct thread
*idle
;
2258 struct idle_thread_runtime
*itr
;
2260 idle
= get_idle_thread(sample
->cpu
);
2262 pr_err("Failed to get idle thread for cpu %d.\n", sample
->cpu
);
2266 itr
= thread__priv(idle
);
2270 itr
->last_thread
= thread
;
2272 /* copy task callchain when entering to idle */
2273 if (perf_evsel__intval(evsel
, sample
, "next_pid") == 0)
2274 save_idle_callchain(itr
, sample
);
2281 static bool timehist_skip_sample(struct perf_sched
*sched
,
2282 struct thread
*thread
,
2283 struct perf_evsel
*evsel
,
2284 struct perf_sample
*sample
)
2288 if (thread__is_filtered(thread
)) {
2290 sched
->skipped_samples
++;
2293 if (sched
->idle_hist
) {
2294 if (strcmp(perf_evsel__name(evsel
), "sched:sched_switch"))
2296 else if (perf_evsel__intval(evsel
, sample
, "prev_pid") != 0 &&
2297 perf_evsel__intval(evsel
, sample
, "next_pid") != 0)
2304 static void timehist_print_wakeup_event(struct perf_sched
*sched
,
2305 struct perf_evsel
*evsel
,
2306 struct perf_sample
*sample
,
2307 struct machine
*machine
,
2308 struct thread
*awakened
)
2310 struct thread
*thread
;
2313 thread
= machine__findnew_thread(machine
, sample
->pid
, sample
->tid
);
2317 /* show wakeup unless both awakee and awaker are filtered */
2318 if (timehist_skip_sample(sched
, thread
, evsel
, sample
) &&
2319 timehist_skip_sample(sched
, awakened
, evsel
, sample
)) {
2323 timestamp__scnprintf_usec(sample
->time
, tstr
, sizeof(tstr
));
2324 printf("%15s [%04d] ", tstr
, sample
->cpu
);
2325 if (sched
->show_cpu_visual
)
2326 printf(" %*s ", sched
->max_cpu
+ 1, "");
2328 printf(" %-*s ", comm_width
, timehist_get_commstr(thread
));
2331 printf(" %9s %9s %9s ", "", "", "");
2333 printf("awakened: %s", timehist_get_commstr(awakened
));
2338 static int timehist_sched_wakeup_event(struct perf_tool
*tool
,
2339 union perf_event
*event __maybe_unused
,
2340 struct perf_evsel
*evsel
,
2341 struct perf_sample
*sample
,
2342 struct machine
*machine
)
2344 struct perf_sched
*sched
= container_of(tool
, struct perf_sched
, tool
);
2345 struct thread
*thread
;
2346 struct thread_runtime
*tr
= NULL
;
2347 /* want pid of awakened task not pid in sample */
2348 const u32 pid
= perf_evsel__intval(evsel
, sample
, "pid");
2350 thread
= machine__findnew_thread(machine
, 0, pid
);
2354 tr
= thread__get_runtime(thread
);
2358 if (tr
->ready_to_run
== 0)
2359 tr
->ready_to_run
= sample
->time
;
2361 /* show wakeups if requested */
2362 if (sched
->show_wakeups
&&
2363 !perf_time__skip_sample(&sched
->ptime
, sample
->time
))
2364 timehist_print_wakeup_event(sched
, evsel
, sample
, machine
, thread
);
2369 static void timehist_print_migration_event(struct perf_sched
*sched
,
2370 struct perf_evsel
*evsel
,
2371 struct perf_sample
*sample
,
2372 struct machine
*machine
,
2373 struct thread
*migrated
)
2375 struct thread
*thread
;
2377 u32 max_cpus
= sched
->max_cpu
+ 1;
2380 if (sched
->summary_only
)
2383 max_cpus
= sched
->max_cpu
+ 1;
2384 ocpu
= perf_evsel__intval(evsel
, sample
, "orig_cpu");
2385 dcpu
= perf_evsel__intval(evsel
, sample
, "dest_cpu");
2387 thread
= machine__findnew_thread(machine
, sample
->pid
, sample
->tid
);
2391 if (timehist_skip_sample(sched
, thread
, evsel
, sample
) &&
2392 timehist_skip_sample(sched
, migrated
, evsel
, sample
)) {
2396 timestamp__scnprintf_usec(sample
->time
, tstr
, sizeof(tstr
));
2397 printf("%15s [%04d] ", tstr
, sample
->cpu
);
2399 if (sched
->show_cpu_visual
) {
2404 for (i
= 0; i
< max_cpus
; ++i
) {
2405 c
= (i
== sample
->cpu
) ? 'm' : ' ';
2411 printf(" %-*s ", comm_width
, timehist_get_commstr(thread
));
2414 printf(" %9s %9s %9s ", "", "", "");
2416 printf("migrated: %s", timehist_get_commstr(migrated
));
2417 printf(" cpu %d => %d", ocpu
, dcpu
);
2422 static int timehist_migrate_task_event(struct perf_tool
*tool
,
2423 union perf_event
*event __maybe_unused
,
2424 struct perf_evsel
*evsel
,
2425 struct perf_sample
*sample
,
2426 struct machine
*machine
)
2428 struct perf_sched
*sched
= container_of(tool
, struct perf_sched
, tool
);
2429 struct thread
*thread
;
2430 struct thread_runtime
*tr
= NULL
;
2431 /* want pid of migrated task not pid in sample */
2432 const u32 pid
= perf_evsel__intval(evsel
, sample
, "pid");
2434 thread
= machine__findnew_thread(machine
, 0, pid
);
2438 tr
= thread__get_runtime(thread
);
2444 /* show migrations if requested */
2445 timehist_print_migration_event(sched
, evsel
, sample
, machine
, thread
);
2450 static int timehist_sched_change_event(struct perf_tool
*tool
,
2451 union perf_event
*event
,
2452 struct perf_evsel
*evsel
,
2453 struct perf_sample
*sample
,
2454 struct machine
*machine
)
2456 struct perf_sched
*sched
= container_of(tool
, struct perf_sched
, tool
);
2457 struct perf_time_interval
*ptime
= &sched
->ptime
;
2458 struct addr_location al
;
2459 struct thread
*thread
;
2460 struct thread_runtime
*tr
= NULL
;
2461 u64 tprev
, t
= sample
->time
;
2463 int state
= perf_evsel__intval(evsel
, sample
, "prev_state");
2466 if (machine__resolve(machine
, &al
, sample
) < 0) {
2467 pr_err("problem processing %d event. skipping it\n",
2468 event
->header
.type
);
2473 thread
= timehist_get_thread(sched
, sample
, machine
, evsel
);
2474 if (thread
== NULL
) {
2479 if (timehist_skip_sample(sched
, thread
, evsel
, sample
))
2482 tr
= thread__get_runtime(thread
);
2488 tprev
= perf_evsel__get_time(evsel
, sample
->cpu
);
2491 * If start time given:
2492 * - sample time is under window user cares about - skip sample
2493 * - tprev is under window user cares about - reset to start of window
2495 if (ptime
->start
&& ptime
->start
> t
)
2498 if (tprev
&& ptime
->start
> tprev
)
2499 tprev
= ptime
->start
;
2502 * If end time given:
2503 * - previous sched event is out of window - we are done
2504 * - sample time is beyond window user cares about - reset it
2505 * to close out stats for time window interest
2508 if (tprev
> ptime
->end
)
2515 if (!sched
->idle_hist
|| thread
->tid
== 0) {
2516 timehist_update_runtime_stats(tr
, t
, tprev
);
2518 if (sched
->idle_hist
) {
2519 struct idle_thread_runtime
*itr
= (void *)tr
;
2520 struct thread_runtime
*last_tr
;
2522 BUG_ON(thread
->tid
!= 0);
2524 if (itr
->last_thread
== NULL
)
2527 /* add current idle time as last thread's runtime */
2528 last_tr
= thread__get_runtime(itr
->last_thread
);
2529 if (last_tr
== NULL
)
2532 timehist_update_runtime_stats(last_tr
, t
, tprev
);
2534 * remove delta time of last thread as it's not updated
2535 * and otherwise it will show an invalid value next
2536 * time. we only care total run time and run stat.
2538 last_tr
->dt_run
= 0;
2539 last_tr
->dt_delay
= 0;
2540 last_tr
->dt_sleep
= 0;
2541 last_tr
->dt_iowait
= 0;
2542 last_tr
->dt_preempt
= 0;
2545 callchain_append(&itr
->callchain
, &itr
->cursor
, t
- tprev
);
2547 itr
->last_thread
= NULL
;
2551 if (!sched
->summary_only
)
2552 timehist_print_sample(sched
, evsel
, sample
, &al
, thread
, t
, state
);
2555 if (sched
->hist_time
.start
== 0 && t
>= ptime
->start
)
2556 sched
->hist_time
.start
= t
;
2557 if (ptime
->end
== 0 || t
<= ptime
->end
)
2558 sched
->hist_time
.end
= t
;
2561 /* time of this sched_switch event becomes last time task seen */
2562 tr
->last_time
= sample
->time
;
2564 /* last state is used to determine where to account wait time */
2565 tr
->last_state
= state
;
2567 /* sched out event for task so reset ready to run time */
2568 tr
->ready_to_run
= 0;
2571 perf_evsel__save_time(evsel
, sample
->time
, sample
->cpu
);
2576 static int timehist_sched_switch_event(struct perf_tool
*tool
,
2577 union perf_event
*event
,
2578 struct perf_evsel
*evsel
,
2579 struct perf_sample
*sample
,
2580 struct machine
*machine __maybe_unused
)
2582 return timehist_sched_change_event(tool
, event
, evsel
, sample
, machine
);
2585 static int process_lost(struct perf_tool
*tool __maybe_unused
,
2586 union perf_event
*event
,
2587 struct perf_sample
*sample
,
2588 struct machine
*machine __maybe_unused
)
2592 timestamp__scnprintf_usec(sample
->time
, tstr
, sizeof(tstr
));
2593 printf("%15s ", tstr
);
2594 printf("lost %" PRIu64
" events on cpu %d\n", event
->lost
.lost
, sample
->cpu
);
2600 static void print_thread_runtime(struct thread
*t
,
2601 struct thread_runtime
*r
)
2603 double mean
= avg_stats(&r
->run_stats
);
2606 printf("%*s %5d %9" PRIu64
" ",
2607 comm_width
, timehist_get_commstr(t
), t
->ppid
,
2608 (u64
) r
->run_stats
.n
);
2610 print_sched_time(r
->total_run_time
, 8);
2611 stddev
= rel_stddev_stats(stddev_stats(&r
->run_stats
), mean
);
2612 print_sched_time(r
->run_stats
.min
, 6);
2614 print_sched_time((u64
) mean
, 6);
2616 print_sched_time(r
->run_stats
.max
, 6);
2618 printf("%5.2f", stddev
);
2619 printf(" %5" PRIu64
, r
->migrations
);
2623 static void print_thread_waittime(struct thread
*t
,
2624 struct thread_runtime
*r
)
2626 printf("%*s %5d %9" PRIu64
" ",
2627 comm_width
, timehist_get_commstr(t
), t
->ppid
,
2628 (u64
) r
->run_stats
.n
);
2630 print_sched_time(r
->total_run_time
, 8);
2631 print_sched_time(r
->total_sleep_time
, 6);
2633 print_sched_time(r
->total_iowait_time
, 6);
2635 print_sched_time(r
->total_preempt_time
, 6);
2637 print_sched_time(r
->total_delay_time
, 6);
2641 struct total_run_stats
{
2642 struct perf_sched
*sched
;
2648 static int __show_thread_runtime(struct thread
*t
, void *priv
)
2650 struct total_run_stats
*stats
= priv
;
2651 struct thread_runtime
*r
;
2653 if (thread__is_filtered(t
))
2656 r
= thread__priv(t
);
2657 if (r
&& r
->run_stats
.n
) {
2658 stats
->task_count
++;
2659 stats
->sched_count
+= r
->run_stats
.n
;
2660 stats
->total_run_time
+= r
->total_run_time
;
2662 if (stats
->sched
->show_state
)
2663 print_thread_waittime(t
, r
);
2665 print_thread_runtime(t
, r
);
2671 static int show_thread_runtime(struct thread
*t
, void *priv
)
2676 return __show_thread_runtime(t
, priv
);
2679 static int show_deadthread_runtime(struct thread
*t
, void *priv
)
2684 return __show_thread_runtime(t
, priv
);
2687 static size_t callchain__fprintf_folded(FILE *fp
, struct callchain_node
*node
)
2689 const char *sep
= " <- ";
2690 struct callchain_list
*chain
;
2698 ret
= callchain__fprintf_folded(fp
, node
->parent
);
2701 list_for_each_entry(chain
, &node
->val
, list
) {
2702 if (chain
->ip
>= PERF_CONTEXT_MAX
)
2704 if (chain
->ms
.sym
&& chain
->ms
.sym
->ignore
)
2706 ret
+= fprintf(fp
, "%s%s", first
? "" : sep
,
2707 callchain_list__sym_name(chain
, bf
, sizeof(bf
),
2715 static size_t timehist_print_idlehist_callchain(struct rb_root
*root
)
2719 struct callchain_node
*chain
;
2720 struct rb_node
*rb_node
= rb_first(root
);
2722 printf(" %16s %8s %s\n", "Idle time (msec)", "Count", "Callchains");
2723 printf(" %.16s %.8s %.50s\n", graph_dotted_line
, graph_dotted_line
,
2727 chain
= rb_entry(rb_node
, struct callchain_node
, rb_node
);
2728 rb_node
= rb_next(rb_node
);
2730 ret
+= fprintf(fp
, " ");
2731 print_sched_time(chain
->hit
, 12);
2732 ret
+= 16; /* print_sched_time returns 2nd arg + 4 */
2733 ret
+= fprintf(fp
, " %8d ", chain
->count
);
2734 ret
+= callchain__fprintf_folded(fp
, chain
);
2735 ret
+= fprintf(fp
, "\n");
2741 static void timehist_print_summary(struct perf_sched
*sched
,
2742 struct perf_session
*session
)
2744 struct machine
*m
= &session
->machines
.host
;
2745 struct total_run_stats totals
;
2748 struct thread_runtime
*r
;
2750 u64 hist_time
= sched
->hist_time
.end
- sched
->hist_time
.start
;
2752 memset(&totals
, 0, sizeof(totals
));
2753 totals
.sched
= sched
;
2755 if (sched
->idle_hist
) {
2756 printf("\nIdle-time summary\n");
2757 printf("%*s parent sched-out ", comm_width
, "comm");
2758 printf(" idle-time min-idle avg-idle max-idle stddev migrations\n");
2759 } else if (sched
->show_state
) {
2760 printf("\nWait-time summary\n");
2761 printf("%*s parent sched-in ", comm_width
, "comm");
2762 printf(" run-time sleep iowait preempt delay\n");
2764 printf("\nRuntime summary\n");
2765 printf("%*s parent sched-in ", comm_width
, "comm");
2766 printf(" run-time min-run avg-run max-run stddev migrations\n");
2768 printf("%*s (count) ", comm_width
, "");
2769 printf(" (msec) (msec) (msec) (msec) %s\n",
2770 sched
->show_state
? "(msec)" : "%");
2771 printf("%.117s\n", graph_dotted_line
);
2773 machine__for_each_thread(m
, show_thread_runtime
, &totals
);
2774 task_count
= totals
.task_count
;
2776 printf("<no still running tasks>\n");
2778 printf("\nTerminated tasks:\n");
2779 machine__for_each_thread(m
, show_deadthread_runtime
, &totals
);
2780 if (task_count
== totals
.task_count
)
2781 printf("<no terminated tasks>\n");
2783 /* CPU idle stats not tracked when samples were skipped */
2784 if (sched
->skipped_samples
&& !sched
->idle_hist
)
2787 printf("\nIdle stats:\n");
2788 for (i
= 0; i
< idle_max_cpu
; ++i
) {
2789 t
= idle_threads
[i
];
2793 r
= thread__priv(t
);
2794 if (r
&& r
->run_stats
.n
) {
2795 totals
.sched_count
+= r
->run_stats
.n
;
2796 printf(" CPU %2d idle for ", i
);
2797 print_sched_time(r
->total_run_time
, 6);
2798 printf(" msec (%6.2f%%)\n", 100.0 * r
->total_run_time
/ hist_time
);
2800 printf(" CPU %2d idle entire time window\n", i
);
2803 if (sched
->idle_hist
&& symbol_conf
.use_callchain
) {
2804 callchain_param
.mode
= CHAIN_FOLDED
;
2805 callchain_param
.value
= CCVAL_PERIOD
;
2807 callchain_register_param(&callchain_param
);
2809 printf("\nIdle stats by callchain:\n");
2810 for (i
= 0; i
< idle_max_cpu
; ++i
) {
2811 struct idle_thread_runtime
*itr
;
2813 t
= idle_threads
[i
];
2817 itr
= thread__priv(t
);
2821 callchain_param
.sort(&itr
->sorted_root
, &itr
->callchain
,
2822 0, &callchain_param
);
2824 printf(" CPU %2d:", i
);
2825 print_sched_time(itr
->tr
.total_run_time
, 6);
2827 timehist_print_idlehist_callchain(&itr
->sorted_root
);
2833 " Total number of unique tasks: %" PRIu64
"\n"
2834 "Total number of context switches: %" PRIu64
"\n",
2835 totals
.task_count
, totals
.sched_count
);
2837 printf(" Total run time (msec): ");
2838 print_sched_time(totals
.total_run_time
, 2);
2841 printf(" Total scheduling time (msec): ");
2842 print_sched_time(hist_time
, 2);
2843 printf(" (x %d)\n", sched
->max_cpu
);
2846 typedef int (*sched_handler
)(struct perf_tool
*tool
,
2847 union perf_event
*event
,
2848 struct perf_evsel
*evsel
,
2849 struct perf_sample
*sample
,
2850 struct machine
*machine
);
2852 static int perf_timehist__process_sample(struct perf_tool
*tool
,
2853 union perf_event
*event
,
2854 struct perf_sample
*sample
,
2855 struct perf_evsel
*evsel
,
2856 struct machine
*machine
)
2858 struct perf_sched
*sched
= container_of(tool
, struct perf_sched
, tool
);
2860 int this_cpu
= sample
->cpu
;
2862 if (this_cpu
> sched
->max_cpu
)
2863 sched
->max_cpu
= this_cpu
;
2865 if (evsel
->handler
!= NULL
) {
2866 sched_handler f
= evsel
->handler
;
2868 err
= f(tool
, event
, evsel
, sample
, machine
);
2874 static int timehist_check_attr(struct perf_sched
*sched
,
2875 struct perf_evlist
*evlist
)
2877 struct perf_evsel
*evsel
;
2878 struct evsel_runtime
*er
;
2880 list_for_each_entry(evsel
, &evlist
->entries
, node
) {
2881 er
= perf_evsel__get_runtime(evsel
);
2883 pr_err("Failed to allocate memory for evsel runtime data\n");
2887 if (sched
->show_callchain
&&
2888 !(evsel
->attr
.sample_type
& PERF_SAMPLE_CALLCHAIN
)) {
2889 pr_info("Samples do not have callchains.\n");
2890 sched
->show_callchain
= 0;
2891 symbol_conf
.use_callchain
= 0;
2898 static int perf_sched__timehist(struct perf_sched
*sched
)
2900 const struct perf_evsel_str_handler handlers
[] = {
2901 { "sched:sched_switch", timehist_sched_switch_event
, },
2902 { "sched:sched_wakeup", timehist_sched_wakeup_event
, },
2903 { "sched:sched_wakeup_new", timehist_sched_wakeup_event
, },
2905 const struct perf_evsel_str_handler migrate_handlers
[] = {
2906 { "sched:sched_migrate_task", timehist_migrate_task_event
, },
2908 struct perf_data data
= {
2912 .mode
= PERF_DATA_MODE_READ
,
2913 .force
= sched
->force
,
2916 struct perf_session
*session
;
2917 struct perf_evlist
*evlist
;
2921 * event handlers for timehist option
2923 sched
->tool
.sample
= perf_timehist__process_sample
;
2924 sched
->tool
.mmap
= perf_event__process_mmap
;
2925 sched
->tool
.comm
= perf_event__process_comm
;
2926 sched
->tool
.exit
= perf_event__process_exit
;
2927 sched
->tool
.fork
= perf_event__process_fork
;
2928 sched
->tool
.lost
= process_lost
;
2929 sched
->tool
.attr
= perf_event__process_attr
;
2930 sched
->tool
.tracing_data
= perf_event__process_tracing_data
;
2931 sched
->tool
.build_id
= perf_event__process_build_id
;
2933 sched
->tool
.ordered_events
= true;
2934 sched
->tool
.ordering_requires_timestamps
= true;
2936 symbol_conf
.use_callchain
= sched
->show_callchain
;
2938 session
= perf_session__new(&data
, false, &sched
->tool
);
2939 if (session
== NULL
)
2942 evlist
= session
->evlist
;
2944 symbol__init(&session
->header
.env
);
2946 if (perf_time__parse_str(&sched
->ptime
, sched
->time_str
) != 0) {
2947 pr_err("Invalid time string\n");
2951 if (timehist_check_attr(sched
, evlist
) != 0)
2956 /* setup per-evsel handlers */
2957 if (perf_session__set_tracepoints_handlers(session
, handlers
))
2960 /* sched_switch event at a minimum needs to exist */
2961 if (!perf_evlist__find_tracepoint_by_name(session
->evlist
,
2962 "sched:sched_switch")) {
2963 pr_err("No sched_switch events found. Have you run 'perf sched record'?\n");
2967 if (sched
->show_migrations
&&
2968 perf_session__set_tracepoints_handlers(session
, migrate_handlers
))
2971 /* pre-allocate struct for per-CPU idle stats */
2972 sched
->max_cpu
= session
->header
.env
.nr_cpus_online
;
2973 if (sched
->max_cpu
== 0)
2975 if (init_idle_threads(sched
->max_cpu
))
2978 /* summary_only implies summary option, but don't overwrite summary if set */
2979 if (sched
->summary_only
)
2980 sched
->summary
= sched
->summary_only
;
2982 if (!sched
->summary_only
)
2983 timehist_header(sched
);
2985 err
= perf_session__process_events(session
);
2987 pr_err("Failed to process events, error %d", err
);
2991 sched
->nr_events
= evlist
->stats
.nr_events
[0];
2992 sched
->nr_lost_events
= evlist
->stats
.total_lost
;
2993 sched
->nr_lost_chunks
= evlist
->stats
.nr_events
[PERF_RECORD_LOST
];
2996 timehist_print_summary(sched
, session
);
2999 free_idle_threads();
3000 perf_session__delete(session
);
3006 static void print_bad_events(struct perf_sched
*sched
)
3008 if (sched
->nr_unordered_timestamps
&& sched
->nr_timestamps
) {
3009 printf(" INFO: %.3f%% unordered timestamps (%ld out of %ld)\n",
3010 (double)sched
->nr_unordered_timestamps
/(double)sched
->nr_timestamps
*100.0,
3011 sched
->nr_unordered_timestamps
, sched
->nr_timestamps
);
3013 if (sched
->nr_lost_events
&& sched
->nr_events
) {
3014 printf(" INFO: %.3f%% lost events (%ld out of %ld, in %ld chunks)\n",
3015 (double)sched
->nr_lost_events
/(double)sched
->nr_events
* 100.0,
3016 sched
->nr_lost_events
, sched
->nr_events
, sched
->nr_lost_chunks
);
3018 if (sched
->nr_context_switch_bugs
&& sched
->nr_timestamps
) {
3019 printf(" INFO: %.3f%% context switch bugs (%ld out of %ld)",
3020 (double)sched
->nr_context_switch_bugs
/(double)sched
->nr_timestamps
*100.0,
3021 sched
->nr_context_switch_bugs
, sched
->nr_timestamps
);
3022 if (sched
->nr_lost_events
)
3023 printf(" (due to lost events?)");
3028 static void __merge_work_atoms(struct rb_root
*root
, struct work_atoms
*data
)
3030 struct rb_node
**new = &(root
->rb_node
), *parent
= NULL
;
3031 struct work_atoms
*this;
3032 const char *comm
= thread__comm_str(data
->thread
), *this_comm
;
3037 this = container_of(*new, struct work_atoms
, node
);
3040 this_comm
= thread__comm_str(this->thread
);
3041 cmp
= strcmp(comm
, this_comm
);
3043 new = &((*new)->rb_left
);
3044 } else if (cmp
< 0) {
3045 new = &((*new)->rb_right
);
3048 this->total_runtime
+= data
->total_runtime
;
3049 this->nb_atoms
+= data
->nb_atoms
;
3050 this->total_lat
+= data
->total_lat
;
3051 list_splice(&data
->work_list
, &this->work_list
);
3052 if (this->max_lat
< data
->max_lat
) {
3053 this->max_lat
= data
->max_lat
;
3054 this->max_lat_at
= data
->max_lat_at
;
3062 rb_link_node(&data
->node
, parent
, new);
3063 rb_insert_color(&data
->node
, root
);
3066 static void perf_sched__merge_lat(struct perf_sched
*sched
)
3068 struct work_atoms
*data
;
3069 struct rb_node
*node
;
3071 if (sched
->skip_merge
)
3074 while ((node
= rb_first(&sched
->atom_root
))) {
3075 rb_erase(node
, &sched
->atom_root
);
3076 data
= rb_entry(node
, struct work_atoms
, node
);
3077 __merge_work_atoms(&sched
->merged_atom_root
, data
);
3081 static int perf_sched__lat(struct perf_sched
*sched
)
3083 struct rb_node
*next
;
3087 if (perf_sched__read_events(sched
))
3090 perf_sched__merge_lat(sched
);
3091 perf_sched__sort_lat(sched
);
3093 printf("\n -----------------------------------------------------------------------------------------------------------------\n");
3094 printf(" Task | Runtime ms | Switches | Average delay ms | Maximum delay ms | Maximum delay at |\n");
3095 printf(" -----------------------------------------------------------------------------------------------------------------\n");
3097 next
= rb_first(&sched
->sorted_atom_root
);
3100 struct work_atoms
*work_list
;
3102 work_list
= rb_entry(next
, struct work_atoms
, node
);
3103 output_lat_thread(sched
, work_list
);
3104 next
= rb_next(next
);
3105 thread__zput(work_list
->thread
);
3108 printf(" -----------------------------------------------------------------------------------------------------------------\n");
3109 printf(" TOTAL: |%11.3f ms |%9" PRIu64
" |\n",
3110 (double)sched
->all_runtime
/ NSEC_PER_MSEC
, sched
->all_count
);
3112 printf(" ---------------------------------------------------\n");
3114 print_bad_events(sched
);
3120 static int setup_map_cpus(struct perf_sched
*sched
)
3122 struct cpu_map
*map
;
3124 sched
->max_cpu
= sysconf(_SC_NPROCESSORS_CONF
);
3126 if (sched
->map
.comp
) {
3127 sched
->map
.comp_cpus
= zalloc(sched
->max_cpu
* sizeof(int));
3128 if (!sched
->map
.comp_cpus
)
3132 if (!sched
->map
.cpus_str
)
3135 map
= cpu_map__new(sched
->map
.cpus_str
);
3137 pr_err("failed to get cpus map from %s\n", sched
->map
.cpus_str
);
3141 sched
->map
.cpus
= map
;
3145 static int setup_color_pids(struct perf_sched
*sched
)
3147 struct thread_map
*map
;
3149 if (!sched
->map
.color_pids_str
)
3152 map
= thread_map__new_by_tid_str(sched
->map
.color_pids_str
);
3154 pr_err("failed to get thread map from %s\n", sched
->map
.color_pids_str
);
3158 sched
->map
.color_pids
= map
;
3162 static int setup_color_cpus(struct perf_sched
*sched
)
3164 struct cpu_map
*map
;
3166 if (!sched
->map
.color_cpus_str
)
3169 map
= cpu_map__new(sched
->map
.color_cpus_str
);
3171 pr_err("failed to get thread map from %s\n", sched
->map
.color_cpus_str
);
3175 sched
->map
.color_cpus
= map
;
3179 static int perf_sched__map(struct perf_sched
*sched
)
3181 if (setup_map_cpus(sched
))
3184 if (setup_color_pids(sched
))
3187 if (setup_color_cpus(sched
))
3191 if (perf_sched__read_events(sched
))
3193 print_bad_events(sched
);
3197 static int perf_sched__replay(struct perf_sched
*sched
)
3201 calibrate_run_measurement_overhead(sched
);
3202 calibrate_sleep_measurement_overhead(sched
);
3204 test_calibrations(sched
);
3206 if (perf_sched__read_events(sched
))
3209 printf("nr_run_events: %ld\n", sched
->nr_run_events
);
3210 printf("nr_sleep_events: %ld\n", sched
->nr_sleep_events
);
3211 printf("nr_wakeup_events: %ld\n", sched
->nr_wakeup_events
);
3213 if (sched
->targetless_wakeups
)
3214 printf("target-less wakeups: %ld\n", sched
->targetless_wakeups
);
3215 if (sched
->multitarget_wakeups
)
3216 printf("multi-target wakeups: %ld\n", sched
->multitarget_wakeups
);
3217 if (sched
->nr_run_events_optimized
)
3218 printf("run atoms optimized: %ld\n",
3219 sched
->nr_run_events_optimized
);
3221 print_task_traces(sched
);
3222 add_cross_task_wakeups(sched
);
3224 create_tasks(sched
);
3225 printf("------------------------------------------------------------\n");
3226 for (i
= 0; i
< sched
->replay_repeat
; i
++)
3227 run_one_test(sched
);
3232 static void setup_sorting(struct perf_sched
*sched
, const struct option
*options
,
3233 const char * const usage_msg
[])
3235 char *tmp
, *tok
, *str
= strdup(sched
->sort_order
);
3237 for (tok
= strtok_r(str
, ", ", &tmp
);
3238 tok
; tok
= strtok_r(NULL
, ", ", &tmp
)) {
3239 if (sort_dimension__add(tok
, &sched
->sort_list
) < 0) {
3240 usage_with_options_msg(usage_msg
, options
,
3241 "Unknown --sort key: `%s'", tok
);
3247 sort_dimension__add("pid", &sched
->cmp_pid
);
3250 static int __cmd_record(int argc
, const char **argv
)
3252 unsigned int rec_argc
, i
, j
;
3253 const char **rec_argv
;
3254 const char * const record_args
[] = {
3260 "-e", "sched:sched_switch",
3261 "-e", "sched:sched_stat_wait",
3262 "-e", "sched:sched_stat_sleep",
3263 "-e", "sched:sched_stat_iowait",
3264 "-e", "sched:sched_stat_runtime",
3265 "-e", "sched:sched_process_fork",
3266 "-e", "sched:sched_wakeup",
3267 "-e", "sched:sched_wakeup_new",
3268 "-e", "sched:sched_migrate_task",
3271 rec_argc
= ARRAY_SIZE(record_args
) + argc
- 1;
3272 rec_argv
= calloc(rec_argc
+ 1, sizeof(char *));
3274 if (rec_argv
== NULL
)
3277 for (i
= 0; i
< ARRAY_SIZE(record_args
); i
++)
3278 rec_argv
[i
] = strdup(record_args
[i
]);
3280 for (j
= 1; j
< (unsigned int)argc
; j
++, i
++)
3281 rec_argv
[i
] = argv
[j
];
3283 BUG_ON(i
!= rec_argc
);
3285 return cmd_record(i
, rec_argv
);
3288 int cmd_sched(int argc
, const char **argv
)
3290 const char default_sort_order
[] = "avg, max, switch, runtime";
3291 struct perf_sched sched
= {
3293 .sample
= perf_sched__process_tracepoint_sample
,
3294 .comm
= perf_event__process_comm
,
3295 .namespaces
= perf_event__process_namespaces
,
3296 .lost
= perf_event__process_lost
,
3297 .fork
= perf_sched__process_fork_event
,
3298 .ordered_events
= true,
3300 .cmp_pid
= LIST_HEAD_INIT(sched
.cmp_pid
),
3301 .sort_list
= LIST_HEAD_INIT(sched
.sort_list
),
3302 .start_work_mutex
= PTHREAD_MUTEX_INITIALIZER
,
3303 .work_done_wait_mutex
= PTHREAD_MUTEX_INITIALIZER
,
3304 .sort_order
= default_sort_order
,
3305 .replay_repeat
= 10,
3307 .next_shortname1
= 'A',
3308 .next_shortname2
= '0',
3310 .show_callchain
= 1,
3313 const struct option sched_options
[] = {
3314 OPT_STRING('i', "input", &input_name
, "file",
3316 OPT_INCR('v', "verbose", &verbose
,
3317 "be more verbose (show symbol address, etc)"),
3318 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace
,
3319 "dump raw trace in ASCII"),
3320 OPT_BOOLEAN('f', "force", &sched
.force
, "don't complain, do it"),
3323 const struct option latency_options
[] = {
3324 OPT_STRING('s', "sort", &sched
.sort_order
, "key[,key2...]",
3325 "sort by key(s): runtime, switch, avg, max"),
3326 OPT_INTEGER('C', "CPU", &sched
.profile_cpu
,
3327 "CPU to profile on"),
3328 OPT_BOOLEAN('p', "pids", &sched
.skip_merge
,
3329 "latency stats per pid instead of per comm"),
3330 OPT_PARENT(sched_options
)
3332 const struct option replay_options
[] = {
3333 OPT_UINTEGER('r', "repeat", &sched
.replay_repeat
,
3334 "repeat the workload replay N times (-1: infinite)"),
3335 OPT_PARENT(sched_options
)
3337 const struct option map_options
[] = {
3338 OPT_BOOLEAN(0, "compact", &sched
.map
.comp
,
3339 "map output in compact mode"),
3340 OPT_STRING(0, "color-pids", &sched
.map
.color_pids_str
, "pids",
3341 "highlight given pids in map"),
3342 OPT_STRING(0, "color-cpus", &sched
.map
.color_cpus_str
, "cpus",
3343 "highlight given CPUs in map"),
3344 OPT_STRING(0, "cpus", &sched
.map
.cpus_str
, "cpus",
3345 "display given CPUs in map"),
3346 OPT_PARENT(sched_options
)
3348 const struct option timehist_options
[] = {
3349 OPT_STRING('k', "vmlinux", &symbol_conf
.vmlinux_name
,
3350 "file", "vmlinux pathname"),
3351 OPT_STRING(0, "kallsyms", &symbol_conf
.kallsyms_name
,
3352 "file", "kallsyms pathname"),
3353 OPT_BOOLEAN('g', "call-graph", &sched
.show_callchain
,
3354 "Display call chains if present (default on)"),
3355 OPT_UINTEGER(0, "max-stack", &sched
.max_stack
,
3356 "Maximum number of functions to display backtrace."),
3357 OPT_STRING(0, "symfs", &symbol_conf
.symfs
, "directory",
3358 "Look for files with symbols relative to this directory"),
3359 OPT_BOOLEAN('s', "summary", &sched
.summary_only
,
3360 "Show only syscall summary with statistics"),
3361 OPT_BOOLEAN('S', "with-summary", &sched
.summary
,
3362 "Show all syscalls and summary with statistics"),
3363 OPT_BOOLEAN('w', "wakeups", &sched
.show_wakeups
, "Show wakeup events"),
3364 OPT_BOOLEAN('n', "next", &sched
.show_next
, "Show next task"),
3365 OPT_BOOLEAN('M', "migrations", &sched
.show_migrations
, "Show migration events"),
3366 OPT_BOOLEAN('V', "cpu-visual", &sched
.show_cpu_visual
, "Add CPU visual"),
3367 OPT_BOOLEAN('I', "idle-hist", &sched
.idle_hist
, "Show idle events only"),
3368 OPT_STRING(0, "time", &sched
.time_str
, "str",
3369 "Time span for analysis (start,stop)"),
3370 OPT_BOOLEAN(0, "state", &sched
.show_state
, "Show task state when sched-out"),
3371 OPT_STRING('p', "pid", &symbol_conf
.pid_list_str
, "pid[,pid...]",
3372 "analyze events only for given process id(s)"),
3373 OPT_STRING('t', "tid", &symbol_conf
.tid_list_str
, "tid[,tid...]",
3374 "analyze events only for given thread id(s)"),
3375 OPT_PARENT(sched_options
)
3378 const char * const latency_usage
[] = {
3379 "perf sched latency [<options>]",
3382 const char * const replay_usage
[] = {
3383 "perf sched replay [<options>]",
3386 const char * const map_usage
[] = {
3387 "perf sched map [<options>]",
3390 const char * const timehist_usage
[] = {
3391 "perf sched timehist [<options>]",
3394 const char *const sched_subcommands
[] = { "record", "latency", "map",
3397 const char *sched_usage
[] = {
3401 struct trace_sched_handler lat_ops
= {
3402 .wakeup_event
= latency_wakeup_event
,
3403 .switch_event
= latency_switch_event
,
3404 .runtime_event
= latency_runtime_event
,
3405 .migrate_task_event
= latency_migrate_task_event
,
3407 struct trace_sched_handler map_ops
= {
3408 .switch_event
= map_switch_event
,
3410 struct trace_sched_handler replay_ops
= {
3411 .wakeup_event
= replay_wakeup_event
,
3412 .switch_event
= replay_switch_event
,
3413 .fork_event
= replay_fork_event
,
3417 for (i
= 0; i
< ARRAY_SIZE(sched
.curr_pid
); i
++)
3418 sched
.curr_pid
[i
] = -1;
3420 argc
= parse_options_subcommand(argc
, argv
, sched_options
, sched_subcommands
,
3421 sched_usage
, PARSE_OPT_STOP_AT_NON_OPTION
);
3423 usage_with_options(sched_usage
, sched_options
);
3426 * Aliased to 'perf script' for now:
3428 if (!strcmp(argv
[0], "script"))
3429 return cmd_script(argc
, argv
);
3431 if (!strncmp(argv
[0], "rec", 3)) {
3432 return __cmd_record(argc
, argv
);
3433 } else if (!strncmp(argv
[0], "lat", 3)) {
3434 sched
.tp_handler
= &lat_ops
;
3436 argc
= parse_options(argc
, argv
, latency_options
, latency_usage
, 0);
3438 usage_with_options(latency_usage
, latency_options
);
3440 setup_sorting(&sched
, latency_options
, latency_usage
);
3441 return perf_sched__lat(&sched
);
3442 } else if (!strcmp(argv
[0], "map")) {
3444 argc
= parse_options(argc
, argv
, map_options
, map_usage
, 0);
3446 usage_with_options(map_usage
, map_options
);
3448 sched
.tp_handler
= &map_ops
;
3449 setup_sorting(&sched
, latency_options
, latency_usage
);
3450 return perf_sched__map(&sched
);
3451 } else if (!strncmp(argv
[0], "rep", 3)) {
3452 sched
.tp_handler
= &replay_ops
;
3454 argc
= parse_options(argc
, argv
, replay_options
, replay_usage
, 0);
3456 usage_with_options(replay_usage
, replay_options
);
3458 return perf_sched__replay(&sched
);
3459 } else if (!strcmp(argv
[0], "timehist")) {
3461 argc
= parse_options(argc
, argv
, timehist_options
,
3464 usage_with_options(timehist_usage
, timehist_options
);
3466 if ((sched
.show_wakeups
|| sched
.show_next
) &&
3467 sched
.summary_only
) {
3468 pr_err(" Error: -s and -[n|w] are mutually exclusive.\n");
3469 parse_options_usage(timehist_usage
, timehist_options
, "s", true);
3470 if (sched
.show_wakeups
)
3471 parse_options_usage(NULL
, timehist_options
, "w", true);
3472 if (sched
.show_next
)
3473 parse_options_usage(NULL
, timehist_options
, "n", true);
3477 return perf_sched__timehist(&sched
);
3479 usage_with_options(sched_usage
, sched_options
);