4 #include "util/cache.h"
5 #include "util/symbol.h"
6 #include "util/thread.h"
7 #include "util/header.h"
9 #include "util/parse-options.h"
12 #include "util/debug.h"
14 #include "util/trace-event.h"
15 #include <sys/types.h>
20 static char const *input_name
= "perf.data";
22 static unsigned long page_size
;
23 static unsigned long mmap_window
= 32;
25 static unsigned long total_comm
= 0;
27 static struct rb_root threads
;
28 static struct thread
*last_match
;
30 static struct perf_header
*header
;
31 static u64 sample_type
;
33 static int replay_mode
;
38 * Scheduler benchmarks
40 #include <sys/resource.h>
41 #include <sys/types.h>
44 #include <sys/prctl.h>
46 #include <linux/unistd.h>
48 #include <semaphore.h>
62 #define PR_SET_NAME 15 /* Set process name */
64 #define BUG_ON(x) assert(!(x))
68 typedef unsigned long long nsec_t
;
70 static nsec_t run_measurement_overhead
;
71 static nsec_t sleep_measurement_overhead
;
73 static nsec_t
get_nsecs(void)
77 clock_gettime(CLOCK_MONOTONIC
, &ts
);
79 return ts
.tv_sec
* 1000000000ULL + ts
.tv_nsec
;
82 static void burn_nsecs(nsec_t nsecs
)
84 nsec_t T0
= get_nsecs(), T1
;
88 } while (T1
+ run_measurement_overhead
< T0
+ nsecs
);
91 static void sleep_nsecs(nsec_t nsecs
)
95 ts
.tv_nsec
= nsecs
% 999999999;
96 ts
.tv_sec
= nsecs
/ 999999999;
101 static void calibrate_run_measurement_overhead(void)
103 nsec_t T0
, T1
, delta
, min_delta
= 1000000000ULL;
106 for (i
= 0; i
< 10; i
++) {
111 min_delta
= min(min_delta
, delta
);
113 run_measurement_overhead
= min_delta
;
115 printf("run measurement overhead: %Ld nsecs\n", min_delta
);
118 static void calibrate_sleep_measurement_overhead(void)
120 nsec_t T0
, T1
, delta
, min_delta
= 1000000000ULL;
123 for (i
= 0; i
< 10; i
++) {
128 min_delta
= min(min_delta
, delta
);
131 sleep_measurement_overhead
= min_delta
;
133 printf("sleep measurement overhead: %Ld nsecs\n", min_delta
);
139 #define MAX_PID 65536
141 static unsigned long nr_tasks
;
150 unsigned long nr_events
;
151 unsigned long curr_event
;
152 struct sched_event
**events
;
157 sem_t ready_for_work
;
163 enum sched_event_type
{
170 enum sched_event_type type
;
176 struct task_desc
*wakee
;
179 static struct task_desc
*pid_to_task
[MAX_PID
];
181 static struct task_desc
**tasks
;
183 static pthread_mutex_t start_work_mutex
= PTHREAD_MUTEX_INITIALIZER
;
184 static nsec_t start_time
;
186 static pthread_mutex_t work_done_wait_mutex
= PTHREAD_MUTEX_INITIALIZER
;
188 static unsigned long nr_run_events
;
189 static unsigned long nr_sleep_events
;
190 static unsigned long nr_wakeup_events
;
192 static unsigned long nr_sleep_corrections
;
193 static unsigned long nr_run_events_optimized
;
195 static struct sched_event
*
196 get_new_event(struct task_desc
*task
, nsec_t timestamp
)
198 struct sched_event
*event
= calloc(1, sizeof(*event
));
199 unsigned long idx
= task
->nr_events
;
202 event
->timestamp
= timestamp
;
206 size
= sizeof(struct sched_event
*) * task
->nr_events
;
207 task
->events
= realloc(task
->events
, size
);
208 BUG_ON(!task
->events
);
210 task
->events
[idx
] = event
;
215 static struct sched_event
*last_event(struct task_desc
*task
)
217 if (!task
->nr_events
)
220 return task
->events
[task
->nr_events
- 1];
224 add_sched_event_run(struct task_desc
*task
, nsec_t timestamp
, u64 duration
)
226 struct sched_event
*event
, *curr_event
= last_event(task
);
229 * optimize an existing RUN event by merging this one
232 if (curr_event
&& curr_event
->type
== SCHED_EVENT_RUN
) {
233 nr_run_events_optimized
++;
234 curr_event
->duration
+= duration
;
238 event
= get_new_event(task
, timestamp
);
240 event
->type
= SCHED_EVENT_RUN
;
241 event
->duration
= duration
;
246 static unsigned long targetless_wakeups
;
247 static unsigned long multitarget_wakeups
;
250 add_sched_event_wakeup(struct task_desc
*task
, nsec_t timestamp
,
251 struct task_desc
*wakee
)
253 struct sched_event
*event
, *wakee_event
;
255 event
= get_new_event(task
, timestamp
);
256 event
->type
= SCHED_EVENT_WAKEUP
;
257 event
->wakee
= wakee
;
259 wakee_event
= last_event(wakee
);
260 if (!wakee_event
|| wakee_event
->type
!= SCHED_EVENT_SLEEP
) {
261 targetless_wakeups
++;
264 if (wakee_event
->wait_sem
) {
265 multitarget_wakeups
++;
269 wakee_event
->wait_sem
= calloc(1, sizeof(*wakee_event
->wait_sem
));
270 sem_init(wakee_event
->wait_sem
, 0, 0);
271 wakee_event
->specific_wait
= 1;
272 event
->wait_sem
= wakee_event
->wait_sem
;
278 add_sched_event_sleep(struct task_desc
*task
, nsec_t timestamp
,
279 u64 task_state __used
)
281 struct sched_event
*event
= get_new_event(task
, timestamp
);
283 event
->type
= SCHED_EVENT_SLEEP
;
288 static struct task_desc
*register_pid(unsigned long pid
, const char *comm
)
290 struct task_desc
*task
;
292 BUG_ON(pid
>= MAX_PID
);
294 task
= pid_to_task
[pid
];
299 task
= calloc(1, sizeof(*task
));
302 strcpy(task
->comm
, comm
);
304 * every task starts in sleeping state - this gets ignored
305 * if there's no wakeup pointing to this sleep state:
307 add_sched_event_sleep(task
, 0, 0);
309 pid_to_task
[pid
] = task
;
311 tasks
= realloc(tasks
, nr_tasks
*sizeof(struct task_task
*));
313 tasks
[task
->nr
] = task
;
316 printf("registered task #%ld, PID %ld (%s)\n", nr_tasks
, pid
, comm
);
322 static void print_task_traces(void)
324 struct task_desc
*task
;
327 for (i
= 0; i
< nr_tasks
; i
++) {
329 printf("task %6ld (%20s:%10ld), nr_events: %ld\n",
330 task
->nr
, task
->comm
, task
->pid
, task
->nr_events
);
334 static void add_cross_task_wakeups(void)
336 struct task_desc
*task1
, *task2
;
339 for (i
= 0; i
< nr_tasks
; i
++) {
345 add_sched_event_wakeup(task1
, 0, task2
);
350 process_sched_event(struct task_desc
*this_task __used
, struct sched_event
*event
)
357 delta
= start_time
+ event
->timestamp
- now
;
359 switch (event
->type
) {
360 case SCHED_EVENT_RUN
:
361 burn_nsecs(event
->duration
);
363 case SCHED_EVENT_SLEEP
:
365 ret
= sem_wait(event
->wait_sem
);
368 case SCHED_EVENT_WAKEUP
:
370 ret
= sem_post(event
->wait_sem
);
378 static nsec_t
get_cpu_usage_nsec_parent(void)
384 err
= getrusage(RUSAGE_SELF
, &ru
);
387 sum
= ru
.ru_utime
.tv_sec
*1e9
+ ru
.ru_utime
.tv_usec
*1e3
;
388 sum
+= ru
.ru_stime
.tv_sec
*1e9
+ ru
.ru_stime
.tv_usec
*1e3
;
393 static nsec_t
get_cpu_usage_nsec_self(void)
395 char filename
[] = "/proc/1234567890/sched";
396 unsigned long msecs
, nsecs
;
404 sprintf(filename
, "/proc/%d/sched", getpid());
405 file
= fopen(filename
, "r");
408 while ((chars
= getline(&line
, &len
, file
)) != -1) {
409 ret
= sscanf(line
, "se.sum_exec_runtime : %ld.%06ld\n",
412 total
= msecs
*1e6
+ nsecs
;
423 static void *thread_func(void *ctx
)
425 struct task_desc
*this_task
= ctx
;
426 nsec_t cpu_usage_0
, cpu_usage_1
;
427 unsigned long i
, ret
;
430 sprintf(comm2
, ":%s", this_task
->comm
);
431 prctl(PR_SET_NAME
, comm2
);
434 ret
= sem_post(&this_task
->ready_for_work
);
436 ret
= pthread_mutex_lock(&start_work_mutex
);
438 ret
= pthread_mutex_unlock(&start_work_mutex
);
441 cpu_usage_0
= get_cpu_usage_nsec_self();
443 for (i
= 0; i
< this_task
->nr_events
; i
++) {
444 this_task
->curr_event
= i
;
445 process_sched_event(this_task
, this_task
->events
[i
]);
448 cpu_usage_1
= get_cpu_usage_nsec_self();
449 this_task
->cpu_usage
= cpu_usage_1
- cpu_usage_0
;
451 ret
= sem_post(&this_task
->work_done_sem
);
454 ret
= pthread_mutex_lock(&work_done_wait_mutex
);
456 ret
= pthread_mutex_unlock(&work_done_wait_mutex
);
462 static void create_tasks(void)
464 struct task_desc
*task
;
469 err
= pthread_attr_init(&attr
);
471 err
= pthread_attr_setstacksize(&attr
, (size_t)(16*1024));
473 err
= pthread_mutex_lock(&start_work_mutex
);
475 err
= pthread_mutex_lock(&work_done_wait_mutex
);
477 for (i
= 0; i
< nr_tasks
; i
++) {
479 sem_init(&task
->sleep_sem
, 0, 0);
480 sem_init(&task
->ready_for_work
, 0, 0);
481 sem_init(&task
->work_done_sem
, 0, 0);
482 task
->curr_event
= 0;
483 err
= pthread_create(&task
->thread
, &attr
, thread_func
, task
);
488 static nsec_t cpu_usage
;
489 static nsec_t runavg_cpu_usage
;
490 static nsec_t parent_cpu_usage
;
491 static nsec_t runavg_parent_cpu_usage
;
493 static void wait_for_tasks(void)
495 nsec_t cpu_usage_0
, cpu_usage_1
;
496 struct task_desc
*task
;
497 unsigned long i
, ret
;
499 start_time
= get_nsecs();
501 pthread_mutex_unlock(&work_done_wait_mutex
);
503 for (i
= 0; i
< nr_tasks
; i
++) {
505 ret
= sem_wait(&task
->ready_for_work
);
507 sem_init(&task
->ready_for_work
, 0, 0);
509 ret
= pthread_mutex_lock(&work_done_wait_mutex
);
512 cpu_usage_0
= get_cpu_usage_nsec_parent();
514 pthread_mutex_unlock(&start_work_mutex
);
516 for (i
= 0; i
< nr_tasks
; i
++) {
518 ret
= sem_wait(&task
->work_done_sem
);
520 sem_init(&task
->work_done_sem
, 0, 0);
521 cpu_usage
+= task
->cpu_usage
;
525 cpu_usage_1
= get_cpu_usage_nsec_parent();
526 if (!runavg_cpu_usage
)
527 runavg_cpu_usage
= cpu_usage
;
528 runavg_cpu_usage
= (runavg_cpu_usage
*9 + cpu_usage
)/10;
530 parent_cpu_usage
= cpu_usage_1
- cpu_usage_0
;
531 if (!runavg_parent_cpu_usage
)
532 runavg_parent_cpu_usage
= parent_cpu_usage
;
533 runavg_parent_cpu_usage
= (runavg_parent_cpu_usage
*9 +
534 parent_cpu_usage
)/10;
536 ret
= pthread_mutex_lock(&start_work_mutex
);
539 for (i
= 0; i
< nr_tasks
; i
++) {
541 sem_init(&task
->sleep_sem
, 0, 0);
542 task
->curr_event
= 0;
546 static int read_events(void);
548 static unsigned long nr_runs
;
549 static nsec_t sum_runtime
;
550 static nsec_t sum_fluct
;
551 static nsec_t run_avg
;
553 static void run_one_test(void)
555 nsec_t T0
, T1
, delta
, avg_delta
, fluct
, std_dev
;
562 sum_runtime
+= delta
;
565 avg_delta
= sum_runtime
/ nr_runs
;
566 if (delta
< avg_delta
)
567 fluct
= avg_delta
- delta
;
569 fluct
= delta
- avg_delta
;
571 std_dev
= sum_fluct
/ nr_runs
/ sqrt(nr_runs
);
574 run_avg
= (run_avg
*9 + delta
)/10;
576 printf("#%-3ld: %0.3f, ",
577 nr_runs
, (double)delta
/1000000.0);
580 printf("%0.2f +- %0.2f, ",
581 (double)avg_delta
/1e6
, (double)std_dev
/1e6
);
583 printf("ravg: %0.2f, ",
584 (double)run_avg
/1e6
);
586 printf("cpu: %0.2f / %0.2f",
587 (double)cpu_usage
/1e6
, (double)runavg_cpu_usage
/1e6
);
591 * rusage statistics done by the parent, these are less
592 * accurate than the sum_exec_runtime based statistics:
594 printf(" [%0.2f / %0.2f]",
595 (double)parent_cpu_usage
/1e6
,
596 (double)runavg_parent_cpu_usage
/1e6
);
601 if (nr_sleep_corrections
)
602 printf(" (%ld sleep corrections)\n", nr_sleep_corrections
);
603 nr_sleep_corrections
= 0;
606 static void test_calibrations(void)
614 printf("the run test took %Ld nsecs\n", T1
-T0
);
620 printf("the sleep test took %Ld nsecs\n", T1
-T0
);
623 static void __cmd_replay(void)
625 long nr_iterations
= 10, i
;
627 calibrate_run_measurement_overhead();
628 calibrate_sleep_measurement_overhead();
634 printf("nr_run_events: %ld\n", nr_run_events
);
635 printf("nr_sleep_events: %ld\n", nr_sleep_events
);
636 printf("nr_wakeup_events: %ld\n", nr_wakeup_events
);
638 if (targetless_wakeups
)
639 printf("target-less wakeups: %ld\n", targetless_wakeups
);
640 if (multitarget_wakeups
)
641 printf("multi-target wakeups: %ld\n", multitarget_wakeups
);
642 if (nr_run_events_optimized
)
643 printf("run events optimized: %ld\n",
644 nr_run_events_optimized
);
647 add_cross_task_wakeups();
650 printf("------------------------------------------------------------\n");
651 for (i
= 0; i
< nr_iterations
; i
++)
656 process_comm_event(event_t
*event
, unsigned long offset
, unsigned long head
)
658 struct thread
*thread
;
660 thread
= threads__findnew(event
->comm
.pid
, &threads
, &last_match
);
662 dump_printf("%p [%p]: PERF_EVENT_COMM: %s:%d\n",
663 (void *)(offset
+ head
),
664 (void *)(long)(event
->header
.size
),
665 event
->comm
.comm
, event
->comm
.pid
);
667 if (thread
== NULL
||
668 thread__set_comm(thread
, event
->comm
.comm
)) {
669 dump_printf("problem processing PERF_EVENT_COMM, skipping event.\n");
678 struct raw_event_sample
{
683 #define FILL_FIELD(ptr, field, event, data) \
684 ptr.field = (typeof(ptr.field)) raw_field_value(event, #field, data)
686 #define FILL_ARRAY(ptr, array, event, data) \
688 void *__array = raw_field_ptr(event, #array, data); \
689 memcpy(ptr.array, __array, sizeof(ptr.array)); \
692 #define FILL_COMMON_FIELDS(ptr, event, data) \
694 FILL_FIELD(ptr, common_type, event, data); \
695 FILL_FIELD(ptr, common_flags, event, data); \
696 FILL_FIELD(ptr, common_preempt_count, event, data); \
697 FILL_FIELD(ptr, common_pid, event, data); \
698 FILL_FIELD(ptr, common_tgid, event, data); \
703 struct trace_switch_event
{
708 u8 common_preempt_count
;
722 struct trace_wakeup_event
{
727 u8 common_preempt_count
;
739 struct trace_fork_event
{
744 u8 common_preempt_count
;
748 char parent_comm
[16];
754 struct trace_sched_handler
{
755 void (*switch_event
)(struct trace_switch_event
*,
759 struct thread
*thread
);
761 void (*wakeup_event
)(struct trace_wakeup_event
*,
765 struct thread
*thread
);
767 void (*fork_event
)(struct trace_fork_event
*,
771 struct thread
*thread
);
776 replay_wakeup_event(struct trace_wakeup_event
*wakeup_event
,
779 u64 timestamp __used
,
780 struct thread
*thread __used
)
782 struct task_desc
*waker
, *wakee
;
785 printf("sched_wakeup event %p\n", event
);
787 printf(" ... pid %d woke up %s/%d\n",
788 wakeup_event
->common_pid
,
793 waker
= register_pid(wakeup_event
->common_pid
, "<unknown>");
794 wakee
= register_pid(wakeup_event
->pid
, wakeup_event
->comm
);
796 add_sched_event_wakeup(waker
, timestamp
, wakee
);
799 static unsigned long cpu_last_switched
[MAX_CPUS
];
802 replay_switch_event(struct trace_switch_event
*switch_event
,
806 struct thread
*thread __used
)
808 struct task_desc
*prev
, *next
;
813 printf("sched_switch event %p\n", event
);
815 if (cpu
>= MAX_CPUS
|| cpu
< 0)
818 timestamp0
= cpu_last_switched
[cpu
];
820 delta
= timestamp
- timestamp0
;
825 die("hm, delta: %Ld < 0 ?\n", delta
);
828 printf(" ... switch from %s/%d to %s/%d [ran %Ld nsecs]\n",
829 switch_event
->prev_comm
, switch_event
->prev_pid
,
830 switch_event
->next_comm
, switch_event
->next_pid
,
834 prev
= register_pid(switch_event
->prev_pid
, switch_event
->prev_comm
);
835 next
= register_pid(switch_event
->next_pid
, switch_event
->next_comm
);
837 cpu_last_switched
[cpu
] = timestamp
;
839 add_sched_event_run(prev
, timestamp
, delta
);
840 add_sched_event_sleep(prev
, timestamp
, switch_event
->prev_state
);
845 replay_fork_event(struct trace_fork_event
*fork_event
,
848 u64 timestamp __used
,
849 struct thread
*thread __used
)
852 printf("sched_fork event %p\n", event
);
853 printf("... parent: %s/%d\n", fork_event
->parent_comm
, fork_event
->parent_pid
);
854 printf("... child: %s/%d\n", fork_event
->child_comm
, fork_event
->child_pid
);
856 register_pid(fork_event
->parent_pid
, fork_event
->parent_comm
);
857 register_pid(fork_event
->child_pid
, fork_event
->child_comm
);
860 static struct trace_sched_handler replay_ops
= {
861 .wakeup_event
= replay_wakeup_event
,
862 .switch_event
= replay_switch_event
,
863 .fork_event
= replay_fork_event
,
866 #define TASK_STATE_TO_CHAR_STR "RSDTtZX"
875 struct lat_snapshot
{
876 struct list_head list
;
877 enum thread_state state
;
882 struct thread_latency
{
883 struct list_head snapshot_list
;
884 struct thread
*thread
;
888 static struct rb_root lat_snapshot_root
;
890 static struct thread_latency
*
891 thread_latency_search(struct rb_root
*root
, struct thread
*thread
)
893 struct rb_node
*node
= root
->rb_node
;
896 struct thread_latency
*lat
;
898 lat
= container_of(node
, struct thread_latency
, node
);
899 if (thread
->pid
< lat
->thread
->pid
)
900 node
= node
->rb_left
;
901 else if (thread
->pid
> lat
->thread
->pid
)
902 node
= node
->rb_right
;
911 __thread_latency_insert(struct rb_root
*root
, struct thread_latency
*data
)
913 struct rb_node
**new = &(root
->rb_node
), *parent
= NULL
;
916 struct thread_latency
*this;
918 this = container_of(*new, struct thread_latency
, node
);
920 if (data
->thread
->pid
< this->thread
->pid
)
921 new = &((*new)->rb_left
);
922 else if (data
->thread
->pid
> this->thread
->pid
)
923 new = &((*new)->rb_right
);
925 die("Double thread insertion\n");
928 rb_link_node(&data
->node
, parent
, new);
929 rb_insert_color(&data
->node
, root
);
932 static void thread_latency_insert(struct thread
*thread
)
934 struct thread_latency
*lat
;
935 lat
= calloc(sizeof(*lat
), 1);
939 lat
->thread
= thread
;
940 INIT_LIST_HEAD(&lat
->snapshot_list
);
941 __thread_latency_insert(&lat_snapshot_root
, lat
);
945 latency_fork_event(struct trace_fork_event
*fork_event __used
,
946 struct event
*event __used
,
948 u64 timestamp __used
,
949 struct thread
*thread __used
)
951 /* should insert the newcomer */
954 static char sched_out_state(struct trace_switch_event
*switch_event
)
956 const char *str
= TASK_STATE_TO_CHAR_STR
;
958 return str
[switch_event
->prev_state
];
962 lat_sched_out(struct thread_latency
*lat
,
963 struct trace_switch_event
*switch_event
)
965 struct lat_snapshot
*snapshot
;
967 if (sched_out_state(switch_event
) == 'R')
970 snapshot
= calloc(sizeof(*snapshot
), 1);
974 list_add_tail(&snapshot
->list
, &lat
->snapshot_list
);
978 lat_sched_in(struct thread_latency
*lat
, u64 timestamp
)
980 struct lat_snapshot
*snapshot
;
982 if (list_empty(&lat
->snapshot_list
))
985 snapshot
= list_entry(lat
->snapshot_list
.prev
, struct lat_snapshot
,
988 if (snapshot
->state
!= THREAD_WAKED_UP
)
991 if (timestamp
< snapshot
->wake_up_time
) {
992 snapshot
->state
= THREAD_IGNORE
;
996 snapshot
->state
= THREAD_SCHED_IN
;
997 snapshot
->sched_in_time
= timestamp
;
1002 latency_switch_event(struct trace_switch_event
*switch_event
,
1003 struct event
*event __used
,
1006 struct thread
*thread __used
)
1008 struct thread_latency
*out_lat
, *in_lat
;
1009 struct thread
*sched_out
, *sched_in
;
1011 sched_out
= threads__findnew(switch_event
->prev_pid
, &threads
, &last_match
);
1012 sched_in
= threads__findnew(switch_event
->next_pid
, &threads
, &last_match
);
1014 in_lat
= thread_latency_search(&lat_snapshot_root
, sched_in
);
1016 thread_latency_insert(sched_in
);
1017 in_lat
= thread_latency_search(&lat_snapshot_root
, sched_in
);
1019 die("Internal latency tree error");
1022 out_lat
= thread_latency_search(&lat_snapshot_root
, sched_out
);
1024 thread_latency_insert(sched_out
);
1025 out_lat
= thread_latency_search(&lat_snapshot_root
, sched_out
);
1027 die("Internal latency tree error");
1030 lat_sched_in(in_lat
, timestamp
);
1031 lat_sched_out(out_lat
, switch_event
);
1035 latency_wakeup_event(struct trace_wakeup_event
*wakeup_event
,
1036 struct event
*event __used
,
1039 struct thread
*thread __used
)
1041 struct thread_latency
*lat
;
1042 struct lat_snapshot
*snapshot
;
1043 struct thread
*wakee
;
1045 /* Note for later, it may be interesting to observe the failing cases */
1046 if (!wakeup_event
->success
)
1049 wakee
= threads__findnew(wakeup_event
->pid
, &threads
, &last_match
);
1050 lat
= thread_latency_search(&lat_snapshot_root
, wakee
);
1052 thread_latency_insert(wakee
);
1056 if (list_empty(&lat
->snapshot_list
))
1059 snapshot
= list_entry(lat
->snapshot_list
.prev
, struct lat_snapshot
,
1062 if (snapshot
->state
!= THREAD_SLEEPING
)
1065 snapshot
->state
= THREAD_WAKED_UP
;
1066 snapshot
->wake_up_time
= timestamp
;
1069 static struct trace_sched_handler lat_ops
= {
1070 .wakeup_event
= latency_wakeup_event
,
1071 .switch_event
= latency_switch_event
,
1072 .fork_event
= latency_fork_event
,
1075 static void output_lat_thread(struct thread_latency
*lat
)
1077 struct lat_snapshot
*shot
;
1082 u64 total
= 0, delta
;
1084 list_for_each_entry(shot
, &lat
->snapshot_list
, list
) {
1085 if (shot
->state
!= THREAD_SCHED_IN
)
1090 delta
= shot
->sched_in_time
- shot
->wake_up_time
;
1099 ret
= printf(" %s ", lat
->thread
->comm
);
1101 for (i
= 0; i
< 19 - ret
; i
++)
1104 avg
= total
/ count
;
1106 printf("|%9.3f ms |%9d | avg:%9.3f ms | max:%9.3f ms |\n",
1107 0.0, count
, (double)avg
/1e9
, (double)max
/1e9
);
1110 static void __cmd_lat(void)
1112 struct rb_node
*next
;
1117 printf("-----------------------------------------------------------------------------------\n");
1118 printf(" Task | runtime ms | switches | average delay ms | maximum delay ms |\n");
1119 printf("-----------------------------------------------------------------------------------\n");
1121 next
= rb_first(&lat_snapshot_root
);
1124 struct thread_latency
*lat
;
1126 lat
= rb_entry(next
, struct thread_latency
, node
);
1127 output_lat_thread(lat
);
1128 next
= rb_next(next
);
1131 printf("-----------------------------------------------------------------------------------\n");
1134 static struct trace_sched_handler
*trace_handler
;
1137 process_sched_wakeup_event(struct raw_event_sample
*raw
,
1138 struct event
*event
,
1140 u64 timestamp __used
,
1141 struct thread
*thread __used
)
1143 struct trace_wakeup_event wakeup_event
;
1145 FILL_COMMON_FIELDS(wakeup_event
, event
, raw
->data
);
1147 FILL_ARRAY(wakeup_event
, comm
, event
, raw
->data
);
1148 FILL_FIELD(wakeup_event
, pid
, event
, raw
->data
);
1149 FILL_FIELD(wakeup_event
, prio
, event
, raw
->data
);
1150 FILL_FIELD(wakeup_event
, success
, event
, raw
->data
);
1151 FILL_FIELD(wakeup_event
, cpu
, event
, raw
->data
);
1153 trace_handler
->wakeup_event(&wakeup_event
, event
, cpu
, timestamp
, thread
);
1157 process_sched_switch_event(struct raw_event_sample
*raw
,
1158 struct event
*event
,
1160 u64 timestamp __used
,
1161 struct thread
*thread __used
)
1163 struct trace_switch_event switch_event
;
1165 FILL_COMMON_FIELDS(switch_event
, event
, raw
->data
);
1167 FILL_ARRAY(switch_event
, prev_comm
, event
, raw
->data
);
1168 FILL_FIELD(switch_event
, prev_pid
, event
, raw
->data
);
1169 FILL_FIELD(switch_event
, prev_prio
, event
, raw
->data
);
1170 FILL_FIELD(switch_event
, prev_state
, event
, raw
->data
);
1171 FILL_ARRAY(switch_event
, next_comm
, event
, raw
->data
);
1172 FILL_FIELD(switch_event
, next_pid
, event
, raw
->data
);
1173 FILL_FIELD(switch_event
, next_prio
, event
, raw
->data
);
1175 trace_handler
->switch_event(&switch_event
, event
, cpu
, timestamp
, thread
);
1179 process_sched_fork_event(struct raw_event_sample
*raw
,
1180 struct event
*event
,
1182 u64 timestamp __used
,
1183 struct thread
*thread __used
)
1185 struct trace_fork_event fork_event
;
1187 FILL_COMMON_FIELDS(fork_event
, event
, raw
->data
);
1189 FILL_ARRAY(fork_event
, parent_comm
, event
, raw
->data
);
1190 FILL_FIELD(fork_event
, parent_pid
, event
, raw
->data
);
1191 FILL_ARRAY(fork_event
, child_comm
, event
, raw
->data
);
1192 FILL_FIELD(fork_event
, child_pid
, event
, raw
->data
);
1194 trace_handler
->fork_event(&fork_event
, event
, cpu
, timestamp
, thread
);
1198 process_sched_exit_event(struct event
*event
,
1200 u64 timestamp __used
,
1201 struct thread
*thread __used
)
1204 printf("sched_exit event %p\n", event
);
1208 process_raw_event(event_t
*raw_event __used
, void *more_data
,
1209 int cpu
, u64 timestamp
, struct thread
*thread
)
1211 struct raw_event_sample
*raw
= more_data
;
1212 struct event
*event
;
1215 type
= trace_parse_common_type(raw
->data
);
1216 event
= trace_find_event(type
);
1218 if (!strcmp(event
->name
, "sched_switch"))
1219 process_sched_switch_event(raw
, event
, cpu
, timestamp
, thread
);
1220 if (!strcmp(event
->name
, "sched_wakeup"))
1221 process_sched_wakeup_event(raw
, event
, cpu
, timestamp
, thread
);
1222 if (!strcmp(event
->name
, "sched_wakeup_new"))
1223 process_sched_wakeup_event(raw
, event
, cpu
, timestamp
, thread
);
1224 if (!strcmp(event
->name
, "sched_process_fork"))
1225 process_sched_fork_event(raw
, event
, cpu
, timestamp
, thread
);
1226 if (!strcmp(event
->name
, "sched_process_exit"))
1227 process_sched_exit_event(event
, cpu
, timestamp
, thread
);
1231 process_sample_event(event_t
*event
, unsigned long offset
, unsigned long head
)
1235 struct dso
*dso
= NULL
;
1236 struct thread
*thread
;
1237 u64 ip
= event
->ip
.ip
;
1241 void *more_data
= event
->ip
.__more_data
;
1244 thread
= threads__findnew(event
->ip
.pid
, &threads
, &last_match
);
1246 if (sample_type
& PERF_SAMPLE_TIME
) {
1247 timestamp
= *(u64
*)more_data
;
1248 more_data
+= sizeof(u64
);
1251 if (sample_type
& PERF_SAMPLE_CPU
) {
1252 cpu
= *(u32
*)more_data
;
1253 more_data
+= sizeof(u32
);
1254 more_data
+= sizeof(u32
); /* reserved */
1257 if (sample_type
& PERF_SAMPLE_PERIOD
) {
1258 period
= *(u64
*)more_data
;
1259 more_data
+= sizeof(u64
);
1262 dump_printf("%p [%p]: PERF_EVENT_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n",
1263 (void *)(offset
+ head
),
1264 (void *)(long)(event
->header
.size
),
1266 event
->ip
.pid
, event
->ip
.tid
,
1270 dump_printf(" ... thread: %s:%d\n", thread
->comm
, thread
->pid
);
1272 if (thread
== NULL
) {
1273 eprintf("problem processing %d event, skipping it.\n",
1274 event
->header
.type
);
1278 cpumode
= event
->header
.misc
& PERF_EVENT_MISC_CPUMODE_MASK
;
1280 if (cpumode
== PERF_EVENT_MISC_KERNEL
) {
1286 dump_printf(" ...... dso: %s\n", dso
->name
);
1288 } else if (cpumode
== PERF_EVENT_MISC_USER
) {
1297 dso
= hypervisor_dso
;
1299 dump_printf(" ...... dso: [hypervisor]\n");
1302 if (sample_type
& PERF_SAMPLE_RAW
)
1303 process_raw_event(event
, more_data
, cpu
, timestamp
, thread
);
1309 process_event(event_t
*event
, unsigned long offset
, unsigned long head
)
1313 switch (event
->header
.type
) {
1314 case PERF_EVENT_MMAP
... PERF_EVENT_LOST
:
1317 case PERF_EVENT_COMM
:
1318 return process_comm_event(event
, offset
, head
);
1320 case PERF_EVENT_EXIT
... PERF_EVENT_READ
:
1323 case PERF_EVENT_SAMPLE
:
1324 return process_sample_event(event
, offset
, head
);
1326 case PERF_EVENT_MAX
:
1334 static int read_events(void)
1336 int ret
, rc
= EXIT_FAILURE
;
1337 unsigned long offset
= 0;
1338 unsigned long head
= 0;
1339 struct stat perf_stat
;
1345 register_idle_thread(&threads
, &last_match
);
1347 input
= open(input_name
, O_RDONLY
);
1349 perror("failed to open file");
1353 ret
= fstat(input
, &perf_stat
);
1355 perror("failed to stat file");
1359 if (!perf_stat
.st_size
) {
1360 fprintf(stderr
, "zero-sized file, nothing to do!\n");
1363 header
= perf_header__read(input
);
1364 head
= header
->data_offset
;
1365 sample_type
= perf_header__sample_type(header
);
1367 if (!(sample_type
& PERF_SAMPLE_RAW
))
1368 die("No trace sample to read. Did you call perf record "
1371 if (load_kernel() < 0) {
1372 perror("failed to load kernel symbols");
1373 return EXIT_FAILURE
;
1377 buf
= (char *)mmap(NULL
, page_size
* mmap_window
, PROT_READ
,
1378 MAP_SHARED
, input
, offset
);
1379 if (buf
== MAP_FAILED
) {
1380 perror("failed to mmap file");
1385 event
= (event_t
*)(buf
+ head
);
1387 size
= event
->header
.size
;
1391 if (head
+ event
->header
.size
>= page_size
* mmap_window
) {
1392 unsigned long shift
= page_size
* (head
/ page_size
);
1395 res
= munmap(buf
, page_size
* mmap_window
);
1403 size
= event
->header
.size
;
1406 if (!size
|| process_event(event
, offset
, head
) < 0) {
1409 * assume we lost track of the stream, check alignment, and
1410 * increment a single u64 in the hope to catch on again 'soon'.
1413 if (unlikely(head
& 7))
1421 if (offset
+ head
< (unsigned long)perf_stat
.st_size
)
1430 static const char * const sched_usage
[] = {
1431 "perf sched [<options>] <command>",
1435 static const struct option options
[] = {
1436 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace
,
1437 "dump raw trace in ASCII"),
1438 OPT_BOOLEAN('r', "replay", &replay_mode
,
1439 "replay sched behaviour from traces"),
1440 OPT_BOOLEAN('l', "latency", &lat_mode
,
1441 "measure various latencies"),
1442 OPT_BOOLEAN('v', "verbose", &verbose
,
1443 "be more verbose (show symbol address, etc)"),
1447 int cmd_sched(int argc
, const char **argv
, const char *prefix __used
)
1450 page_size
= getpagesize();
1452 argc
= parse_options(argc
, argv
, options
, sched_usage
, 0);
1455 * Special case: if there's an argument left then assume tha
1456 * it's a symbol filter:
1459 usage_with_options(sched_usage
, options
);
1463 trace_handler
= &replay_ops
;
1465 trace_handler
= &lat_ops
;
1467 usage_with_options(sched_usage
, options
);