8 CTX_BIT_KERNEL
= 1 << 1,
10 CTX_BIT_HOST
= 1 << 3,
11 CTX_BIT_IDLE
= 1 << 4,
15 #define NUM_CTX CTX_BIT_MAX
17 static struct stats runtime_nsecs_stats
[MAX_NR_CPUS
];
18 static struct stats runtime_cycles_stats
[NUM_CTX
][MAX_NR_CPUS
];
19 static struct stats runtime_stalled_cycles_front_stats
[NUM_CTX
][MAX_NR_CPUS
];
20 static struct stats runtime_stalled_cycles_back_stats
[NUM_CTX
][MAX_NR_CPUS
];
21 static struct stats runtime_branches_stats
[NUM_CTX
][MAX_NR_CPUS
];
22 static struct stats runtime_cacherefs_stats
[NUM_CTX
][MAX_NR_CPUS
];
23 static struct stats runtime_l1_dcache_stats
[NUM_CTX
][MAX_NR_CPUS
];
24 static struct stats runtime_l1_icache_stats
[NUM_CTX
][MAX_NR_CPUS
];
25 static struct stats runtime_ll_cache_stats
[NUM_CTX
][MAX_NR_CPUS
];
26 static struct stats runtime_itlb_cache_stats
[NUM_CTX
][MAX_NR_CPUS
];
27 static struct stats runtime_dtlb_cache_stats
[NUM_CTX
][MAX_NR_CPUS
];
28 static struct stats runtime_cycles_in_tx_stats
[NUM_CTX
][MAX_NR_CPUS
];
29 static struct stats runtime_transaction_stats
[NUM_CTX
][MAX_NR_CPUS
];
30 static struct stats runtime_elision_stats
[NUM_CTX
][MAX_NR_CPUS
];
32 struct stats walltime_nsecs_stats
;
34 static int evsel_context(struct perf_evsel
*evsel
)
38 if (evsel
->attr
.exclude_kernel
)
39 ctx
|= CTX_BIT_KERNEL
;
40 if (evsel
->attr
.exclude_user
)
42 if (evsel
->attr
.exclude_hv
)
44 if (evsel
->attr
.exclude_host
)
46 if (evsel
->attr
.exclude_idle
)
52 void perf_stat__reset_shadow_stats(void)
54 memset(runtime_nsecs_stats
, 0, sizeof(runtime_nsecs_stats
));
55 memset(runtime_cycles_stats
, 0, sizeof(runtime_cycles_stats
));
56 memset(runtime_stalled_cycles_front_stats
, 0, sizeof(runtime_stalled_cycles_front_stats
));
57 memset(runtime_stalled_cycles_back_stats
, 0, sizeof(runtime_stalled_cycles_back_stats
));
58 memset(runtime_branches_stats
, 0, sizeof(runtime_branches_stats
));
59 memset(runtime_cacherefs_stats
, 0, sizeof(runtime_cacherefs_stats
));
60 memset(runtime_l1_dcache_stats
, 0, sizeof(runtime_l1_dcache_stats
));
61 memset(runtime_l1_icache_stats
, 0, sizeof(runtime_l1_icache_stats
));
62 memset(runtime_ll_cache_stats
, 0, sizeof(runtime_ll_cache_stats
));
63 memset(runtime_itlb_cache_stats
, 0, sizeof(runtime_itlb_cache_stats
));
64 memset(runtime_dtlb_cache_stats
, 0, sizeof(runtime_dtlb_cache_stats
));
65 memset(runtime_cycles_in_tx_stats
, 0,
66 sizeof(runtime_cycles_in_tx_stats
));
67 memset(runtime_transaction_stats
, 0,
68 sizeof(runtime_transaction_stats
));
69 memset(runtime_elision_stats
, 0, sizeof(runtime_elision_stats
));
70 memset(&walltime_nsecs_stats
, 0, sizeof(walltime_nsecs_stats
));
74 * Update various tracking values we maintain to print
75 * more semantic information such as miss/hit ratios,
76 * instruction rates, etc:
78 void perf_stat__update_shadow_stats(struct perf_evsel
*counter
, u64
*count
,
81 int ctx
= evsel_context(counter
);
83 if (perf_evsel__match(counter
, SOFTWARE
, SW_TASK_CLOCK
))
84 update_stats(&runtime_nsecs_stats
[cpu
], count
[0]);
85 else if (perf_evsel__match(counter
, HARDWARE
, HW_CPU_CYCLES
))
86 update_stats(&runtime_cycles_stats
[ctx
][cpu
], count
[0]);
87 else if (perf_stat_evsel__is(counter
, CYCLES_IN_TX
))
88 update_stats(&runtime_cycles_in_tx_stats
[ctx
][cpu
], count
[0]);
89 else if (perf_stat_evsel__is(counter
, TRANSACTION_START
))
90 update_stats(&runtime_transaction_stats
[ctx
][cpu
], count
[0]);
91 else if (perf_stat_evsel__is(counter
, ELISION_START
))
92 update_stats(&runtime_elision_stats
[ctx
][cpu
], count
[0]);
93 else if (perf_evsel__match(counter
, HARDWARE
, HW_STALLED_CYCLES_FRONTEND
))
94 update_stats(&runtime_stalled_cycles_front_stats
[ctx
][cpu
], count
[0]);
95 else if (perf_evsel__match(counter
, HARDWARE
, HW_STALLED_CYCLES_BACKEND
))
96 update_stats(&runtime_stalled_cycles_back_stats
[ctx
][cpu
], count
[0]);
97 else if (perf_evsel__match(counter
, HARDWARE
, HW_BRANCH_INSTRUCTIONS
))
98 update_stats(&runtime_branches_stats
[ctx
][cpu
], count
[0]);
99 else if (perf_evsel__match(counter
, HARDWARE
, HW_CACHE_REFERENCES
))
100 update_stats(&runtime_cacherefs_stats
[ctx
][cpu
], count
[0]);
101 else if (perf_evsel__match(counter
, HW_CACHE
, HW_CACHE_L1D
))
102 update_stats(&runtime_l1_dcache_stats
[ctx
][cpu
], count
[0]);
103 else if (perf_evsel__match(counter
, HW_CACHE
, HW_CACHE_L1I
))
104 update_stats(&runtime_ll_cache_stats
[ctx
][cpu
], count
[0]);
105 else if (perf_evsel__match(counter
, HW_CACHE
, HW_CACHE_LL
))
106 update_stats(&runtime_ll_cache_stats
[ctx
][cpu
], count
[0]);
107 else if (perf_evsel__match(counter
, HW_CACHE
, HW_CACHE_DTLB
))
108 update_stats(&runtime_dtlb_cache_stats
[ctx
][cpu
], count
[0]);
109 else if (perf_evsel__match(counter
, HW_CACHE
, HW_CACHE_ITLB
))
110 update_stats(&runtime_itlb_cache_stats
[ctx
][cpu
], count
[0]);
113 /* used for get_ratio_color() */
115 GRC_STALLED_CYCLES_FE
,
116 GRC_STALLED_CYCLES_BE
,
121 static const char *get_ratio_color(enum grc_type type
, double ratio
)
123 static const double grc_table
[GRC_MAX_NR
][3] = {
124 [GRC_STALLED_CYCLES_FE
] = { 50.0, 30.0, 10.0 },
125 [GRC_STALLED_CYCLES_BE
] = { 75.0, 50.0, 20.0 },
126 [GRC_CACHE_MISSES
] = { 20.0, 10.0, 5.0 },
128 const char *color
= PERF_COLOR_NORMAL
;
130 if (ratio
> grc_table
[type
][0])
131 color
= PERF_COLOR_RED
;
132 else if (ratio
> grc_table
[type
][1])
133 color
= PERF_COLOR_MAGENTA
;
134 else if (ratio
> grc_table
[type
][2])
135 color
= PERF_COLOR_YELLOW
;
140 static void print_stalled_cycles_frontend(FILE *out
, int cpu
,
141 struct perf_evsel
*evsel
142 __maybe_unused
, double avg
)
144 double total
, ratio
= 0.0;
146 int ctx
= evsel_context(evsel
);
148 total
= avg_stats(&runtime_cycles_stats
[ctx
][cpu
]);
151 ratio
= avg
/ total
* 100.0;
153 color
= get_ratio_color(GRC_STALLED_CYCLES_FE
, ratio
);
156 color_fprintf(out
, color
, "%6.2f%%", ratio
);
157 fprintf(out
, " frontend cycles idle ");
160 static void print_stalled_cycles_backend(FILE *out
, int cpu
,
161 struct perf_evsel
*evsel
162 __maybe_unused
, double avg
)
164 double total
, ratio
= 0.0;
166 int ctx
= evsel_context(evsel
);
168 total
= avg_stats(&runtime_cycles_stats
[ctx
][cpu
]);
171 ratio
= avg
/ total
* 100.0;
173 color
= get_ratio_color(GRC_STALLED_CYCLES_BE
, ratio
);
176 color_fprintf(out
, color
, "%6.2f%%", ratio
);
177 fprintf(out
, " backend cycles idle ");
180 static void print_branch_misses(FILE *out
, int cpu
,
181 struct perf_evsel
*evsel __maybe_unused
,
184 double total
, ratio
= 0.0;
186 int ctx
= evsel_context(evsel
);
188 total
= avg_stats(&runtime_branches_stats
[ctx
][cpu
]);
191 ratio
= avg
/ total
* 100.0;
193 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
196 color_fprintf(out
, color
, "%6.2f%%", ratio
);
197 fprintf(out
, " of all branches ");
200 static void print_l1_dcache_misses(FILE *out
, int cpu
,
201 struct perf_evsel
*evsel __maybe_unused
,
204 double total
, ratio
= 0.0;
206 int ctx
= evsel_context(evsel
);
208 total
= avg_stats(&runtime_l1_dcache_stats
[ctx
][cpu
]);
211 ratio
= avg
/ total
* 100.0;
213 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
216 color_fprintf(out
, color
, "%6.2f%%", ratio
);
217 fprintf(out
, " of all L1-dcache hits ");
220 static void print_l1_icache_misses(FILE *out
, int cpu
,
221 struct perf_evsel
*evsel __maybe_unused
,
224 double total
, ratio
= 0.0;
226 int ctx
= evsel_context(evsel
);
228 total
= avg_stats(&runtime_l1_icache_stats
[ctx
][cpu
]);
231 ratio
= avg
/ total
* 100.0;
233 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
236 color_fprintf(out
, color
, "%6.2f%%", ratio
);
237 fprintf(out
, " of all L1-icache hits ");
240 static void print_dtlb_cache_misses(FILE *out
, int cpu
,
241 struct perf_evsel
*evsel __maybe_unused
,
244 double total
, ratio
= 0.0;
246 int ctx
= evsel_context(evsel
);
248 total
= avg_stats(&runtime_dtlb_cache_stats
[ctx
][cpu
]);
251 ratio
= avg
/ total
* 100.0;
253 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
256 color_fprintf(out
, color
, "%6.2f%%", ratio
);
257 fprintf(out
, " of all dTLB cache hits ");
260 static void print_itlb_cache_misses(FILE *out
, int cpu
,
261 struct perf_evsel
*evsel __maybe_unused
,
264 double total
, ratio
= 0.0;
266 int ctx
= evsel_context(evsel
);
268 total
= avg_stats(&runtime_itlb_cache_stats
[ctx
][cpu
]);
271 ratio
= avg
/ total
* 100.0;
273 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
276 color_fprintf(out
, color
, "%6.2f%%", ratio
);
277 fprintf(out
, " of all iTLB cache hits ");
280 static void print_ll_cache_misses(FILE *out
, int cpu
,
281 struct perf_evsel
*evsel __maybe_unused
,
284 double total
, ratio
= 0.0;
286 int ctx
= evsel_context(evsel
);
288 total
= avg_stats(&runtime_ll_cache_stats
[ctx
][cpu
]);
291 ratio
= avg
/ total
* 100.0;
293 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
296 color_fprintf(out
, color
, "%6.2f%%", ratio
);
297 fprintf(out
, " of all LL-cache hits ");
300 void perf_stat__print_shadow_stats(FILE *out
, struct perf_evsel
*evsel
,
301 double avg
, int cpu
, enum aggr_mode aggr
)
303 double total
, ratio
= 0.0, total2
;
304 int ctx
= evsel_context(evsel
);
306 if (perf_evsel__match(evsel
, HARDWARE
, HW_INSTRUCTIONS
)) {
307 total
= avg_stats(&runtime_cycles_stats
[ctx
][cpu
]);
310 fprintf(out
, " # %5.2f insns per cycle ", ratio
);
314 total
= avg_stats(&runtime_stalled_cycles_front_stats
[ctx
][cpu
]);
315 total
= max(total
, avg_stats(&runtime_stalled_cycles_back_stats
[ctx
][cpu
]));
320 if (aggr
== AGGR_NONE
)
322 fprintf(out
, " # %5.2f stalled cycles per insn", ratio
);
325 } else if (perf_evsel__match(evsel
, HARDWARE
, HW_BRANCH_MISSES
) &&
326 runtime_branches_stats
[ctx
][cpu
].n
!= 0) {
327 print_branch_misses(out
, cpu
, evsel
, avg
);
329 evsel
->attr
.type
== PERF_TYPE_HW_CACHE
&&
330 evsel
->attr
.config
== ( PERF_COUNT_HW_CACHE_L1D
|
331 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
332 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16)) &&
333 runtime_l1_dcache_stats
[ctx
][cpu
].n
!= 0) {
334 print_l1_dcache_misses(out
, cpu
, evsel
, avg
);
336 evsel
->attr
.type
== PERF_TYPE_HW_CACHE
&&
337 evsel
->attr
.config
== ( PERF_COUNT_HW_CACHE_L1I
|
338 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
339 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16)) &&
340 runtime_l1_icache_stats
[ctx
][cpu
].n
!= 0) {
341 print_l1_icache_misses(out
, cpu
, evsel
, avg
);
343 evsel
->attr
.type
== PERF_TYPE_HW_CACHE
&&
344 evsel
->attr
.config
== ( PERF_COUNT_HW_CACHE_DTLB
|
345 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
346 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16)) &&
347 runtime_dtlb_cache_stats
[ctx
][cpu
].n
!= 0) {
348 print_dtlb_cache_misses(out
, cpu
, evsel
, avg
);
350 evsel
->attr
.type
== PERF_TYPE_HW_CACHE
&&
351 evsel
->attr
.config
== ( PERF_COUNT_HW_CACHE_ITLB
|
352 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
353 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16)) &&
354 runtime_itlb_cache_stats
[ctx
][cpu
].n
!= 0) {
355 print_itlb_cache_misses(out
, cpu
, evsel
, avg
);
357 evsel
->attr
.type
== PERF_TYPE_HW_CACHE
&&
358 evsel
->attr
.config
== ( PERF_COUNT_HW_CACHE_LL
|
359 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
360 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16)) &&
361 runtime_ll_cache_stats
[ctx
][cpu
].n
!= 0) {
362 print_ll_cache_misses(out
, cpu
, evsel
, avg
);
363 } else if (perf_evsel__match(evsel
, HARDWARE
, HW_CACHE_MISSES
) &&
364 runtime_cacherefs_stats
[ctx
][cpu
].n
!= 0) {
365 total
= avg_stats(&runtime_cacherefs_stats
[ctx
][cpu
]);
368 ratio
= avg
* 100 / total
;
370 fprintf(out
, " # %8.3f %% of all cache refs ", ratio
);
372 } else if (perf_evsel__match(evsel
, HARDWARE
, HW_STALLED_CYCLES_FRONTEND
)) {
373 print_stalled_cycles_frontend(out
, cpu
, evsel
, avg
);
374 } else if (perf_evsel__match(evsel
, HARDWARE
, HW_STALLED_CYCLES_BACKEND
)) {
375 print_stalled_cycles_backend(out
, cpu
, evsel
, avg
);
376 } else if (perf_evsel__match(evsel
, HARDWARE
, HW_CPU_CYCLES
)) {
377 total
= avg_stats(&runtime_nsecs_stats
[cpu
]);
381 fprintf(out
, " # %8.3f GHz ", ratio
);
385 } else if (perf_stat_evsel__is(evsel
, CYCLES_IN_TX
)) {
386 total
= avg_stats(&runtime_cycles_stats
[ctx
][cpu
]);
389 " # %5.2f%% transactional cycles ",
390 100.0 * (avg
/ total
));
391 } else if (perf_stat_evsel__is(evsel
, CYCLES_IN_TX_CP
)) {
392 total
= avg_stats(&runtime_cycles_stats
[ctx
][cpu
]);
393 total2
= avg_stats(&runtime_cycles_in_tx_stats
[ctx
][cpu
]);
398 " # %5.2f%% aborted cycles ",
399 100.0 * ((total2
-avg
) / total
));
400 } else if (perf_stat_evsel__is(evsel
, TRANSACTION_START
) &&
401 runtime_cycles_in_tx_stats
[ctx
][cpu
].n
!= 0) {
402 total
= avg_stats(&runtime_cycles_in_tx_stats
[ctx
][cpu
]);
407 fprintf(out
, " # %8.0f cycles / transaction ", ratio
);
408 } else if (perf_stat_evsel__is(evsel
, ELISION_START
) &&
409 runtime_cycles_in_tx_stats
[ctx
][cpu
].n
!= 0) {
410 total
= avg_stats(&runtime_cycles_in_tx_stats
[ctx
][cpu
]);
415 fprintf(out
, " # %8.0f cycles / elision ", ratio
);
416 } else if (perf_evsel__match(evsel
, SOFTWARE
, SW_TASK_CLOCK
)) {
417 if ((ratio
= avg_stats(&walltime_nsecs_stats
)) != 0)
418 fprintf(out
, " # %8.3f CPUs utilized ", avg
/ ratio
);
421 } else if (runtime_nsecs_stats
[cpu
].n
!= 0) {
424 total
= avg_stats(&runtime_nsecs_stats
[cpu
]);
427 ratio
= 1000.0 * avg
/ total
;
433 fprintf(out
, " # %8.3f %c/sec ", ratio
, unit
);