9 CTX_BIT_KERNEL
= 1 << 1,
11 CTX_BIT_HOST
= 1 << 3,
12 CTX_BIT_IDLE
= 1 << 4,
16 #define NUM_CTX CTX_BIT_MAX
19 * AGGR_GLOBAL: Use CPU 0
20 * AGGR_SOCKET: Use first CPU of socket
21 * AGGR_CORE: Use first CPU of core
22 * AGGR_NONE: Use matching CPU
23 * AGGR_THREAD: Not supported?
25 static struct stats runtime_nsecs_stats
[MAX_NR_CPUS
];
26 static struct stats runtime_cycles_stats
[NUM_CTX
][MAX_NR_CPUS
];
27 static struct stats runtime_stalled_cycles_front_stats
[NUM_CTX
][MAX_NR_CPUS
];
28 static struct stats runtime_stalled_cycles_back_stats
[NUM_CTX
][MAX_NR_CPUS
];
29 static struct stats runtime_branches_stats
[NUM_CTX
][MAX_NR_CPUS
];
30 static struct stats runtime_cacherefs_stats
[NUM_CTX
][MAX_NR_CPUS
];
31 static struct stats runtime_l1_dcache_stats
[NUM_CTX
][MAX_NR_CPUS
];
32 static struct stats runtime_l1_icache_stats
[NUM_CTX
][MAX_NR_CPUS
];
33 static struct stats runtime_ll_cache_stats
[NUM_CTX
][MAX_NR_CPUS
];
34 static struct stats runtime_itlb_cache_stats
[NUM_CTX
][MAX_NR_CPUS
];
35 static struct stats runtime_dtlb_cache_stats
[NUM_CTX
][MAX_NR_CPUS
];
36 static struct stats runtime_cycles_in_tx_stats
[NUM_CTX
][MAX_NR_CPUS
];
37 static struct stats runtime_transaction_stats
[NUM_CTX
][MAX_NR_CPUS
];
38 static struct stats runtime_elision_stats
[NUM_CTX
][MAX_NR_CPUS
];
39 static bool have_frontend_stalled
;
41 struct stats walltime_nsecs_stats
;
43 void perf_stat__init_shadow_stats(void)
45 have_frontend_stalled
= pmu_have_event("cpu", "stalled-cycles-frontend");
48 static int evsel_context(struct perf_evsel
*evsel
)
52 if (evsel
->attr
.exclude_kernel
)
53 ctx
|= CTX_BIT_KERNEL
;
54 if (evsel
->attr
.exclude_user
)
56 if (evsel
->attr
.exclude_hv
)
58 if (evsel
->attr
.exclude_host
)
60 if (evsel
->attr
.exclude_idle
)
66 void perf_stat__reset_shadow_stats(void)
68 memset(runtime_nsecs_stats
, 0, sizeof(runtime_nsecs_stats
));
69 memset(runtime_cycles_stats
, 0, sizeof(runtime_cycles_stats
));
70 memset(runtime_stalled_cycles_front_stats
, 0, sizeof(runtime_stalled_cycles_front_stats
));
71 memset(runtime_stalled_cycles_back_stats
, 0, sizeof(runtime_stalled_cycles_back_stats
));
72 memset(runtime_branches_stats
, 0, sizeof(runtime_branches_stats
));
73 memset(runtime_cacherefs_stats
, 0, sizeof(runtime_cacherefs_stats
));
74 memset(runtime_l1_dcache_stats
, 0, sizeof(runtime_l1_dcache_stats
));
75 memset(runtime_l1_icache_stats
, 0, sizeof(runtime_l1_icache_stats
));
76 memset(runtime_ll_cache_stats
, 0, sizeof(runtime_ll_cache_stats
));
77 memset(runtime_itlb_cache_stats
, 0, sizeof(runtime_itlb_cache_stats
));
78 memset(runtime_dtlb_cache_stats
, 0, sizeof(runtime_dtlb_cache_stats
));
79 memset(runtime_cycles_in_tx_stats
, 0,
80 sizeof(runtime_cycles_in_tx_stats
));
81 memset(runtime_transaction_stats
, 0,
82 sizeof(runtime_transaction_stats
));
83 memset(runtime_elision_stats
, 0, sizeof(runtime_elision_stats
));
84 memset(&walltime_nsecs_stats
, 0, sizeof(walltime_nsecs_stats
));
88 * Update various tracking values we maintain to print
89 * more semantic information such as miss/hit ratios,
90 * instruction rates, etc:
92 void perf_stat__update_shadow_stats(struct perf_evsel
*counter
, u64
*count
,
95 int ctx
= evsel_context(counter
);
97 if (perf_evsel__match(counter
, SOFTWARE
, SW_TASK_CLOCK
))
98 update_stats(&runtime_nsecs_stats
[cpu
], count
[0]);
99 else if (perf_evsel__match(counter
, HARDWARE
, HW_CPU_CYCLES
))
100 update_stats(&runtime_cycles_stats
[ctx
][cpu
], count
[0]);
101 else if (perf_stat_evsel__is(counter
, CYCLES_IN_TX
))
102 update_stats(&runtime_cycles_in_tx_stats
[ctx
][cpu
], count
[0]);
103 else if (perf_stat_evsel__is(counter
, TRANSACTION_START
))
104 update_stats(&runtime_transaction_stats
[ctx
][cpu
], count
[0]);
105 else if (perf_stat_evsel__is(counter
, ELISION_START
))
106 update_stats(&runtime_elision_stats
[ctx
][cpu
], count
[0]);
107 else if (perf_evsel__match(counter
, HARDWARE
, HW_STALLED_CYCLES_FRONTEND
))
108 update_stats(&runtime_stalled_cycles_front_stats
[ctx
][cpu
], count
[0]);
109 else if (perf_evsel__match(counter
, HARDWARE
, HW_STALLED_CYCLES_BACKEND
))
110 update_stats(&runtime_stalled_cycles_back_stats
[ctx
][cpu
], count
[0]);
111 else if (perf_evsel__match(counter
, HARDWARE
, HW_BRANCH_INSTRUCTIONS
))
112 update_stats(&runtime_branches_stats
[ctx
][cpu
], count
[0]);
113 else if (perf_evsel__match(counter
, HARDWARE
, HW_CACHE_REFERENCES
))
114 update_stats(&runtime_cacherefs_stats
[ctx
][cpu
], count
[0]);
115 else if (perf_evsel__match(counter
, HW_CACHE
, HW_CACHE_L1D
))
116 update_stats(&runtime_l1_dcache_stats
[ctx
][cpu
], count
[0]);
117 else if (perf_evsel__match(counter
, HW_CACHE
, HW_CACHE_L1I
))
118 update_stats(&runtime_ll_cache_stats
[ctx
][cpu
], count
[0]);
119 else if (perf_evsel__match(counter
, HW_CACHE
, HW_CACHE_LL
))
120 update_stats(&runtime_ll_cache_stats
[ctx
][cpu
], count
[0]);
121 else if (perf_evsel__match(counter
, HW_CACHE
, HW_CACHE_DTLB
))
122 update_stats(&runtime_dtlb_cache_stats
[ctx
][cpu
], count
[0]);
123 else if (perf_evsel__match(counter
, HW_CACHE
, HW_CACHE_ITLB
))
124 update_stats(&runtime_itlb_cache_stats
[ctx
][cpu
], count
[0]);
127 /* used for get_ratio_color() */
129 GRC_STALLED_CYCLES_FE
,
130 GRC_STALLED_CYCLES_BE
,
135 static const char *get_ratio_color(enum grc_type type
, double ratio
)
137 static const double grc_table
[GRC_MAX_NR
][3] = {
138 [GRC_STALLED_CYCLES_FE
] = { 50.0, 30.0, 10.0 },
139 [GRC_STALLED_CYCLES_BE
] = { 75.0, 50.0, 20.0 },
140 [GRC_CACHE_MISSES
] = { 20.0, 10.0, 5.0 },
142 const char *color
= PERF_COLOR_NORMAL
;
144 if (ratio
> grc_table
[type
][0])
145 color
= PERF_COLOR_RED
;
146 else if (ratio
> grc_table
[type
][1])
147 color
= PERF_COLOR_MAGENTA
;
148 else if (ratio
> grc_table
[type
][2])
149 color
= PERF_COLOR_YELLOW
;
154 static void print_stalled_cycles_frontend(int cpu
,
155 struct perf_evsel
*evsel
, double avg
,
156 struct perf_stat_output_ctx
*out
)
158 double total
, ratio
= 0.0;
160 int ctx
= evsel_context(evsel
);
162 total
= avg_stats(&runtime_cycles_stats
[ctx
][cpu
]);
165 ratio
= avg
/ total
* 100.0;
167 color
= get_ratio_color(GRC_STALLED_CYCLES_FE
, ratio
);
170 out
->print_metric(out
->ctx
, color
, "%7.2f%%", "frontend cycles idle",
173 out
->print_metric(out
->ctx
, NULL
, NULL
, "frontend cycles idle", 0);
176 static void print_stalled_cycles_backend(int cpu
,
177 struct perf_evsel
*evsel
, double avg
,
178 struct perf_stat_output_ctx
*out
)
180 double total
, ratio
= 0.0;
182 int ctx
= evsel_context(evsel
);
184 total
= avg_stats(&runtime_cycles_stats
[ctx
][cpu
]);
187 ratio
= avg
/ total
* 100.0;
189 color
= get_ratio_color(GRC_STALLED_CYCLES_BE
, ratio
);
191 out
->print_metric(out
->ctx
, color
, "%6.2f%%", "backend cycles idle", ratio
);
194 static void print_branch_misses(int cpu
,
195 struct perf_evsel
*evsel
,
197 struct perf_stat_output_ctx
*out
)
199 double total
, ratio
= 0.0;
201 int ctx
= evsel_context(evsel
);
203 total
= avg_stats(&runtime_branches_stats
[ctx
][cpu
]);
206 ratio
= avg
/ total
* 100.0;
208 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
210 out
->print_metric(out
->ctx
, color
, "%7.2f%%", "of all branches", ratio
);
213 static void print_l1_dcache_misses(int cpu
,
214 struct perf_evsel
*evsel
,
216 struct perf_stat_output_ctx
*out
)
218 double total
, ratio
= 0.0;
220 int ctx
= evsel_context(evsel
);
222 total
= avg_stats(&runtime_l1_dcache_stats
[ctx
][cpu
]);
225 ratio
= avg
/ total
* 100.0;
227 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
229 out
->print_metric(out
->ctx
, color
, "%7.2f%%", "of all L1-dcache hits", ratio
);
232 static void print_l1_icache_misses(int cpu
,
233 struct perf_evsel
*evsel
,
235 struct perf_stat_output_ctx
*out
)
237 double total
, ratio
= 0.0;
239 int ctx
= evsel_context(evsel
);
241 total
= avg_stats(&runtime_l1_icache_stats
[ctx
][cpu
]);
244 ratio
= avg
/ total
* 100.0;
246 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
247 out
->print_metric(out
->ctx
, color
, "%7.2f%%", "of all L1-icache hits", ratio
);
250 static void print_dtlb_cache_misses(int cpu
,
251 struct perf_evsel
*evsel
,
253 struct perf_stat_output_ctx
*out
)
255 double total
, ratio
= 0.0;
257 int ctx
= evsel_context(evsel
);
259 total
= avg_stats(&runtime_dtlb_cache_stats
[ctx
][cpu
]);
262 ratio
= avg
/ total
* 100.0;
264 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
265 out
->print_metric(out
->ctx
, color
, "%7.2f%%", "of all dTLB cache hits", ratio
);
268 static void print_itlb_cache_misses(int cpu
,
269 struct perf_evsel
*evsel
,
271 struct perf_stat_output_ctx
*out
)
273 double total
, ratio
= 0.0;
275 int ctx
= evsel_context(evsel
);
277 total
= avg_stats(&runtime_itlb_cache_stats
[ctx
][cpu
]);
280 ratio
= avg
/ total
* 100.0;
282 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
283 out
->print_metric(out
->ctx
, color
, "%7.2f%%", "of all iTLB cache hits", ratio
);
286 static void print_ll_cache_misses(int cpu
,
287 struct perf_evsel
*evsel
,
289 struct perf_stat_output_ctx
*out
)
291 double total
, ratio
= 0.0;
293 int ctx
= evsel_context(evsel
);
295 total
= avg_stats(&runtime_ll_cache_stats
[ctx
][cpu
]);
298 ratio
= avg
/ total
* 100.0;
300 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
301 out
->print_metric(out
->ctx
, color
, "%7.2f%%", "of all LL-cache hits", ratio
);
304 void perf_stat__print_shadow_stats(struct perf_evsel
*evsel
,
306 struct perf_stat_output_ctx
*out
)
308 void *ctxp
= out
->ctx
;
309 print_metric_t print_metric
= out
->print_metric
;
310 double total
, ratio
= 0.0, total2
;
311 int ctx
= evsel_context(evsel
);
313 if (perf_evsel__match(evsel
, HARDWARE
, HW_INSTRUCTIONS
)) {
314 total
= avg_stats(&runtime_cycles_stats
[ctx
][cpu
]);
317 print_metric(ctxp
, NULL
, "%7.2f ",
318 "insn per cycle", ratio
);
320 print_metric(ctxp
, NULL
, NULL
, "insn per cycle", 0);
322 total
= avg_stats(&runtime_stalled_cycles_front_stats
[ctx
][cpu
]);
323 total
= max(total
, avg_stats(&runtime_stalled_cycles_back_stats
[ctx
][cpu
]));
328 print_metric(ctxp
, NULL
, "%7.2f ",
329 "stalled cycles per insn",
331 } else if (have_frontend_stalled
) {
332 print_metric(ctxp
, NULL
, NULL
,
333 "stalled cycles per insn", 0);
335 } else if (perf_evsel__match(evsel
, HARDWARE
, HW_BRANCH_MISSES
)) {
336 if (runtime_branches_stats
[ctx
][cpu
].n
!= 0)
337 print_branch_misses(cpu
, evsel
, avg
, out
);
339 print_metric(ctxp
, NULL
, NULL
, "of all branches", 0);
341 evsel
->attr
.type
== PERF_TYPE_HW_CACHE
&&
342 evsel
->attr
.config
== ( PERF_COUNT_HW_CACHE_L1D
|
343 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
344 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
345 if (runtime_l1_dcache_stats
[ctx
][cpu
].n
!= 0)
346 print_l1_dcache_misses(cpu
, evsel
, avg
, out
);
348 print_metric(ctxp
, NULL
, NULL
, "of all L1-dcache hits", 0);
350 evsel
->attr
.type
== PERF_TYPE_HW_CACHE
&&
351 evsel
->attr
.config
== ( PERF_COUNT_HW_CACHE_L1I
|
352 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
353 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
354 if (runtime_l1_icache_stats
[ctx
][cpu
].n
!= 0)
355 print_l1_icache_misses(cpu
, evsel
, avg
, out
);
357 print_metric(ctxp
, NULL
, NULL
, "of all L1-icache hits", 0);
359 evsel
->attr
.type
== PERF_TYPE_HW_CACHE
&&
360 evsel
->attr
.config
== ( PERF_COUNT_HW_CACHE_DTLB
|
361 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
362 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
363 if (runtime_dtlb_cache_stats
[ctx
][cpu
].n
!= 0)
364 print_dtlb_cache_misses(cpu
, evsel
, avg
, out
);
366 print_metric(ctxp
, NULL
, NULL
, "of all dTLB cache hits", 0);
368 evsel
->attr
.type
== PERF_TYPE_HW_CACHE
&&
369 evsel
->attr
.config
== ( PERF_COUNT_HW_CACHE_ITLB
|
370 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
371 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
372 if (runtime_itlb_cache_stats
[ctx
][cpu
].n
!= 0)
373 print_itlb_cache_misses(cpu
, evsel
, avg
, out
);
375 print_metric(ctxp
, NULL
, NULL
, "of all iTLB cache hits", 0);
377 evsel
->attr
.type
== PERF_TYPE_HW_CACHE
&&
378 evsel
->attr
.config
== ( PERF_COUNT_HW_CACHE_LL
|
379 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
380 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
381 if (runtime_ll_cache_stats
[ctx
][cpu
].n
!= 0)
382 print_ll_cache_misses(cpu
, evsel
, avg
, out
);
384 print_metric(ctxp
, NULL
, NULL
, "of all LL-cache hits", 0);
385 } else if (perf_evsel__match(evsel
, HARDWARE
, HW_CACHE_MISSES
)) {
386 total
= avg_stats(&runtime_cacherefs_stats
[ctx
][cpu
]);
389 ratio
= avg
* 100 / total
;
391 if (runtime_cacherefs_stats
[ctx
][cpu
].n
!= 0)
392 print_metric(ctxp
, NULL
, "%8.3f %%",
393 "of all cache refs", ratio
);
395 print_metric(ctxp
, NULL
, NULL
, "of all cache refs", 0);
396 } else if (perf_evsel__match(evsel
, HARDWARE
, HW_STALLED_CYCLES_FRONTEND
)) {
397 print_stalled_cycles_frontend(cpu
, evsel
, avg
, out
);
398 } else if (perf_evsel__match(evsel
, HARDWARE
, HW_STALLED_CYCLES_BACKEND
)) {
399 print_stalled_cycles_backend(cpu
, evsel
, avg
, out
);
400 } else if (perf_evsel__match(evsel
, HARDWARE
, HW_CPU_CYCLES
)) {
401 total
= avg_stats(&runtime_nsecs_stats
[cpu
]);
405 print_metric(ctxp
, NULL
, "%8.3f", "GHz", ratio
);
407 print_metric(ctxp
, NULL
, NULL
, "Ghz", 0);
409 } else if (perf_stat_evsel__is(evsel
, CYCLES_IN_TX
)) {
410 total
= avg_stats(&runtime_cycles_stats
[ctx
][cpu
]);
412 print_metric(ctxp
, NULL
,
413 "%7.2f%%", "transactional cycles",
414 100.0 * (avg
/ total
));
416 print_metric(ctxp
, NULL
, NULL
, "transactional cycles",
418 } else if (perf_stat_evsel__is(evsel
, CYCLES_IN_TX_CP
)) {
419 total
= avg_stats(&runtime_cycles_stats
[ctx
][cpu
]);
420 total2
= avg_stats(&runtime_cycles_in_tx_stats
[ctx
][cpu
]);
424 print_metric(ctxp
, NULL
, "%7.2f%%", "aborted cycles",
425 100.0 * ((total2
-avg
) / total
));
427 print_metric(ctxp
, NULL
, NULL
, "aborted cycles", 0);
428 } else if (perf_stat_evsel__is(evsel
, TRANSACTION_START
)) {
429 total
= avg_stats(&runtime_cycles_in_tx_stats
[ctx
][cpu
]);
434 if (runtime_cycles_in_tx_stats
[ctx
][cpu
].n
!= 0)
435 print_metric(ctxp
, NULL
, "%8.0f",
436 "cycles / transaction", ratio
);
438 print_metric(ctxp
, NULL
, NULL
, "cycles / transaction",
440 } else if (perf_stat_evsel__is(evsel
, ELISION_START
)) {
441 total
= avg_stats(&runtime_cycles_in_tx_stats
[ctx
][cpu
]);
446 print_metric(ctxp
, NULL
, "%8.0f", "cycles / elision", ratio
);
447 } else if (perf_evsel__match(evsel
, SOFTWARE
, SW_TASK_CLOCK
)) {
448 if ((ratio
= avg_stats(&walltime_nsecs_stats
)) != 0)
449 print_metric(ctxp
, NULL
, "%8.3f", "CPUs utilized",
452 print_metric(ctxp
, NULL
, NULL
, "CPUs utilized", 0);
453 } else if (runtime_nsecs_stats
[cpu
].n
!= 0) {
457 total
= avg_stats(&runtime_nsecs_stats
[cpu
]);
460 ratio
= 1000.0 * avg
/ total
;
465 snprintf(unit_buf
, sizeof(unit_buf
), "%c/sec", unit
);
466 print_metric(ctxp
, NULL
, "%8.3f", unit_buf
, ratio
);
468 print_metric(ctxp
, NULL
, NULL
, NULL
, 0);