9 CTX_BIT_KERNEL
= 1 << 1,
11 CTX_BIT_HOST
= 1 << 3,
12 CTX_BIT_IDLE
= 1 << 4,
16 #define NUM_CTX CTX_BIT_MAX
19 * AGGR_GLOBAL: Use CPU 0
20 * AGGR_SOCKET: Use first CPU of socket
21 * AGGR_CORE: Use first CPU of core
22 * AGGR_NONE: Use matching CPU
23 * AGGR_THREAD: Not supported?
25 static struct stats runtime_nsecs_stats
[MAX_NR_CPUS
];
26 static struct stats runtime_cycles_stats
[NUM_CTX
][MAX_NR_CPUS
];
27 static struct stats runtime_stalled_cycles_front_stats
[NUM_CTX
][MAX_NR_CPUS
];
28 static struct stats runtime_stalled_cycles_back_stats
[NUM_CTX
][MAX_NR_CPUS
];
29 static struct stats runtime_branches_stats
[NUM_CTX
][MAX_NR_CPUS
];
30 static struct stats runtime_cacherefs_stats
[NUM_CTX
][MAX_NR_CPUS
];
31 static struct stats runtime_l1_dcache_stats
[NUM_CTX
][MAX_NR_CPUS
];
32 static struct stats runtime_l1_icache_stats
[NUM_CTX
][MAX_NR_CPUS
];
33 static struct stats runtime_ll_cache_stats
[NUM_CTX
][MAX_NR_CPUS
];
34 static struct stats runtime_itlb_cache_stats
[NUM_CTX
][MAX_NR_CPUS
];
35 static struct stats runtime_dtlb_cache_stats
[NUM_CTX
][MAX_NR_CPUS
];
36 static struct stats runtime_cycles_in_tx_stats
[NUM_CTX
][MAX_NR_CPUS
];
37 static struct stats runtime_transaction_stats
[NUM_CTX
][MAX_NR_CPUS
];
38 static struct stats runtime_elision_stats
[NUM_CTX
][MAX_NR_CPUS
];
39 static struct stats runtime_topdown_total_slots
[NUM_CTX
][MAX_NR_CPUS
];
40 static struct stats runtime_topdown_slots_issued
[NUM_CTX
][MAX_NR_CPUS
];
41 static struct stats runtime_topdown_slots_retired
[NUM_CTX
][MAX_NR_CPUS
];
42 static struct stats runtime_topdown_fetch_bubbles
[NUM_CTX
][MAX_NR_CPUS
];
43 static struct stats runtime_topdown_recovery_bubbles
[NUM_CTX
][MAX_NR_CPUS
];
44 static bool have_frontend_stalled
;
46 struct stats walltime_nsecs_stats
;
48 void perf_stat__init_shadow_stats(void)
50 have_frontend_stalled
= pmu_have_event("cpu", "stalled-cycles-frontend");
53 static int evsel_context(struct perf_evsel
*evsel
)
57 if (evsel
->attr
.exclude_kernel
)
58 ctx
|= CTX_BIT_KERNEL
;
59 if (evsel
->attr
.exclude_user
)
61 if (evsel
->attr
.exclude_hv
)
63 if (evsel
->attr
.exclude_host
)
65 if (evsel
->attr
.exclude_idle
)
71 void perf_stat__reset_shadow_stats(void)
73 memset(runtime_nsecs_stats
, 0, sizeof(runtime_nsecs_stats
));
74 memset(runtime_cycles_stats
, 0, sizeof(runtime_cycles_stats
));
75 memset(runtime_stalled_cycles_front_stats
, 0, sizeof(runtime_stalled_cycles_front_stats
));
76 memset(runtime_stalled_cycles_back_stats
, 0, sizeof(runtime_stalled_cycles_back_stats
));
77 memset(runtime_branches_stats
, 0, sizeof(runtime_branches_stats
));
78 memset(runtime_cacherefs_stats
, 0, sizeof(runtime_cacherefs_stats
));
79 memset(runtime_l1_dcache_stats
, 0, sizeof(runtime_l1_dcache_stats
));
80 memset(runtime_l1_icache_stats
, 0, sizeof(runtime_l1_icache_stats
));
81 memset(runtime_ll_cache_stats
, 0, sizeof(runtime_ll_cache_stats
));
82 memset(runtime_itlb_cache_stats
, 0, sizeof(runtime_itlb_cache_stats
));
83 memset(runtime_dtlb_cache_stats
, 0, sizeof(runtime_dtlb_cache_stats
));
84 memset(runtime_cycles_in_tx_stats
, 0,
85 sizeof(runtime_cycles_in_tx_stats
));
86 memset(runtime_transaction_stats
, 0,
87 sizeof(runtime_transaction_stats
));
88 memset(runtime_elision_stats
, 0, sizeof(runtime_elision_stats
));
89 memset(&walltime_nsecs_stats
, 0, sizeof(walltime_nsecs_stats
));
90 memset(runtime_topdown_total_slots
, 0, sizeof(runtime_topdown_total_slots
));
91 memset(runtime_topdown_slots_retired
, 0, sizeof(runtime_topdown_slots_retired
));
92 memset(runtime_topdown_slots_issued
, 0, sizeof(runtime_topdown_slots_issued
));
93 memset(runtime_topdown_fetch_bubbles
, 0, sizeof(runtime_topdown_fetch_bubbles
));
94 memset(runtime_topdown_recovery_bubbles
, 0, sizeof(runtime_topdown_recovery_bubbles
));
98 * Update various tracking values we maintain to print
99 * more semantic information such as miss/hit ratios,
100 * instruction rates, etc:
102 void perf_stat__update_shadow_stats(struct perf_evsel
*counter
, u64
*count
,
105 int ctx
= evsel_context(counter
);
107 if (perf_evsel__match(counter
, SOFTWARE
, SW_TASK_CLOCK
) ||
108 perf_evsel__match(counter
, SOFTWARE
, SW_CPU_CLOCK
))
109 update_stats(&runtime_nsecs_stats
[cpu
], count
[0]);
110 else if (perf_evsel__match(counter
, HARDWARE
, HW_CPU_CYCLES
))
111 update_stats(&runtime_cycles_stats
[ctx
][cpu
], count
[0]);
112 else if (perf_stat_evsel__is(counter
, CYCLES_IN_TX
))
113 update_stats(&runtime_cycles_in_tx_stats
[ctx
][cpu
], count
[0]);
114 else if (perf_stat_evsel__is(counter
, TRANSACTION_START
))
115 update_stats(&runtime_transaction_stats
[ctx
][cpu
], count
[0]);
116 else if (perf_stat_evsel__is(counter
, ELISION_START
))
117 update_stats(&runtime_elision_stats
[ctx
][cpu
], count
[0]);
118 else if (perf_stat_evsel__is(counter
, TOPDOWN_TOTAL_SLOTS
))
119 update_stats(&runtime_topdown_total_slots
[ctx
][cpu
], count
[0]);
120 else if (perf_stat_evsel__is(counter
, TOPDOWN_SLOTS_ISSUED
))
121 update_stats(&runtime_topdown_slots_issued
[ctx
][cpu
], count
[0]);
122 else if (perf_stat_evsel__is(counter
, TOPDOWN_SLOTS_RETIRED
))
123 update_stats(&runtime_topdown_slots_retired
[ctx
][cpu
], count
[0]);
124 else if (perf_stat_evsel__is(counter
, TOPDOWN_FETCH_BUBBLES
))
125 update_stats(&runtime_topdown_fetch_bubbles
[ctx
][cpu
],count
[0]);
126 else if (perf_stat_evsel__is(counter
, TOPDOWN_RECOVERY_BUBBLES
))
127 update_stats(&runtime_topdown_recovery_bubbles
[ctx
][cpu
], count
[0]);
128 else if (perf_evsel__match(counter
, HARDWARE
, HW_STALLED_CYCLES_FRONTEND
))
129 update_stats(&runtime_stalled_cycles_front_stats
[ctx
][cpu
], count
[0]);
130 else if (perf_evsel__match(counter
, HARDWARE
, HW_STALLED_CYCLES_BACKEND
))
131 update_stats(&runtime_stalled_cycles_back_stats
[ctx
][cpu
], count
[0]);
132 else if (perf_evsel__match(counter
, HARDWARE
, HW_BRANCH_INSTRUCTIONS
))
133 update_stats(&runtime_branches_stats
[ctx
][cpu
], count
[0]);
134 else if (perf_evsel__match(counter
, HARDWARE
, HW_CACHE_REFERENCES
))
135 update_stats(&runtime_cacherefs_stats
[ctx
][cpu
], count
[0]);
136 else if (perf_evsel__match(counter
, HW_CACHE
, HW_CACHE_L1D
))
137 update_stats(&runtime_l1_dcache_stats
[ctx
][cpu
], count
[0]);
138 else if (perf_evsel__match(counter
, HW_CACHE
, HW_CACHE_L1I
))
139 update_stats(&runtime_ll_cache_stats
[ctx
][cpu
], count
[0]);
140 else if (perf_evsel__match(counter
, HW_CACHE
, HW_CACHE_LL
))
141 update_stats(&runtime_ll_cache_stats
[ctx
][cpu
], count
[0]);
142 else if (perf_evsel__match(counter
, HW_CACHE
, HW_CACHE_DTLB
))
143 update_stats(&runtime_dtlb_cache_stats
[ctx
][cpu
], count
[0]);
144 else if (perf_evsel__match(counter
, HW_CACHE
, HW_CACHE_ITLB
))
145 update_stats(&runtime_itlb_cache_stats
[ctx
][cpu
], count
[0]);
148 /* used for get_ratio_color() */
150 GRC_STALLED_CYCLES_FE
,
151 GRC_STALLED_CYCLES_BE
,
156 static const char *get_ratio_color(enum grc_type type
, double ratio
)
158 static const double grc_table
[GRC_MAX_NR
][3] = {
159 [GRC_STALLED_CYCLES_FE
] = { 50.0, 30.0, 10.0 },
160 [GRC_STALLED_CYCLES_BE
] = { 75.0, 50.0, 20.0 },
161 [GRC_CACHE_MISSES
] = { 20.0, 10.0, 5.0 },
163 const char *color
= PERF_COLOR_NORMAL
;
165 if (ratio
> grc_table
[type
][0])
166 color
= PERF_COLOR_RED
;
167 else if (ratio
> grc_table
[type
][1])
168 color
= PERF_COLOR_MAGENTA
;
169 else if (ratio
> grc_table
[type
][2])
170 color
= PERF_COLOR_YELLOW
;
175 static void print_stalled_cycles_frontend(int cpu
,
176 struct perf_evsel
*evsel
, double avg
,
177 struct perf_stat_output_ctx
*out
)
179 double total
, ratio
= 0.0;
181 int ctx
= evsel_context(evsel
);
183 total
= avg_stats(&runtime_cycles_stats
[ctx
][cpu
]);
186 ratio
= avg
/ total
* 100.0;
188 color
= get_ratio_color(GRC_STALLED_CYCLES_FE
, ratio
);
191 out
->print_metric(out
->ctx
, color
, "%7.2f%%", "frontend cycles idle",
194 out
->print_metric(out
->ctx
, NULL
, NULL
, "frontend cycles idle", 0);
197 static void print_stalled_cycles_backend(int cpu
,
198 struct perf_evsel
*evsel
, double avg
,
199 struct perf_stat_output_ctx
*out
)
201 double total
, ratio
= 0.0;
203 int ctx
= evsel_context(evsel
);
205 total
= avg_stats(&runtime_cycles_stats
[ctx
][cpu
]);
208 ratio
= avg
/ total
* 100.0;
210 color
= get_ratio_color(GRC_STALLED_CYCLES_BE
, ratio
);
212 out
->print_metric(out
->ctx
, color
, "%7.2f%%", "backend cycles idle", ratio
);
215 static void print_branch_misses(int cpu
,
216 struct perf_evsel
*evsel
,
218 struct perf_stat_output_ctx
*out
)
220 double total
, ratio
= 0.0;
222 int ctx
= evsel_context(evsel
);
224 total
= avg_stats(&runtime_branches_stats
[ctx
][cpu
]);
227 ratio
= avg
/ total
* 100.0;
229 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
231 out
->print_metric(out
->ctx
, color
, "%7.2f%%", "of all branches", ratio
);
234 static void print_l1_dcache_misses(int cpu
,
235 struct perf_evsel
*evsel
,
237 struct perf_stat_output_ctx
*out
)
239 double total
, ratio
= 0.0;
241 int ctx
= evsel_context(evsel
);
243 total
= avg_stats(&runtime_l1_dcache_stats
[ctx
][cpu
]);
246 ratio
= avg
/ total
* 100.0;
248 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
250 out
->print_metric(out
->ctx
, color
, "%7.2f%%", "of all L1-dcache hits", ratio
);
253 static void print_l1_icache_misses(int cpu
,
254 struct perf_evsel
*evsel
,
256 struct perf_stat_output_ctx
*out
)
258 double total
, ratio
= 0.0;
260 int ctx
= evsel_context(evsel
);
262 total
= avg_stats(&runtime_l1_icache_stats
[ctx
][cpu
]);
265 ratio
= avg
/ total
* 100.0;
267 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
268 out
->print_metric(out
->ctx
, color
, "%7.2f%%", "of all L1-icache hits", ratio
);
271 static void print_dtlb_cache_misses(int cpu
,
272 struct perf_evsel
*evsel
,
274 struct perf_stat_output_ctx
*out
)
276 double total
, ratio
= 0.0;
278 int ctx
= evsel_context(evsel
);
280 total
= avg_stats(&runtime_dtlb_cache_stats
[ctx
][cpu
]);
283 ratio
= avg
/ total
* 100.0;
285 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
286 out
->print_metric(out
->ctx
, color
, "%7.2f%%", "of all dTLB cache hits", ratio
);
289 static void print_itlb_cache_misses(int cpu
,
290 struct perf_evsel
*evsel
,
292 struct perf_stat_output_ctx
*out
)
294 double total
, ratio
= 0.0;
296 int ctx
= evsel_context(evsel
);
298 total
= avg_stats(&runtime_itlb_cache_stats
[ctx
][cpu
]);
301 ratio
= avg
/ total
* 100.0;
303 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
304 out
->print_metric(out
->ctx
, color
, "%7.2f%%", "of all iTLB cache hits", ratio
);
307 static void print_ll_cache_misses(int cpu
,
308 struct perf_evsel
*evsel
,
310 struct perf_stat_output_ctx
*out
)
312 double total
, ratio
= 0.0;
314 int ctx
= evsel_context(evsel
);
316 total
= avg_stats(&runtime_ll_cache_stats
[ctx
][cpu
]);
319 ratio
= avg
/ total
* 100.0;
321 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
322 out
->print_metric(out
->ctx
, color
, "%7.2f%%", "of all LL-cache hits", ratio
);
326 * High level "TopDown" CPU core pipe line bottleneck break down.
328 * Basic concept following
329 * Yasin, A Top Down Method for Performance analysis and Counter architecture
332 * The CPU pipeline is divided into 4 areas that can be bottlenecks:
334 * Frontend -> Backend -> Retiring
335 * BadSpeculation in addition means out of order execution that is thrown away
336 * (for example branch mispredictions)
337 * Frontend is instruction decoding.
338 * Backend is execution, like computation and accessing data in memory
339 * Retiring is good execution that is not directly bottlenecked
341 * The formulas are computed in slots.
342 * A slot is an entry in the pipeline each for the pipeline width
343 * (for example a 4-wide pipeline has 4 slots for each cycle)
346 * BadSpeculation = ((SlotsIssued - SlotsRetired) + RecoveryBubbles) /
348 * Retiring = SlotsRetired / TotalSlots
349 * FrontendBound = FetchBubbles / TotalSlots
350 * BackendBound = 1.0 - BadSpeculation - Retiring - FrontendBound
352 * The kernel provides the mapping to the low level CPU events and any scaling
353 * needed for the CPU pipeline width, for example:
355 * TotalSlots = Cycles * 4
357 * The scaling factor is communicated in the sysfs unit.
359 * In some cases the CPU may not be able to measure all the formulas due to
360 * missing events. In this case multiple formulas are combined, as possible.
362 * Full TopDown supports more levels to sub-divide each area: for example
363 * BackendBound into computing bound and memory bound. For now we only
364 * support Level 1 TopDown.
367 static double sanitize_val(double x
)
369 if (x
< 0 && x
>= -0.02)
374 static double td_total_slots(int ctx
, int cpu
)
376 return avg_stats(&runtime_topdown_total_slots
[ctx
][cpu
]);
379 static double td_bad_spec(int ctx
, int cpu
)
385 total
= avg_stats(&runtime_topdown_slots_issued
[ctx
][cpu
]) -
386 avg_stats(&runtime_topdown_slots_retired
[ctx
][cpu
]) +
387 avg_stats(&runtime_topdown_recovery_bubbles
[ctx
][cpu
]);
388 total_slots
= td_total_slots(ctx
, cpu
);
390 bad_spec
= total
/ total_slots
;
391 return sanitize_val(bad_spec
);
394 static double td_retiring(int ctx
, int cpu
)
397 double total_slots
= td_total_slots(ctx
, cpu
);
398 double ret_slots
= avg_stats(&runtime_topdown_slots_retired
[ctx
][cpu
]);
401 retiring
= ret_slots
/ total_slots
;
405 static double td_fe_bound(int ctx
, int cpu
)
408 double total_slots
= td_total_slots(ctx
, cpu
);
409 double fetch_bub
= avg_stats(&runtime_topdown_fetch_bubbles
[ctx
][cpu
]);
412 fe_bound
= fetch_bub
/ total_slots
;
416 static double td_be_bound(int ctx
, int cpu
)
418 double sum
= (td_fe_bound(ctx
, cpu
) +
419 td_bad_spec(ctx
, cpu
) +
420 td_retiring(ctx
, cpu
));
423 return sanitize_val(1.0 - sum
);
426 void perf_stat__print_shadow_stats(struct perf_evsel
*evsel
,
428 struct perf_stat_output_ctx
*out
)
430 void *ctxp
= out
->ctx
;
431 print_metric_t print_metric
= out
->print_metric
;
432 double total
, ratio
= 0.0, total2
;
433 const char *color
= NULL
;
434 int ctx
= evsel_context(evsel
);
436 if (perf_evsel__match(evsel
, HARDWARE
, HW_INSTRUCTIONS
)) {
437 total
= avg_stats(&runtime_cycles_stats
[ctx
][cpu
]);
440 print_metric(ctxp
, NULL
, "%7.2f ",
441 "insn per cycle", ratio
);
443 print_metric(ctxp
, NULL
, NULL
, "insn per cycle", 0);
445 total
= avg_stats(&runtime_stalled_cycles_front_stats
[ctx
][cpu
]);
446 total
= max(total
, avg_stats(&runtime_stalled_cycles_back_stats
[ctx
][cpu
]));
451 print_metric(ctxp
, NULL
, "%7.2f ",
452 "stalled cycles per insn",
454 } else if (have_frontend_stalled
) {
455 print_metric(ctxp
, NULL
, NULL
,
456 "stalled cycles per insn", 0);
458 } else if (perf_evsel__match(evsel
, HARDWARE
, HW_BRANCH_MISSES
)) {
459 if (runtime_branches_stats
[ctx
][cpu
].n
!= 0)
460 print_branch_misses(cpu
, evsel
, avg
, out
);
462 print_metric(ctxp
, NULL
, NULL
, "of all branches", 0);
464 evsel
->attr
.type
== PERF_TYPE_HW_CACHE
&&
465 evsel
->attr
.config
== ( PERF_COUNT_HW_CACHE_L1D
|
466 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
467 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
468 if (runtime_l1_dcache_stats
[ctx
][cpu
].n
!= 0)
469 print_l1_dcache_misses(cpu
, evsel
, avg
, out
);
471 print_metric(ctxp
, NULL
, NULL
, "of all L1-dcache hits", 0);
473 evsel
->attr
.type
== PERF_TYPE_HW_CACHE
&&
474 evsel
->attr
.config
== ( PERF_COUNT_HW_CACHE_L1I
|
475 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
476 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
477 if (runtime_l1_icache_stats
[ctx
][cpu
].n
!= 0)
478 print_l1_icache_misses(cpu
, evsel
, avg
, out
);
480 print_metric(ctxp
, NULL
, NULL
, "of all L1-icache hits", 0);
482 evsel
->attr
.type
== PERF_TYPE_HW_CACHE
&&
483 evsel
->attr
.config
== ( PERF_COUNT_HW_CACHE_DTLB
|
484 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
485 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
486 if (runtime_dtlb_cache_stats
[ctx
][cpu
].n
!= 0)
487 print_dtlb_cache_misses(cpu
, evsel
, avg
, out
);
489 print_metric(ctxp
, NULL
, NULL
, "of all dTLB cache hits", 0);
491 evsel
->attr
.type
== PERF_TYPE_HW_CACHE
&&
492 evsel
->attr
.config
== ( PERF_COUNT_HW_CACHE_ITLB
|
493 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
494 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
495 if (runtime_itlb_cache_stats
[ctx
][cpu
].n
!= 0)
496 print_itlb_cache_misses(cpu
, evsel
, avg
, out
);
498 print_metric(ctxp
, NULL
, NULL
, "of all iTLB cache hits", 0);
500 evsel
->attr
.type
== PERF_TYPE_HW_CACHE
&&
501 evsel
->attr
.config
== ( PERF_COUNT_HW_CACHE_LL
|
502 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
503 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
504 if (runtime_ll_cache_stats
[ctx
][cpu
].n
!= 0)
505 print_ll_cache_misses(cpu
, evsel
, avg
, out
);
507 print_metric(ctxp
, NULL
, NULL
, "of all LL-cache hits", 0);
508 } else if (perf_evsel__match(evsel
, HARDWARE
, HW_CACHE_MISSES
)) {
509 total
= avg_stats(&runtime_cacherefs_stats
[ctx
][cpu
]);
512 ratio
= avg
* 100 / total
;
514 if (runtime_cacherefs_stats
[ctx
][cpu
].n
!= 0)
515 print_metric(ctxp
, NULL
, "%8.3f %%",
516 "of all cache refs", ratio
);
518 print_metric(ctxp
, NULL
, NULL
, "of all cache refs", 0);
519 } else if (perf_evsel__match(evsel
, HARDWARE
, HW_STALLED_CYCLES_FRONTEND
)) {
520 print_stalled_cycles_frontend(cpu
, evsel
, avg
, out
);
521 } else if (perf_evsel__match(evsel
, HARDWARE
, HW_STALLED_CYCLES_BACKEND
)) {
522 print_stalled_cycles_backend(cpu
, evsel
, avg
, out
);
523 } else if (perf_evsel__match(evsel
, HARDWARE
, HW_CPU_CYCLES
)) {
524 total
= avg_stats(&runtime_nsecs_stats
[cpu
]);
528 print_metric(ctxp
, NULL
, "%8.3f", "GHz", ratio
);
530 print_metric(ctxp
, NULL
, NULL
, "Ghz", 0);
532 } else if (perf_stat_evsel__is(evsel
, CYCLES_IN_TX
)) {
533 total
= avg_stats(&runtime_cycles_stats
[ctx
][cpu
]);
535 print_metric(ctxp
, NULL
,
536 "%7.2f%%", "transactional cycles",
537 100.0 * (avg
/ total
));
539 print_metric(ctxp
, NULL
, NULL
, "transactional cycles",
541 } else if (perf_stat_evsel__is(evsel
, CYCLES_IN_TX_CP
)) {
542 total
= avg_stats(&runtime_cycles_stats
[ctx
][cpu
]);
543 total2
= avg_stats(&runtime_cycles_in_tx_stats
[ctx
][cpu
]);
547 print_metric(ctxp
, NULL
, "%7.2f%%", "aborted cycles",
548 100.0 * ((total2
-avg
) / total
));
550 print_metric(ctxp
, NULL
, NULL
, "aborted cycles", 0);
551 } else if (perf_stat_evsel__is(evsel
, TRANSACTION_START
)) {
552 total
= avg_stats(&runtime_cycles_in_tx_stats
[ctx
][cpu
]);
557 if (runtime_cycles_in_tx_stats
[ctx
][cpu
].n
!= 0)
558 print_metric(ctxp
, NULL
, "%8.0f",
559 "cycles / transaction", ratio
);
561 print_metric(ctxp
, NULL
, NULL
, "cycles / transaction",
563 } else if (perf_stat_evsel__is(evsel
, ELISION_START
)) {
564 total
= avg_stats(&runtime_cycles_in_tx_stats
[ctx
][cpu
]);
569 print_metric(ctxp
, NULL
, "%8.0f", "cycles / elision", ratio
);
570 } else if (perf_evsel__match(evsel
, SOFTWARE
, SW_TASK_CLOCK
) ||
571 perf_evsel__match(evsel
, SOFTWARE
, SW_CPU_CLOCK
)) {
572 if ((ratio
= avg_stats(&walltime_nsecs_stats
)) != 0)
573 print_metric(ctxp
, NULL
, "%8.3f", "CPUs utilized",
576 print_metric(ctxp
, NULL
, NULL
, "CPUs utilized", 0);
577 } else if (perf_stat_evsel__is(evsel
, TOPDOWN_FETCH_BUBBLES
)) {
578 double fe_bound
= td_fe_bound(ctx
, cpu
);
581 color
= PERF_COLOR_RED
;
582 print_metric(ctxp
, color
, "%8.1f%%", "frontend bound",
584 } else if (perf_stat_evsel__is(evsel
, TOPDOWN_SLOTS_RETIRED
)) {
585 double retiring
= td_retiring(ctx
, cpu
);
588 color
= PERF_COLOR_GREEN
;
589 print_metric(ctxp
, color
, "%8.1f%%", "retiring",
591 } else if (perf_stat_evsel__is(evsel
, TOPDOWN_RECOVERY_BUBBLES
)) {
592 double bad_spec
= td_bad_spec(ctx
, cpu
);
595 color
= PERF_COLOR_RED
;
596 print_metric(ctxp
, color
, "%8.1f%%", "bad speculation",
598 } else if (perf_stat_evsel__is(evsel
, TOPDOWN_SLOTS_ISSUED
)) {
599 double be_bound
= td_be_bound(ctx
, cpu
);
600 const char *name
= "backend bound";
601 static int have_recovery_bubbles
= -1;
603 /* In case the CPU does not support topdown-recovery-bubbles */
604 if (have_recovery_bubbles
< 0)
605 have_recovery_bubbles
= pmu_have_event("cpu",
606 "topdown-recovery-bubbles");
607 if (!have_recovery_bubbles
)
608 name
= "backend bound/bad spec";
611 color
= PERF_COLOR_RED
;
612 if (td_total_slots(ctx
, cpu
) > 0)
613 print_metric(ctxp
, color
, "%8.1f%%", name
,
616 print_metric(ctxp
, NULL
, NULL
, name
, 0);
617 } else if (runtime_nsecs_stats
[cpu
].n
!= 0) {
621 total
= avg_stats(&runtime_nsecs_stats
[cpu
]);
624 ratio
= 1000.0 * avg
/ total
;
629 snprintf(unit_buf
, sizeof(unit_buf
), "%c/sec", unit
);
630 print_metric(ctxp
, NULL
, "%8.3f", unit_buf
, ratio
);
632 print_metric(ctxp
, NULL
, NULL
, NULL
, 0);