1 // SPDX-License-Identifier: GPL-2.0
10 #include "metricgroup.h"
13 * AGGR_GLOBAL: Use CPU 0
14 * AGGR_SOCKET: Use first CPU of socket
15 * AGGR_CORE: Use first CPU of core
16 * AGGR_NONE: Use matching CPU
17 * AGGR_THREAD: Not supported?
19 static bool have_frontend_stalled
;
21 struct runtime_stat rt_stat
;
22 struct stats walltime_nsecs_stats
;
25 struct rb_node rb_node
;
26 struct perf_evsel
*evsel
;
30 struct runtime_stat
*stat
;
34 static int saved_value_cmp(struct rb_node
*rb_node
, const void *entry
)
36 struct saved_value
*a
= container_of(rb_node
,
39 const struct saved_value
*b
= entry
;
42 return a
->cpu
- b
->cpu
;
45 * Previously the rbtree was used to link generic metrics.
46 * The keys were evsel/cpu. Now the rbtree is extended to support
47 * per-thread shadow stats. For shadow stats case, the keys
48 * are cpu/type/ctx/stat (evsel is NULL). For generic metrics
49 * case, the keys are still evsel/cpu (type/ctx/stat are 0 or NULL).
51 if (a
->type
!= b
->type
)
52 return a
->type
- b
->type
;
55 return a
->ctx
- b
->ctx
;
57 if (a
->evsel
== NULL
&& b
->evsel
== NULL
) {
58 if (a
->stat
== b
->stat
)
61 if ((char *)a
->stat
< (char *)b
->stat
)
67 if (a
->evsel
== b
->evsel
)
69 if ((char *)a
->evsel
< (char *)b
->evsel
)
74 static struct rb_node
*saved_value_new(struct rblist
*rblist __maybe_unused
,
77 struct saved_value
*nd
= malloc(sizeof(struct saved_value
));
81 memcpy(nd
, entry
, sizeof(struct saved_value
));
85 static void saved_value_delete(struct rblist
*rblist __maybe_unused
,
86 struct rb_node
*rb_node
)
88 struct saved_value
*v
;
91 v
= container_of(rb_node
, struct saved_value
, rb_node
);
95 static struct saved_value
*saved_value_lookup(struct perf_evsel
*evsel
,
100 struct runtime_stat
*st
)
102 struct rblist
*rblist
;
104 struct saved_value dm
= {
112 rblist
= &st
->value_list
;
114 nd
= rblist__find(rblist
, &dm
);
116 return container_of(nd
, struct saved_value
, rb_node
);
118 rblist__add_node(rblist
, &dm
);
119 nd
= rblist__find(rblist
, &dm
);
121 return container_of(nd
, struct saved_value
, rb_node
);
126 void runtime_stat__init(struct runtime_stat
*st
)
128 struct rblist
*rblist
= &st
->value_list
;
130 rblist__init(rblist
);
131 rblist
->node_cmp
= saved_value_cmp
;
132 rblist
->node_new
= saved_value_new
;
133 rblist
->node_delete
= saved_value_delete
;
136 void runtime_stat__exit(struct runtime_stat
*st
)
138 rblist__exit(&st
->value_list
);
141 void perf_stat__init_shadow_stats(void)
143 have_frontend_stalled
= pmu_have_event("cpu", "stalled-cycles-frontend");
144 runtime_stat__init(&rt_stat
);
147 static int evsel_context(struct perf_evsel
*evsel
)
151 if (evsel
->attr
.exclude_kernel
)
152 ctx
|= CTX_BIT_KERNEL
;
153 if (evsel
->attr
.exclude_user
)
155 if (evsel
->attr
.exclude_hv
)
157 if (evsel
->attr
.exclude_host
)
159 if (evsel
->attr
.exclude_idle
)
165 static void reset_stat(struct runtime_stat
*st
)
167 struct rblist
*rblist
;
168 struct rb_node
*pos
, *next
;
170 rblist
= &st
->value_list
;
171 next
= rb_first_cached(&rblist
->entries
);
175 memset(&container_of(pos
, struct saved_value
, rb_node
)->stats
,
177 sizeof(struct stats
));
181 void perf_stat__reset_shadow_stats(void)
183 reset_stat(&rt_stat
);
184 memset(&walltime_nsecs_stats
, 0, sizeof(walltime_nsecs_stats
));
187 void perf_stat__reset_shadow_per_stat(struct runtime_stat
*st
)
192 static void update_runtime_stat(struct runtime_stat
*st
,
194 int ctx
, int cpu
, u64 count
)
196 struct saved_value
*v
= saved_value_lookup(NULL
, cpu
, true,
200 update_stats(&v
->stats
, count
);
204 * Update various tracking values we maintain to print
205 * more semantic information such as miss/hit ratios,
206 * instruction rates, etc:
208 void perf_stat__update_shadow_stats(struct perf_evsel
*counter
, u64 count
,
209 int cpu
, struct runtime_stat
*st
)
211 int ctx
= evsel_context(counter
);
212 u64 count_ns
= count
;
214 count
*= counter
->scale
;
216 if (perf_evsel__is_clock(counter
))
217 update_runtime_stat(st
, STAT_NSECS
, 0, cpu
, count_ns
);
218 else if (perf_evsel__match(counter
, HARDWARE
, HW_CPU_CYCLES
))
219 update_runtime_stat(st
, STAT_CYCLES
, ctx
, cpu
, count
);
220 else if (perf_stat_evsel__is(counter
, CYCLES_IN_TX
))
221 update_runtime_stat(st
, STAT_CYCLES_IN_TX
, ctx
, cpu
, count
);
222 else if (perf_stat_evsel__is(counter
, TRANSACTION_START
))
223 update_runtime_stat(st
, STAT_TRANSACTION
, ctx
, cpu
, count
);
224 else if (perf_stat_evsel__is(counter
, ELISION_START
))
225 update_runtime_stat(st
, STAT_ELISION
, ctx
, cpu
, count
);
226 else if (perf_stat_evsel__is(counter
, TOPDOWN_TOTAL_SLOTS
))
227 update_runtime_stat(st
, STAT_TOPDOWN_TOTAL_SLOTS
,
229 else if (perf_stat_evsel__is(counter
, TOPDOWN_SLOTS_ISSUED
))
230 update_runtime_stat(st
, STAT_TOPDOWN_SLOTS_ISSUED
,
232 else if (perf_stat_evsel__is(counter
, TOPDOWN_SLOTS_RETIRED
))
233 update_runtime_stat(st
, STAT_TOPDOWN_SLOTS_RETIRED
,
235 else if (perf_stat_evsel__is(counter
, TOPDOWN_FETCH_BUBBLES
))
236 update_runtime_stat(st
, STAT_TOPDOWN_FETCH_BUBBLES
,
238 else if (perf_stat_evsel__is(counter
, TOPDOWN_RECOVERY_BUBBLES
))
239 update_runtime_stat(st
, STAT_TOPDOWN_RECOVERY_BUBBLES
,
241 else if (perf_evsel__match(counter
, HARDWARE
, HW_STALLED_CYCLES_FRONTEND
))
242 update_runtime_stat(st
, STAT_STALLED_CYCLES_FRONT
,
244 else if (perf_evsel__match(counter
, HARDWARE
, HW_STALLED_CYCLES_BACKEND
))
245 update_runtime_stat(st
, STAT_STALLED_CYCLES_BACK
,
247 else if (perf_evsel__match(counter
, HARDWARE
, HW_BRANCH_INSTRUCTIONS
))
248 update_runtime_stat(st
, STAT_BRANCHES
, ctx
, cpu
, count
);
249 else if (perf_evsel__match(counter
, HARDWARE
, HW_CACHE_REFERENCES
))
250 update_runtime_stat(st
, STAT_CACHEREFS
, ctx
, cpu
, count
);
251 else if (perf_evsel__match(counter
, HW_CACHE
, HW_CACHE_L1D
))
252 update_runtime_stat(st
, STAT_L1_DCACHE
, ctx
, cpu
, count
);
253 else if (perf_evsel__match(counter
, HW_CACHE
, HW_CACHE_L1I
))
254 update_runtime_stat(st
, STAT_L1_ICACHE
, ctx
, cpu
, count
);
255 else if (perf_evsel__match(counter
, HW_CACHE
, HW_CACHE_LL
))
256 update_runtime_stat(st
, STAT_LL_CACHE
, ctx
, cpu
, count
);
257 else if (perf_evsel__match(counter
, HW_CACHE
, HW_CACHE_DTLB
))
258 update_runtime_stat(st
, STAT_DTLB_CACHE
, ctx
, cpu
, count
);
259 else if (perf_evsel__match(counter
, HW_CACHE
, HW_CACHE_ITLB
))
260 update_runtime_stat(st
, STAT_ITLB_CACHE
, ctx
, cpu
, count
);
261 else if (perf_stat_evsel__is(counter
, SMI_NUM
))
262 update_runtime_stat(st
, STAT_SMI_NUM
, ctx
, cpu
, count
);
263 else if (perf_stat_evsel__is(counter
, APERF
))
264 update_runtime_stat(st
, STAT_APERF
, ctx
, cpu
, count
);
266 if (counter
->collect_stat
) {
267 struct saved_value
*v
= saved_value_lookup(counter
, cpu
, true,
269 update_stats(&v
->stats
, count
);
273 /* used for get_ratio_color() */
275 GRC_STALLED_CYCLES_FE
,
276 GRC_STALLED_CYCLES_BE
,
281 static const char *get_ratio_color(enum grc_type type
, double ratio
)
283 static const double grc_table
[GRC_MAX_NR
][3] = {
284 [GRC_STALLED_CYCLES_FE
] = { 50.0, 30.0, 10.0 },
285 [GRC_STALLED_CYCLES_BE
] = { 75.0, 50.0, 20.0 },
286 [GRC_CACHE_MISSES
] = { 20.0, 10.0, 5.0 },
288 const char *color
= PERF_COLOR_NORMAL
;
290 if (ratio
> grc_table
[type
][0])
291 color
= PERF_COLOR_RED
;
292 else if (ratio
> grc_table
[type
][1])
293 color
= PERF_COLOR_MAGENTA
;
294 else if (ratio
> grc_table
[type
][2])
295 color
= PERF_COLOR_YELLOW
;
300 static struct perf_evsel
*perf_stat__find_event(struct perf_evlist
*evsel_list
,
303 struct perf_evsel
*c2
;
305 evlist__for_each_entry (evsel_list
, c2
) {
306 if (!strcasecmp(c2
->name
, name
))
312 /* Mark MetricExpr target events and link events using them to them. */
313 void perf_stat__collect_metric_expr(struct perf_evlist
*evsel_list
)
315 struct perf_evsel
*counter
, *leader
, **metric_events
, *oc
;
317 const char **metric_names
;
319 int num_metric_names
;
321 evlist__for_each_entry(evsel_list
, counter
) {
322 bool invalid
= false;
324 leader
= counter
->leader
;
325 if (!counter
->metric_expr
)
327 metric_events
= counter
->metric_events
;
328 if (!metric_events
) {
329 if (expr__find_other(counter
->metric_expr
, counter
->name
,
330 &metric_names
, &num_metric_names
) < 0)
333 metric_events
= calloc(sizeof(struct perf_evsel
*),
334 num_metric_names
+ 1);
337 counter
->metric_events
= metric_events
;
340 for (i
= 0; i
< num_metric_names
; i
++) {
343 /* Search in group */
344 for_each_group_member (oc
, leader
) {
345 if (!strcasecmp(oc
->name
, metric_names
[i
])) {
352 /* Search ignoring groups */
353 oc
= perf_stat__find_event(evsel_list
, metric_names
[i
]);
356 /* Deduping one is good enough to handle duplicated PMUs. */
357 static char *printed
;
360 * Adding events automatically would be difficult, because
361 * it would risk creating groups that are not schedulable.
362 * perf stat doesn't understand all the scheduling constraints
363 * of events. So we ask the user instead to add the missing
366 if (!printed
|| strcasecmp(printed
, metric_names
[i
])) {
368 "Add %s event to groups to get metric expression for %s\n",
371 printed
= strdup(metric_names
[i
]);
376 metric_events
[i
] = oc
;
377 oc
->collect_stat
= true;
379 metric_events
[i
] = NULL
;
383 counter
->metric_events
= NULL
;
384 counter
->metric_expr
= NULL
;
389 static double runtime_stat_avg(struct runtime_stat
*st
,
390 enum stat_type type
, int ctx
, int cpu
)
392 struct saved_value
*v
;
394 v
= saved_value_lookup(NULL
, cpu
, false, type
, ctx
, st
);
398 return avg_stats(&v
->stats
);
401 static double runtime_stat_n(struct runtime_stat
*st
,
402 enum stat_type type
, int ctx
, int cpu
)
404 struct saved_value
*v
;
406 v
= saved_value_lookup(NULL
, cpu
, false, type
, ctx
, st
);
413 static void print_stalled_cycles_frontend(struct perf_stat_config
*config
,
415 struct perf_evsel
*evsel
, double avg
,
416 struct perf_stat_output_ctx
*out
,
417 struct runtime_stat
*st
)
419 double total
, ratio
= 0.0;
421 int ctx
= evsel_context(evsel
);
423 total
= runtime_stat_avg(st
, STAT_CYCLES
, ctx
, cpu
);
426 ratio
= avg
/ total
* 100.0;
428 color
= get_ratio_color(GRC_STALLED_CYCLES_FE
, ratio
);
431 out
->print_metric(config
, out
->ctx
, color
, "%7.2f%%", "frontend cycles idle",
434 out
->print_metric(config
, out
->ctx
, NULL
, NULL
, "frontend cycles idle", 0);
437 static void print_stalled_cycles_backend(struct perf_stat_config
*config
,
439 struct perf_evsel
*evsel
, double avg
,
440 struct perf_stat_output_ctx
*out
,
441 struct runtime_stat
*st
)
443 double total
, ratio
= 0.0;
445 int ctx
= evsel_context(evsel
);
447 total
= runtime_stat_avg(st
, STAT_CYCLES
, ctx
, cpu
);
450 ratio
= avg
/ total
* 100.0;
452 color
= get_ratio_color(GRC_STALLED_CYCLES_BE
, ratio
);
454 out
->print_metric(config
, out
->ctx
, color
, "%7.2f%%", "backend cycles idle", ratio
);
457 static void print_branch_misses(struct perf_stat_config
*config
,
459 struct perf_evsel
*evsel
,
461 struct perf_stat_output_ctx
*out
,
462 struct runtime_stat
*st
)
464 double total
, ratio
= 0.0;
466 int ctx
= evsel_context(evsel
);
468 total
= runtime_stat_avg(st
, STAT_BRANCHES
, ctx
, cpu
);
471 ratio
= avg
/ total
* 100.0;
473 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
475 out
->print_metric(config
, out
->ctx
, color
, "%7.2f%%", "of all branches", ratio
);
478 static void print_l1_dcache_misses(struct perf_stat_config
*config
,
480 struct perf_evsel
*evsel
,
482 struct perf_stat_output_ctx
*out
,
483 struct runtime_stat
*st
)
486 double total
, ratio
= 0.0;
488 int ctx
= evsel_context(evsel
);
490 total
= runtime_stat_avg(st
, STAT_L1_DCACHE
, ctx
, cpu
);
493 ratio
= avg
/ total
* 100.0;
495 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
497 out
->print_metric(config
, out
->ctx
, color
, "%7.2f%%", "of all L1-dcache hits", ratio
);
500 static void print_l1_icache_misses(struct perf_stat_config
*config
,
502 struct perf_evsel
*evsel
,
504 struct perf_stat_output_ctx
*out
,
505 struct runtime_stat
*st
)
508 double total
, ratio
= 0.0;
510 int ctx
= evsel_context(evsel
);
512 total
= runtime_stat_avg(st
, STAT_L1_ICACHE
, ctx
, cpu
);
515 ratio
= avg
/ total
* 100.0;
517 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
518 out
->print_metric(config
, out
->ctx
, color
, "%7.2f%%", "of all L1-icache hits", ratio
);
521 static void print_dtlb_cache_misses(struct perf_stat_config
*config
,
523 struct perf_evsel
*evsel
,
525 struct perf_stat_output_ctx
*out
,
526 struct runtime_stat
*st
)
528 double total
, ratio
= 0.0;
530 int ctx
= evsel_context(evsel
);
532 total
= runtime_stat_avg(st
, STAT_DTLB_CACHE
, ctx
, cpu
);
535 ratio
= avg
/ total
* 100.0;
537 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
538 out
->print_metric(config
, out
->ctx
, color
, "%7.2f%%", "of all dTLB cache hits", ratio
);
541 static void print_itlb_cache_misses(struct perf_stat_config
*config
,
543 struct perf_evsel
*evsel
,
545 struct perf_stat_output_ctx
*out
,
546 struct runtime_stat
*st
)
548 double total
, ratio
= 0.0;
550 int ctx
= evsel_context(evsel
);
552 total
= runtime_stat_avg(st
, STAT_ITLB_CACHE
, ctx
, cpu
);
555 ratio
= avg
/ total
* 100.0;
557 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
558 out
->print_metric(config
, out
->ctx
, color
, "%7.2f%%", "of all iTLB cache hits", ratio
);
561 static void print_ll_cache_misses(struct perf_stat_config
*config
,
563 struct perf_evsel
*evsel
,
565 struct perf_stat_output_ctx
*out
,
566 struct runtime_stat
*st
)
568 double total
, ratio
= 0.0;
570 int ctx
= evsel_context(evsel
);
572 total
= runtime_stat_avg(st
, STAT_LL_CACHE
, ctx
, cpu
);
575 ratio
= avg
/ total
* 100.0;
577 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
578 out
->print_metric(config
, out
->ctx
, color
, "%7.2f%%", "of all LL-cache hits", ratio
);
582 * High level "TopDown" CPU core pipe line bottleneck break down.
584 * Basic concept following
585 * Yasin, A Top Down Method for Performance analysis and Counter architecture
588 * The CPU pipeline is divided into 4 areas that can be bottlenecks:
590 * Frontend -> Backend -> Retiring
591 * BadSpeculation in addition means out of order execution that is thrown away
592 * (for example branch mispredictions)
593 * Frontend is instruction decoding.
594 * Backend is execution, like computation and accessing data in memory
595 * Retiring is good execution that is not directly bottlenecked
597 * The formulas are computed in slots.
598 * A slot is an entry in the pipeline each for the pipeline width
599 * (for example a 4-wide pipeline has 4 slots for each cycle)
602 * BadSpeculation = ((SlotsIssued - SlotsRetired) + RecoveryBubbles) /
604 * Retiring = SlotsRetired / TotalSlots
605 * FrontendBound = FetchBubbles / TotalSlots
606 * BackendBound = 1.0 - BadSpeculation - Retiring - FrontendBound
608 * The kernel provides the mapping to the low level CPU events and any scaling
609 * needed for the CPU pipeline width, for example:
611 * TotalSlots = Cycles * 4
613 * The scaling factor is communicated in the sysfs unit.
615 * In some cases the CPU may not be able to measure all the formulas due to
616 * missing events. In this case multiple formulas are combined, as possible.
618 * Full TopDown supports more levels to sub-divide each area: for example
619 * BackendBound into computing bound and memory bound. For now we only
620 * support Level 1 TopDown.
623 static double sanitize_val(double x
)
625 if (x
< 0 && x
>= -0.02)
630 static double td_total_slots(int ctx
, int cpu
, struct runtime_stat
*st
)
632 return runtime_stat_avg(st
, STAT_TOPDOWN_TOTAL_SLOTS
, ctx
, cpu
);
635 static double td_bad_spec(int ctx
, int cpu
, struct runtime_stat
*st
)
641 total
= runtime_stat_avg(st
, STAT_TOPDOWN_SLOTS_ISSUED
, ctx
, cpu
) -
642 runtime_stat_avg(st
, STAT_TOPDOWN_SLOTS_RETIRED
, ctx
, cpu
) +
643 runtime_stat_avg(st
, STAT_TOPDOWN_RECOVERY_BUBBLES
, ctx
, cpu
);
645 total_slots
= td_total_slots(ctx
, cpu
, st
);
647 bad_spec
= total
/ total_slots
;
648 return sanitize_val(bad_spec
);
651 static double td_retiring(int ctx
, int cpu
, struct runtime_stat
*st
)
654 double total_slots
= td_total_slots(ctx
, cpu
, st
);
655 double ret_slots
= runtime_stat_avg(st
, STAT_TOPDOWN_SLOTS_RETIRED
,
659 retiring
= ret_slots
/ total_slots
;
663 static double td_fe_bound(int ctx
, int cpu
, struct runtime_stat
*st
)
666 double total_slots
= td_total_slots(ctx
, cpu
, st
);
667 double fetch_bub
= runtime_stat_avg(st
, STAT_TOPDOWN_FETCH_BUBBLES
,
671 fe_bound
= fetch_bub
/ total_slots
;
675 static double td_be_bound(int ctx
, int cpu
, struct runtime_stat
*st
)
677 double sum
= (td_fe_bound(ctx
, cpu
, st
) +
678 td_bad_spec(ctx
, cpu
, st
) +
679 td_retiring(ctx
, cpu
, st
));
682 return sanitize_val(1.0 - sum
);
685 static void print_smi_cost(struct perf_stat_config
*config
,
686 int cpu
, struct perf_evsel
*evsel
,
687 struct perf_stat_output_ctx
*out
,
688 struct runtime_stat
*st
)
690 double smi_num
, aperf
, cycles
, cost
= 0.0;
691 int ctx
= evsel_context(evsel
);
692 const char *color
= NULL
;
694 smi_num
= runtime_stat_avg(st
, STAT_SMI_NUM
, ctx
, cpu
);
695 aperf
= runtime_stat_avg(st
, STAT_APERF
, ctx
, cpu
);
696 cycles
= runtime_stat_avg(st
, STAT_CYCLES
, ctx
, cpu
);
698 if ((cycles
== 0) || (aperf
== 0))
702 cost
= (aperf
- cycles
) / aperf
* 100.00;
705 color
= PERF_COLOR_RED
;
706 out
->print_metric(config
, out
->ctx
, color
, "%8.1f%%", "SMI cycles%", cost
);
707 out
->print_metric(config
, out
->ctx
, NULL
, "%4.0f", "SMI#", smi_num
);
710 static void generic_metric(struct perf_stat_config
*config
,
711 const char *metric_expr
,
712 struct perf_evsel
**metric_events
,
714 const char *metric_name
,
717 struct perf_stat_output_ctx
*out
,
718 struct runtime_stat
*st
)
720 print_metric_t print_metric
= out
->print_metric
;
721 struct parse_ctx pctx
;
724 void *ctxp
= out
->ctx
;
726 expr__ctx_init(&pctx
);
727 expr__add_id(&pctx
, name
, avg
);
728 for (i
= 0; metric_events
[i
]; i
++) {
729 struct saved_value
*v
;
733 if (!strcmp(metric_events
[i
]->name
, "duration_time")) {
734 stats
= &walltime_nsecs_stats
;
737 v
= saved_value_lookup(metric_events
[i
], cpu
, false,
744 expr__add_id(&pctx
, metric_events
[i
]->name
, avg_stats(stats
)*scale
);
746 if (!metric_events
[i
]) {
747 const char *p
= metric_expr
;
749 if (expr__parse(&ratio
, &pctx
, &p
) == 0)
750 print_metric(config
, ctxp
, NULL
, "%8.1f",
753 out
->force_header
? name
: "",
756 print_metric(config
, ctxp
, NULL
, NULL
,
758 (metric_name
? metric_name
: name
) : "", 0);
760 print_metric(config
, ctxp
, NULL
, NULL
, "", 0);
763 void perf_stat__print_shadow_stats(struct perf_stat_config
*config
,
764 struct perf_evsel
*evsel
,
766 struct perf_stat_output_ctx
*out
,
767 struct rblist
*metric_events
,
768 struct runtime_stat
*st
)
770 void *ctxp
= out
->ctx
;
771 print_metric_t print_metric
= out
->print_metric
;
772 double total
, ratio
= 0.0, total2
;
773 const char *color
= NULL
;
774 int ctx
= evsel_context(evsel
);
775 struct metric_event
*me
;
778 if (perf_evsel__match(evsel
, HARDWARE
, HW_INSTRUCTIONS
)) {
779 total
= runtime_stat_avg(st
, STAT_CYCLES
, ctx
, cpu
);
783 print_metric(config
, ctxp
, NULL
, "%7.2f ",
784 "insn per cycle", ratio
);
786 print_metric(config
, ctxp
, NULL
, NULL
, "insn per cycle", 0);
789 total
= runtime_stat_avg(st
, STAT_STALLED_CYCLES_FRONT
,
792 total
= max(total
, runtime_stat_avg(st
,
793 STAT_STALLED_CYCLES_BACK
,
797 out
->new_line(config
, ctxp
);
799 print_metric(config
, ctxp
, NULL
, "%7.2f ",
800 "stalled cycles per insn",
802 } else if (have_frontend_stalled
) {
803 print_metric(config
, ctxp
, NULL
, NULL
,
804 "stalled cycles per insn", 0);
806 } else if (perf_evsel__match(evsel
, HARDWARE
, HW_BRANCH_MISSES
)) {
807 if (runtime_stat_n(st
, STAT_BRANCHES
, ctx
, cpu
) != 0)
808 print_branch_misses(config
, cpu
, evsel
, avg
, out
, st
);
810 print_metric(config
, ctxp
, NULL
, NULL
, "of all branches", 0);
812 evsel
->attr
.type
== PERF_TYPE_HW_CACHE
&&
813 evsel
->attr
.config
== ( PERF_COUNT_HW_CACHE_L1D
|
814 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
815 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
817 if (runtime_stat_n(st
, STAT_L1_DCACHE
, ctx
, cpu
) != 0)
818 print_l1_dcache_misses(config
, cpu
, evsel
, avg
, out
, st
);
820 print_metric(config
, ctxp
, NULL
, NULL
, "of all L1-dcache hits", 0);
822 evsel
->attr
.type
== PERF_TYPE_HW_CACHE
&&
823 evsel
->attr
.config
== ( PERF_COUNT_HW_CACHE_L1I
|
824 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
825 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
827 if (runtime_stat_n(st
, STAT_L1_ICACHE
, ctx
, cpu
) != 0)
828 print_l1_icache_misses(config
, cpu
, evsel
, avg
, out
, st
);
830 print_metric(config
, ctxp
, NULL
, NULL
, "of all L1-icache hits", 0);
832 evsel
->attr
.type
== PERF_TYPE_HW_CACHE
&&
833 evsel
->attr
.config
== ( PERF_COUNT_HW_CACHE_DTLB
|
834 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
835 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
837 if (runtime_stat_n(st
, STAT_DTLB_CACHE
, ctx
, cpu
) != 0)
838 print_dtlb_cache_misses(config
, cpu
, evsel
, avg
, out
, st
);
840 print_metric(config
, ctxp
, NULL
, NULL
, "of all dTLB cache hits", 0);
842 evsel
->attr
.type
== PERF_TYPE_HW_CACHE
&&
843 evsel
->attr
.config
== ( PERF_COUNT_HW_CACHE_ITLB
|
844 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
845 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
847 if (runtime_stat_n(st
, STAT_ITLB_CACHE
, ctx
, cpu
) != 0)
848 print_itlb_cache_misses(config
, cpu
, evsel
, avg
, out
, st
);
850 print_metric(config
, ctxp
, NULL
, NULL
, "of all iTLB cache hits", 0);
852 evsel
->attr
.type
== PERF_TYPE_HW_CACHE
&&
853 evsel
->attr
.config
== ( PERF_COUNT_HW_CACHE_LL
|
854 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
855 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
857 if (runtime_stat_n(st
, STAT_LL_CACHE
, ctx
, cpu
) != 0)
858 print_ll_cache_misses(config
, cpu
, evsel
, avg
, out
, st
);
860 print_metric(config
, ctxp
, NULL
, NULL
, "of all LL-cache hits", 0);
861 } else if (perf_evsel__match(evsel
, HARDWARE
, HW_CACHE_MISSES
)) {
862 total
= runtime_stat_avg(st
, STAT_CACHEREFS
, ctx
, cpu
);
865 ratio
= avg
* 100 / total
;
867 if (runtime_stat_n(st
, STAT_CACHEREFS
, ctx
, cpu
) != 0)
868 print_metric(config
, ctxp
, NULL
, "%8.3f %%",
869 "of all cache refs", ratio
);
871 print_metric(config
, ctxp
, NULL
, NULL
, "of all cache refs", 0);
872 } else if (perf_evsel__match(evsel
, HARDWARE
, HW_STALLED_CYCLES_FRONTEND
)) {
873 print_stalled_cycles_frontend(config
, cpu
, evsel
, avg
, out
, st
);
874 } else if (perf_evsel__match(evsel
, HARDWARE
, HW_STALLED_CYCLES_BACKEND
)) {
875 print_stalled_cycles_backend(config
, cpu
, evsel
, avg
, out
, st
);
876 } else if (perf_evsel__match(evsel
, HARDWARE
, HW_CPU_CYCLES
)) {
877 total
= runtime_stat_avg(st
, STAT_NSECS
, 0, cpu
);
881 print_metric(config
, ctxp
, NULL
, "%8.3f", "GHz", ratio
);
883 print_metric(config
, ctxp
, NULL
, NULL
, "Ghz", 0);
885 } else if (perf_stat_evsel__is(evsel
, CYCLES_IN_TX
)) {
886 total
= runtime_stat_avg(st
, STAT_CYCLES
, ctx
, cpu
);
889 print_metric(config
, ctxp
, NULL
,
890 "%7.2f%%", "transactional cycles",
891 100.0 * (avg
/ total
));
893 print_metric(config
, ctxp
, NULL
, NULL
, "transactional cycles",
895 } else if (perf_stat_evsel__is(evsel
, CYCLES_IN_TX_CP
)) {
896 total
= runtime_stat_avg(st
, STAT_CYCLES
, ctx
, cpu
);
897 total2
= runtime_stat_avg(st
, STAT_CYCLES_IN_TX
, ctx
, cpu
);
902 print_metric(config
, ctxp
, NULL
, "%7.2f%%", "aborted cycles",
903 100.0 * ((total2
-avg
) / total
));
905 print_metric(config
, ctxp
, NULL
, NULL
, "aborted cycles", 0);
906 } else if (perf_stat_evsel__is(evsel
, TRANSACTION_START
)) {
907 total
= runtime_stat_avg(st
, STAT_CYCLES_IN_TX
,
913 if (runtime_stat_n(st
, STAT_CYCLES_IN_TX
, ctx
, cpu
) != 0)
914 print_metric(config
, ctxp
, NULL
, "%8.0f",
915 "cycles / transaction", ratio
);
917 print_metric(config
, ctxp
, NULL
, NULL
, "cycles / transaction",
919 } else if (perf_stat_evsel__is(evsel
, ELISION_START
)) {
920 total
= runtime_stat_avg(st
, STAT_CYCLES_IN_TX
,
926 print_metric(config
, ctxp
, NULL
, "%8.0f", "cycles / elision", ratio
);
927 } else if (perf_evsel__is_clock(evsel
)) {
928 if ((ratio
= avg_stats(&walltime_nsecs_stats
)) != 0)
929 print_metric(config
, ctxp
, NULL
, "%8.3f", "CPUs utilized",
930 avg
/ (ratio
* evsel
->scale
));
932 print_metric(config
, ctxp
, NULL
, NULL
, "CPUs utilized", 0);
933 } else if (perf_stat_evsel__is(evsel
, TOPDOWN_FETCH_BUBBLES
)) {
934 double fe_bound
= td_fe_bound(ctx
, cpu
, st
);
937 color
= PERF_COLOR_RED
;
938 print_metric(config
, ctxp
, color
, "%8.1f%%", "frontend bound",
940 } else if (perf_stat_evsel__is(evsel
, TOPDOWN_SLOTS_RETIRED
)) {
941 double retiring
= td_retiring(ctx
, cpu
, st
);
944 color
= PERF_COLOR_GREEN
;
945 print_metric(config
, ctxp
, color
, "%8.1f%%", "retiring",
947 } else if (perf_stat_evsel__is(evsel
, TOPDOWN_RECOVERY_BUBBLES
)) {
948 double bad_spec
= td_bad_spec(ctx
, cpu
, st
);
951 color
= PERF_COLOR_RED
;
952 print_metric(config
, ctxp
, color
, "%8.1f%%", "bad speculation",
954 } else if (perf_stat_evsel__is(evsel
, TOPDOWN_SLOTS_ISSUED
)) {
955 double be_bound
= td_be_bound(ctx
, cpu
, st
);
956 const char *name
= "backend bound";
957 static int have_recovery_bubbles
= -1;
959 /* In case the CPU does not support topdown-recovery-bubbles */
960 if (have_recovery_bubbles
< 0)
961 have_recovery_bubbles
= pmu_have_event("cpu",
962 "topdown-recovery-bubbles");
963 if (!have_recovery_bubbles
)
964 name
= "backend bound/bad spec";
967 color
= PERF_COLOR_RED
;
968 if (td_total_slots(ctx
, cpu
, st
) > 0)
969 print_metric(config
, ctxp
, color
, "%8.1f%%", name
,
972 print_metric(config
, ctxp
, NULL
, NULL
, name
, 0);
973 } else if (evsel
->metric_expr
) {
974 generic_metric(config
, evsel
->metric_expr
, evsel
->metric_events
, evsel
->name
,
975 evsel
->metric_name
, avg
, cpu
, out
, st
);
976 } else if (runtime_stat_n(st
, STAT_NSECS
, 0, cpu
) != 0) {
980 total
= runtime_stat_avg(st
, STAT_NSECS
, 0, cpu
);
983 ratio
= 1000.0 * avg
/ total
;
988 snprintf(unit_buf
, sizeof(unit_buf
), "%c/sec", unit
);
989 print_metric(config
, ctxp
, NULL
, "%8.3f", unit_buf
, ratio
);
990 } else if (perf_stat_evsel__is(evsel
, SMI_NUM
)) {
991 print_smi_cost(config
, cpu
, evsel
, out
, st
);
996 if ((me
= metricgroup__lookup(metric_events
, evsel
, false)) != NULL
) {
997 struct metric_expr
*mexp
;
999 list_for_each_entry (mexp
, &me
->head
, nd
) {
1001 out
->new_line(config
, ctxp
);
1002 generic_metric(config
, mexp
->metric_expr
, mexp
->metric_events
,
1003 evsel
->name
, mexp
->metric_name
,
1008 print_metric(config
, ctxp
, NULL
, NULL
, NULL
, 0);