1 // SPDX-License-Identifier: GPL-2.0
10 #include "metricgroup.h"
13 * AGGR_GLOBAL: Use CPU 0
14 * AGGR_SOCKET: Use first CPU of socket
15 * AGGR_DIE: Use first CPU of die
16 * AGGR_CORE: Use first CPU of core
17 * AGGR_NONE: Use matching CPU
18 * AGGR_THREAD: Not supported?
20 static bool have_frontend_stalled
;
22 struct runtime_stat rt_stat
;
23 struct stats walltime_nsecs_stats
;
26 struct rb_node rb_node
;
27 struct perf_evsel
*evsel
;
31 struct runtime_stat
*stat
;
35 static int saved_value_cmp(struct rb_node
*rb_node
, const void *entry
)
37 struct saved_value
*a
= container_of(rb_node
,
40 const struct saved_value
*b
= entry
;
43 return a
->cpu
- b
->cpu
;
46 * Previously the rbtree was used to link generic metrics.
47 * The keys were evsel/cpu. Now the rbtree is extended to support
48 * per-thread shadow stats. For shadow stats case, the keys
49 * are cpu/type/ctx/stat (evsel is NULL). For generic metrics
50 * case, the keys are still evsel/cpu (type/ctx/stat are 0 or NULL).
52 if (a
->type
!= b
->type
)
53 return a
->type
- b
->type
;
56 return a
->ctx
- b
->ctx
;
58 if (a
->evsel
== NULL
&& b
->evsel
== NULL
) {
59 if (a
->stat
== b
->stat
)
62 if ((char *)a
->stat
< (char *)b
->stat
)
68 if (a
->evsel
== b
->evsel
)
70 if ((char *)a
->evsel
< (char *)b
->evsel
)
75 static struct rb_node
*saved_value_new(struct rblist
*rblist __maybe_unused
,
78 struct saved_value
*nd
= malloc(sizeof(struct saved_value
));
82 memcpy(nd
, entry
, sizeof(struct saved_value
));
86 static void saved_value_delete(struct rblist
*rblist __maybe_unused
,
87 struct rb_node
*rb_node
)
89 struct saved_value
*v
;
92 v
= container_of(rb_node
, struct saved_value
, rb_node
);
96 static struct saved_value
*saved_value_lookup(struct perf_evsel
*evsel
,
101 struct runtime_stat
*st
)
103 struct rblist
*rblist
;
105 struct saved_value dm
= {
113 rblist
= &st
->value_list
;
115 nd
= rblist__find(rblist
, &dm
);
117 return container_of(nd
, struct saved_value
, rb_node
);
119 rblist__add_node(rblist
, &dm
);
120 nd
= rblist__find(rblist
, &dm
);
122 return container_of(nd
, struct saved_value
, rb_node
);
127 void runtime_stat__init(struct runtime_stat
*st
)
129 struct rblist
*rblist
= &st
->value_list
;
131 rblist__init(rblist
);
132 rblist
->node_cmp
= saved_value_cmp
;
133 rblist
->node_new
= saved_value_new
;
134 rblist
->node_delete
= saved_value_delete
;
137 void runtime_stat__exit(struct runtime_stat
*st
)
139 rblist__exit(&st
->value_list
);
142 void perf_stat__init_shadow_stats(void)
144 have_frontend_stalled
= pmu_have_event("cpu", "stalled-cycles-frontend");
145 runtime_stat__init(&rt_stat
);
148 static int evsel_context(struct perf_evsel
*evsel
)
152 if (evsel
->attr
.exclude_kernel
)
153 ctx
|= CTX_BIT_KERNEL
;
154 if (evsel
->attr
.exclude_user
)
156 if (evsel
->attr
.exclude_hv
)
158 if (evsel
->attr
.exclude_host
)
160 if (evsel
->attr
.exclude_idle
)
166 static void reset_stat(struct runtime_stat
*st
)
168 struct rblist
*rblist
;
169 struct rb_node
*pos
, *next
;
171 rblist
= &st
->value_list
;
172 next
= rb_first_cached(&rblist
->entries
);
176 memset(&container_of(pos
, struct saved_value
, rb_node
)->stats
,
178 sizeof(struct stats
));
182 void perf_stat__reset_shadow_stats(void)
184 reset_stat(&rt_stat
);
185 memset(&walltime_nsecs_stats
, 0, sizeof(walltime_nsecs_stats
));
188 void perf_stat__reset_shadow_per_stat(struct runtime_stat
*st
)
193 static void update_runtime_stat(struct runtime_stat
*st
,
195 int ctx
, int cpu
, u64 count
)
197 struct saved_value
*v
= saved_value_lookup(NULL
, cpu
, true,
201 update_stats(&v
->stats
, count
);
205 * Update various tracking values we maintain to print
206 * more semantic information such as miss/hit ratios,
207 * instruction rates, etc:
209 void perf_stat__update_shadow_stats(struct perf_evsel
*counter
, u64 count
,
210 int cpu
, struct runtime_stat
*st
)
212 int ctx
= evsel_context(counter
);
213 u64 count_ns
= count
;
215 count
*= counter
->scale
;
217 if (perf_evsel__is_clock(counter
))
218 update_runtime_stat(st
, STAT_NSECS
, 0, cpu
, count_ns
);
219 else if (perf_evsel__match(counter
, HARDWARE
, HW_CPU_CYCLES
))
220 update_runtime_stat(st
, STAT_CYCLES
, ctx
, cpu
, count
);
221 else if (perf_stat_evsel__is(counter
, CYCLES_IN_TX
))
222 update_runtime_stat(st
, STAT_CYCLES_IN_TX
, ctx
, cpu
, count
);
223 else if (perf_stat_evsel__is(counter
, TRANSACTION_START
))
224 update_runtime_stat(st
, STAT_TRANSACTION
, ctx
, cpu
, count
);
225 else if (perf_stat_evsel__is(counter
, ELISION_START
))
226 update_runtime_stat(st
, STAT_ELISION
, ctx
, cpu
, count
);
227 else if (perf_stat_evsel__is(counter
, TOPDOWN_TOTAL_SLOTS
))
228 update_runtime_stat(st
, STAT_TOPDOWN_TOTAL_SLOTS
,
230 else if (perf_stat_evsel__is(counter
, TOPDOWN_SLOTS_ISSUED
))
231 update_runtime_stat(st
, STAT_TOPDOWN_SLOTS_ISSUED
,
233 else if (perf_stat_evsel__is(counter
, TOPDOWN_SLOTS_RETIRED
))
234 update_runtime_stat(st
, STAT_TOPDOWN_SLOTS_RETIRED
,
236 else if (perf_stat_evsel__is(counter
, TOPDOWN_FETCH_BUBBLES
))
237 update_runtime_stat(st
, STAT_TOPDOWN_FETCH_BUBBLES
,
239 else if (perf_stat_evsel__is(counter
, TOPDOWN_RECOVERY_BUBBLES
))
240 update_runtime_stat(st
, STAT_TOPDOWN_RECOVERY_BUBBLES
,
242 else if (perf_evsel__match(counter
, HARDWARE
, HW_STALLED_CYCLES_FRONTEND
))
243 update_runtime_stat(st
, STAT_STALLED_CYCLES_FRONT
,
245 else if (perf_evsel__match(counter
, HARDWARE
, HW_STALLED_CYCLES_BACKEND
))
246 update_runtime_stat(st
, STAT_STALLED_CYCLES_BACK
,
248 else if (perf_evsel__match(counter
, HARDWARE
, HW_BRANCH_INSTRUCTIONS
))
249 update_runtime_stat(st
, STAT_BRANCHES
, ctx
, cpu
, count
);
250 else if (perf_evsel__match(counter
, HARDWARE
, HW_CACHE_REFERENCES
))
251 update_runtime_stat(st
, STAT_CACHEREFS
, ctx
, cpu
, count
);
252 else if (perf_evsel__match(counter
, HW_CACHE
, HW_CACHE_L1D
))
253 update_runtime_stat(st
, STAT_L1_DCACHE
, ctx
, cpu
, count
);
254 else if (perf_evsel__match(counter
, HW_CACHE
, HW_CACHE_L1I
))
255 update_runtime_stat(st
, STAT_L1_ICACHE
, ctx
, cpu
, count
);
256 else if (perf_evsel__match(counter
, HW_CACHE
, HW_CACHE_LL
))
257 update_runtime_stat(st
, STAT_LL_CACHE
, ctx
, cpu
, count
);
258 else if (perf_evsel__match(counter
, HW_CACHE
, HW_CACHE_DTLB
))
259 update_runtime_stat(st
, STAT_DTLB_CACHE
, ctx
, cpu
, count
);
260 else if (perf_evsel__match(counter
, HW_CACHE
, HW_CACHE_ITLB
))
261 update_runtime_stat(st
, STAT_ITLB_CACHE
, ctx
, cpu
, count
);
262 else if (perf_stat_evsel__is(counter
, SMI_NUM
))
263 update_runtime_stat(st
, STAT_SMI_NUM
, ctx
, cpu
, count
);
264 else if (perf_stat_evsel__is(counter
, APERF
))
265 update_runtime_stat(st
, STAT_APERF
, ctx
, cpu
, count
);
267 if (counter
->collect_stat
) {
268 struct saved_value
*v
= saved_value_lookup(counter
, cpu
, true,
270 update_stats(&v
->stats
, count
);
274 /* used for get_ratio_color() */
276 GRC_STALLED_CYCLES_FE
,
277 GRC_STALLED_CYCLES_BE
,
282 static const char *get_ratio_color(enum grc_type type
, double ratio
)
284 static const double grc_table
[GRC_MAX_NR
][3] = {
285 [GRC_STALLED_CYCLES_FE
] = { 50.0, 30.0, 10.0 },
286 [GRC_STALLED_CYCLES_BE
] = { 75.0, 50.0, 20.0 },
287 [GRC_CACHE_MISSES
] = { 20.0, 10.0, 5.0 },
289 const char *color
= PERF_COLOR_NORMAL
;
291 if (ratio
> grc_table
[type
][0])
292 color
= PERF_COLOR_RED
;
293 else if (ratio
> grc_table
[type
][1])
294 color
= PERF_COLOR_MAGENTA
;
295 else if (ratio
> grc_table
[type
][2])
296 color
= PERF_COLOR_YELLOW
;
301 static struct perf_evsel
*perf_stat__find_event(struct perf_evlist
*evsel_list
,
304 struct perf_evsel
*c2
;
306 evlist__for_each_entry (evsel_list
, c2
) {
307 if (!strcasecmp(c2
->name
, name
))
313 /* Mark MetricExpr target events and link events using them to them. */
314 void perf_stat__collect_metric_expr(struct perf_evlist
*evsel_list
)
316 struct perf_evsel
*counter
, *leader
, **metric_events
, *oc
;
318 const char **metric_names
;
320 int num_metric_names
;
322 evlist__for_each_entry(evsel_list
, counter
) {
323 bool invalid
= false;
325 leader
= counter
->leader
;
326 if (!counter
->metric_expr
)
328 metric_events
= counter
->metric_events
;
329 if (!metric_events
) {
330 if (expr__find_other(counter
->metric_expr
, counter
->name
,
331 &metric_names
, &num_metric_names
) < 0)
334 metric_events
= calloc(sizeof(struct perf_evsel
*),
335 num_metric_names
+ 1);
338 counter
->metric_events
= metric_events
;
341 for (i
= 0; i
< num_metric_names
; i
++) {
344 /* Search in group */
345 for_each_group_member (oc
, leader
) {
346 if (!strcasecmp(oc
->name
, metric_names
[i
])) {
353 /* Search ignoring groups */
354 oc
= perf_stat__find_event(evsel_list
, metric_names
[i
]);
357 /* Deduping one is good enough to handle duplicated PMUs. */
358 static char *printed
;
361 * Adding events automatically would be difficult, because
362 * it would risk creating groups that are not schedulable.
363 * perf stat doesn't understand all the scheduling constraints
364 * of events. So we ask the user instead to add the missing
367 if (!printed
|| strcasecmp(printed
, metric_names
[i
])) {
369 "Add %s event to groups to get metric expression for %s\n",
372 printed
= strdup(metric_names
[i
]);
377 metric_events
[i
] = oc
;
378 oc
->collect_stat
= true;
380 metric_events
[i
] = NULL
;
384 counter
->metric_events
= NULL
;
385 counter
->metric_expr
= NULL
;
390 static double runtime_stat_avg(struct runtime_stat
*st
,
391 enum stat_type type
, int ctx
, int cpu
)
393 struct saved_value
*v
;
395 v
= saved_value_lookup(NULL
, cpu
, false, type
, ctx
, st
);
399 return avg_stats(&v
->stats
);
402 static double runtime_stat_n(struct runtime_stat
*st
,
403 enum stat_type type
, int ctx
, int cpu
)
405 struct saved_value
*v
;
407 v
= saved_value_lookup(NULL
, cpu
, false, type
, ctx
, st
);
414 static void print_stalled_cycles_frontend(struct perf_stat_config
*config
,
416 struct perf_evsel
*evsel
, double avg
,
417 struct perf_stat_output_ctx
*out
,
418 struct runtime_stat
*st
)
420 double total
, ratio
= 0.0;
422 int ctx
= evsel_context(evsel
);
424 total
= runtime_stat_avg(st
, STAT_CYCLES
, ctx
, cpu
);
427 ratio
= avg
/ total
* 100.0;
429 color
= get_ratio_color(GRC_STALLED_CYCLES_FE
, ratio
);
432 out
->print_metric(config
, out
->ctx
, color
, "%7.2f%%", "frontend cycles idle",
435 out
->print_metric(config
, out
->ctx
, NULL
, NULL
, "frontend cycles idle", 0);
438 static void print_stalled_cycles_backend(struct perf_stat_config
*config
,
440 struct perf_evsel
*evsel
, double avg
,
441 struct perf_stat_output_ctx
*out
,
442 struct runtime_stat
*st
)
444 double total
, ratio
= 0.0;
446 int ctx
= evsel_context(evsel
);
448 total
= runtime_stat_avg(st
, STAT_CYCLES
, ctx
, cpu
);
451 ratio
= avg
/ total
* 100.0;
453 color
= get_ratio_color(GRC_STALLED_CYCLES_BE
, ratio
);
455 out
->print_metric(config
, out
->ctx
, color
, "%7.2f%%", "backend cycles idle", ratio
);
458 static void print_branch_misses(struct perf_stat_config
*config
,
460 struct perf_evsel
*evsel
,
462 struct perf_stat_output_ctx
*out
,
463 struct runtime_stat
*st
)
465 double total
, ratio
= 0.0;
467 int ctx
= evsel_context(evsel
);
469 total
= runtime_stat_avg(st
, STAT_BRANCHES
, ctx
, cpu
);
472 ratio
= avg
/ total
* 100.0;
474 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
476 out
->print_metric(config
, out
->ctx
, color
, "%7.2f%%", "of all branches", ratio
);
479 static void print_l1_dcache_misses(struct perf_stat_config
*config
,
481 struct perf_evsel
*evsel
,
483 struct perf_stat_output_ctx
*out
,
484 struct runtime_stat
*st
)
487 double total
, ratio
= 0.0;
489 int ctx
= evsel_context(evsel
);
491 total
= runtime_stat_avg(st
, STAT_L1_DCACHE
, ctx
, cpu
);
494 ratio
= avg
/ total
* 100.0;
496 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
498 out
->print_metric(config
, out
->ctx
, color
, "%7.2f%%", "of all L1-dcache hits", ratio
);
501 static void print_l1_icache_misses(struct perf_stat_config
*config
,
503 struct perf_evsel
*evsel
,
505 struct perf_stat_output_ctx
*out
,
506 struct runtime_stat
*st
)
509 double total
, ratio
= 0.0;
511 int ctx
= evsel_context(evsel
);
513 total
= runtime_stat_avg(st
, STAT_L1_ICACHE
, ctx
, cpu
);
516 ratio
= avg
/ total
* 100.0;
518 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
519 out
->print_metric(config
, out
->ctx
, color
, "%7.2f%%", "of all L1-icache hits", ratio
);
522 static void print_dtlb_cache_misses(struct perf_stat_config
*config
,
524 struct perf_evsel
*evsel
,
526 struct perf_stat_output_ctx
*out
,
527 struct runtime_stat
*st
)
529 double total
, ratio
= 0.0;
531 int ctx
= evsel_context(evsel
);
533 total
= runtime_stat_avg(st
, STAT_DTLB_CACHE
, ctx
, cpu
);
536 ratio
= avg
/ total
* 100.0;
538 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
539 out
->print_metric(config
, out
->ctx
, color
, "%7.2f%%", "of all dTLB cache hits", ratio
);
542 static void print_itlb_cache_misses(struct perf_stat_config
*config
,
544 struct perf_evsel
*evsel
,
546 struct perf_stat_output_ctx
*out
,
547 struct runtime_stat
*st
)
549 double total
, ratio
= 0.0;
551 int ctx
= evsel_context(evsel
);
553 total
= runtime_stat_avg(st
, STAT_ITLB_CACHE
, ctx
, cpu
);
556 ratio
= avg
/ total
* 100.0;
558 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
559 out
->print_metric(config
, out
->ctx
, color
, "%7.2f%%", "of all iTLB cache hits", ratio
);
562 static void print_ll_cache_misses(struct perf_stat_config
*config
,
564 struct perf_evsel
*evsel
,
566 struct perf_stat_output_ctx
*out
,
567 struct runtime_stat
*st
)
569 double total
, ratio
= 0.0;
571 int ctx
= evsel_context(evsel
);
573 total
= runtime_stat_avg(st
, STAT_LL_CACHE
, ctx
, cpu
);
576 ratio
= avg
/ total
* 100.0;
578 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
579 out
->print_metric(config
, out
->ctx
, color
, "%7.2f%%", "of all LL-cache hits", ratio
);
583 * High level "TopDown" CPU core pipe line bottleneck break down.
585 * Basic concept following
586 * Yasin, A Top Down Method for Performance analysis and Counter architecture
589 * The CPU pipeline is divided into 4 areas that can be bottlenecks:
591 * Frontend -> Backend -> Retiring
592 * BadSpeculation in addition means out of order execution that is thrown away
593 * (for example branch mispredictions)
594 * Frontend is instruction decoding.
595 * Backend is execution, like computation and accessing data in memory
596 * Retiring is good execution that is not directly bottlenecked
598 * The formulas are computed in slots.
599 * A slot is an entry in the pipeline each for the pipeline width
600 * (for example a 4-wide pipeline has 4 slots for each cycle)
603 * BadSpeculation = ((SlotsIssued - SlotsRetired) + RecoveryBubbles) /
605 * Retiring = SlotsRetired / TotalSlots
606 * FrontendBound = FetchBubbles / TotalSlots
607 * BackendBound = 1.0 - BadSpeculation - Retiring - FrontendBound
609 * The kernel provides the mapping to the low level CPU events and any scaling
610 * needed for the CPU pipeline width, for example:
612 * TotalSlots = Cycles * 4
614 * The scaling factor is communicated in the sysfs unit.
616 * In some cases the CPU may not be able to measure all the formulas due to
617 * missing events. In this case multiple formulas are combined, as possible.
619 * Full TopDown supports more levels to sub-divide each area: for example
620 * BackendBound into computing bound and memory bound. For now we only
621 * support Level 1 TopDown.
624 static double sanitize_val(double x
)
626 if (x
< 0 && x
>= -0.02)
631 static double td_total_slots(int ctx
, int cpu
, struct runtime_stat
*st
)
633 return runtime_stat_avg(st
, STAT_TOPDOWN_TOTAL_SLOTS
, ctx
, cpu
);
636 static double td_bad_spec(int ctx
, int cpu
, struct runtime_stat
*st
)
642 total
= runtime_stat_avg(st
, STAT_TOPDOWN_SLOTS_ISSUED
, ctx
, cpu
) -
643 runtime_stat_avg(st
, STAT_TOPDOWN_SLOTS_RETIRED
, ctx
, cpu
) +
644 runtime_stat_avg(st
, STAT_TOPDOWN_RECOVERY_BUBBLES
, ctx
, cpu
);
646 total_slots
= td_total_slots(ctx
, cpu
, st
);
648 bad_spec
= total
/ total_slots
;
649 return sanitize_val(bad_spec
);
652 static double td_retiring(int ctx
, int cpu
, struct runtime_stat
*st
)
655 double total_slots
= td_total_slots(ctx
, cpu
, st
);
656 double ret_slots
= runtime_stat_avg(st
, STAT_TOPDOWN_SLOTS_RETIRED
,
660 retiring
= ret_slots
/ total_slots
;
664 static double td_fe_bound(int ctx
, int cpu
, struct runtime_stat
*st
)
667 double total_slots
= td_total_slots(ctx
, cpu
, st
);
668 double fetch_bub
= runtime_stat_avg(st
, STAT_TOPDOWN_FETCH_BUBBLES
,
672 fe_bound
= fetch_bub
/ total_slots
;
676 static double td_be_bound(int ctx
, int cpu
, struct runtime_stat
*st
)
678 double sum
= (td_fe_bound(ctx
, cpu
, st
) +
679 td_bad_spec(ctx
, cpu
, st
) +
680 td_retiring(ctx
, cpu
, st
));
683 return sanitize_val(1.0 - sum
);
686 static void print_smi_cost(struct perf_stat_config
*config
,
687 int cpu
, struct perf_evsel
*evsel
,
688 struct perf_stat_output_ctx
*out
,
689 struct runtime_stat
*st
)
691 double smi_num
, aperf
, cycles
, cost
= 0.0;
692 int ctx
= evsel_context(evsel
);
693 const char *color
= NULL
;
695 smi_num
= runtime_stat_avg(st
, STAT_SMI_NUM
, ctx
, cpu
);
696 aperf
= runtime_stat_avg(st
, STAT_APERF
, ctx
, cpu
);
697 cycles
= runtime_stat_avg(st
, STAT_CYCLES
, ctx
, cpu
);
699 if ((cycles
== 0) || (aperf
== 0))
703 cost
= (aperf
- cycles
) / aperf
* 100.00;
706 color
= PERF_COLOR_RED
;
707 out
->print_metric(config
, out
->ctx
, color
, "%8.1f%%", "SMI cycles%", cost
);
708 out
->print_metric(config
, out
->ctx
, NULL
, "%4.0f", "SMI#", smi_num
);
711 static void generic_metric(struct perf_stat_config
*config
,
712 const char *metric_expr
,
713 struct perf_evsel
**metric_events
,
715 const char *metric_name
,
718 struct perf_stat_output_ctx
*out
,
719 struct runtime_stat
*st
)
721 print_metric_t print_metric
= out
->print_metric
;
722 struct parse_ctx pctx
;
725 void *ctxp
= out
->ctx
;
727 expr__ctx_init(&pctx
);
728 expr__add_id(&pctx
, name
, avg
);
729 for (i
= 0; metric_events
[i
]; i
++) {
730 struct saved_value
*v
;
734 if (!strcmp(metric_events
[i
]->name
, "duration_time")) {
735 stats
= &walltime_nsecs_stats
;
738 v
= saved_value_lookup(metric_events
[i
], cpu
, false,
745 expr__add_id(&pctx
, metric_events
[i
]->name
, avg_stats(stats
)*scale
);
747 if (!metric_events
[i
]) {
748 const char *p
= metric_expr
;
750 if (expr__parse(&ratio
, &pctx
, &p
) == 0)
751 print_metric(config
, ctxp
, NULL
, "%8.1f",
754 out
->force_header
? name
: "",
757 print_metric(config
, ctxp
, NULL
, NULL
,
759 (metric_name
? metric_name
: name
) : "", 0);
761 print_metric(config
, ctxp
, NULL
, NULL
, "", 0);
764 void perf_stat__print_shadow_stats(struct perf_stat_config
*config
,
765 struct perf_evsel
*evsel
,
767 struct perf_stat_output_ctx
*out
,
768 struct rblist
*metric_events
,
769 struct runtime_stat
*st
)
771 void *ctxp
= out
->ctx
;
772 print_metric_t print_metric
= out
->print_metric
;
773 double total
, ratio
= 0.0, total2
;
774 const char *color
= NULL
;
775 int ctx
= evsel_context(evsel
);
776 struct metric_event
*me
;
779 if (perf_evsel__match(evsel
, HARDWARE
, HW_INSTRUCTIONS
)) {
780 total
= runtime_stat_avg(st
, STAT_CYCLES
, ctx
, cpu
);
784 print_metric(config
, ctxp
, NULL
, "%7.2f ",
785 "insn per cycle", ratio
);
787 print_metric(config
, ctxp
, NULL
, NULL
, "insn per cycle", 0);
790 total
= runtime_stat_avg(st
, STAT_STALLED_CYCLES_FRONT
,
793 total
= max(total
, runtime_stat_avg(st
,
794 STAT_STALLED_CYCLES_BACK
,
798 out
->new_line(config
, ctxp
);
800 print_metric(config
, ctxp
, NULL
, "%7.2f ",
801 "stalled cycles per insn",
803 } else if (have_frontend_stalled
) {
804 print_metric(config
, ctxp
, NULL
, NULL
,
805 "stalled cycles per insn", 0);
807 } else if (perf_evsel__match(evsel
, HARDWARE
, HW_BRANCH_MISSES
)) {
808 if (runtime_stat_n(st
, STAT_BRANCHES
, ctx
, cpu
) != 0)
809 print_branch_misses(config
, cpu
, evsel
, avg
, out
, st
);
811 print_metric(config
, ctxp
, NULL
, NULL
, "of all branches", 0);
813 evsel
->attr
.type
== PERF_TYPE_HW_CACHE
&&
814 evsel
->attr
.config
== ( PERF_COUNT_HW_CACHE_L1D
|
815 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
816 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
818 if (runtime_stat_n(st
, STAT_L1_DCACHE
, ctx
, cpu
) != 0)
819 print_l1_dcache_misses(config
, cpu
, evsel
, avg
, out
, st
);
821 print_metric(config
, ctxp
, NULL
, NULL
, "of all L1-dcache hits", 0);
823 evsel
->attr
.type
== PERF_TYPE_HW_CACHE
&&
824 evsel
->attr
.config
== ( PERF_COUNT_HW_CACHE_L1I
|
825 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
826 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
828 if (runtime_stat_n(st
, STAT_L1_ICACHE
, ctx
, cpu
) != 0)
829 print_l1_icache_misses(config
, cpu
, evsel
, avg
, out
, st
);
831 print_metric(config
, ctxp
, NULL
, NULL
, "of all L1-icache hits", 0);
833 evsel
->attr
.type
== PERF_TYPE_HW_CACHE
&&
834 evsel
->attr
.config
== ( PERF_COUNT_HW_CACHE_DTLB
|
835 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
836 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
838 if (runtime_stat_n(st
, STAT_DTLB_CACHE
, ctx
, cpu
) != 0)
839 print_dtlb_cache_misses(config
, cpu
, evsel
, avg
, out
, st
);
841 print_metric(config
, ctxp
, NULL
, NULL
, "of all dTLB cache hits", 0);
843 evsel
->attr
.type
== PERF_TYPE_HW_CACHE
&&
844 evsel
->attr
.config
== ( PERF_COUNT_HW_CACHE_ITLB
|
845 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
846 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
848 if (runtime_stat_n(st
, STAT_ITLB_CACHE
, ctx
, cpu
) != 0)
849 print_itlb_cache_misses(config
, cpu
, evsel
, avg
, out
, st
);
851 print_metric(config
, ctxp
, NULL
, NULL
, "of all iTLB cache hits", 0);
853 evsel
->attr
.type
== PERF_TYPE_HW_CACHE
&&
854 evsel
->attr
.config
== ( PERF_COUNT_HW_CACHE_LL
|
855 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
856 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
858 if (runtime_stat_n(st
, STAT_LL_CACHE
, ctx
, cpu
) != 0)
859 print_ll_cache_misses(config
, cpu
, evsel
, avg
, out
, st
);
861 print_metric(config
, ctxp
, NULL
, NULL
, "of all LL-cache hits", 0);
862 } else if (perf_evsel__match(evsel
, HARDWARE
, HW_CACHE_MISSES
)) {
863 total
= runtime_stat_avg(st
, STAT_CACHEREFS
, ctx
, cpu
);
866 ratio
= avg
* 100 / total
;
868 if (runtime_stat_n(st
, STAT_CACHEREFS
, ctx
, cpu
) != 0)
869 print_metric(config
, ctxp
, NULL
, "%8.3f %%",
870 "of all cache refs", ratio
);
872 print_metric(config
, ctxp
, NULL
, NULL
, "of all cache refs", 0);
873 } else if (perf_evsel__match(evsel
, HARDWARE
, HW_STALLED_CYCLES_FRONTEND
)) {
874 print_stalled_cycles_frontend(config
, cpu
, evsel
, avg
, out
, st
);
875 } else if (perf_evsel__match(evsel
, HARDWARE
, HW_STALLED_CYCLES_BACKEND
)) {
876 print_stalled_cycles_backend(config
, cpu
, evsel
, avg
, out
, st
);
877 } else if (perf_evsel__match(evsel
, HARDWARE
, HW_CPU_CYCLES
)) {
878 total
= runtime_stat_avg(st
, STAT_NSECS
, 0, cpu
);
882 print_metric(config
, ctxp
, NULL
, "%8.3f", "GHz", ratio
);
884 print_metric(config
, ctxp
, NULL
, NULL
, "Ghz", 0);
886 } else if (perf_stat_evsel__is(evsel
, CYCLES_IN_TX
)) {
887 total
= runtime_stat_avg(st
, STAT_CYCLES
, ctx
, cpu
);
890 print_metric(config
, ctxp
, NULL
,
891 "%7.2f%%", "transactional cycles",
892 100.0 * (avg
/ total
));
894 print_metric(config
, ctxp
, NULL
, NULL
, "transactional cycles",
896 } else if (perf_stat_evsel__is(evsel
, CYCLES_IN_TX_CP
)) {
897 total
= runtime_stat_avg(st
, STAT_CYCLES
, ctx
, cpu
);
898 total2
= runtime_stat_avg(st
, STAT_CYCLES_IN_TX
, ctx
, cpu
);
903 print_metric(config
, ctxp
, NULL
, "%7.2f%%", "aborted cycles",
904 100.0 * ((total2
-avg
) / total
));
906 print_metric(config
, ctxp
, NULL
, NULL
, "aborted cycles", 0);
907 } else if (perf_stat_evsel__is(evsel
, TRANSACTION_START
)) {
908 total
= runtime_stat_avg(st
, STAT_CYCLES_IN_TX
,
914 if (runtime_stat_n(st
, STAT_CYCLES_IN_TX
, ctx
, cpu
) != 0)
915 print_metric(config
, ctxp
, NULL
, "%8.0f",
916 "cycles / transaction", ratio
);
918 print_metric(config
, ctxp
, NULL
, NULL
, "cycles / transaction",
920 } else if (perf_stat_evsel__is(evsel
, ELISION_START
)) {
921 total
= runtime_stat_avg(st
, STAT_CYCLES_IN_TX
,
927 print_metric(config
, ctxp
, NULL
, "%8.0f", "cycles / elision", ratio
);
928 } else if (perf_evsel__is_clock(evsel
)) {
929 if ((ratio
= avg_stats(&walltime_nsecs_stats
)) != 0)
930 print_metric(config
, ctxp
, NULL
, "%8.3f", "CPUs utilized",
931 avg
/ (ratio
* evsel
->scale
));
933 print_metric(config
, ctxp
, NULL
, NULL
, "CPUs utilized", 0);
934 } else if (perf_stat_evsel__is(evsel
, TOPDOWN_FETCH_BUBBLES
)) {
935 double fe_bound
= td_fe_bound(ctx
, cpu
, st
);
938 color
= PERF_COLOR_RED
;
939 print_metric(config
, ctxp
, color
, "%8.1f%%", "frontend bound",
941 } else if (perf_stat_evsel__is(evsel
, TOPDOWN_SLOTS_RETIRED
)) {
942 double retiring
= td_retiring(ctx
, cpu
, st
);
945 color
= PERF_COLOR_GREEN
;
946 print_metric(config
, ctxp
, color
, "%8.1f%%", "retiring",
948 } else if (perf_stat_evsel__is(evsel
, TOPDOWN_RECOVERY_BUBBLES
)) {
949 double bad_spec
= td_bad_spec(ctx
, cpu
, st
);
952 color
= PERF_COLOR_RED
;
953 print_metric(config
, ctxp
, color
, "%8.1f%%", "bad speculation",
955 } else if (perf_stat_evsel__is(evsel
, TOPDOWN_SLOTS_ISSUED
)) {
956 double be_bound
= td_be_bound(ctx
, cpu
, st
);
957 const char *name
= "backend bound";
958 static int have_recovery_bubbles
= -1;
960 /* In case the CPU does not support topdown-recovery-bubbles */
961 if (have_recovery_bubbles
< 0)
962 have_recovery_bubbles
= pmu_have_event("cpu",
963 "topdown-recovery-bubbles");
964 if (!have_recovery_bubbles
)
965 name
= "backend bound/bad spec";
968 color
= PERF_COLOR_RED
;
969 if (td_total_slots(ctx
, cpu
, st
) > 0)
970 print_metric(config
, ctxp
, color
, "%8.1f%%", name
,
973 print_metric(config
, ctxp
, NULL
, NULL
, name
, 0);
974 } else if (evsel
->metric_expr
) {
975 generic_metric(config
, evsel
->metric_expr
, evsel
->metric_events
, evsel
->name
,
976 evsel
->metric_name
, avg
, cpu
, out
, st
);
977 } else if (runtime_stat_n(st
, STAT_NSECS
, 0, cpu
) != 0) {
981 total
= runtime_stat_avg(st
, STAT_NSECS
, 0, cpu
);
984 ratio
= 1000.0 * avg
/ total
;
989 snprintf(unit_buf
, sizeof(unit_buf
), "%c/sec", unit
);
990 print_metric(config
, ctxp
, NULL
, "%8.3f", unit_buf
, ratio
);
991 } else if (perf_stat_evsel__is(evsel
, SMI_NUM
)) {
992 print_smi_cost(config
, cpu
, evsel
, out
, st
);
997 if ((me
= metricgroup__lookup(metric_events
, evsel
, false)) != NULL
) {
998 struct metric_expr
*mexp
;
1000 list_for_each_entry (mexp
, &me
->head
, nd
) {
1002 out
->new_line(config
, ctxp
);
1003 generic_metric(config
, mexp
->metric_expr
, mexp
->metric_events
,
1004 evsel
->name
, mexp
->metric_name
,
1009 print_metric(config
, ctxp
, NULL
, NULL
, NULL
, 0);