1 // SPDX-License-Identifier: GPL-2.0
10 #include "metricgroup.h"
11 #include <linux/zalloc.h>
14 * AGGR_GLOBAL: Use CPU 0
15 * AGGR_SOCKET: Use first CPU of socket
16 * AGGR_DIE: Use first CPU of die
17 * AGGR_CORE: Use first CPU of core
18 * AGGR_NONE: Use matching CPU
19 * AGGR_THREAD: Not supported?
22 struct runtime_stat rt_stat
;
23 struct stats walltime_nsecs_stats
;
26 struct rb_node rb_node
;
31 struct runtime_stat
*stat
;
37 static int saved_value_cmp(struct rb_node
*rb_node
, const void *entry
)
39 struct saved_value
*a
= container_of(rb_node
,
42 const struct saved_value
*b
= entry
;
45 return a
->cpu
- b
->cpu
;
48 * Previously the rbtree was used to link generic metrics.
49 * The keys were evsel/cpu. Now the rbtree is extended to support
50 * per-thread shadow stats. For shadow stats case, the keys
51 * are cpu/type/ctx/stat (evsel is NULL). For generic metrics
52 * case, the keys are still evsel/cpu (type/ctx/stat are 0 or NULL).
54 if (a
->type
!= b
->type
)
55 return a
->type
- b
->type
;
58 return a
->ctx
- b
->ctx
;
60 if (a
->evsel
== NULL
&& b
->evsel
== NULL
) {
61 if (a
->stat
== b
->stat
)
64 if ((char *)a
->stat
< (char *)b
->stat
)
70 if (a
->evsel
== b
->evsel
)
72 if ((char *)a
->evsel
< (char *)b
->evsel
)
77 static struct rb_node
*saved_value_new(struct rblist
*rblist __maybe_unused
,
80 struct saved_value
*nd
= malloc(sizeof(struct saved_value
));
84 memcpy(nd
, entry
, sizeof(struct saved_value
));
88 static void saved_value_delete(struct rblist
*rblist __maybe_unused
,
89 struct rb_node
*rb_node
)
91 struct saved_value
*v
;
94 v
= container_of(rb_node
, struct saved_value
, rb_node
);
98 static struct saved_value
*saved_value_lookup(struct evsel
*evsel
,
103 struct runtime_stat
*st
)
105 struct rblist
*rblist
;
107 struct saved_value dm
= {
115 rblist
= &st
->value_list
;
117 nd
= rblist__find(rblist
, &dm
);
119 return container_of(nd
, struct saved_value
, rb_node
);
121 rblist__add_node(rblist
, &dm
);
122 nd
= rblist__find(rblist
, &dm
);
124 return container_of(nd
, struct saved_value
, rb_node
);
129 void runtime_stat__init(struct runtime_stat
*st
)
131 struct rblist
*rblist
= &st
->value_list
;
133 rblist__init(rblist
);
134 rblist
->node_cmp
= saved_value_cmp
;
135 rblist
->node_new
= saved_value_new
;
136 rblist
->node_delete
= saved_value_delete
;
139 void runtime_stat__exit(struct runtime_stat
*st
)
141 rblist__exit(&st
->value_list
);
144 void perf_stat__init_shadow_stats(void)
146 runtime_stat__init(&rt_stat
);
149 static int evsel_context(struct evsel
*evsel
)
153 if (evsel
->core
.attr
.exclude_kernel
)
154 ctx
|= CTX_BIT_KERNEL
;
155 if (evsel
->core
.attr
.exclude_user
)
157 if (evsel
->core
.attr
.exclude_hv
)
159 if (evsel
->core
.attr
.exclude_host
)
161 if (evsel
->core
.attr
.exclude_idle
)
167 static void reset_stat(struct runtime_stat
*st
)
169 struct rblist
*rblist
;
170 struct rb_node
*pos
, *next
;
172 rblist
= &st
->value_list
;
173 next
= rb_first_cached(&rblist
->entries
);
177 memset(&container_of(pos
, struct saved_value
, rb_node
)->stats
,
179 sizeof(struct stats
));
183 void perf_stat__reset_shadow_stats(void)
185 reset_stat(&rt_stat
);
186 memset(&walltime_nsecs_stats
, 0, sizeof(walltime_nsecs_stats
));
189 void perf_stat__reset_shadow_per_stat(struct runtime_stat
*st
)
194 static void update_runtime_stat(struct runtime_stat
*st
,
196 int ctx
, int cpu
, u64 count
)
198 struct saved_value
*v
= saved_value_lookup(NULL
, cpu
, true,
202 update_stats(&v
->stats
, count
);
206 * Update various tracking values we maintain to print
207 * more semantic information such as miss/hit ratios,
208 * instruction rates, etc:
210 void perf_stat__update_shadow_stats(struct evsel
*counter
, u64 count
,
211 int cpu
, struct runtime_stat
*st
)
213 int ctx
= evsel_context(counter
);
214 u64 count_ns
= count
;
215 struct saved_value
*v
;
217 count
*= counter
->scale
;
219 if (evsel__is_clock(counter
))
220 update_runtime_stat(st
, STAT_NSECS
, 0, cpu
, count_ns
);
221 else if (evsel__match(counter
, HARDWARE
, HW_CPU_CYCLES
))
222 update_runtime_stat(st
, STAT_CYCLES
, ctx
, cpu
, count
);
223 else if (perf_stat_evsel__is(counter
, CYCLES_IN_TX
))
224 update_runtime_stat(st
, STAT_CYCLES_IN_TX
, ctx
, cpu
, count
);
225 else if (perf_stat_evsel__is(counter
, TRANSACTION_START
))
226 update_runtime_stat(st
, STAT_TRANSACTION
, ctx
, cpu
, count
);
227 else if (perf_stat_evsel__is(counter
, ELISION_START
))
228 update_runtime_stat(st
, STAT_ELISION
, ctx
, cpu
, count
);
229 else if (perf_stat_evsel__is(counter
, TOPDOWN_TOTAL_SLOTS
))
230 update_runtime_stat(st
, STAT_TOPDOWN_TOTAL_SLOTS
,
232 else if (perf_stat_evsel__is(counter
, TOPDOWN_SLOTS_ISSUED
))
233 update_runtime_stat(st
, STAT_TOPDOWN_SLOTS_ISSUED
,
235 else if (perf_stat_evsel__is(counter
, TOPDOWN_SLOTS_RETIRED
))
236 update_runtime_stat(st
, STAT_TOPDOWN_SLOTS_RETIRED
,
238 else if (perf_stat_evsel__is(counter
, TOPDOWN_FETCH_BUBBLES
))
239 update_runtime_stat(st
, STAT_TOPDOWN_FETCH_BUBBLES
,
241 else if (perf_stat_evsel__is(counter
, TOPDOWN_RECOVERY_BUBBLES
))
242 update_runtime_stat(st
, STAT_TOPDOWN_RECOVERY_BUBBLES
,
244 else if (evsel__match(counter
, HARDWARE
, HW_STALLED_CYCLES_FRONTEND
))
245 update_runtime_stat(st
, STAT_STALLED_CYCLES_FRONT
,
247 else if (evsel__match(counter
, HARDWARE
, HW_STALLED_CYCLES_BACKEND
))
248 update_runtime_stat(st
, STAT_STALLED_CYCLES_BACK
,
250 else if (evsel__match(counter
, HARDWARE
, HW_BRANCH_INSTRUCTIONS
))
251 update_runtime_stat(st
, STAT_BRANCHES
, ctx
, cpu
, count
);
252 else if (evsel__match(counter
, HARDWARE
, HW_CACHE_REFERENCES
))
253 update_runtime_stat(st
, STAT_CACHEREFS
, ctx
, cpu
, count
);
254 else if (evsel__match(counter
, HW_CACHE
, HW_CACHE_L1D
))
255 update_runtime_stat(st
, STAT_L1_DCACHE
, ctx
, cpu
, count
);
256 else if (evsel__match(counter
, HW_CACHE
, HW_CACHE_L1I
))
257 update_runtime_stat(st
, STAT_L1_ICACHE
, ctx
, cpu
, count
);
258 else if (evsel__match(counter
, HW_CACHE
, HW_CACHE_LL
))
259 update_runtime_stat(st
, STAT_LL_CACHE
, ctx
, cpu
, count
);
260 else if (evsel__match(counter
, HW_CACHE
, HW_CACHE_DTLB
))
261 update_runtime_stat(st
, STAT_DTLB_CACHE
, ctx
, cpu
, count
);
262 else if (evsel__match(counter
, HW_CACHE
, HW_CACHE_ITLB
))
263 update_runtime_stat(st
, STAT_ITLB_CACHE
, ctx
, cpu
, count
);
264 else if (perf_stat_evsel__is(counter
, SMI_NUM
))
265 update_runtime_stat(st
, STAT_SMI_NUM
, ctx
, cpu
, count
);
266 else if (perf_stat_evsel__is(counter
, APERF
))
267 update_runtime_stat(st
, STAT_APERF
, ctx
, cpu
, count
);
269 if (counter
->collect_stat
) {
270 v
= saved_value_lookup(counter
, cpu
, true, STAT_NONE
, 0, st
);
271 update_stats(&v
->stats
, count
);
272 if (counter
->metric_leader
)
273 v
->metric_total
+= count
;
274 } else if (counter
->metric_leader
) {
275 v
= saved_value_lookup(counter
->metric_leader
,
276 cpu
, true, STAT_NONE
, 0, st
);
277 v
->metric_total
+= count
;
282 /* used for get_ratio_color() */
284 GRC_STALLED_CYCLES_FE
,
285 GRC_STALLED_CYCLES_BE
,
290 static const char *get_ratio_color(enum grc_type type
, double ratio
)
292 static const double grc_table
[GRC_MAX_NR
][3] = {
293 [GRC_STALLED_CYCLES_FE
] = { 50.0, 30.0, 10.0 },
294 [GRC_STALLED_CYCLES_BE
] = { 75.0, 50.0, 20.0 },
295 [GRC_CACHE_MISSES
] = { 20.0, 10.0, 5.0 },
297 const char *color
= PERF_COLOR_NORMAL
;
299 if (ratio
> grc_table
[type
][0])
300 color
= PERF_COLOR_RED
;
301 else if (ratio
> grc_table
[type
][1])
302 color
= PERF_COLOR_MAGENTA
;
303 else if (ratio
> grc_table
[type
][2])
304 color
= PERF_COLOR_YELLOW
;
309 static struct evsel
*perf_stat__find_event(struct evlist
*evsel_list
,
314 evlist__for_each_entry (evsel_list
, c2
) {
315 if (!strcasecmp(c2
->name
, name
) && !c2
->collect_stat
)
321 /* Mark MetricExpr target events and link events using them to them. */
322 void perf_stat__collect_metric_expr(struct evlist
*evsel_list
)
324 struct evsel
*counter
, *leader
, **metric_events
, *oc
;
326 struct expr_parse_ctx ctx
;
327 struct hashmap_entry
*cur
;
331 expr__ctx_init(&ctx
);
332 evlist__for_each_entry(evsel_list
, counter
) {
333 bool invalid
= false;
335 leader
= counter
->leader
;
336 if (!counter
->metric_expr
)
339 expr__ctx_clear(&ctx
);
340 metric_events
= counter
->metric_events
;
341 if (!metric_events
) {
342 if (expr__find_other(counter
->metric_expr
,
347 metric_events
= calloc(sizeof(struct evsel
*),
348 hashmap__size(&ctx
.ids
) + 1);
349 if (!metric_events
) {
350 expr__ctx_clear(&ctx
);
353 counter
->metric_events
= metric_events
;
357 hashmap__for_each_entry((&ctx
.ids
), cur
, bkt
) {
358 const char *metric_name
= (const char *)cur
->key
;
362 /* Search in group */
363 for_each_group_member (oc
, leader
) {
364 if (!strcasecmp(oc
->name
,
373 /* Search ignoring groups */
374 oc
= perf_stat__find_event(evsel_list
,
378 /* Deduping one is good enough to handle duplicated PMUs. */
379 static char *printed
;
382 * Adding events automatically would be difficult, because
383 * it would risk creating groups that are not schedulable.
384 * perf stat doesn't understand all the scheduling constraints
385 * of events. So we ask the user instead to add the missing
389 strcasecmp(printed
, metric_name
)) {
391 "Add %s event to groups to get metric expression for %s\n",
394 printed
= strdup(metric_name
);
399 metric_events
[i
++] = oc
;
400 oc
->collect_stat
= true;
402 metric_events
[i
] = NULL
;
405 counter
->metric_events
= NULL
;
406 counter
->metric_expr
= NULL
;
409 expr__ctx_clear(&ctx
);
412 static double runtime_stat_avg(struct runtime_stat
*st
,
413 enum stat_type type
, int ctx
, int cpu
)
415 struct saved_value
*v
;
417 v
= saved_value_lookup(NULL
, cpu
, false, type
, ctx
, st
);
421 return avg_stats(&v
->stats
);
424 static double runtime_stat_n(struct runtime_stat
*st
,
425 enum stat_type type
, int ctx
, int cpu
)
427 struct saved_value
*v
;
429 v
= saved_value_lookup(NULL
, cpu
, false, type
, ctx
, st
);
436 static void print_stalled_cycles_frontend(struct perf_stat_config
*config
,
438 struct evsel
*evsel
, double avg
,
439 struct perf_stat_output_ctx
*out
,
440 struct runtime_stat
*st
)
442 double total
, ratio
= 0.0;
444 int ctx
= evsel_context(evsel
);
446 total
= runtime_stat_avg(st
, STAT_CYCLES
, ctx
, cpu
);
449 ratio
= avg
/ total
* 100.0;
451 color
= get_ratio_color(GRC_STALLED_CYCLES_FE
, ratio
);
454 out
->print_metric(config
, out
->ctx
, color
, "%7.2f%%", "frontend cycles idle",
457 out
->print_metric(config
, out
->ctx
, NULL
, NULL
, "frontend cycles idle", 0);
460 static void print_stalled_cycles_backend(struct perf_stat_config
*config
,
462 struct evsel
*evsel
, double avg
,
463 struct perf_stat_output_ctx
*out
,
464 struct runtime_stat
*st
)
466 double total
, ratio
= 0.0;
468 int ctx
= evsel_context(evsel
);
470 total
= runtime_stat_avg(st
, STAT_CYCLES
, ctx
, cpu
);
473 ratio
= avg
/ total
* 100.0;
475 color
= get_ratio_color(GRC_STALLED_CYCLES_BE
, ratio
);
477 out
->print_metric(config
, out
->ctx
, color
, "%7.2f%%", "backend cycles idle", ratio
);
480 static void print_branch_misses(struct perf_stat_config
*config
,
484 struct perf_stat_output_ctx
*out
,
485 struct runtime_stat
*st
)
487 double total
, ratio
= 0.0;
489 int ctx
= evsel_context(evsel
);
491 total
= runtime_stat_avg(st
, STAT_BRANCHES
, ctx
, cpu
);
494 ratio
= avg
/ total
* 100.0;
496 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
498 out
->print_metric(config
, out
->ctx
, color
, "%7.2f%%", "of all branches", ratio
);
501 static void print_l1_dcache_misses(struct perf_stat_config
*config
,
505 struct perf_stat_output_ctx
*out
,
506 struct runtime_stat
*st
)
509 double total
, ratio
= 0.0;
511 int ctx
= evsel_context(evsel
);
513 total
= runtime_stat_avg(st
, STAT_L1_DCACHE
, ctx
, cpu
);
516 ratio
= avg
/ total
* 100.0;
518 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
520 out
->print_metric(config
, out
->ctx
, color
, "%7.2f%%", "of all L1-dcache hits", ratio
);
523 static void print_l1_icache_misses(struct perf_stat_config
*config
,
527 struct perf_stat_output_ctx
*out
,
528 struct runtime_stat
*st
)
531 double total
, ratio
= 0.0;
533 int ctx
= evsel_context(evsel
);
535 total
= runtime_stat_avg(st
, STAT_L1_ICACHE
, ctx
, cpu
);
538 ratio
= avg
/ total
* 100.0;
540 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
541 out
->print_metric(config
, out
->ctx
, color
, "%7.2f%%", "of all L1-icache hits", ratio
);
544 static void print_dtlb_cache_misses(struct perf_stat_config
*config
,
548 struct perf_stat_output_ctx
*out
,
549 struct runtime_stat
*st
)
551 double total
, ratio
= 0.0;
553 int ctx
= evsel_context(evsel
);
555 total
= runtime_stat_avg(st
, STAT_DTLB_CACHE
, ctx
, cpu
);
558 ratio
= avg
/ total
* 100.0;
560 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
561 out
->print_metric(config
, out
->ctx
, color
, "%7.2f%%", "of all dTLB cache hits", ratio
);
564 static void print_itlb_cache_misses(struct perf_stat_config
*config
,
568 struct perf_stat_output_ctx
*out
,
569 struct runtime_stat
*st
)
571 double total
, ratio
= 0.0;
573 int ctx
= evsel_context(evsel
);
575 total
= runtime_stat_avg(st
, STAT_ITLB_CACHE
, ctx
, cpu
);
578 ratio
= avg
/ total
* 100.0;
580 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
581 out
->print_metric(config
, out
->ctx
, color
, "%7.2f%%", "of all iTLB cache hits", ratio
);
584 static void print_ll_cache_misses(struct perf_stat_config
*config
,
588 struct perf_stat_output_ctx
*out
,
589 struct runtime_stat
*st
)
591 double total
, ratio
= 0.0;
593 int ctx
= evsel_context(evsel
);
595 total
= runtime_stat_avg(st
, STAT_LL_CACHE
, ctx
, cpu
);
598 ratio
= avg
/ total
* 100.0;
600 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
601 out
->print_metric(config
, out
->ctx
, color
, "%7.2f%%", "of all LL-cache hits", ratio
);
605 * High level "TopDown" CPU core pipe line bottleneck break down.
607 * Basic concept following
608 * Yasin, A Top Down Method for Performance analysis and Counter architecture
611 * The CPU pipeline is divided into 4 areas that can be bottlenecks:
613 * Frontend -> Backend -> Retiring
614 * BadSpeculation in addition means out of order execution that is thrown away
615 * (for example branch mispredictions)
616 * Frontend is instruction decoding.
617 * Backend is execution, like computation and accessing data in memory
618 * Retiring is good execution that is not directly bottlenecked
620 * The formulas are computed in slots.
621 * A slot is an entry in the pipeline each for the pipeline width
622 * (for example a 4-wide pipeline has 4 slots for each cycle)
625 * BadSpeculation = ((SlotsIssued - SlotsRetired) + RecoveryBubbles) /
627 * Retiring = SlotsRetired / TotalSlots
628 * FrontendBound = FetchBubbles / TotalSlots
629 * BackendBound = 1.0 - BadSpeculation - Retiring - FrontendBound
631 * The kernel provides the mapping to the low level CPU events and any scaling
632 * needed for the CPU pipeline width, for example:
634 * TotalSlots = Cycles * 4
636 * The scaling factor is communicated in the sysfs unit.
638 * In some cases the CPU may not be able to measure all the formulas due to
639 * missing events. In this case multiple formulas are combined, as possible.
641 * Full TopDown supports more levels to sub-divide each area: for example
642 * BackendBound into computing bound and memory bound. For now we only
643 * support Level 1 TopDown.
646 static double sanitize_val(double x
)
648 if (x
< 0 && x
>= -0.02)
653 static double td_total_slots(int ctx
, int cpu
, struct runtime_stat
*st
)
655 return runtime_stat_avg(st
, STAT_TOPDOWN_TOTAL_SLOTS
, ctx
, cpu
);
658 static double td_bad_spec(int ctx
, int cpu
, struct runtime_stat
*st
)
664 total
= runtime_stat_avg(st
, STAT_TOPDOWN_SLOTS_ISSUED
, ctx
, cpu
) -
665 runtime_stat_avg(st
, STAT_TOPDOWN_SLOTS_RETIRED
, ctx
, cpu
) +
666 runtime_stat_avg(st
, STAT_TOPDOWN_RECOVERY_BUBBLES
, ctx
, cpu
);
668 total_slots
= td_total_slots(ctx
, cpu
, st
);
670 bad_spec
= total
/ total_slots
;
671 return sanitize_val(bad_spec
);
674 static double td_retiring(int ctx
, int cpu
, struct runtime_stat
*st
)
677 double total_slots
= td_total_slots(ctx
, cpu
, st
);
678 double ret_slots
= runtime_stat_avg(st
, STAT_TOPDOWN_SLOTS_RETIRED
,
682 retiring
= ret_slots
/ total_slots
;
686 static double td_fe_bound(int ctx
, int cpu
, struct runtime_stat
*st
)
689 double total_slots
= td_total_slots(ctx
, cpu
, st
);
690 double fetch_bub
= runtime_stat_avg(st
, STAT_TOPDOWN_FETCH_BUBBLES
,
694 fe_bound
= fetch_bub
/ total_slots
;
698 static double td_be_bound(int ctx
, int cpu
, struct runtime_stat
*st
)
700 double sum
= (td_fe_bound(ctx
, cpu
, st
) +
701 td_bad_spec(ctx
, cpu
, st
) +
702 td_retiring(ctx
, cpu
, st
));
705 return sanitize_val(1.0 - sum
);
708 static void print_smi_cost(struct perf_stat_config
*config
,
709 int cpu
, struct evsel
*evsel
,
710 struct perf_stat_output_ctx
*out
,
711 struct runtime_stat
*st
)
713 double smi_num
, aperf
, cycles
, cost
= 0.0;
714 int ctx
= evsel_context(evsel
);
715 const char *color
= NULL
;
717 smi_num
= runtime_stat_avg(st
, STAT_SMI_NUM
, ctx
, cpu
);
718 aperf
= runtime_stat_avg(st
, STAT_APERF
, ctx
, cpu
);
719 cycles
= runtime_stat_avg(st
, STAT_CYCLES
, ctx
, cpu
);
721 if ((cycles
== 0) || (aperf
== 0))
725 cost
= (aperf
- cycles
) / aperf
* 100.00;
728 color
= PERF_COLOR_RED
;
729 out
->print_metric(config
, out
->ctx
, color
, "%8.1f%%", "SMI cycles%", cost
);
730 out
->print_metric(config
, out
->ctx
, NULL
, "%4.0f", "SMI#", smi_num
);
733 static int prepare_metric(struct evsel
**metric_events
,
734 struct metric_ref
*metric_refs
,
735 struct expr_parse_ctx
*pctx
,
737 struct runtime_stat
*st
)
743 expr__ctx_init(pctx
);
744 for (i
= 0; metric_events
[i
]; i
++) {
745 struct saved_value
*v
;
747 u64 metric_total
= 0;
749 if (!strcmp(metric_events
[i
]->name
, "duration_time")) {
750 stats
= &walltime_nsecs_stats
;
753 v
= saved_value_lookup(metric_events
[i
], cpu
, false,
761 metric_total
= v
->metric_total
;
764 n
= strdup(metric_events
[i
]->name
);
768 * This display code with --no-merge adds [cpu] postfixes.
769 * These are not supported by the parser. Remove everything
777 expr__add_id_val(pctx
, n
, metric_total
);
779 expr__add_id_val(pctx
, n
, avg_stats(stats
)*scale
);
782 for (j
= 0; metric_refs
&& metric_refs
[j
].metric_name
; j
++) {
783 ret
= expr__add_ref(pctx
, &metric_refs
[j
]);
791 static void generic_metric(struct perf_stat_config
*config
,
792 const char *metric_expr
,
793 struct evsel
**metric_events
,
794 struct metric_ref
*metric_refs
,
796 const char *metric_name
,
797 const char *metric_unit
,
800 struct perf_stat_output_ctx
*out
,
801 struct runtime_stat
*st
)
803 print_metric_t print_metric
= out
->print_metric
;
804 struct expr_parse_ctx pctx
;
807 void *ctxp
= out
->ctx
;
809 i
= prepare_metric(metric_events
, metric_refs
, &pctx
, cpu
, st
);
813 if (!metric_events
[i
]) {
814 if (expr__parse(&ratio
, &pctx
, metric_expr
, runtime
) == 0) {
818 if (metric_unit
&& metric_name
) {
819 if (perf_pmu__convert_scale(metric_unit
,
820 &unit
, &scale
) >= 0) {
823 if (strstr(metric_expr
, "?"))
824 scnprintf(metric_bf
, sizeof(metric_bf
),
825 "%s %s_%d", unit
, metric_name
, runtime
);
827 scnprintf(metric_bf
, sizeof(metric_bf
),
828 "%s %s", unit
, metric_name
);
830 print_metric(config
, ctxp
, NULL
, "%8.1f",
833 print_metric(config
, ctxp
, NULL
, "%8.2f",
836 out
->force_header
? name
: "",
840 print_metric(config
, ctxp
, NULL
, NULL
,
842 (metric_name
? metric_name
: name
) : "", 0);
845 print_metric(config
, ctxp
, NULL
, NULL
,
847 (metric_name
? metric_name
: name
) : "", 0);
850 expr__ctx_clear(&pctx
);
853 double test_generic_metric(struct metric_expr
*mexp
, int cpu
, struct runtime_stat
*st
)
855 struct expr_parse_ctx pctx
;
858 if (prepare_metric(mexp
->metric_events
, mexp
->metric_refs
, &pctx
, cpu
, st
) < 0)
861 if (expr__parse(&ratio
, &pctx
, mexp
->metric_expr
, 1))
867 void perf_stat__print_shadow_stats(struct perf_stat_config
*config
,
870 struct perf_stat_output_ctx
*out
,
871 struct rblist
*metric_events
,
872 struct runtime_stat
*st
)
874 void *ctxp
= out
->ctx
;
875 print_metric_t print_metric
= out
->print_metric
;
876 double total
, ratio
= 0.0, total2
;
877 const char *color
= NULL
;
878 int ctx
= evsel_context(evsel
);
879 struct metric_event
*me
;
882 if (evsel__match(evsel
, HARDWARE
, HW_INSTRUCTIONS
)) {
883 total
= runtime_stat_avg(st
, STAT_CYCLES
, ctx
, cpu
);
887 print_metric(config
, ctxp
, NULL
, "%7.2f ",
888 "insn per cycle", ratio
);
890 print_metric(config
, ctxp
, NULL
, NULL
, "insn per cycle", 0);
893 total
= runtime_stat_avg(st
, STAT_STALLED_CYCLES_FRONT
,
896 total
= max(total
, runtime_stat_avg(st
,
897 STAT_STALLED_CYCLES_BACK
,
901 out
->new_line(config
, ctxp
);
903 print_metric(config
, ctxp
, NULL
, "%7.2f ",
904 "stalled cycles per insn",
907 } else if (evsel__match(evsel
, HARDWARE
, HW_BRANCH_MISSES
)) {
908 if (runtime_stat_n(st
, STAT_BRANCHES
, ctx
, cpu
) != 0)
909 print_branch_misses(config
, cpu
, evsel
, avg
, out
, st
);
911 print_metric(config
, ctxp
, NULL
, NULL
, "of all branches", 0);
913 evsel
->core
.attr
.type
== PERF_TYPE_HW_CACHE
&&
914 evsel
->core
.attr
.config
== ( PERF_COUNT_HW_CACHE_L1D
|
915 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
916 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
918 if (runtime_stat_n(st
, STAT_L1_DCACHE
, ctx
, cpu
) != 0)
919 print_l1_dcache_misses(config
, cpu
, evsel
, avg
, out
, st
);
921 print_metric(config
, ctxp
, NULL
, NULL
, "of all L1-dcache hits", 0);
923 evsel
->core
.attr
.type
== PERF_TYPE_HW_CACHE
&&
924 evsel
->core
.attr
.config
== ( PERF_COUNT_HW_CACHE_L1I
|
925 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
926 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
928 if (runtime_stat_n(st
, STAT_L1_ICACHE
, ctx
, cpu
) != 0)
929 print_l1_icache_misses(config
, cpu
, evsel
, avg
, out
, st
);
931 print_metric(config
, ctxp
, NULL
, NULL
, "of all L1-icache hits", 0);
933 evsel
->core
.attr
.type
== PERF_TYPE_HW_CACHE
&&
934 evsel
->core
.attr
.config
== ( PERF_COUNT_HW_CACHE_DTLB
|
935 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
936 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
938 if (runtime_stat_n(st
, STAT_DTLB_CACHE
, ctx
, cpu
) != 0)
939 print_dtlb_cache_misses(config
, cpu
, evsel
, avg
, out
, st
);
941 print_metric(config
, ctxp
, NULL
, NULL
, "of all dTLB cache hits", 0);
943 evsel
->core
.attr
.type
== PERF_TYPE_HW_CACHE
&&
944 evsel
->core
.attr
.config
== ( PERF_COUNT_HW_CACHE_ITLB
|
945 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
946 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
948 if (runtime_stat_n(st
, STAT_ITLB_CACHE
, ctx
, cpu
) != 0)
949 print_itlb_cache_misses(config
, cpu
, evsel
, avg
, out
, st
);
951 print_metric(config
, ctxp
, NULL
, NULL
, "of all iTLB cache hits", 0);
953 evsel
->core
.attr
.type
== PERF_TYPE_HW_CACHE
&&
954 evsel
->core
.attr
.config
== ( PERF_COUNT_HW_CACHE_LL
|
955 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
956 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
958 if (runtime_stat_n(st
, STAT_LL_CACHE
, ctx
, cpu
) != 0)
959 print_ll_cache_misses(config
, cpu
, evsel
, avg
, out
, st
);
961 print_metric(config
, ctxp
, NULL
, NULL
, "of all LL-cache hits", 0);
962 } else if (evsel__match(evsel
, HARDWARE
, HW_CACHE_MISSES
)) {
963 total
= runtime_stat_avg(st
, STAT_CACHEREFS
, ctx
, cpu
);
966 ratio
= avg
* 100 / total
;
968 if (runtime_stat_n(st
, STAT_CACHEREFS
, ctx
, cpu
) != 0)
969 print_metric(config
, ctxp
, NULL
, "%8.3f %%",
970 "of all cache refs", ratio
);
972 print_metric(config
, ctxp
, NULL
, NULL
, "of all cache refs", 0);
973 } else if (evsel__match(evsel
, HARDWARE
, HW_STALLED_CYCLES_FRONTEND
)) {
974 print_stalled_cycles_frontend(config
, cpu
, evsel
, avg
, out
, st
);
975 } else if (evsel__match(evsel
, HARDWARE
, HW_STALLED_CYCLES_BACKEND
)) {
976 print_stalled_cycles_backend(config
, cpu
, evsel
, avg
, out
, st
);
977 } else if (evsel__match(evsel
, HARDWARE
, HW_CPU_CYCLES
)) {
978 total
= runtime_stat_avg(st
, STAT_NSECS
, 0, cpu
);
982 print_metric(config
, ctxp
, NULL
, "%8.3f", "GHz", ratio
);
984 print_metric(config
, ctxp
, NULL
, NULL
, "Ghz", 0);
986 } else if (perf_stat_evsel__is(evsel
, CYCLES_IN_TX
)) {
987 total
= runtime_stat_avg(st
, STAT_CYCLES
, ctx
, cpu
);
990 print_metric(config
, ctxp
, NULL
,
991 "%7.2f%%", "transactional cycles",
992 100.0 * (avg
/ total
));
994 print_metric(config
, ctxp
, NULL
, NULL
, "transactional cycles",
996 } else if (perf_stat_evsel__is(evsel
, CYCLES_IN_TX_CP
)) {
997 total
= runtime_stat_avg(st
, STAT_CYCLES
, ctx
, cpu
);
998 total2
= runtime_stat_avg(st
, STAT_CYCLES_IN_TX
, ctx
, cpu
);
1003 print_metric(config
, ctxp
, NULL
, "%7.2f%%", "aborted cycles",
1004 100.0 * ((total2
-avg
) / total
));
1006 print_metric(config
, ctxp
, NULL
, NULL
, "aborted cycles", 0);
1007 } else if (perf_stat_evsel__is(evsel
, TRANSACTION_START
)) {
1008 total
= runtime_stat_avg(st
, STAT_CYCLES_IN_TX
,
1012 ratio
= total
/ avg
;
1014 if (runtime_stat_n(st
, STAT_CYCLES_IN_TX
, ctx
, cpu
) != 0)
1015 print_metric(config
, ctxp
, NULL
, "%8.0f",
1016 "cycles / transaction", ratio
);
1018 print_metric(config
, ctxp
, NULL
, NULL
, "cycles / transaction",
1020 } else if (perf_stat_evsel__is(evsel
, ELISION_START
)) {
1021 total
= runtime_stat_avg(st
, STAT_CYCLES_IN_TX
,
1025 ratio
= total
/ avg
;
1027 print_metric(config
, ctxp
, NULL
, "%8.0f", "cycles / elision", ratio
);
1028 } else if (evsel__is_clock(evsel
)) {
1029 if ((ratio
= avg_stats(&walltime_nsecs_stats
)) != 0)
1030 print_metric(config
, ctxp
, NULL
, "%8.3f", "CPUs utilized",
1031 avg
/ (ratio
* evsel
->scale
));
1033 print_metric(config
, ctxp
, NULL
, NULL
, "CPUs utilized", 0);
1034 } else if (perf_stat_evsel__is(evsel
, TOPDOWN_FETCH_BUBBLES
)) {
1035 double fe_bound
= td_fe_bound(ctx
, cpu
, st
);
1038 color
= PERF_COLOR_RED
;
1039 print_metric(config
, ctxp
, color
, "%8.1f%%", "frontend bound",
1041 } else if (perf_stat_evsel__is(evsel
, TOPDOWN_SLOTS_RETIRED
)) {
1042 double retiring
= td_retiring(ctx
, cpu
, st
);
1045 color
= PERF_COLOR_GREEN
;
1046 print_metric(config
, ctxp
, color
, "%8.1f%%", "retiring",
1048 } else if (perf_stat_evsel__is(evsel
, TOPDOWN_RECOVERY_BUBBLES
)) {
1049 double bad_spec
= td_bad_spec(ctx
, cpu
, st
);
1052 color
= PERF_COLOR_RED
;
1053 print_metric(config
, ctxp
, color
, "%8.1f%%", "bad speculation",
1055 } else if (perf_stat_evsel__is(evsel
, TOPDOWN_SLOTS_ISSUED
)) {
1056 double be_bound
= td_be_bound(ctx
, cpu
, st
);
1057 const char *name
= "backend bound";
1058 static int have_recovery_bubbles
= -1;
1060 /* In case the CPU does not support topdown-recovery-bubbles */
1061 if (have_recovery_bubbles
< 0)
1062 have_recovery_bubbles
= pmu_have_event("cpu",
1063 "topdown-recovery-bubbles");
1064 if (!have_recovery_bubbles
)
1065 name
= "backend bound/bad spec";
1068 color
= PERF_COLOR_RED
;
1069 if (td_total_slots(ctx
, cpu
, st
) > 0)
1070 print_metric(config
, ctxp
, color
, "%8.1f%%", name
,
1073 print_metric(config
, ctxp
, NULL
, NULL
, name
, 0);
1074 } else if (evsel
->metric_expr
) {
1075 generic_metric(config
, evsel
->metric_expr
, evsel
->metric_events
, NULL
,
1076 evsel
->name
, evsel
->metric_name
, NULL
, 1, cpu
, out
, st
);
1077 } else if (runtime_stat_n(st
, STAT_NSECS
, 0, cpu
) != 0) {
1081 total
= runtime_stat_avg(st
, STAT_NSECS
, 0, cpu
);
1084 ratio
= 1000.0 * avg
/ total
;
1085 if (ratio
< 0.001) {
1089 snprintf(unit_buf
, sizeof(unit_buf
), "%c/sec", unit
);
1090 print_metric(config
, ctxp
, NULL
, "%8.3f", unit_buf
, ratio
);
1091 } else if (perf_stat_evsel__is(evsel
, SMI_NUM
)) {
1092 print_smi_cost(config
, cpu
, evsel
, out
, st
);
1097 if ((me
= metricgroup__lookup(metric_events
, evsel
, false)) != NULL
) {
1098 struct metric_expr
*mexp
;
1100 list_for_each_entry (mexp
, &me
->head
, nd
) {
1102 out
->new_line(config
, ctxp
);
1103 generic_metric(config
, mexp
->metric_expr
, mexp
->metric_events
,
1104 mexp
->metric_refs
, evsel
->name
, mexp
->metric_name
,
1105 mexp
->metric_unit
, mexp
->runtime
, cpu
, out
, st
);
1109 print_metric(config
, ctxp
, NULL
, NULL
, NULL
, 0);