1 // SPDX-License-Identifier: GPL-2.0
10 #include "metricgroup.h"
11 #include <linux/zalloc.h>
14 * AGGR_GLOBAL: Use CPU 0
15 * AGGR_SOCKET: Use first CPU of socket
16 * AGGR_DIE: Use first CPU of die
17 * AGGR_CORE: Use first CPU of core
18 * AGGR_NONE: Use matching CPU
19 * AGGR_THREAD: Not supported?
22 struct runtime_stat rt_stat
;
23 struct stats walltime_nsecs_stats
;
26 struct rb_node rb_node
;
31 struct runtime_stat
*stat
;
37 static int saved_value_cmp(struct rb_node
*rb_node
, const void *entry
)
39 struct saved_value
*a
= container_of(rb_node
,
42 const struct saved_value
*b
= entry
;
45 return a
->cpu
- b
->cpu
;
48 * Previously the rbtree was used to link generic metrics.
49 * The keys were evsel/cpu. Now the rbtree is extended to support
50 * per-thread shadow stats. For shadow stats case, the keys
51 * are cpu/type/ctx/stat (evsel is NULL). For generic metrics
52 * case, the keys are still evsel/cpu (type/ctx/stat are 0 or NULL).
54 if (a
->type
!= b
->type
)
55 return a
->type
- b
->type
;
58 return a
->ctx
- b
->ctx
;
60 if (a
->evsel
== NULL
&& b
->evsel
== NULL
) {
61 if (a
->stat
== b
->stat
)
64 if ((char *)a
->stat
< (char *)b
->stat
)
70 if (a
->evsel
== b
->evsel
)
72 if ((char *)a
->evsel
< (char *)b
->evsel
)
77 static struct rb_node
*saved_value_new(struct rblist
*rblist __maybe_unused
,
80 struct saved_value
*nd
= malloc(sizeof(struct saved_value
));
84 memcpy(nd
, entry
, sizeof(struct saved_value
));
88 static void saved_value_delete(struct rblist
*rblist __maybe_unused
,
89 struct rb_node
*rb_node
)
91 struct saved_value
*v
;
94 v
= container_of(rb_node
, struct saved_value
, rb_node
);
98 static struct saved_value
*saved_value_lookup(struct evsel
*evsel
,
103 struct runtime_stat
*st
)
105 struct rblist
*rblist
;
107 struct saved_value dm
= {
115 rblist
= &st
->value_list
;
117 nd
= rblist__find(rblist
, &dm
);
119 return container_of(nd
, struct saved_value
, rb_node
);
121 rblist__add_node(rblist
, &dm
);
122 nd
= rblist__find(rblist
, &dm
);
124 return container_of(nd
, struct saved_value
, rb_node
);
129 void runtime_stat__init(struct runtime_stat
*st
)
131 struct rblist
*rblist
= &st
->value_list
;
133 rblist__init(rblist
);
134 rblist
->node_cmp
= saved_value_cmp
;
135 rblist
->node_new
= saved_value_new
;
136 rblist
->node_delete
= saved_value_delete
;
139 void runtime_stat__exit(struct runtime_stat
*st
)
141 rblist__exit(&st
->value_list
);
144 void perf_stat__init_shadow_stats(void)
146 runtime_stat__init(&rt_stat
);
149 static int evsel_context(struct evsel
*evsel
)
153 if (evsel
->core
.attr
.exclude_kernel
)
154 ctx
|= CTX_BIT_KERNEL
;
155 if (evsel
->core
.attr
.exclude_user
)
157 if (evsel
->core
.attr
.exclude_hv
)
159 if (evsel
->core
.attr
.exclude_host
)
161 if (evsel
->core
.attr
.exclude_idle
)
167 static void reset_stat(struct runtime_stat
*st
)
169 struct rblist
*rblist
;
170 struct rb_node
*pos
, *next
;
172 rblist
= &st
->value_list
;
173 next
= rb_first_cached(&rblist
->entries
);
177 memset(&container_of(pos
, struct saved_value
, rb_node
)->stats
,
179 sizeof(struct stats
));
183 void perf_stat__reset_shadow_stats(void)
185 reset_stat(&rt_stat
);
186 memset(&walltime_nsecs_stats
, 0, sizeof(walltime_nsecs_stats
));
189 void perf_stat__reset_shadow_per_stat(struct runtime_stat
*st
)
194 static void update_runtime_stat(struct runtime_stat
*st
,
196 int ctx
, int cpu
, u64 count
)
198 struct saved_value
*v
= saved_value_lookup(NULL
, cpu
, true,
202 update_stats(&v
->stats
, count
);
206 * Update various tracking values we maintain to print
207 * more semantic information such as miss/hit ratios,
208 * instruction rates, etc:
210 void perf_stat__update_shadow_stats(struct evsel
*counter
, u64 count
,
211 int cpu
, struct runtime_stat
*st
)
213 int ctx
= evsel_context(counter
);
214 u64 count_ns
= count
;
215 struct saved_value
*v
;
217 count
*= counter
->scale
;
219 if (evsel__is_clock(counter
))
220 update_runtime_stat(st
, STAT_NSECS
, 0, cpu
, count_ns
);
221 else if (evsel__match(counter
, HARDWARE
, HW_CPU_CYCLES
))
222 update_runtime_stat(st
, STAT_CYCLES
, ctx
, cpu
, count
);
223 else if (perf_stat_evsel__is(counter
, CYCLES_IN_TX
))
224 update_runtime_stat(st
, STAT_CYCLES_IN_TX
, ctx
, cpu
, count
);
225 else if (perf_stat_evsel__is(counter
, TRANSACTION_START
))
226 update_runtime_stat(st
, STAT_TRANSACTION
, ctx
, cpu
, count
);
227 else if (perf_stat_evsel__is(counter
, ELISION_START
))
228 update_runtime_stat(st
, STAT_ELISION
, ctx
, cpu
, count
);
229 else if (perf_stat_evsel__is(counter
, TOPDOWN_TOTAL_SLOTS
))
230 update_runtime_stat(st
, STAT_TOPDOWN_TOTAL_SLOTS
,
232 else if (perf_stat_evsel__is(counter
, TOPDOWN_SLOTS_ISSUED
))
233 update_runtime_stat(st
, STAT_TOPDOWN_SLOTS_ISSUED
,
235 else if (perf_stat_evsel__is(counter
, TOPDOWN_SLOTS_RETIRED
))
236 update_runtime_stat(st
, STAT_TOPDOWN_SLOTS_RETIRED
,
238 else if (perf_stat_evsel__is(counter
, TOPDOWN_FETCH_BUBBLES
))
239 update_runtime_stat(st
, STAT_TOPDOWN_FETCH_BUBBLES
,
241 else if (perf_stat_evsel__is(counter
, TOPDOWN_RECOVERY_BUBBLES
))
242 update_runtime_stat(st
, STAT_TOPDOWN_RECOVERY_BUBBLES
,
244 else if (perf_stat_evsel__is(counter
, TOPDOWN_RETIRING
))
245 update_runtime_stat(st
, STAT_TOPDOWN_RETIRING
,
247 else if (perf_stat_evsel__is(counter
, TOPDOWN_BAD_SPEC
))
248 update_runtime_stat(st
, STAT_TOPDOWN_BAD_SPEC
,
250 else if (perf_stat_evsel__is(counter
, TOPDOWN_FE_BOUND
))
251 update_runtime_stat(st
, STAT_TOPDOWN_FE_BOUND
,
253 else if (perf_stat_evsel__is(counter
, TOPDOWN_BE_BOUND
))
254 update_runtime_stat(st
, STAT_TOPDOWN_BE_BOUND
,
256 else if (evsel__match(counter
, HARDWARE
, HW_STALLED_CYCLES_FRONTEND
))
257 update_runtime_stat(st
, STAT_STALLED_CYCLES_FRONT
,
259 else if (evsel__match(counter
, HARDWARE
, HW_STALLED_CYCLES_BACKEND
))
260 update_runtime_stat(st
, STAT_STALLED_CYCLES_BACK
,
262 else if (evsel__match(counter
, HARDWARE
, HW_BRANCH_INSTRUCTIONS
))
263 update_runtime_stat(st
, STAT_BRANCHES
, ctx
, cpu
, count
);
264 else if (evsel__match(counter
, HARDWARE
, HW_CACHE_REFERENCES
))
265 update_runtime_stat(st
, STAT_CACHEREFS
, ctx
, cpu
, count
);
266 else if (evsel__match(counter
, HW_CACHE
, HW_CACHE_L1D
))
267 update_runtime_stat(st
, STAT_L1_DCACHE
, ctx
, cpu
, count
);
268 else if (evsel__match(counter
, HW_CACHE
, HW_CACHE_L1I
))
269 update_runtime_stat(st
, STAT_L1_ICACHE
, ctx
, cpu
, count
);
270 else if (evsel__match(counter
, HW_CACHE
, HW_CACHE_LL
))
271 update_runtime_stat(st
, STAT_LL_CACHE
, ctx
, cpu
, count
);
272 else if (evsel__match(counter
, HW_CACHE
, HW_CACHE_DTLB
))
273 update_runtime_stat(st
, STAT_DTLB_CACHE
, ctx
, cpu
, count
);
274 else if (evsel__match(counter
, HW_CACHE
, HW_CACHE_ITLB
))
275 update_runtime_stat(st
, STAT_ITLB_CACHE
, ctx
, cpu
, count
);
276 else if (perf_stat_evsel__is(counter
, SMI_NUM
))
277 update_runtime_stat(st
, STAT_SMI_NUM
, ctx
, cpu
, count
);
278 else if (perf_stat_evsel__is(counter
, APERF
))
279 update_runtime_stat(st
, STAT_APERF
, ctx
, cpu
, count
);
281 if (counter
->collect_stat
) {
282 v
= saved_value_lookup(counter
, cpu
, true, STAT_NONE
, 0, st
);
283 update_stats(&v
->stats
, count
);
284 if (counter
->metric_leader
)
285 v
->metric_total
+= count
;
286 } else if (counter
->metric_leader
) {
287 v
= saved_value_lookup(counter
->metric_leader
,
288 cpu
, true, STAT_NONE
, 0, st
);
289 v
->metric_total
+= count
;
294 /* used for get_ratio_color() */
296 GRC_STALLED_CYCLES_FE
,
297 GRC_STALLED_CYCLES_BE
,
302 static const char *get_ratio_color(enum grc_type type
, double ratio
)
304 static const double grc_table
[GRC_MAX_NR
][3] = {
305 [GRC_STALLED_CYCLES_FE
] = { 50.0, 30.0, 10.0 },
306 [GRC_STALLED_CYCLES_BE
] = { 75.0, 50.0, 20.0 },
307 [GRC_CACHE_MISSES
] = { 20.0, 10.0, 5.0 },
309 const char *color
= PERF_COLOR_NORMAL
;
311 if (ratio
> grc_table
[type
][0])
312 color
= PERF_COLOR_RED
;
313 else if (ratio
> grc_table
[type
][1])
314 color
= PERF_COLOR_MAGENTA
;
315 else if (ratio
> grc_table
[type
][2])
316 color
= PERF_COLOR_YELLOW
;
321 static struct evsel
*perf_stat__find_event(struct evlist
*evsel_list
,
326 evlist__for_each_entry (evsel_list
, c2
) {
327 if (!strcasecmp(c2
->name
, name
) && !c2
->collect_stat
)
333 /* Mark MetricExpr target events and link events using them to them. */
334 void perf_stat__collect_metric_expr(struct evlist
*evsel_list
)
336 struct evsel
*counter
, *leader
, **metric_events
, *oc
;
338 struct expr_parse_ctx ctx
;
339 struct hashmap_entry
*cur
;
343 expr__ctx_init(&ctx
);
344 evlist__for_each_entry(evsel_list
, counter
) {
345 bool invalid
= false;
347 leader
= counter
->leader
;
348 if (!counter
->metric_expr
)
351 expr__ctx_clear(&ctx
);
352 metric_events
= counter
->metric_events
;
353 if (!metric_events
) {
354 if (expr__find_other(counter
->metric_expr
,
359 metric_events
= calloc(sizeof(struct evsel
*),
360 hashmap__size(&ctx
.ids
) + 1);
361 if (!metric_events
) {
362 expr__ctx_clear(&ctx
);
365 counter
->metric_events
= metric_events
;
369 hashmap__for_each_entry((&ctx
.ids
), cur
, bkt
) {
370 const char *metric_name
= (const char *)cur
->key
;
374 /* Search in group */
375 for_each_group_member (oc
, leader
) {
376 if (!strcasecmp(oc
->name
,
385 /* Search ignoring groups */
386 oc
= perf_stat__find_event(evsel_list
,
390 /* Deduping one is good enough to handle duplicated PMUs. */
391 static char *printed
;
394 * Adding events automatically would be difficult, because
395 * it would risk creating groups that are not schedulable.
396 * perf stat doesn't understand all the scheduling constraints
397 * of events. So we ask the user instead to add the missing
401 strcasecmp(printed
, metric_name
)) {
403 "Add %s event to groups to get metric expression for %s\n",
406 printed
= strdup(metric_name
);
411 metric_events
[i
++] = oc
;
412 oc
->collect_stat
= true;
414 metric_events
[i
] = NULL
;
417 counter
->metric_events
= NULL
;
418 counter
->metric_expr
= NULL
;
421 expr__ctx_clear(&ctx
);
424 static double runtime_stat_avg(struct runtime_stat
*st
,
425 enum stat_type type
, int ctx
, int cpu
)
427 struct saved_value
*v
;
429 v
= saved_value_lookup(NULL
, cpu
, false, type
, ctx
, st
);
433 return avg_stats(&v
->stats
);
436 static double runtime_stat_n(struct runtime_stat
*st
,
437 enum stat_type type
, int ctx
, int cpu
)
439 struct saved_value
*v
;
441 v
= saved_value_lookup(NULL
, cpu
, false, type
, ctx
, st
);
448 static void print_stalled_cycles_frontend(struct perf_stat_config
*config
,
450 struct evsel
*evsel
, double avg
,
451 struct perf_stat_output_ctx
*out
,
452 struct runtime_stat
*st
)
454 double total
, ratio
= 0.0;
456 int ctx
= evsel_context(evsel
);
458 total
= runtime_stat_avg(st
, STAT_CYCLES
, ctx
, cpu
);
461 ratio
= avg
/ total
* 100.0;
463 color
= get_ratio_color(GRC_STALLED_CYCLES_FE
, ratio
);
466 out
->print_metric(config
, out
->ctx
, color
, "%7.2f%%", "frontend cycles idle",
469 out
->print_metric(config
, out
->ctx
, NULL
, NULL
, "frontend cycles idle", 0);
472 static void print_stalled_cycles_backend(struct perf_stat_config
*config
,
474 struct evsel
*evsel
, double avg
,
475 struct perf_stat_output_ctx
*out
,
476 struct runtime_stat
*st
)
478 double total
, ratio
= 0.0;
480 int ctx
= evsel_context(evsel
);
482 total
= runtime_stat_avg(st
, STAT_CYCLES
, ctx
, cpu
);
485 ratio
= avg
/ total
* 100.0;
487 color
= get_ratio_color(GRC_STALLED_CYCLES_BE
, ratio
);
489 out
->print_metric(config
, out
->ctx
, color
, "%7.2f%%", "backend cycles idle", ratio
);
492 static void print_branch_misses(struct perf_stat_config
*config
,
496 struct perf_stat_output_ctx
*out
,
497 struct runtime_stat
*st
)
499 double total
, ratio
= 0.0;
501 int ctx
= evsel_context(evsel
);
503 total
= runtime_stat_avg(st
, STAT_BRANCHES
, ctx
, cpu
);
506 ratio
= avg
/ total
* 100.0;
508 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
510 out
->print_metric(config
, out
->ctx
, color
, "%7.2f%%", "of all branches", ratio
);
513 static void print_l1_dcache_misses(struct perf_stat_config
*config
,
517 struct perf_stat_output_ctx
*out
,
518 struct runtime_stat
*st
)
521 double total
, ratio
= 0.0;
523 int ctx
= evsel_context(evsel
);
525 total
= runtime_stat_avg(st
, STAT_L1_DCACHE
, ctx
, cpu
);
528 ratio
= avg
/ total
* 100.0;
530 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
532 out
->print_metric(config
, out
->ctx
, color
, "%7.2f%%", "of all L1-dcache accesses", ratio
);
535 static void print_l1_icache_misses(struct perf_stat_config
*config
,
539 struct perf_stat_output_ctx
*out
,
540 struct runtime_stat
*st
)
543 double total
, ratio
= 0.0;
545 int ctx
= evsel_context(evsel
);
547 total
= runtime_stat_avg(st
, STAT_L1_ICACHE
, ctx
, cpu
);
550 ratio
= avg
/ total
* 100.0;
552 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
553 out
->print_metric(config
, out
->ctx
, color
, "%7.2f%%", "of all L1-icache accesses", ratio
);
556 static void print_dtlb_cache_misses(struct perf_stat_config
*config
,
560 struct perf_stat_output_ctx
*out
,
561 struct runtime_stat
*st
)
563 double total
, ratio
= 0.0;
565 int ctx
= evsel_context(evsel
);
567 total
= runtime_stat_avg(st
, STAT_DTLB_CACHE
, ctx
, cpu
);
570 ratio
= avg
/ total
* 100.0;
572 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
573 out
->print_metric(config
, out
->ctx
, color
, "%7.2f%%", "of all dTLB cache accesses", ratio
);
576 static void print_itlb_cache_misses(struct perf_stat_config
*config
,
580 struct perf_stat_output_ctx
*out
,
581 struct runtime_stat
*st
)
583 double total
, ratio
= 0.0;
585 int ctx
= evsel_context(evsel
);
587 total
= runtime_stat_avg(st
, STAT_ITLB_CACHE
, ctx
, cpu
);
590 ratio
= avg
/ total
* 100.0;
592 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
593 out
->print_metric(config
, out
->ctx
, color
, "%7.2f%%", "of all iTLB cache accesses", ratio
);
596 static void print_ll_cache_misses(struct perf_stat_config
*config
,
600 struct perf_stat_output_ctx
*out
,
601 struct runtime_stat
*st
)
603 double total
, ratio
= 0.0;
605 int ctx
= evsel_context(evsel
);
607 total
= runtime_stat_avg(st
, STAT_LL_CACHE
, ctx
, cpu
);
610 ratio
= avg
/ total
* 100.0;
612 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
613 out
->print_metric(config
, out
->ctx
, color
, "%7.2f%%", "of all LL-cache accesses", ratio
);
617 * High level "TopDown" CPU core pipe line bottleneck break down.
619 * Basic concept following
620 * Yasin, A Top Down Method for Performance analysis and Counter architecture
623 * The CPU pipeline is divided into 4 areas that can be bottlenecks:
625 * Frontend -> Backend -> Retiring
626 * BadSpeculation in addition means out of order execution that is thrown away
627 * (for example branch mispredictions)
628 * Frontend is instruction decoding.
629 * Backend is execution, like computation and accessing data in memory
630 * Retiring is good execution that is not directly bottlenecked
632 * The formulas are computed in slots.
633 * A slot is an entry in the pipeline each for the pipeline width
634 * (for example a 4-wide pipeline has 4 slots for each cycle)
637 * BadSpeculation = ((SlotsIssued - SlotsRetired) + RecoveryBubbles) /
639 * Retiring = SlotsRetired / TotalSlots
640 * FrontendBound = FetchBubbles / TotalSlots
641 * BackendBound = 1.0 - BadSpeculation - Retiring - FrontendBound
643 * The kernel provides the mapping to the low level CPU events and any scaling
644 * needed for the CPU pipeline width, for example:
646 * TotalSlots = Cycles * 4
648 * The scaling factor is communicated in the sysfs unit.
650 * In some cases the CPU may not be able to measure all the formulas due to
651 * missing events. In this case multiple formulas are combined, as possible.
653 * Full TopDown supports more levels to sub-divide each area: for example
654 * BackendBound into computing bound and memory bound. For now we only
655 * support Level 1 TopDown.
658 static double sanitize_val(double x
)
660 if (x
< 0 && x
>= -0.02)
665 static double td_total_slots(int ctx
, int cpu
, struct runtime_stat
*st
)
667 return runtime_stat_avg(st
, STAT_TOPDOWN_TOTAL_SLOTS
, ctx
, cpu
);
670 static double td_bad_spec(int ctx
, int cpu
, struct runtime_stat
*st
)
676 total
= runtime_stat_avg(st
, STAT_TOPDOWN_SLOTS_ISSUED
, ctx
, cpu
) -
677 runtime_stat_avg(st
, STAT_TOPDOWN_SLOTS_RETIRED
, ctx
, cpu
) +
678 runtime_stat_avg(st
, STAT_TOPDOWN_RECOVERY_BUBBLES
, ctx
, cpu
);
680 total_slots
= td_total_slots(ctx
, cpu
, st
);
682 bad_spec
= total
/ total_slots
;
683 return sanitize_val(bad_spec
);
686 static double td_retiring(int ctx
, int cpu
, struct runtime_stat
*st
)
689 double total_slots
= td_total_slots(ctx
, cpu
, st
);
690 double ret_slots
= runtime_stat_avg(st
, STAT_TOPDOWN_SLOTS_RETIRED
,
694 retiring
= ret_slots
/ total_slots
;
698 static double td_fe_bound(int ctx
, int cpu
, struct runtime_stat
*st
)
701 double total_slots
= td_total_slots(ctx
, cpu
, st
);
702 double fetch_bub
= runtime_stat_avg(st
, STAT_TOPDOWN_FETCH_BUBBLES
,
706 fe_bound
= fetch_bub
/ total_slots
;
710 static double td_be_bound(int ctx
, int cpu
, struct runtime_stat
*st
)
712 double sum
= (td_fe_bound(ctx
, cpu
, st
) +
713 td_bad_spec(ctx
, cpu
, st
) +
714 td_retiring(ctx
, cpu
, st
));
717 return sanitize_val(1.0 - sum
);
721 * Kernel reports metrics multiplied with slots. To get back
722 * the ratios we need to recreate the sum.
725 static double td_metric_ratio(int ctx
, int cpu
,
727 struct runtime_stat
*stat
)
729 double sum
= runtime_stat_avg(stat
, STAT_TOPDOWN_RETIRING
, ctx
, cpu
) +
730 runtime_stat_avg(stat
, STAT_TOPDOWN_FE_BOUND
, ctx
, cpu
) +
731 runtime_stat_avg(stat
, STAT_TOPDOWN_BE_BOUND
, ctx
, cpu
) +
732 runtime_stat_avg(stat
, STAT_TOPDOWN_BAD_SPEC
, ctx
, cpu
);
733 double d
= runtime_stat_avg(stat
, type
, ctx
, cpu
);
741 * ... but only if most of the values are actually available.
742 * We allow two missing.
745 static bool full_td(int ctx
, int cpu
,
746 struct runtime_stat
*stat
)
750 if (runtime_stat_avg(stat
, STAT_TOPDOWN_RETIRING
, ctx
, cpu
) > 0)
752 if (runtime_stat_avg(stat
, STAT_TOPDOWN_BE_BOUND
, ctx
, cpu
) > 0)
754 if (runtime_stat_avg(stat
, STAT_TOPDOWN_FE_BOUND
, ctx
, cpu
) > 0)
756 if (runtime_stat_avg(stat
, STAT_TOPDOWN_BAD_SPEC
, ctx
, cpu
) > 0)
761 static void print_smi_cost(struct perf_stat_config
*config
,
762 int cpu
, struct evsel
*evsel
,
763 struct perf_stat_output_ctx
*out
,
764 struct runtime_stat
*st
)
766 double smi_num
, aperf
, cycles
, cost
= 0.0;
767 int ctx
= evsel_context(evsel
);
768 const char *color
= NULL
;
770 smi_num
= runtime_stat_avg(st
, STAT_SMI_NUM
, ctx
, cpu
);
771 aperf
= runtime_stat_avg(st
, STAT_APERF
, ctx
, cpu
);
772 cycles
= runtime_stat_avg(st
, STAT_CYCLES
, ctx
, cpu
);
774 if ((cycles
== 0) || (aperf
== 0))
778 cost
= (aperf
- cycles
) / aperf
* 100.00;
781 color
= PERF_COLOR_RED
;
782 out
->print_metric(config
, out
->ctx
, color
, "%8.1f%%", "SMI cycles%", cost
);
783 out
->print_metric(config
, out
->ctx
, NULL
, "%4.0f", "SMI#", smi_num
);
786 static int prepare_metric(struct evsel
**metric_events
,
787 struct metric_ref
*metric_refs
,
788 struct expr_parse_ctx
*pctx
,
790 struct runtime_stat
*st
)
796 expr__ctx_init(pctx
);
797 for (i
= 0; metric_events
[i
]; i
++) {
798 struct saved_value
*v
;
800 u64 metric_total
= 0;
802 if (!strcmp(metric_events
[i
]->name
, "duration_time")) {
803 stats
= &walltime_nsecs_stats
;
806 v
= saved_value_lookup(metric_events
[i
], cpu
, false,
814 metric_total
= v
->metric_total
;
817 n
= strdup(metric_events
[i
]->name
);
821 * This display code with --no-merge adds [cpu] postfixes.
822 * These are not supported by the parser. Remove everything
830 expr__add_id_val(pctx
, n
, metric_total
);
832 expr__add_id_val(pctx
, n
, avg_stats(stats
)*scale
);
835 for (j
= 0; metric_refs
&& metric_refs
[j
].metric_name
; j
++) {
836 ret
= expr__add_ref(pctx
, &metric_refs
[j
]);
844 static void generic_metric(struct perf_stat_config
*config
,
845 const char *metric_expr
,
846 struct evsel
**metric_events
,
847 struct metric_ref
*metric_refs
,
849 const char *metric_name
,
850 const char *metric_unit
,
853 struct perf_stat_output_ctx
*out
,
854 struct runtime_stat
*st
)
856 print_metric_t print_metric
= out
->print_metric
;
857 struct expr_parse_ctx pctx
;
860 void *ctxp
= out
->ctx
;
862 i
= prepare_metric(metric_events
, metric_refs
, &pctx
, cpu
, st
);
866 if (!metric_events
[i
]) {
867 if (expr__parse(&ratio
, &pctx
, metric_expr
, runtime
) == 0) {
871 if (metric_unit
&& metric_name
) {
872 if (perf_pmu__convert_scale(metric_unit
,
873 &unit
, &scale
) >= 0) {
876 if (strstr(metric_expr
, "?"))
877 scnprintf(metric_bf
, sizeof(metric_bf
),
878 "%s %s_%d", unit
, metric_name
, runtime
);
880 scnprintf(metric_bf
, sizeof(metric_bf
),
881 "%s %s", unit
, metric_name
);
883 print_metric(config
, ctxp
, NULL
, "%8.1f",
886 print_metric(config
, ctxp
, NULL
, "%8.2f",
889 out
->force_header
? name
: "",
893 print_metric(config
, ctxp
, NULL
, NULL
,
895 (metric_name
? metric_name
: name
) : "", 0);
898 print_metric(config
, ctxp
, NULL
, NULL
,
900 (metric_name
? metric_name
: name
) : "", 0);
903 expr__ctx_clear(&pctx
);
906 double test_generic_metric(struct metric_expr
*mexp
, int cpu
, struct runtime_stat
*st
)
908 struct expr_parse_ctx pctx
;
911 if (prepare_metric(mexp
->metric_events
, mexp
->metric_refs
, &pctx
, cpu
, st
) < 0)
914 if (expr__parse(&ratio
, &pctx
, mexp
->metric_expr
, 1))
918 expr__ctx_clear(&pctx
);
922 void perf_stat__print_shadow_stats(struct perf_stat_config
*config
,
925 struct perf_stat_output_ctx
*out
,
926 struct rblist
*metric_events
,
927 struct runtime_stat
*st
)
929 void *ctxp
= out
->ctx
;
930 print_metric_t print_metric
= out
->print_metric
;
931 double total
, ratio
= 0.0, total2
;
932 const char *color
= NULL
;
933 int ctx
= evsel_context(evsel
);
934 struct metric_event
*me
;
937 if (evsel__match(evsel
, HARDWARE
, HW_INSTRUCTIONS
)) {
938 total
= runtime_stat_avg(st
, STAT_CYCLES
, ctx
, cpu
);
942 print_metric(config
, ctxp
, NULL
, "%7.2f ",
943 "insn per cycle", ratio
);
945 print_metric(config
, ctxp
, NULL
, NULL
, "insn per cycle", 0);
948 total
= runtime_stat_avg(st
, STAT_STALLED_CYCLES_FRONT
,
951 total
= max(total
, runtime_stat_avg(st
,
952 STAT_STALLED_CYCLES_BACK
,
956 out
->new_line(config
, ctxp
);
958 print_metric(config
, ctxp
, NULL
, "%7.2f ",
959 "stalled cycles per insn",
962 } else if (evsel__match(evsel
, HARDWARE
, HW_BRANCH_MISSES
)) {
963 if (runtime_stat_n(st
, STAT_BRANCHES
, ctx
, cpu
) != 0)
964 print_branch_misses(config
, cpu
, evsel
, avg
, out
, st
);
966 print_metric(config
, ctxp
, NULL
, NULL
, "of all branches", 0);
968 evsel
->core
.attr
.type
== PERF_TYPE_HW_CACHE
&&
969 evsel
->core
.attr
.config
== ( PERF_COUNT_HW_CACHE_L1D
|
970 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
971 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
973 if (runtime_stat_n(st
, STAT_L1_DCACHE
, ctx
, cpu
) != 0)
974 print_l1_dcache_misses(config
, cpu
, evsel
, avg
, out
, st
);
976 print_metric(config
, ctxp
, NULL
, NULL
, "of all L1-dcache accesses", 0);
978 evsel
->core
.attr
.type
== PERF_TYPE_HW_CACHE
&&
979 evsel
->core
.attr
.config
== ( PERF_COUNT_HW_CACHE_L1I
|
980 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
981 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
983 if (runtime_stat_n(st
, STAT_L1_ICACHE
, ctx
, cpu
) != 0)
984 print_l1_icache_misses(config
, cpu
, evsel
, avg
, out
, st
);
986 print_metric(config
, ctxp
, NULL
, NULL
, "of all L1-icache accesses", 0);
988 evsel
->core
.attr
.type
== PERF_TYPE_HW_CACHE
&&
989 evsel
->core
.attr
.config
== ( PERF_COUNT_HW_CACHE_DTLB
|
990 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
991 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
993 if (runtime_stat_n(st
, STAT_DTLB_CACHE
, ctx
, cpu
) != 0)
994 print_dtlb_cache_misses(config
, cpu
, evsel
, avg
, out
, st
);
996 print_metric(config
, ctxp
, NULL
, NULL
, "of all dTLB cache accesses", 0);
998 evsel
->core
.attr
.type
== PERF_TYPE_HW_CACHE
&&
999 evsel
->core
.attr
.config
== ( PERF_COUNT_HW_CACHE_ITLB
|
1000 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
1001 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
1003 if (runtime_stat_n(st
, STAT_ITLB_CACHE
, ctx
, cpu
) != 0)
1004 print_itlb_cache_misses(config
, cpu
, evsel
, avg
, out
, st
);
1006 print_metric(config
, ctxp
, NULL
, NULL
, "of all iTLB cache accesses", 0);
1008 evsel
->core
.attr
.type
== PERF_TYPE_HW_CACHE
&&
1009 evsel
->core
.attr
.config
== ( PERF_COUNT_HW_CACHE_LL
|
1010 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
1011 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
1013 if (runtime_stat_n(st
, STAT_LL_CACHE
, ctx
, cpu
) != 0)
1014 print_ll_cache_misses(config
, cpu
, evsel
, avg
, out
, st
);
1016 print_metric(config
, ctxp
, NULL
, NULL
, "of all LL-cache accesses", 0);
1017 } else if (evsel__match(evsel
, HARDWARE
, HW_CACHE_MISSES
)) {
1018 total
= runtime_stat_avg(st
, STAT_CACHEREFS
, ctx
, cpu
);
1021 ratio
= avg
* 100 / total
;
1023 if (runtime_stat_n(st
, STAT_CACHEREFS
, ctx
, cpu
) != 0)
1024 print_metric(config
, ctxp
, NULL
, "%8.3f %%",
1025 "of all cache refs", ratio
);
1027 print_metric(config
, ctxp
, NULL
, NULL
, "of all cache refs", 0);
1028 } else if (evsel__match(evsel
, HARDWARE
, HW_STALLED_CYCLES_FRONTEND
)) {
1029 print_stalled_cycles_frontend(config
, cpu
, evsel
, avg
, out
, st
);
1030 } else if (evsel__match(evsel
, HARDWARE
, HW_STALLED_CYCLES_BACKEND
)) {
1031 print_stalled_cycles_backend(config
, cpu
, evsel
, avg
, out
, st
);
1032 } else if (evsel__match(evsel
, HARDWARE
, HW_CPU_CYCLES
)) {
1033 total
= runtime_stat_avg(st
, STAT_NSECS
, 0, cpu
);
1036 ratio
= avg
/ total
;
1037 print_metric(config
, ctxp
, NULL
, "%8.3f", "GHz", ratio
);
1039 print_metric(config
, ctxp
, NULL
, NULL
, "Ghz", 0);
1041 } else if (perf_stat_evsel__is(evsel
, CYCLES_IN_TX
)) {
1042 total
= runtime_stat_avg(st
, STAT_CYCLES
, ctx
, cpu
);
1045 print_metric(config
, ctxp
, NULL
,
1046 "%7.2f%%", "transactional cycles",
1047 100.0 * (avg
/ total
));
1049 print_metric(config
, ctxp
, NULL
, NULL
, "transactional cycles",
1051 } else if (perf_stat_evsel__is(evsel
, CYCLES_IN_TX_CP
)) {
1052 total
= runtime_stat_avg(st
, STAT_CYCLES
, ctx
, cpu
);
1053 total2
= runtime_stat_avg(st
, STAT_CYCLES_IN_TX
, ctx
, cpu
);
1058 print_metric(config
, ctxp
, NULL
, "%7.2f%%", "aborted cycles",
1059 100.0 * ((total2
-avg
) / total
));
1061 print_metric(config
, ctxp
, NULL
, NULL
, "aborted cycles", 0);
1062 } else if (perf_stat_evsel__is(evsel
, TRANSACTION_START
)) {
1063 total
= runtime_stat_avg(st
, STAT_CYCLES_IN_TX
,
1067 ratio
= total
/ avg
;
1069 if (runtime_stat_n(st
, STAT_CYCLES_IN_TX
, ctx
, cpu
) != 0)
1070 print_metric(config
, ctxp
, NULL
, "%8.0f",
1071 "cycles / transaction", ratio
);
1073 print_metric(config
, ctxp
, NULL
, NULL
, "cycles / transaction",
1075 } else if (perf_stat_evsel__is(evsel
, ELISION_START
)) {
1076 total
= runtime_stat_avg(st
, STAT_CYCLES_IN_TX
,
1080 ratio
= total
/ avg
;
1082 print_metric(config
, ctxp
, NULL
, "%8.0f", "cycles / elision", ratio
);
1083 } else if (evsel__is_clock(evsel
)) {
1084 if ((ratio
= avg_stats(&walltime_nsecs_stats
)) != 0)
1085 print_metric(config
, ctxp
, NULL
, "%8.3f", "CPUs utilized",
1086 avg
/ (ratio
* evsel
->scale
));
1088 print_metric(config
, ctxp
, NULL
, NULL
, "CPUs utilized", 0);
1089 } else if (perf_stat_evsel__is(evsel
, TOPDOWN_FETCH_BUBBLES
)) {
1090 double fe_bound
= td_fe_bound(ctx
, cpu
, st
);
1093 color
= PERF_COLOR_RED
;
1094 print_metric(config
, ctxp
, color
, "%8.1f%%", "frontend bound",
1096 } else if (perf_stat_evsel__is(evsel
, TOPDOWN_SLOTS_RETIRED
)) {
1097 double retiring
= td_retiring(ctx
, cpu
, st
);
1100 color
= PERF_COLOR_GREEN
;
1101 print_metric(config
, ctxp
, color
, "%8.1f%%", "retiring",
1103 } else if (perf_stat_evsel__is(evsel
, TOPDOWN_RECOVERY_BUBBLES
)) {
1104 double bad_spec
= td_bad_spec(ctx
, cpu
, st
);
1107 color
= PERF_COLOR_RED
;
1108 print_metric(config
, ctxp
, color
, "%8.1f%%", "bad speculation",
1110 } else if (perf_stat_evsel__is(evsel
, TOPDOWN_SLOTS_ISSUED
)) {
1111 double be_bound
= td_be_bound(ctx
, cpu
, st
);
1112 const char *name
= "backend bound";
1113 static int have_recovery_bubbles
= -1;
1115 /* In case the CPU does not support topdown-recovery-bubbles */
1116 if (have_recovery_bubbles
< 0)
1117 have_recovery_bubbles
= pmu_have_event("cpu",
1118 "topdown-recovery-bubbles");
1119 if (!have_recovery_bubbles
)
1120 name
= "backend bound/bad spec";
1123 color
= PERF_COLOR_RED
;
1124 if (td_total_slots(ctx
, cpu
, st
) > 0)
1125 print_metric(config
, ctxp
, color
, "%8.1f%%", name
,
1128 print_metric(config
, ctxp
, NULL
, NULL
, name
, 0);
1129 } else if (perf_stat_evsel__is(evsel
, TOPDOWN_RETIRING
) &&
1130 full_td(ctx
, cpu
, st
)) {
1131 double retiring
= td_metric_ratio(ctx
, cpu
,
1132 STAT_TOPDOWN_RETIRING
, st
);
1135 color
= PERF_COLOR_GREEN
;
1136 print_metric(config
, ctxp
, color
, "%8.1f%%", "retiring",
1138 } else if (perf_stat_evsel__is(evsel
, TOPDOWN_FE_BOUND
) &&
1139 full_td(ctx
, cpu
, st
)) {
1140 double fe_bound
= td_metric_ratio(ctx
, cpu
,
1141 STAT_TOPDOWN_FE_BOUND
, st
);
1144 color
= PERF_COLOR_RED
;
1145 print_metric(config
, ctxp
, color
, "%8.1f%%", "frontend bound",
1147 } else if (perf_stat_evsel__is(evsel
, TOPDOWN_BE_BOUND
) &&
1148 full_td(ctx
, cpu
, st
)) {
1149 double be_bound
= td_metric_ratio(ctx
, cpu
,
1150 STAT_TOPDOWN_BE_BOUND
, st
);
1153 color
= PERF_COLOR_RED
;
1154 print_metric(config
, ctxp
, color
, "%8.1f%%", "backend bound",
1156 } else if (perf_stat_evsel__is(evsel
, TOPDOWN_BAD_SPEC
) &&
1157 full_td(ctx
, cpu
, st
)) {
1158 double bad_spec
= td_metric_ratio(ctx
, cpu
,
1159 STAT_TOPDOWN_BAD_SPEC
, st
);
1162 color
= PERF_COLOR_RED
;
1163 print_metric(config
, ctxp
, color
, "%8.1f%%", "bad speculation",
1165 } else if (evsel
->metric_expr
) {
1166 generic_metric(config
, evsel
->metric_expr
, evsel
->metric_events
, NULL
,
1167 evsel
->name
, evsel
->metric_name
, NULL
, 1, cpu
, out
, st
);
1168 } else if (runtime_stat_n(st
, STAT_NSECS
, 0, cpu
) != 0) {
1172 total
= runtime_stat_avg(st
, STAT_NSECS
, 0, cpu
);
1175 ratio
= 1000.0 * avg
/ total
;
1176 if (ratio
< 0.001) {
1180 snprintf(unit_buf
, sizeof(unit_buf
), "%c/sec", unit
);
1181 print_metric(config
, ctxp
, NULL
, "%8.3f", unit_buf
, ratio
);
1182 } else if (perf_stat_evsel__is(evsel
, SMI_NUM
)) {
1183 print_smi_cost(config
, cpu
, evsel
, out
, st
);
1188 if ((me
= metricgroup__lookup(metric_events
, evsel
, false)) != NULL
) {
1189 struct metric_expr
*mexp
;
1191 list_for_each_entry (mexp
, &me
->head
, nd
) {
1193 out
->new_line(config
, ctxp
);
1194 generic_metric(config
, mexp
->metric_expr
, mexp
->metric_events
,
1195 mexp
->metric_refs
, evsel
->name
, mexp
->metric_name
,
1196 mexp
->metric_unit
, mexp
->runtime
, cpu
, out
, st
);
1200 print_metric(config
, ctxp
, NULL
, NULL
, NULL
, 0);