1 // SPDX-License-Identifier: GPL-2.0
10 #include "metricgroup.h"
11 #include <linux/zalloc.h>
14 * AGGR_GLOBAL: Use CPU 0
15 * AGGR_SOCKET: Use first CPU of socket
16 * AGGR_DIE: Use first CPU of die
17 * AGGR_CORE: Use first CPU of core
18 * AGGR_NONE: Use matching CPU
19 * AGGR_THREAD: Not supported?
21 static bool have_frontend_stalled
;
23 struct runtime_stat rt_stat
;
24 struct stats walltime_nsecs_stats
;
27 struct rb_node rb_node
;
32 struct runtime_stat
*stat
;
38 static int saved_value_cmp(struct rb_node
*rb_node
, const void *entry
)
40 struct saved_value
*a
= container_of(rb_node
,
43 const struct saved_value
*b
= entry
;
46 return a
->cpu
- b
->cpu
;
49 * Previously the rbtree was used to link generic metrics.
50 * The keys were evsel/cpu. Now the rbtree is extended to support
51 * per-thread shadow stats. For shadow stats case, the keys
52 * are cpu/type/ctx/stat (evsel is NULL). For generic metrics
53 * case, the keys are still evsel/cpu (type/ctx/stat are 0 or NULL).
55 if (a
->type
!= b
->type
)
56 return a
->type
- b
->type
;
59 return a
->ctx
- b
->ctx
;
61 if (a
->evsel
== NULL
&& b
->evsel
== NULL
) {
62 if (a
->stat
== b
->stat
)
65 if ((char *)a
->stat
< (char *)b
->stat
)
71 if (a
->evsel
== b
->evsel
)
73 if ((char *)a
->evsel
< (char *)b
->evsel
)
78 static struct rb_node
*saved_value_new(struct rblist
*rblist __maybe_unused
,
81 struct saved_value
*nd
= malloc(sizeof(struct saved_value
));
85 memcpy(nd
, entry
, sizeof(struct saved_value
));
89 static void saved_value_delete(struct rblist
*rblist __maybe_unused
,
90 struct rb_node
*rb_node
)
92 struct saved_value
*v
;
95 v
= container_of(rb_node
, struct saved_value
, rb_node
);
99 static struct saved_value
*saved_value_lookup(struct evsel
*evsel
,
104 struct runtime_stat
*st
)
106 struct rblist
*rblist
;
108 struct saved_value dm
= {
116 rblist
= &st
->value_list
;
118 nd
= rblist__find(rblist
, &dm
);
120 return container_of(nd
, struct saved_value
, rb_node
);
122 rblist__add_node(rblist
, &dm
);
123 nd
= rblist__find(rblist
, &dm
);
125 return container_of(nd
, struct saved_value
, rb_node
);
130 void runtime_stat__init(struct runtime_stat
*st
)
132 struct rblist
*rblist
= &st
->value_list
;
134 rblist__init(rblist
);
135 rblist
->node_cmp
= saved_value_cmp
;
136 rblist
->node_new
= saved_value_new
;
137 rblist
->node_delete
= saved_value_delete
;
140 void runtime_stat__exit(struct runtime_stat
*st
)
142 rblist__exit(&st
->value_list
);
145 void perf_stat__init_shadow_stats(void)
147 have_frontend_stalled
= pmu_have_event("cpu", "stalled-cycles-frontend");
148 runtime_stat__init(&rt_stat
);
151 static int evsel_context(struct evsel
*evsel
)
155 if (evsel
->core
.attr
.exclude_kernel
)
156 ctx
|= CTX_BIT_KERNEL
;
157 if (evsel
->core
.attr
.exclude_user
)
159 if (evsel
->core
.attr
.exclude_hv
)
161 if (evsel
->core
.attr
.exclude_host
)
163 if (evsel
->core
.attr
.exclude_idle
)
169 static void reset_stat(struct runtime_stat
*st
)
171 struct rblist
*rblist
;
172 struct rb_node
*pos
, *next
;
174 rblist
= &st
->value_list
;
175 next
= rb_first_cached(&rblist
->entries
);
179 memset(&container_of(pos
, struct saved_value
, rb_node
)->stats
,
181 sizeof(struct stats
));
185 void perf_stat__reset_shadow_stats(void)
187 reset_stat(&rt_stat
);
188 memset(&walltime_nsecs_stats
, 0, sizeof(walltime_nsecs_stats
));
191 void perf_stat__reset_shadow_per_stat(struct runtime_stat
*st
)
196 static void update_runtime_stat(struct runtime_stat
*st
,
198 int ctx
, int cpu
, u64 count
)
200 struct saved_value
*v
= saved_value_lookup(NULL
, cpu
, true,
204 update_stats(&v
->stats
, count
);
208 * Update various tracking values we maintain to print
209 * more semantic information such as miss/hit ratios,
210 * instruction rates, etc:
212 void perf_stat__update_shadow_stats(struct evsel
*counter
, u64 count
,
213 int cpu
, struct runtime_stat
*st
)
215 int ctx
= evsel_context(counter
);
216 u64 count_ns
= count
;
217 struct saved_value
*v
;
219 count
*= counter
->scale
;
221 if (perf_evsel__is_clock(counter
))
222 update_runtime_stat(st
, STAT_NSECS
, 0, cpu
, count_ns
);
223 else if (perf_evsel__match(counter
, HARDWARE
, HW_CPU_CYCLES
))
224 update_runtime_stat(st
, STAT_CYCLES
, ctx
, cpu
, count
);
225 else if (perf_stat_evsel__is(counter
, CYCLES_IN_TX
))
226 update_runtime_stat(st
, STAT_CYCLES_IN_TX
, ctx
, cpu
, count
);
227 else if (perf_stat_evsel__is(counter
, TRANSACTION_START
))
228 update_runtime_stat(st
, STAT_TRANSACTION
, ctx
, cpu
, count
);
229 else if (perf_stat_evsel__is(counter
, ELISION_START
))
230 update_runtime_stat(st
, STAT_ELISION
, ctx
, cpu
, count
);
231 else if (perf_stat_evsel__is(counter
, TOPDOWN_TOTAL_SLOTS
))
232 update_runtime_stat(st
, STAT_TOPDOWN_TOTAL_SLOTS
,
234 else if (perf_stat_evsel__is(counter
, TOPDOWN_SLOTS_ISSUED
))
235 update_runtime_stat(st
, STAT_TOPDOWN_SLOTS_ISSUED
,
237 else if (perf_stat_evsel__is(counter
, TOPDOWN_SLOTS_RETIRED
))
238 update_runtime_stat(st
, STAT_TOPDOWN_SLOTS_RETIRED
,
240 else if (perf_stat_evsel__is(counter
, TOPDOWN_FETCH_BUBBLES
))
241 update_runtime_stat(st
, STAT_TOPDOWN_FETCH_BUBBLES
,
243 else if (perf_stat_evsel__is(counter
, TOPDOWN_RECOVERY_BUBBLES
))
244 update_runtime_stat(st
, STAT_TOPDOWN_RECOVERY_BUBBLES
,
246 else if (perf_evsel__match(counter
, HARDWARE
, HW_STALLED_CYCLES_FRONTEND
))
247 update_runtime_stat(st
, STAT_STALLED_CYCLES_FRONT
,
249 else if (perf_evsel__match(counter
, HARDWARE
, HW_STALLED_CYCLES_BACKEND
))
250 update_runtime_stat(st
, STAT_STALLED_CYCLES_BACK
,
252 else if (perf_evsel__match(counter
, HARDWARE
, HW_BRANCH_INSTRUCTIONS
))
253 update_runtime_stat(st
, STAT_BRANCHES
, ctx
, cpu
, count
);
254 else if (perf_evsel__match(counter
, HARDWARE
, HW_CACHE_REFERENCES
))
255 update_runtime_stat(st
, STAT_CACHEREFS
, ctx
, cpu
, count
);
256 else if (perf_evsel__match(counter
, HW_CACHE
, HW_CACHE_L1D
))
257 update_runtime_stat(st
, STAT_L1_DCACHE
, ctx
, cpu
, count
);
258 else if (perf_evsel__match(counter
, HW_CACHE
, HW_CACHE_L1I
))
259 update_runtime_stat(st
, STAT_L1_ICACHE
, ctx
, cpu
, count
);
260 else if (perf_evsel__match(counter
, HW_CACHE
, HW_CACHE_LL
))
261 update_runtime_stat(st
, STAT_LL_CACHE
, ctx
, cpu
, count
);
262 else if (perf_evsel__match(counter
, HW_CACHE
, HW_CACHE_DTLB
))
263 update_runtime_stat(st
, STAT_DTLB_CACHE
, ctx
, cpu
, count
);
264 else if (perf_evsel__match(counter
, HW_CACHE
, HW_CACHE_ITLB
))
265 update_runtime_stat(st
, STAT_ITLB_CACHE
, ctx
, cpu
, count
);
266 else if (perf_stat_evsel__is(counter
, SMI_NUM
))
267 update_runtime_stat(st
, STAT_SMI_NUM
, ctx
, cpu
, count
);
268 else if (perf_stat_evsel__is(counter
, APERF
))
269 update_runtime_stat(st
, STAT_APERF
, ctx
, cpu
, count
);
271 if (counter
->collect_stat
) {
272 v
= saved_value_lookup(counter
, cpu
, true, STAT_NONE
, 0, st
);
273 update_stats(&v
->stats
, count
);
274 if (counter
->metric_leader
)
275 v
->metric_total
+= count
;
276 } else if (counter
->metric_leader
) {
277 v
= saved_value_lookup(counter
->metric_leader
,
278 cpu
, true, STAT_NONE
, 0, st
);
279 v
->metric_total
+= count
;
284 /* used for get_ratio_color() */
286 GRC_STALLED_CYCLES_FE
,
287 GRC_STALLED_CYCLES_BE
,
292 static const char *get_ratio_color(enum grc_type type
, double ratio
)
294 static const double grc_table
[GRC_MAX_NR
][3] = {
295 [GRC_STALLED_CYCLES_FE
] = { 50.0, 30.0, 10.0 },
296 [GRC_STALLED_CYCLES_BE
] = { 75.0, 50.0, 20.0 },
297 [GRC_CACHE_MISSES
] = { 20.0, 10.0, 5.0 },
299 const char *color
= PERF_COLOR_NORMAL
;
301 if (ratio
> grc_table
[type
][0])
302 color
= PERF_COLOR_RED
;
303 else if (ratio
> grc_table
[type
][1])
304 color
= PERF_COLOR_MAGENTA
;
305 else if (ratio
> grc_table
[type
][2])
306 color
= PERF_COLOR_YELLOW
;
311 static struct evsel
*perf_stat__find_event(struct evlist
*evsel_list
,
316 evlist__for_each_entry (evsel_list
, c2
) {
317 if (!strcasecmp(c2
->name
, name
) && !c2
->collect_stat
)
323 /* Mark MetricExpr target events and link events using them to them. */
324 void perf_stat__collect_metric_expr(struct evlist
*evsel_list
)
326 struct evsel
*counter
, *leader
, **metric_events
, *oc
;
328 const char **metric_names
;
330 int num_metric_names
;
332 evlist__for_each_entry(evsel_list
, counter
) {
333 bool invalid
= false;
335 leader
= counter
->leader
;
336 if (!counter
->metric_expr
)
338 metric_events
= counter
->metric_events
;
339 if (!metric_events
) {
340 if (expr__find_other(counter
->metric_expr
, counter
->name
,
341 &metric_names
, &num_metric_names
) < 0)
344 metric_events
= calloc(sizeof(struct evsel
*),
345 num_metric_names
+ 1);
348 counter
->metric_events
= metric_events
;
351 for (i
= 0; i
< num_metric_names
; i
++) {
354 /* Search in group */
355 for_each_group_member (oc
, leader
) {
356 if (!strcasecmp(oc
->name
, metric_names
[i
]) &&
364 /* Search ignoring groups */
365 oc
= perf_stat__find_event(evsel_list
, metric_names
[i
]);
368 /* Deduping one is good enough to handle duplicated PMUs. */
369 static char *printed
;
372 * Adding events automatically would be difficult, because
373 * it would risk creating groups that are not schedulable.
374 * perf stat doesn't understand all the scheduling constraints
375 * of events. So we ask the user instead to add the missing
378 if (!printed
|| strcasecmp(printed
, metric_names
[i
])) {
380 "Add %s event to groups to get metric expression for %s\n",
383 printed
= strdup(metric_names
[i
]);
388 metric_events
[i
] = oc
;
389 oc
->collect_stat
= true;
391 metric_events
[i
] = NULL
;
395 counter
->metric_events
= NULL
;
396 counter
->metric_expr
= NULL
;
401 static double runtime_stat_avg(struct runtime_stat
*st
,
402 enum stat_type type
, int ctx
, int cpu
)
404 struct saved_value
*v
;
406 v
= saved_value_lookup(NULL
, cpu
, false, type
, ctx
, st
);
410 return avg_stats(&v
->stats
);
413 static double runtime_stat_n(struct runtime_stat
*st
,
414 enum stat_type type
, int ctx
, int cpu
)
416 struct saved_value
*v
;
418 v
= saved_value_lookup(NULL
, cpu
, false, type
, ctx
, st
);
425 static void print_stalled_cycles_frontend(struct perf_stat_config
*config
,
427 struct evsel
*evsel
, double avg
,
428 struct perf_stat_output_ctx
*out
,
429 struct runtime_stat
*st
)
431 double total
, ratio
= 0.0;
433 int ctx
= evsel_context(evsel
);
435 total
= runtime_stat_avg(st
, STAT_CYCLES
, ctx
, cpu
);
438 ratio
= avg
/ total
* 100.0;
440 color
= get_ratio_color(GRC_STALLED_CYCLES_FE
, ratio
);
443 out
->print_metric(config
, out
->ctx
, color
, "%7.2f%%", "frontend cycles idle",
446 out
->print_metric(config
, out
->ctx
, NULL
, NULL
, "frontend cycles idle", 0);
449 static void print_stalled_cycles_backend(struct perf_stat_config
*config
,
451 struct evsel
*evsel
, double avg
,
452 struct perf_stat_output_ctx
*out
,
453 struct runtime_stat
*st
)
455 double total
, ratio
= 0.0;
457 int ctx
= evsel_context(evsel
);
459 total
= runtime_stat_avg(st
, STAT_CYCLES
, ctx
, cpu
);
462 ratio
= avg
/ total
* 100.0;
464 color
= get_ratio_color(GRC_STALLED_CYCLES_BE
, ratio
);
466 out
->print_metric(config
, out
->ctx
, color
, "%7.2f%%", "backend cycles idle", ratio
);
469 static void print_branch_misses(struct perf_stat_config
*config
,
473 struct perf_stat_output_ctx
*out
,
474 struct runtime_stat
*st
)
476 double total
, ratio
= 0.0;
478 int ctx
= evsel_context(evsel
);
480 total
= runtime_stat_avg(st
, STAT_BRANCHES
, ctx
, cpu
);
483 ratio
= avg
/ total
* 100.0;
485 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
487 out
->print_metric(config
, out
->ctx
, color
, "%7.2f%%", "of all branches", ratio
);
490 static void print_l1_dcache_misses(struct perf_stat_config
*config
,
494 struct perf_stat_output_ctx
*out
,
495 struct runtime_stat
*st
)
498 double total
, ratio
= 0.0;
500 int ctx
= evsel_context(evsel
);
502 total
= runtime_stat_avg(st
, STAT_L1_DCACHE
, ctx
, cpu
);
505 ratio
= avg
/ total
* 100.0;
507 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
509 out
->print_metric(config
, out
->ctx
, color
, "%7.2f%%", "of all L1-dcache hits", ratio
);
512 static void print_l1_icache_misses(struct perf_stat_config
*config
,
516 struct perf_stat_output_ctx
*out
,
517 struct runtime_stat
*st
)
520 double total
, ratio
= 0.0;
522 int ctx
= evsel_context(evsel
);
524 total
= runtime_stat_avg(st
, STAT_L1_ICACHE
, ctx
, cpu
);
527 ratio
= avg
/ total
* 100.0;
529 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
530 out
->print_metric(config
, out
->ctx
, color
, "%7.2f%%", "of all L1-icache hits", ratio
);
533 static void print_dtlb_cache_misses(struct perf_stat_config
*config
,
537 struct perf_stat_output_ctx
*out
,
538 struct runtime_stat
*st
)
540 double total
, ratio
= 0.0;
542 int ctx
= evsel_context(evsel
);
544 total
= runtime_stat_avg(st
, STAT_DTLB_CACHE
, ctx
, cpu
);
547 ratio
= avg
/ total
* 100.0;
549 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
550 out
->print_metric(config
, out
->ctx
, color
, "%7.2f%%", "of all dTLB cache hits", ratio
);
553 static void print_itlb_cache_misses(struct perf_stat_config
*config
,
557 struct perf_stat_output_ctx
*out
,
558 struct runtime_stat
*st
)
560 double total
, ratio
= 0.0;
562 int ctx
= evsel_context(evsel
);
564 total
= runtime_stat_avg(st
, STAT_ITLB_CACHE
, ctx
, cpu
);
567 ratio
= avg
/ total
* 100.0;
569 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
570 out
->print_metric(config
, out
->ctx
, color
, "%7.2f%%", "of all iTLB cache hits", ratio
);
573 static void print_ll_cache_misses(struct perf_stat_config
*config
,
577 struct perf_stat_output_ctx
*out
,
578 struct runtime_stat
*st
)
580 double total
, ratio
= 0.0;
582 int ctx
= evsel_context(evsel
);
584 total
= runtime_stat_avg(st
, STAT_LL_CACHE
, ctx
, cpu
);
587 ratio
= avg
/ total
* 100.0;
589 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
590 out
->print_metric(config
, out
->ctx
, color
, "%7.2f%%", "of all LL-cache hits", ratio
);
594 * High level "TopDown" CPU core pipe line bottleneck break down.
596 * Basic concept following
597 * Yasin, A Top Down Method for Performance analysis and Counter architecture
600 * The CPU pipeline is divided into 4 areas that can be bottlenecks:
602 * Frontend -> Backend -> Retiring
603 * BadSpeculation in addition means out of order execution that is thrown away
604 * (for example branch mispredictions)
605 * Frontend is instruction decoding.
606 * Backend is execution, like computation and accessing data in memory
607 * Retiring is good execution that is not directly bottlenecked
609 * The formulas are computed in slots.
610 * A slot is an entry in the pipeline each for the pipeline width
611 * (for example a 4-wide pipeline has 4 slots for each cycle)
614 * BadSpeculation = ((SlotsIssued - SlotsRetired) + RecoveryBubbles) /
616 * Retiring = SlotsRetired / TotalSlots
617 * FrontendBound = FetchBubbles / TotalSlots
618 * BackendBound = 1.0 - BadSpeculation - Retiring - FrontendBound
620 * The kernel provides the mapping to the low level CPU events and any scaling
621 * needed for the CPU pipeline width, for example:
623 * TotalSlots = Cycles * 4
625 * The scaling factor is communicated in the sysfs unit.
627 * In some cases the CPU may not be able to measure all the formulas due to
628 * missing events. In this case multiple formulas are combined, as possible.
630 * Full TopDown supports more levels to sub-divide each area: for example
631 * BackendBound into computing bound and memory bound. For now we only
632 * support Level 1 TopDown.
635 static double sanitize_val(double x
)
637 if (x
< 0 && x
>= -0.02)
642 static double td_total_slots(int ctx
, int cpu
, struct runtime_stat
*st
)
644 return runtime_stat_avg(st
, STAT_TOPDOWN_TOTAL_SLOTS
, ctx
, cpu
);
647 static double td_bad_spec(int ctx
, int cpu
, struct runtime_stat
*st
)
653 total
= runtime_stat_avg(st
, STAT_TOPDOWN_SLOTS_ISSUED
, ctx
, cpu
) -
654 runtime_stat_avg(st
, STAT_TOPDOWN_SLOTS_RETIRED
, ctx
, cpu
) +
655 runtime_stat_avg(st
, STAT_TOPDOWN_RECOVERY_BUBBLES
, ctx
, cpu
);
657 total_slots
= td_total_slots(ctx
, cpu
, st
);
659 bad_spec
= total
/ total_slots
;
660 return sanitize_val(bad_spec
);
663 static double td_retiring(int ctx
, int cpu
, struct runtime_stat
*st
)
666 double total_slots
= td_total_slots(ctx
, cpu
, st
);
667 double ret_slots
= runtime_stat_avg(st
, STAT_TOPDOWN_SLOTS_RETIRED
,
671 retiring
= ret_slots
/ total_slots
;
675 static double td_fe_bound(int ctx
, int cpu
, struct runtime_stat
*st
)
678 double total_slots
= td_total_slots(ctx
, cpu
, st
);
679 double fetch_bub
= runtime_stat_avg(st
, STAT_TOPDOWN_FETCH_BUBBLES
,
683 fe_bound
= fetch_bub
/ total_slots
;
687 static double td_be_bound(int ctx
, int cpu
, struct runtime_stat
*st
)
689 double sum
= (td_fe_bound(ctx
, cpu
, st
) +
690 td_bad_spec(ctx
, cpu
, st
) +
691 td_retiring(ctx
, cpu
, st
));
694 return sanitize_val(1.0 - sum
);
697 static void print_smi_cost(struct perf_stat_config
*config
,
698 int cpu
, struct evsel
*evsel
,
699 struct perf_stat_output_ctx
*out
,
700 struct runtime_stat
*st
)
702 double smi_num
, aperf
, cycles
, cost
= 0.0;
703 int ctx
= evsel_context(evsel
);
704 const char *color
= NULL
;
706 smi_num
= runtime_stat_avg(st
, STAT_SMI_NUM
, ctx
, cpu
);
707 aperf
= runtime_stat_avg(st
, STAT_APERF
, ctx
, cpu
);
708 cycles
= runtime_stat_avg(st
, STAT_CYCLES
, ctx
, cpu
);
710 if ((cycles
== 0) || (aperf
== 0))
714 cost
= (aperf
- cycles
) / aperf
* 100.00;
717 color
= PERF_COLOR_RED
;
718 out
->print_metric(config
, out
->ctx
, color
, "%8.1f%%", "SMI cycles%", cost
);
719 out
->print_metric(config
, out
->ctx
, NULL
, "%4.0f", "SMI#", smi_num
);
722 static void generic_metric(struct perf_stat_config
*config
,
723 const char *metric_expr
,
724 struct evsel
**metric_events
,
726 const char *metric_name
,
727 const char *metric_unit
,
730 struct perf_stat_output_ctx
*out
,
731 struct runtime_stat
*st
)
733 print_metric_t print_metric
= out
->print_metric
;
734 struct parse_ctx pctx
;
737 void *ctxp
= out
->ctx
;
740 expr__ctx_init(&pctx
);
741 /* Must be first id entry */
742 expr__add_id(&pctx
, name
, avg
);
743 for (i
= 0; metric_events
[i
]; i
++) {
744 struct saved_value
*v
;
746 u64 metric_total
= 0;
748 if (!strcmp(metric_events
[i
]->name
, "duration_time")) {
749 stats
= &walltime_nsecs_stats
;
752 v
= saved_value_lookup(metric_events
[i
], cpu
, false,
760 metric_total
= v
->metric_total
;
763 n
= strdup(metric_events
[i
]->name
);
767 * This display code with --no-merge adds [cpu] postfixes.
768 * These are not supported by the parser. Remove everything
776 expr__add_id(&pctx
, n
, metric_total
);
778 expr__add_id(&pctx
, n
, avg_stats(stats
)*scale
);
781 if (!metric_events
[i
]) {
782 const char *p
= metric_expr
;
784 if (expr__parse(&ratio
, &pctx
, &p
) == 0) {
788 if (metric_unit
&& metric_name
) {
789 if (perf_pmu__convert_scale(metric_unit
,
790 &unit
, &scale
) >= 0) {
794 scnprintf(metric_bf
, sizeof(metric_bf
),
795 "%s %s", unit
, metric_name
);
796 print_metric(config
, ctxp
, NULL
, "%8.1f",
799 print_metric(config
, ctxp
, NULL
, "%8.1f",
802 out
->force_header
? name
: "",
806 print_metric(config
, ctxp
, NULL
, NULL
,
808 (metric_name
? metric_name
: name
) : "", 0);
811 print_metric(config
, ctxp
, NULL
, NULL
, "", 0);
813 for (i
= 1; i
< pctx
.num_ids
; i
++)
814 zfree(&pctx
.ids
[i
].name
);
817 void perf_stat__print_shadow_stats(struct perf_stat_config
*config
,
820 struct perf_stat_output_ctx
*out
,
821 struct rblist
*metric_events
,
822 struct runtime_stat
*st
)
824 void *ctxp
= out
->ctx
;
825 print_metric_t print_metric
= out
->print_metric
;
826 double total
, ratio
= 0.0, total2
;
827 const char *color
= NULL
;
828 int ctx
= evsel_context(evsel
);
829 struct metric_event
*me
;
832 if (perf_evsel__match(evsel
, HARDWARE
, HW_INSTRUCTIONS
)) {
833 total
= runtime_stat_avg(st
, STAT_CYCLES
, ctx
, cpu
);
837 print_metric(config
, ctxp
, NULL
, "%7.2f ",
838 "insn per cycle", ratio
);
840 print_metric(config
, ctxp
, NULL
, NULL
, "insn per cycle", 0);
843 total
= runtime_stat_avg(st
, STAT_STALLED_CYCLES_FRONT
,
846 total
= max(total
, runtime_stat_avg(st
,
847 STAT_STALLED_CYCLES_BACK
,
851 out
->new_line(config
, ctxp
);
853 print_metric(config
, ctxp
, NULL
, "%7.2f ",
854 "stalled cycles per insn",
856 } else if (have_frontend_stalled
) {
857 out
->new_line(config
, ctxp
);
858 print_metric(config
, ctxp
, NULL
, "%7.2f ",
859 "stalled cycles per insn", 0);
861 } else if (perf_evsel__match(evsel
, HARDWARE
, HW_BRANCH_MISSES
)) {
862 if (runtime_stat_n(st
, STAT_BRANCHES
, ctx
, cpu
) != 0)
863 print_branch_misses(config
, cpu
, evsel
, avg
, out
, st
);
865 print_metric(config
, ctxp
, NULL
, NULL
, "of all branches", 0);
867 evsel
->core
.attr
.type
== PERF_TYPE_HW_CACHE
&&
868 evsel
->core
.attr
.config
== ( PERF_COUNT_HW_CACHE_L1D
|
869 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
870 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
872 if (runtime_stat_n(st
, STAT_L1_DCACHE
, ctx
, cpu
) != 0)
873 print_l1_dcache_misses(config
, cpu
, evsel
, avg
, out
, st
);
875 print_metric(config
, ctxp
, NULL
, NULL
, "of all L1-dcache hits", 0);
877 evsel
->core
.attr
.type
== PERF_TYPE_HW_CACHE
&&
878 evsel
->core
.attr
.config
== ( PERF_COUNT_HW_CACHE_L1I
|
879 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
880 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
882 if (runtime_stat_n(st
, STAT_L1_ICACHE
, ctx
, cpu
) != 0)
883 print_l1_icache_misses(config
, cpu
, evsel
, avg
, out
, st
);
885 print_metric(config
, ctxp
, NULL
, NULL
, "of all L1-icache hits", 0);
887 evsel
->core
.attr
.type
== PERF_TYPE_HW_CACHE
&&
888 evsel
->core
.attr
.config
== ( PERF_COUNT_HW_CACHE_DTLB
|
889 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
890 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
892 if (runtime_stat_n(st
, STAT_DTLB_CACHE
, ctx
, cpu
) != 0)
893 print_dtlb_cache_misses(config
, cpu
, evsel
, avg
, out
, st
);
895 print_metric(config
, ctxp
, NULL
, NULL
, "of all dTLB cache hits", 0);
897 evsel
->core
.attr
.type
== PERF_TYPE_HW_CACHE
&&
898 evsel
->core
.attr
.config
== ( PERF_COUNT_HW_CACHE_ITLB
|
899 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
900 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
902 if (runtime_stat_n(st
, STAT_ITLB_CACHE
, ctx
, cpu
) != 0)
903 print_itlb_cache_misses(config
, cpu
, evsel
, avg
, out
, st
);
905 print_metric(config
, ctxp
, NULL
, NULL
, "of all iTLB cache hits", 0);
907 evsel
->core
.attr
.type
== PERF_TYPE_HW_CACHE
&&
908 evsel
->core
.attr
.config
== ( PERF_COUNT_HW_CACHE_LL
|
909 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
910 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
912 if (runtime_stat_n(st
, STAT_LL_CACHE
, ctx
, cpu
) != 0)
913 print_ll_cache_misses(config
, cpu
, evsel
, avg
, out
, st
);
915 print_metric(config
, ctxp
, NULL
, NULL
, "of all LL-cache hits", 0);
916 } else if (perf_evsel__match(evsel
, HARDWARE
, HW_CACHE_MISSES
)) {
917 total
= runtime_stat_avg(st
, STAT_CACHEREFS
, ctx
, cpu
);
920 ratio
= avg
* 100 / total
;
922 if (runtime_stat_n(st
, STAT_CACHEREFS
, ctx
, cpu
) != 0)
923 print_metric(config
, ctxp
, NULL
, "%8.3f %%",
924 "of all cache refs", ratio
);
926 print_metric(config
, ctxp
, NULL
, NULL
, "of all cache refs", 0);
927 } else if (perf_evsel__match(evsel
, HARDWARE
, HW_STALLED_CYCLES_FRONTEND
)) {
928 print_stalled_cycles_frontend(config
, cpu
, evsel
, avg
, out
, st
);
929 } else if (perf_evsel__match(evsel
, HARDWARE
, HW_STALLED_CYCLES_BACKEND
)) {
930 print_stalled_cycles_backend(config
, cpu
, evsel
, avg
, out
, st
);
931 } else if (perf_evsel__match(evsel
, HARDWARE
, HW_CPU_CYCLES
)) {
932 total
= runtime_stat_avg(st
, STAT_NSECS
, 0, cpu
);
936 print_metric(config
, ctxp
, NULL
, "%8.3f", "GHz", ratio
);
938 print_metric(config
, ctxp
, NULL
, NULL
, "Ghz", 0);
940 } else if (perf_stat_evsel__is(evsel
, CYCLES_IN_TX
)) {
941 total
= runtime_stat_avg(st
, STAT_CYCLES
, ctx
, cpu
);
944 print_metric(config
, ctxp
, NULL
,
945 "%7.2f%%", "transactional cycles",
946 100.0 * (avg
/ total
));
948 print_metric(config
, ctxp
, NULL
, NULL
, "transactional cycles",
950 } else if (perf_stat_evsel__is(evsel
, CYCLES_IN_TX_CP
)) {
951 total
= runtime_stat_avg(st
, STAT_CYCLES
, ctx
, cpu
);
952 total2
= runtime_stat_avg(st
, STAT_CYCLES_IN_TX
, ctx
, cpu
);
957 print_metric(config
, ctxp
, NULL
, "%7.2f%%", "aborted cycles",
958 100.0 * ((total2
-avg
) / total
));
960 print_metric(config
, ctxp
, NULL
, NULL
, "aborted cycles", 0);
961 } else if (perf_stat_evsel__is(evsel
, TRANSACTION_START
)) {
962 total
= runtime_stat_avg(st
, STAT_CYCLES_IN_TX
,
968 if (runtime_stat_n(st
, STAT_CYCLES_IN_TX
, ctx
, cpu
) != 0)
969 print_metric(config
, ctxp
, NULL
, "%8.0f",
970 "cycles / transaction", ratio
);
972 print_metric(config
, ctxp
, NULL
, NULL
, "cycles / transaction",
974 } else if (perf_stat_evsel__is(evsel
, ELISION_START
)) {
975 total
= runtime_stat_avg(st
, STAT_CYCLES_IN_TX
,
981 print_metric(config
, ctxp
, NULL
, "%8.0f", "cycles / elision", ratio
);
982 } else if (perf_evsel__is_clock(evsel
)) {
983 if ((ratio
= avg_stats(&walltime_nsecs_stats
)) != 0)
984 print_metric(config
, ctxp
, NULL
, "%8.3f", "CPUs utilized",
985 avg
/ (ratio
* evsel
->scale
));
987 print_metric(config
, ctxp
, NULL
, NULL
, "CPUs utilized", 0);
988 } else if (perf_stat_evsel__is(evsel
, TOPDOWN_FETCH_BUBBLES
)) {
989 double fe_bound
= td_fe_bound(ctx
, cpu
, st
);
992 color
= PERF_COLOR_RED
;
993 print_metric(config
, ctxp
, color
, "%8.1f%%", "frontend bound",
995 } else if (perf_stat_evsel__is(evsel
, TOPDOWN_SLOTS_RETIRED
)) {
996 double retiring
= td_retiring(ctx
, cpu
, st
);
999 color
= PERF_COLOR_GREEN
;
1000 print_metric(config
, ctxp
, color
, "%8.1f%%", "retiring",
1002 } else if (perf_stat_evsel__is(evsel
, TOPDOWN_RECOVERY_BUBBLES
)) {
1003 double bad_spec
= td_bad_spec(ctx
, cpu
, st
);
1006 color
= PERF_COLOR_RED
;
1007 print_metric(config
, ctxp
, color
, "%8.1f%%", "bad speculation",
1009 } else if (perf_stat_evsel__is(evsel
, TOPDOWN_SLOTS_ISSUED
)) {
1010 double be_bound
= td_be_bound(ctx
, cpu
, st
);
1011 const char *name
= "backend bound";
1012 static int have_recovery_bubbles
= -1;
1014 /* In case the CPU does not support topdown-recovery-bubbles */
1015 if (have_recovery_bubbles
< 0)
1016 have_recovery_bubbles
= pmu_have_event("cpu",
1017 "topdown-recovery-bubbles");
1018 if (!have_recovery_bubbles
)
1019 name
= "backend bound/bad spec";
1022 color
= PERF_COLOR_RED
;
1023 if (td_total_slots(ctx
, cpu
, st
) > 0)
1024 print_metric(config
, ctxp
, color
, "%8.1f%%", name
,
1027 print_metric(config
, ctxp
, NULL
, NULL
, name
, 0);
1028 } else if (evsel
->metric_expr
) {
1029 generic_metric(config
, evsel
->metric_expr
, evsel
->metric_events
, evsel
->name
,
1030 evsel
->metric_name
, NULL
, avg
, cpu
, out
, st
);
1031 } else if (runtime_stat_n(st
, STAT_NSECS
, 0, cpu
) != 0) {
1035 total
= runtime_stat_avg(st
, STAT_NSECS
, 0, cpu
);
1038 ratio
= 1000.0 * avg
/ total
;
1039 if (ratio
< 0.001) {
1043 snprintf(unit_buf
, sizeof(unit_buf
), "%c/sec", unit
);
1044 print_metric(config
, ctxp
, NULL
, "%8.3f", unit_buf
, ratio
);
1045 } else if (perf_stat_evsel__is(evsel
, SMI_NUM
)) {
1046 print_smi_cost(config
, cpu
, evsel
, out
, st
);
1051 if ((me
= metricgroup__lookup(metric_events
, evsel
, false)) != NULL
) {
1052 struct metric_expr
*mexp
;
1054 list_for_each_entry (mexp
, &me
->head
, nd
) {
1056 out
->new_line(config
, ctxp
);
1057 generic_metric(config
, mexp
->metric_expr
, mexp
->metric_events
,
1058 evsel
->name
, mexp
->metric_name
,
1059 mexp
->metric_unit
, avg
, cpu
, out
, st
);
1063 print_metric(config
, ctxp
, NULL
, NULL
, NULL
, 0);