1 // SPDX-License-Identifier: GPL-2.0
10 #include "metricgroup.h"
13 * AGGR_GLOBAL: Use CPU 0
14 * AGGR_SOCKET: Use first CPU of socket
15 * AGGR_CORE: Use first CPU of core
16 * AGGR_NONE: Use matching CPU
17 * AGGR_THREAD: Not supported?
19 static bool have_frontend_stalled
;
21 struct runtime_stat rt_stat
;
22 struct stats walltime_nsecs_stats
;
25 struct rb_node rb_node
;
26 struct perf_evsel
*evsel
;
30 struct runtime_stat
*stat
;
34 static int saved_value_cmp(struct rb_node
*rb_node
, const void *entry
)
36 struct saved_value
*a
= container_of(rb_node
,
39 const struct saved_value
*b
= entry
;
42 return a
->cpu
- b
->cpu
;
45 * Previously the rbtree was used to link generic metrics.
46 * The keys were evsel/cpu. Now the rbtree is extended to support
47 * per-thread shadow stats. For shadow stats case, the keys
48 * are cpu/type/ctx/stat (evsel is NULL). For generic metrics
49 * case, the keys are still evsel/cpu (type/ctx/stat are 0 or NULL).
51 if (a
->type
!= b
->type
)
52 return a
->type
- b
->type
;
55 return a
->ctx
- b
->ctx
;
57 if (a
->evsel
== NULL
&& b
->evsel
== NULL
) {
58 if (a
->stat
== b
->stat
)
61 if ((char *)a
->stat
< (char *)b
->stat
)
67 if (a
->evsel
== b
->evsel
)
69 if ((char *)a
->evsel
< (char *)b
->evsel
)
74 static struct rb_node
*saved_value_new(struct rblist
*rblist __maybe_unused
,
77 struct saved_value
*nd
= malloc(sizeof(struct saved_value
));
81 memcpy(nd
, entry
, sizeof(struct saved_value
));
85 static void saved_value_delete(struct rblist
*rblist __maybe_unused
,
86 struct rb_node
*rb_node
)
88 struct saved_value
*v
;
91 v
= container_of(rb_node
, struct saved_value
, rb_node
);
95 static struct saved_value
*saved_value_lookup(struct perf_evsel
*evsel
,
100 struct runtime_stat
*st
)
102 struct rblist
*rblist
;
104 struct saved_value dm
= {
112 rblist
= &st
->value_list
;
114 nd
= rblist__find(rblist
, &dm
);
116 return container_of(nd
, struct saved_value
, rb_node
);
118 rblist__add_node(rblist
, &dm
);
119 nd
= rblist__find(rblist
, &dm
);
121 return container_of(nd
, struct saved_value
, rb_node
);
126 void runtime_stat__init(struct runtime_stat
*st
)
128 struct rblist
*rblist
= &st
->value_list
;
130 rblist__init(rblist
);
131 rblist
->node_cmp
= saved_value_cmp
;
132 rblist
->node_new
= saved_value_new
;
133 rblist
->node_delete
= saved_value_delete
;
136 void runtime_stat__exit(struct runtime_stat
*st
)
138 rblist__exit(&st
->value_list
);
141 void perf_stat__init_shadow_stats(void)
143 have_frontend_stalled
= pmu_have_event("cpu", "stalled-cycles-frontend");
144 runtime_stat__init(&rt_stat
);
147 static int evsel_context(struct perf_evsel
*evsel
)
151 if (evsel
->attr
.exclude_kernel
)
152 ctx
|= CTX_BIT_KERNEL
;
153 if (evsel
->attr
.exclude_user
)
155 if (evsel
->attr
.exclude_hv
)
157 if (evsel
->attr
.exclude_host
)
159 if (evsel
->attr
.exclude_idle
)
165 static void reset_stat(struct runtime_stat
*st
)
167 struct rblist
*rblist
;
168 struct rb_node
*pos
, *next
;
170 rblist
= &st
->value_list
;
171 next
= rb_first(&rblist
->entries
);
175 memset(&container_of(pos
, struct saved_value
, rb_node
)->stats
,
177 sizeof(struct stats
));
181 void perf_stat__reset_shadow_stats(void)
183 reset_stat(&rt_stat
);
184 memset(&walltime_nsecs_stats
, 0, sizeof(walltime_nsecs_stats
));
187 void perf_stat__reset_shadow_per_stat(struct runtime_stat
*st
)
192 static void update_runtime_stat(struct runtime_stat
*st
,
194 int ctx
, int cpu
, u64 count
)
196 struct saved_value
*v
= saved_value_lookup(NULL
, cpu
, true,
200 update_stats(&v
->stats
, count
);
204 * Update various tracking values we maintain to print
205 * more semantic information such as miss/hit ratios,
206 * instruction rates, etc:
208 void perf_stat__update_shadow_stats(struct perf_evsel
*counter
, u64 count
,
209 int cpu
, struct runtime_stat
*st
)
211 int ctx
= evsel_context(counter
);
213 count
*= counter
->scale
;
215 if (perf_evsel__match(counter
, SOFTWARE
, SW_TASK_CLOCK
) ||
216 perf_evsel__match(counter
, SOFTWARE
, SW_CPU_CLOCK
))
217 update_runtime_stat(st
, STAT_NSECS
, 0, cpu
, count
);
218 else if (perf_evsel__match(counter
, HARDWARE
, HW_CPU_CYCLES
))
219 update_runtime_stat(st
, STAT_CYCLES
, ctx
, cpu
, count
);
220 else if (perf_stat_evsel__is(counter
, CYCLES_IN_TX
))
221 update_runtime_stat(st
, STAT_CYCLES_IN_TX
, ctx
, cpu
, count
);
222 else if (perf_stat_evsel__is(counter
, TRANSACTION_START
))
223 update_runtime_stat(st
, STAT_TRANSACTION
, ctx
, cpu
, count
);
224 else if (perf_stat_evsel__is(counter
, ELISION_START
))
225 update_runtime_stat(st
, STAT_ELISION
, ctx
, cpu
, count
);
226 else if (perf_stat_evsel__is(counter
, TOPDOWN_TOTAL_SLOTS
))
227 update_runtime_stat(st
, STAT_TOPDOWN_TOTAL_SLOTS
,
229 else if (perf_stat_evsel__is(counter
, TOPDOWN_SLOTS_ISSUED
))
230 update_runtime_stat(st
, STAT_TOPDOWN_SLOTS_ISSUED
,
232 else if (perf_stat_evsel__is(counter
, TOPDOWN_SLOTS_RETIRED
))
233 update_runtime_stat(st
, STAT_TOPDOWN_SLOTS_RETIRED
,
235 else if (perf_stat_evsel__is(counter
, TOPDOWN_FETCH_BUBBLES
))
236 update_runtime_stat(st
, STAT_TOPDOWN_FETCH_BUBBLES
,
238 else if (perf_stat_evsel__is(counter
, TOPDOWN_RECOVERY_BUBBLES
))
239 update_runtime_stat(st
, STAT_TOPDOWN_RECOVERY_BUBBLES
,
241 else if (perf_evsel__match(counter
, HARDWARE
, HW_STALLED_CYCLES_FRONTEND
))
242 update_runtime_stat(st
, STAT_STALLED_CYCLES_FRONT
,
244 else if (perf_evsel__match(counter
, HARDWARE
, HW_STALLED_CYCLES_BACKEND
))
245 update_runtime_stat(st
, STAT_STALLED_CYCLES_BACK
,
247 else if (perf_evsel__match(counter
, HARDWARE
, HW_BRANCH_INSTRUCTIONS
))
248 update_runtime_stat(st
, STAT_BRANCHES
, ctx
, cpu
, count
);
249 else if (perf_evsel__match(counter
, HARDWARE
, HW_CACHE_REFERENCES
))
250 update_runtime_stat(st
, STAT_CACHEREFS
, ctx
, cpu
, count
);
251 else if (perf_evsel__match(counter
, HW_CACHE
, HW_CACHE_L1D
))
252 update_runtime_stat(st
, STAT_L1_DCACHE
, ctx
, cpu
, count
);
253 else if (perf_evsel__match(counter
, HW_CACHE
, HW_CACHE_L1I
))
254 update_runtime_stat(st
, STAT_L1_ICACHE
, ctx
, cpu
, count
);
255 else if (perf_evsel__match(counter
, HW_CACHE
, HW_CACHE_LL
))
256 update_runtime_stat(st
, STAT_LL_CACHE
, ctx
, cpu
, count
);
257 else if (perf_evsel__match(counter
, HW_CACHE
, HW_CACHE_DTLB
))
258 update_runtime_stat(st
, STAT_DTLB_CACHE
, ctx
, cpu
, count
);
259 else if (perf_evsel__match(counter
, HW_CACHE
, HW_CACHE_ITLB
))
260 update_runtime_stat(st
, STAT_ITLB_CACHE
, ctx
, cpu
, count
);
261 else if (perf_stat_evsel__is(counter
, SMI_NUM
))
262 update_runtime_stat(st
, STAT_SMI_NUM
, ctx
, cpu
, count
);
263 else if (perf_stat_evsel__is(counter
, APERF
))
264 update_runtime_stat(st
, STAT_APERF
, ctx
, cpu
, count
);
266 if (counter
->collect_stat
) {
267 struct saved_value
*v
= saved_value_lookup(counter
, cpu
, true,
269 update_stats(&v
->stats
, count
);
273 /* used for get_ratio_color() */
275 GRC_STALLED_CYCLES_FE
,
276 GRC_STALLED_CYCLES_BE
,
281 static const char *get_ratio_color(enum grc_type type
, double ratio
)
283 static const double grc_table
[GRC_MAX_NR
][3] = {
284 [GRC_STALLED_CYCLES_FE
] = { 50.0, 30.0, 10.0 },
285 [GRC_STALLED_CYCLES_BE
] = { 75.0, 50.0, 20.0 },
286 [GRC_CACHE_MISSES
] = { 20.0, 10.0, 5.0 },
288 const char *color
= PERF_COLOR_NORMAL
;
290 if (ratio
> grc_table
[type
][0])
291 color
= PERF_COLOR_RED
;
292 else if (ratio
> grc_table
[type
][1])
293 color
= PERF_COLOR_MAGENTA
;
294 else if (ratio
> grc_table
[type
][2])
295 color
= PERF_COLOR_YELLOW
;
300 static struct perf_evsel
*perf_stat__find_event(struct perf_evlist
*evsel_list
,
303 struct perf_evsel
*c2
;
305 evlist__for_each_entry (evsel_list
, c2
) {
306 if (!strcasecmp(c2
->name
, name
))
312 /* Mark MetricExpr target events and link events using them to them. */
313 void perf_stat__collect_metric_expr(struct perf_evlist
*evsel_list
)
315 struct perf_evsel
*counter
, *leader
, **metric_events
, *oc
;
317 const char **metric_names
;
319 int num_metric_names
;
321 evlist__for_each_entry(evsel_list
, counter
) {
322 bool invalid
= false;
324 leader
= counter
->leader
;
325 if (!counter
->metric_expr
)
327 metric_events
= counter
->metric_events
;
328 if (!metric_events
) {
329 if (expr__find_other(counter
->metric_expr
, counter
->name
,
330 &metric_names
, &num_metric_names
) < 0)
333 metric_events
= calloc(sizeof(struct perf_evsel
*),
334 num_metric_names
+ 1);
337 counter
->metric_events
= metric_events
;
340 for (i
= 0; i
< num_metric_names
; i
++) {
343 /* Search in group */
344 for_each_group_member (oc
, leader
) {
345 if (!strcasecmp(oc
->name
, metric_names
[i
])) {
352 /* Search ignoring groups */
353 oc
= perf_stat__find_event(evsel_list
, metric_names
[i
]);
356 /* Deduping one is good enough to handle duplicated PMUs. */
357 static char *printed
;
360 * Adding events automatically would be difficult, because
361 * it would risk creating groups that are not schedulable.
362 * perf stat doesn't understand all the scheduling constraints
363 * of events. So we ask the user instead to add the missing
366 if (!printed
|| strcasecmp(printed
, metric_names
[i
])) {
368 "Add %s event to groups to get metric expression for %s\n",
371 printed
= strdup(metric_names
[i
]);
376 metric_events
[i
] = oc
;
377 oc
->collect_stat
= true;
379 metric_events
[i
] = NULL
;
383 counter
->metric_events
= NULL
;
384 counter
->metric_expr
= NULL
;
389 static double runtime_stat_avg(struct runtime_stat
*st
,
390 enum stat_type type
, int ctx
, int cpu
)
392 struct saved_value
*v
;
394 v
= saved_value_lookup(NULL
, cpu
, false, type
, ctx
, st
);
398 return avg_stats(&v
->stats
);
401 static double runtime_stat_n(struct runtime_stat
*st
,
402 enum stat_type type
, int ctx
, int cpu
)
404 struct saved_value
*v
;
406 v
= saved_value_lookup(NULL
, cpu
, false, type
, ctx
, st
);
413 static void print_stalled_cycles_frontend(int cpu
,
414 struct perf_evsel
*evsel
, double avg
,
415 struct perf_stat_output_ctx
*out
,
416 struct runtime_stat
*st
)
418 double total
, ratio
= 0.0;
420 int ctx
= evsel_context(evsel
);
422 total
= runtime_stat_avg(st
, STAT_CYCLES
, ctx
, cpu
);
425 ratio
= avg
/ total
* 100.0;
427 color
= get_ratio_color(GRC_STALLED_CYCLES_FE
, ratio
);
430 out
->print_metric(out
->ctx
, color
, "%7.2f%%", "frontend cycles idle",
433 out
->print_metric(out
->ctx
, NULL
, NULL
, "frontend cycles idle", 0);
436 static void print_stalled_cycles_backend(int cpu
,
437 struct perf_evsel
*evsel
, double avg
,
438 struct perf_stat_output_ctx
*out
,
439 struct runtime_stat
*st
)
441 double total
, ratio
= 0.0;
443 int ctx
= evsel_context(evsel
);
445 total
= runtime_stat_avg(st
, STAT_CYCLES
, ctx
, cpu
);
448 ratio
= avg
/ total
* 100.0;
450 color
= get_ratio_color(GRC_STALLED_CYCLES_BE
, ratio
);
452 out
->print_metric(out
->ctx
, color
, "%7.2f%%", "backend cycles idle", ratio
);
455 static void print_branch_misses(int cpu
,
456 struct perf_evsel
*evsel
,
458 struct perf_stat_output_ctx
*out
,
459 struct runtime_stat
*st
)
461 double total
, ratio
= 0.0;
463 int ctx
= evsel_context(evsel
);
465 total
= runtime_stat_avg(st
, STAT_BRANCHES
, ctx
, cpu
);
468 ratio
= avg
/ total
* 100.0;
470 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
472 out
->print_metric(out
->ctx
, color
, "%7.2f%%", "of all branches", ratio
);
475 static void print_l1_dcache_misses(int cpu
,
476 struct perf_evsel
*evsel
,
478 struct perf_stat_output_ctx
*out
,
479 struct runtime_stat
*st
)
482 double total
, ratio
= 0.0;
484 int ctx
= evsel_context(evsel
);
486 total
= runtime_stat_avg(st
, STAT_L1_DCACHE
, ctx
, cpu
);
489 ratio
= avg
/ total
* 100.0;
491 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
493 out
->print_metric(out
->ctx
, color
, "%7.2f%%", "of all L1-dcache hits", ratio
);
496 static void print_l1_icache_misses(int cpu
,
497 struct perf_evsel
*evsel
,
499 struct perf_stat_output_ctx
*out
,
500 struct runtime_stat
*st
)
503 double total
, ratio
= 0.0;
505 int ctx
= evsel_context(evsel
);
507 total
= runtime_stat_avg(st
, STAT_L1_ICACHE
, ctx
, cpu
);
510 ratio
= avg
/ total
* 100.0;
512 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
513 out
->print_metric(out
->ctx
, color
, "%7.2f%%", "of all L1-icache hits", ratio
);
516 static void print_dtlb_cache_misses(int cpu
,
517 struct perf_evsel
*evsel
,
519 struct perf_stat_output_ctx
*out
,
520 struct runtime_stat
*st
)
522 double total
, ratio
= 0.0;
524 int ctx
= evsel_context(evsel
);
526 total
= runtime_stat_avg(st
, STAT_DTLB_CACHE
, ctx
, cpu
);
529 ratio
= avg
/ total
* 100.0;
531 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
532 out
->print_metric(out
->ctx
, color
, "%7.2f%%", "of all dTLB cache hits", ratio
);
535 static void print_itlb_cache_misses(int cpu
,
536 struct perf_evsel
*evsel
,
538 struct perf_stat_output_ctx
*out
,
539 struct runtime_stat
*st
)
541 double total
, ratio
= 0.0;
543 int ctx
= evsel_context(evsel
);
545 total
= runtime_stat_avg(st
, STAT_ITLB_CACHE
, ctx
, cpu
);
548 ratio
= avg
/ total
* 100.0;
550 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
551 out
->print_metric(out
->ctx
, color
, "%7.2f%%", "of all iTLB cache hits", ratio
);
554 static void print_ll_cache_misses(int cpu
,
555 struct perf_evsel
*evsel
,
557 struct perf_stat_output_ctx
*out
,
558 struct runtime_stat
*st
)
560 double total
, ratio
= 0.0;
562 int ctx
= evsel_context(evsel
);
564 total
= runtime_stat_avg(st
, STAT_LL_CACHE
, ctx
, cpu
);
567 ratio
= avg
/ total
* 100.0;
569 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
570 out
->print_metric(out
->ctx
, color
, "%7.2f%%", "of all LL-cache hits", ratio
);
574 * High level "TopDown" CPU core pipe line bottleneck break down.
576 * Basic concept following
577 * Yasin, A Top Down Method for Performance analysis and Counter architecture
580 * The CPU pipeline is divided into 4 areas that can be bottlenecks:
582 * Frontend -> Backend -> Retiring
583 * BadSpeculation in addition means out of order execution that is thrown away
584 * (for example branch mispredictions)
585 * Frontend is instruction decoding.
586 * Backend is execution, like computation and accessing data in memory
587 * Retiring is good execution that is not directly bottlenecked
589 * The formulas are computed in slots.
590 * A slot is an entry in the pipeline each for the pipeline width
591 * (for example a 4-wide pipeline has 4 slots for each cycle)
594 * BadSpeculation = ((SlotsIssued - SlotsRetired) + RecoveryBubbles) /
596 * Retiring = SlotsRetired / TotalSlots
597 * FrontendBound = FetchBubbles / TotalSlots
598 * BackendBound = 1.0 - BadSpeculation - Retiring - FrontendBound
600 * The kernel provides the mapping to the low level CPU events and any scaling
601 * needed for the CPU pipeline width, for example:
603 * TotalSlots = Cycles * 4
605 * The scaling factor is communicated in the sysfs unit.
607 * In some cases the CPU may not be able to measure all the formulas due to
608 * missing events. In this case multiple formulas are combined, as possible.
610 * Full TopDown supports more levels to sub-divide each area: for example
611 * BackendBound into computing bound and memory bound. For now we only
612 * support Level 1 TopDown.
615 static double sanitize_val(double x
)
617 if (x
< 0 && x
>= -0.02)
622 static double td_total_slots(int ctx
, int cpu
, struct runtime_stat
*st
)
624 return runtime_stat_avg(st
, STAT_TOPDOWN_TOTAL_SLOTS
, ctx
, cpu
);
627 static double td_bad_spec(int ctx
, int cpu
, struct runtime_stat
*st
)
633 total
= runtime_stat_avg(st
, STAT_TOPDOWN_SLOTS_ISSUED
, ctx
, cpu
) -
634 runtime_stat_avg(st
, STAT_TOPDOWN_SLOTS_RETIRED
, ctx
, cpu
) +
635 runtime_stat_avg(st
, STAT_TOPDOWN_RECOVERY_BUBBLES
, ctx
, cpu
);
637 total_slots
= td_total_slots(ctx
, cpu
, st
);
639 bad_spec
= total
/ total_slots
;
640 return sanitize_val(bad_spec
);
643 static double td_retiring(int ctx
, int cpu
, struct runtime_stat
*st
)
646 double total_slots
= td_total_slots(ctx
, cpu
, st
);
647 double ret_slots
= runtime_stat_avg(st
, STAT_TOPDOWN_SLOTS_RETIRED
,
651 retiring
= ret_slots
/ total_slots
;
655 static double td_fe_bound(int ctx
, int cpu
, struct runtime_stat
*st
)
658 double total_slots
= td_total_slots(ctx
, cpu
, st
);
659 double fetch_bub
= runtime_stat_avg(st
, STAT_TOPDOWN_FETCH_BUBBLES
,
663 fe_bound
= fetch_bub
/ total_slots
;
667 static double td_be_bound(int ctx
, int cpu
, struct runtime_stat
*st
)
669 double sum
= (td_fe_bound(ctx
, cpu
, st
) +
670 td_bad_spec(ctx
, cpu
, st
) +
671 td_retiring(ctx
, cpu
, st
));
674 return sanitize_val(1.0 - sum
);
677 static void print_smi_cost(int cpu
, struct perf_evsel
*evsel
,
678 struct perf_stat_output_ctx
*out
,
679 struct runtime_stat
*st
)
681 double smi_num
, aperf
, cycles
, cost
= 0.0;
682 int ctx
= evsel_context(evsel
);
683 const char *color
= NULL
;
685 smi_num
= runtime_stat_avg(st
, STAT_SMI_NUM
, ctx
, cpu
);
686 aperf
= runtime_stat_avg(st
, STAT_APERF
, ctx
, cpu
);
687 cycles
= runtime_stat_avg(st
, STAT_CYCLES
, ctx
, cpu
);
689 if ((cycles
== 0) || (aperf
== 0))
693 cost
= (aperf
- cycles
) / aperf
* 100.00;
696 color
= PERF_COLOR_RED
;
697 out
->print_metric(out
->ctx
, color
, "%8.1f%%", "SMI cycles%", cost
);
698 out
->print_metric(out
->ctx
, NULL
, "%4.0f", "SMI#", smi_num
);
701 static void generic_metric(const char *metric_expr
,
702 struct perf_evsel
**metric_events
,
704 const char *metric_name
,
707 struct perf_stat_output_ctx
*out
,
708 struct runtime_stat
*st
)
710 print_metric_t print_metric
= out
->print_metric
;
711 struct parse_ctx pctx
;
714 void *ctxp
= out
->ctx
;
716 expr__ctx_init(&pctx
);
717 expr__add_id(&pctx
, name
, avg
);
718 for (i
= 0; metric_events
[i
]; i
++) {
719 struct saved_value
*v
;
723 if (!strcmp(metric_events
[i
]->name
, "duration_time")) {
724 stats
= &walltime_nsecs_stats
;
727 v
= saved_value_lookup(metric_events
[i
], cpu
, false,
734 expr__add_id(&pctx
, metric_events
[i
]->name
, avg_stats(stats
)*scale
);
736 if (!metric_events
[i
]) {
737 const char *p
= metric_expr
;
739 if (expr__parse(&ratio
, &pctx
, &p
) == 0)
740 print_metric(ctxp
, NULL
, "%8.1f",
743 out
->force_header
? name
: "",
746 print_metric(ctxp
, NULL
, NULL
,
748 (metric_name
? metric_name
: name
) : "", 0);
750 print_metric(ctxp
, NULL
, NULL
, "", 0);
753 void perf_stat__print_shadow_stats(struct perf_evsel
*evsel
,
755 struct perf_stat_output_ctx
*out
,
756 struct rblist
*metric_events
,
757 struct runtime_stat
*st
)
759 void *ctxp
= out
->ctx
;
760 print_metric_t print_metric
= out
->print_metric
;
761 double total
, ratio
= 0.0, total2
;
762 const char *color
= NULL
;
763 int ctx
= evsel_context(evsel
);
764 struct metric_event
*me
;
767 if (perf_evsel__match(evsel
, HARDWARE
, HW_INSTRUCTIONS
)) {
768 total
= runtime_stat_avg(st
, STAT_CYCLES
, ctx
, cpu
);
772 print_metric(ctxp
, NULL
, "%7.2f ",
773 "insn per cycle", ratio
);
775 print_metric(ctxp
, NULL
, NULL
, "insn per cycle", 0);
778 total
= runtime_stat_avg(st
, STAT_STALLED_CYCLES_FRONT
,
781 total
= max(total
, runtime_stat_avg(st
,
782 STAT_STALLED_CYCLES_BACK
,
788 print_metric(ctxp
, NULL
, "%7.2f ",
789 "stalled cycles per insn",
791 } else if (have_frontend_stalled
) {
792 print_metric(ctxp
, NULL
, NULL
,
793 "stalled cycles per insn", 0);
795 } else if (perf_evsel__match(evsel
, HARDWARE
, HW_BRANCH_MISSES
)) {
796 if (runtime_stat_n(st
, STAT_BRANCHES
, ctx
, cpu
) != 0)
797 print_branch_misses(cpu
, evsel
, avg
, out
, st
);
799 print_metric(ctxp
, NULL
, NULL
, "of all branches", 0);
801 evsel
->attr
.type
== PERF_TYPE_HW_CACHE
&&
802 evsel
->attr
.config
== ( PERF_COUNT_HW_CACHE_L1D
|
803 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
804 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
806 if (runtime_stat_n(st
, STAT_L1_DCACHE
, ctx
, cpu
) != 0)
807 print_l1_dcache_misses(cpu
, evsel
, avg
, out
, st
);
809 print_metric(ctxp
, NULL
, NULL
, "of all L1-dcache hits", 0);
811 evsel
->attr
.type
== PERF_TYPE_HW_CACHE
&&
812 evsel
->attr
.config
== ( PERF_COUNT_HW_CACHE_L1I
|
813 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
814 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
816 if (runtime_stat_n(st
, STAT_L1_ICACHE
, ctx
, cpu
) != 0)
817 print_l1_icache_misses(cpu
, evsel
, avg
, out
, st
);
819 print_metric(ctxp
, NULL
, NULL
, "of all L1-icache hits", 0);
821 evsel
->attr
.type
== PERF_TYPE_HW_CACHE
&&
822 evsel
->attr
.config
== ( PERF_COUNT_HW_CACHE_DTLB
|
823 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
824 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
826 if (runtime_stat_n(st
, STAT_DTLB_CACHE
, ctx
, cpu
) != 0)
827 print_dtlb_cache_misses(cpu
, evsel
, avg
, out
, st
);
829 print_metric(ctxp
, NULL
, NULL
, "of all dTLB cache hits", 0);
831 evsel
->attr
.type
== PERF_TYPE_HW_CACHE
&&
832 evsel
->attr
.config
== ( PERF_COUNT_HW_CACHE_ITLB
|
833 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
834 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
836 if (runtime_stat_n(st
, STAT_ITLB_CACHE
, ctx
, cpu
) != 0)
837 print_itlb_cache_misses(cpu
, evsel
, avg
, out
, st
);
839 print_metric(ctxp
, NULL
, NULL
, "of all iTLB cache hits", 0);
841 evsel
->attr
.type
== PERF_TYPE_HW_CACHE
&&
842 evsel
->attr
.config
== ( PERF_COUNT_HW_CACHE_LL
|
843 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
844 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
846 if (runtime_stat_n(st
, STAT_LL_CACHE
, ctx
, cpu
) != 0)
847 print_ll_cache_misses(cpu
, evsel
, avg
, out
, st
);
849 print_metric(ctxp
, NULL
, NULL
, "of all LL-cache hits", 0);
850 } else if (perf_evsel__match(evsel
, HARDWARE
, HW_CACHE_MISSES
)) {
851 total
= runtime_stat_avg(st
, STAT_CACHEREFS
, ctx
, cpu
);
854 ratio
= avg
* 100 / total
;
856 if (runtime_stat_n(st
, STAT_CACHEREFS
, ctx
, cpu
) != 0)
857 print_metric(ctxp
, NULL
, "%8.3f %%",
858 "of all cache refs", ratio
);
860 print_metric(ctxp
, NULL
, NULL
, "of all cache refs", 0);
861 } else if (perf_evsel__match(evsel
, HARDWARE
, HW_STALLED_CYCLES_FRONTEND
)) {
862 print_stalled_cycles_frontend(cpu
, evsel
, avg
, out
, st
);
863 } else if (perf_evsel__match(evsel
, HARDWARE
, HW_STALLED_CYCLES_BACKEND
)) {
864 print_stalled_cycles_backend(cpu
, evsel
, avg
, out
, st
);
865 } else if (perf_evsel__match(evsel
, HARDWARE
, HW_CPU_CYCLES
)) {
866 total
= runtime_stat_avg(st
, STAT_NSECS
, 0, cpu
);
870 print_metric(ctxp
, NULL
, "%8.3f", "GHz", ratio
);
872 print_metric(ctxp
, NULL
, NULL
, "Ghz", 0);
874 } else if (perf_stat_evsel__is(evsel
, CYCLES_IN_TX
)) {
875 total
= runtime_stat_avg(st
, STAT_CYCLES
, ctx
, cpu
);
878 print_metric(ctxp
, NULL
,
879 "%7.2f%%", "transactional cycles",
880 100.0 * (avg
/ total
));
882 print_metric(ctxp
, NULL
, NULL
, "transactional cycles",
884 } else if (perf_stat_evsel__is(evsel
, CYCLES_IN_TX_CP
)) {
885 total
= runtime_stat_avg(st
, STAT_CYCLES
, ctx
, cpu
);
886 total2
= runtime_stat_avg(st
, STAT_CYCLES_IN_TX
, ctx
, cpu
);
891 print_metric(ctxp
, NULL
, "%7.2f%%", "aborted cycles",
892 100.0 * ((total2
-avg
) / total
));
894 print_metric(ctxp
, NULL
, NULL
, "aborted cycles", 0);
895 } else if (perf_stat_evsel__is(evsel
, TRANSACTION_START
)) {
896 total
= runtime_stat_avg(st
, STAT_CYCLES_IN_TX
,
902 if (runtime_stat_n(st
, STAT_CYCLES_IN_TX
, ctx
, cpu
) != 0)
903 print_metric(ctxp
, NULL
, "%8.0f",
904 "cycles / transaction", ratio
);
906 print_metric(ctxp
, NULL
, NULL
, "cycles / transaction",
908 } else if (perf_stat_evsel__is(evsel
, ELISION_START
)) {
909 total
= runtime_stat_avg(st
, STAT_CYCLES_IN_TX
,
915 print_metric(ctxp
, NULL
, "%8.0f", "cycles / elision", ratio
);
916 } else if (perf_evsel__match(evsel
, SOFTWARE
, SW_TASK_CLOCK
) ||
917 perf_evsel__match(evsel
, SOFTWARE
, SW_CPU_CLOCK
)) {
918 if ((ratio
= avg_stats(&walltime_nsecs_stats
)) != 0)
919 print_metric(ctxp
, NULL
, "%8.3f", "CPUs utilized",
922 print_metric(ctxp
, NULL
, NULL
, "CPUs utilized", 0);
923 } else if (perf_stat_evsel__is(evsel
, TOPDOWN_FETCH_BUBBLES
)) {
924 double fe_bound
= td_fe_bound(ctx
, cpu
, st
);
927 color
= PERF_COLOR_RED
;
928 print_metric(ctxp
, color
, "%8.1f%%", "frontend bound",
930 } else if (perf_stat_evsel__is(evsel
, TOPDOWN_SLOTS_RETIRED
)) {
931 double retiring
= td_retiring(ctx
, cpu
, st
);
934 color
= PERF_COLOR_GREEN
;
935 print_metric(ctxp
, color
, "%8.1f%%", "retiring",
937 } else if (perf_stat_evsel__is(evsel
, TOPDOWN_RECOVERY_BUBBLES
)) {
938 double bad_spec
= td_bad_spec(ctx
, cpu
, st
);
941 color
= PERF_COLOR_RED
;
942 print_metric(ctxp
, color
, "%8.1f%%", "bad speculation",
944 } else if (perf_stat_evsel__is(evsel
, TOPDOWN_SLOTS_ISSUED
)) {
945 double be_bound
= td_be_bound(ctx
, cpu
, st
);
946 const char *name
= "backend bound";
947 static int have_recovery_bubbles
= -1;
949 /* In case the CPU does not support topdown-recovery-bubbles */
950 if (have_recovery_bubbles
< 0)
951 have_recovery_bubbles
= pmu_have_event("cpu",
952 "topdown-recovery-bubbles");
953 if (!have_recovery_bubbles
)
954 name
= "backend bound/bad spec";
957 color
= PERF_COLOR_RED
;
958 if (td_total_slots(ctx
, cpu
, st
) > 0)
959 print_metric(ctxp
, color
, "%8.1f%%", name
,
962 print_metric(ctxp
, NULL
, NULL
, name
, 0);
963 } else if (evsel
->metric_expr
) {
964 generic_metric(evsel
->metric_expr
, evsel
->metric_events
, evsel
->name
,
965 evsel
->metric_name
, avg
, cpu
, out
, st
);
966 } else if (runtime_stat_n(st
, STAT_NSECS
, 0, cpu
) != 0) {
970 total
= runtime_stat_avg(st
, STAT_NSECS
, 0, cpu
);
973 ratio
= 1000.0 * avg
/ total
;
978 snprintf(unit_buf
, sizeof(unit_buf
), "%c/sec", unit
);
979 print_metric(ctxp
, NULL
, "%8.3f", unit_buf
, ratio
);
980 } else if (perf_stat_evsel__is(evsel
, SMI_NUM
)) {
981 print_smi_cost(cpu
, evsel
, out
, st
);
986 if ((me
= metricgroup__lookup(metric_events
, evsel
, false)) != NULL
) {
987 struct metric_expr
*mexp
;
989 list_for_each_entry (mexp
, &me
->head
, nd
) {
992 generic_metric(mexp
->metric_expr
, mexp
->metric_events
,
993 evsel
->name
, mexp
->metric_name
,
998 print_metric(ctxp
, NULL
, NULL
, NULL
, 0);