1 // SPDX-License-Identifier: GPL-2.0
15 #include "thread_map.h"
16 #include <linux/zalloc.h>
18 void update_stats(struct stats
*stats
, u64 val
)
23 delta
= val
- stats
->mean
;
24 stats
->mean
+= delta
/ stats
->n
;
25 stats
->M2
+= delta
*(val
- stats
->mean
);
34 double avg_stats(struct stats
*stats
)
40 * http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
42 * (\Sum n_i^2) - ((\Sum n_i)^2)/n
43 * s^2 = -------------------------------
46 * http://en.wikipedia.org/wiki/Stddev
48 * The std dev of the mean is related to the std dev by:
55 double stddev_stats(struct stats
*stats
)
57 double variance
, variance_mean
;
62 variance
= stats
->M2
/ (stats
->n
- 1);
63 variance_mean
= variance
/ stats
->n
;
65 return sqrt(variance_mean
);
68 double rel_stddev_stats(double stddev
, double avg
)
73 pct
= 100.0 * stddev
/avg
;
78 bool __perf_evsel_stat__is(struct evsel
*evsel
,
79 enum perf_stat_evsel_id id
)
81 struct perf_stat_evsel
*ps
= evsel
->stats
;
86 #define ID(id, name) [PERF_STAT_EVSEL_ID__##id] = #name
87 static const char *id_str
[PERF_STAT_EVSEL_ID__MAX
] = {
89 ID(CYCLES_IN_TX
, cpu
/cycles
-t
/),
90 ID(TRANSACTION_START
, cpu
/tx
-start
/),
91 ID(ELISION_START
, cpu
/el
-start
/),
92 ID(CYCLES_IN_TX_CP
, cpu
/cycles
-ct
/),
93 ID(TOPDOWN_TOTAL_SLOTS
, topdown
-total
-slots
),
94 ID(TOPDOWN_SLOTS_ISSUED
, topdown
-slots
-issued
),
95 ID(TOPDOWN_SLOTS_RETIRED
, topdown
-slots
-retired
),
96 ID(TOPDOWN_FETCH_BUBBLES
, topdown
-fetch
-bubbles
),
97 ID(TOPDOWN_RECOVERY_BUBBLES
, topdown
-recovery
-bubbles
),
98 ID(TOPDOWN_RETIRING
, topdown
-retiring
),
99 ID(TOPDOWN_BAD_SPEC
, topdown
-bad
-spec
),
100 ID(TOPDOWN_FE_BOUND
, topdown
-fe
-bound
),
101 ID(TOPDOWN_BE_BOUND
, topdown
-be
-bound
),
102 ID(SMI_NUM
, msr
/smi
/),
103 ID(APERF
, msr
/aperf
/),
107 static void perf_stat_evsel_id_init(struct evsel
*evsel
)
109 struct perf_stat_evsel
*ps
= evsel
->stats
;
112 /* ps->id is 0 hence PERF_STAT_EVSEL_ID__NONE by default */
114 for (i
= 0; i
< PERF_STAT_EVSEL_ID__MAX
; i
++) {
115 if (!strcmp(evsel__name(evsel
), id_str
[i
])) {
122 static void evsel__reset_stat_priv(struct evsel
*evsel
)
125 struct perf_stat_evsel
*ps
= evsel
->stats
;
127 for (i
= 0; i
< 3; i
++)
128 init_stats(&ps
->res_stats
[i
]);
130 perf_stat_evsel_id_init(evsel
);
133 static int evsel__alloc_stat_priv(struct evsel
*evsel
)
135 evsel
->stats
= zalloc(sizeof(struct perf_stat_evsel
));
136 if (evsel
->stats
== NULL
)
138 evsel__reset_stat_priv(evsel
);
142 static void evsel__free_stat_priv(struct evsel
*evsel
)
144 struct perf_stat_evsel
*ps
= evsel
->stats
;
147 zfree(&ps
->group_data
);
148 zfree(&evsel
->stats
);
151 static int evsel__alloc_prev_raw_counts(struct evsel
*evsel
, int ncpus
, int nthreads
)
153 struct perf_counts
*counts
;
155 counts
= perf_counts__new(ncpus
, nthreads
);
157 evsel
->prev_raw_counts
= counts
;
159 return counts
? 0 : -ENOMEM
;
162 static void evsel__free_prev_raw_counts(struct evsel
*evsel
)
164 perf_counts__delete(evsel
->prev_raw_counts
);
165 evsel
->prev_raw_counts
= NULL
;
168 static void evsel__reset_prev_raw_counts(struct evsel
*evsel
)
170 if (evsel
->prev_raw_counts
)
171 perf_counts__reset(evsel
->prev_raw_counts
);
174 static int evsel__alloc_stats(struct evsel
*evsel
, bool alloc_raw
)
176 int ncpus
= evsel__nr_cpus(evsel
);
177 int nthreads
= perf_thread_map__nr(evsel
->core
.threads
);
179 if (evsel__alloc_stat_priv(evsel
) < 0 ||
180 evsel__alloc_counts(evsel
, ncpus
, nthreads
) < 0 ||
181 (alloc_raw
&& evsel__alloc_prev_raw_counts(evsel
, ncpus
, nthreads
) < 0))
187 int evlist__alloc_stats(struct evlist
*evlist
, bool alloc_raw
)
191 evlist__for_each_entry(evlist
, evsel
) {
192 if (evsel__alloc_stats(evsel
, alloc_raw
))
199 evlist__free_stats(evlist
);
203 void evlist__free_stats(struct evlist
*evlist
)
207 evlist__for_each_entry(evlist
, evsel
) {
208 evsel__free_stat_priv(evsel
);
209 evsel__free_counts(evsel
);
210 evsel__free_prev_raw_counts(evsel
);
214 void evlist__reset_stats(struct evlist
*evlist
)
218 evlist__for_each_entry(evlist
, evsel
) {
219 evsel__reset_stat_priv(evsel
);
220 evsel__reset_counts(evsel
);
224 void evlist__reset_prev_raw_counts(struct evlist
*evlist
)
228 evlist__for_each_entry(evlist
, evsel
)
229 evsel__reset_prev_raw_counts(evsel
);
232 static void evsel__copy_prev_raw_counts(struct evsel
*evsel
)
234 int ncpus
= evsel__nr_cpus(evsel
);
235 int nthreads
= perf_thread_map__nr(evsel
->core
.threads
);
237 for (int thread
= 0; thread
< nthreads
; thread
++) {
238 for (int cpu
= 0; cpu
< ncpus
; cpu
++) {
239 *perf_counts(evsel
->counts
, cpu
, thread
) =
240 *perf_counts(evsel
->prev_raw_counts
, cpu
,
245 evsel
->counts
->aggr
= evsel
->prev_raw_counts
->aggr
;
248 void evlist__copy_prev_raw_counts(struct evlist
*evlist
)
252 evlist__for_each_entry(evlist
, evsel
)
253 evsel__copy_prev_raw_counts(evsel
);
256 void evlist__save_aggr_prev_raw_counts(struct evlist
*evlist
)
261 * To collect the overall statistics for interval mode,
262 * we copy the counts from evsel->prev_raw_counts to
263 * evsel->counts. The perf_stat_process_counter creates
264 * aggr values from per cpu values, but the per cpu values
265 * are 0 for AGGR_GLOBAL. So we use a trick that saves the
266 * previous aggr value to the first member of perf_counts,
267 * then aggr calculation in process_counter_values can work
270 evlist__for_each_entry(evlist
, evsel
) {
271 *perf_counts(evsel
->prev_raw_counts
, 0, 0) =
272 evsel
->prev_raw_counts
->aggr
;
276 static void zero_per_pkg(struct evsel
*counter
)
278 if (counter
->per_pkg_mask
)
279 memset(counter
->per_pkg_mask
, 0, cpu__max_cpu());
282 static int check_per_pkg(struct evsel
*counter
,
283 struct perf_counts_values
*vals
, int cpu
, bool *skip
)
285 unsigned long *mask
= counter
->per_pkg_mask
;
286 struct perf_cpu_map
*cpus
= evsel__cpus(counter
);
291 if (!counter
->per_pkg
)
294 if (perf_cpu_map__empty(cpus
))
298 mask
= zalloc(cpu__max_cpu());
302 counter
->per_pkg_mask
= mask
;
306 * we do not consider an event that has not run as a good
307 * instance to mark a package as used (skip=1). Otherwise
308 * we may run into a situation where the first CPU in a package
309 * is not running anything, yet the second is, and this function
310 * would mark the package as used after the first CPU and would
311 * not read the values from the second CPU.
313 if (!(vals
->run
&& vals
->ena
))
316 s
= cpu_map__get_socket(cpus
, cpu
, NULL
).socket
;
320 *skip
= test_and_set_bit(s
, mask
) == 1;
325 process_counter_values(struct perf_stat_config
*config
, struct evsel
*evsel
,
327 struct perf_counts_values
*count
)
329 struct perf_counts_values
*aggr
= &evsel
->counts
->aggr
;
330 static struct perf_counts_values zero
;
333 if (check_per_pkg(evsel
, count
, cpu
, &skip
)) {
334 pr_err("failed to read per-pkg counter\n");
341 switch (config
->aggr_mode
) {
348 if (!evsel
->snapshot
)
349 evsel__compute_deltas(evsel
, cpu
, thread
, count
);
350 perf_counts_values__scale(count
, config
->scale
, NULL
);
351 if ((config
->aggr_mode
== AGGR_NONE
) && (!evsel
->percore
)) {
352 perf_stat__update_shadow_stats(evsel
, count
->val
,
356 if (config
->aggr_mode
== AGGR_THREAD
) {
358 perf_stat__update_shadow_stats(evsel
,
359 count
->val
, 0, &config
->stats
[thread
]);
361 perf_stat__update_shadow_stats(evsel
,
362 count
->val
, 0, &rt_stat
);
366 aggr
->val
+= count
->val
;
367 aggr
->ena
+= count
->ena
;
368 aggr
->run
+= count
->run
;
377 static int process_counter_maps(struct perf_stat_config
*config
,
378 struct evsel
*counter
)
380 int nthreads
= perf_thread_map__nr(counter
->core
.threads
);
381 int ncpus
= evsel__nr_cpus(counter
);
384 if (counter
->core
.system_wide
)
387 for (thread
= 0; thread
< nthreads
; thread
++) {
388 for (cpu
= 0; cpu
< ncpus
; cpu
++) {
389 if (process_counter_values(config
, counter
, cpu
, thread
,
390 perf_counts(counter
->counts
, cpu
, thread
)))
398 int perf_stat_process_counter(struct perf_stat_config
*config
,
399 struct evsel
*counter
)
401 struct perf_counts_values
*aggr
= &counter
->counts
->aggr
;
402 struct perf_stat_evsel
*ps
= counter
->stats
;
403 u64
*count
= counter
->counts
->aggr
.values
;
406 aggr
->val
= aggr
->ena
= aggr
->run
= 0;
409 * We calculate counter's data every interval,
410 * and the display code shows ps->res_stats
411 * avg value. We need to zero the stats for
412 * interval mode, otherwise overall avg running
413 * averages will be shown for each interval.
415 if (config
->interval
|| config
->summary
) {
416 for (i
= 0; i
< 3; i
++)
417 init_stats(&ps
->res_stats
[i
]);
420 if (counter
->per_pkg
)
421 zero_per_pkg(counter
);
423 ret
= process_counter_maps(config
, counter
);
427 if (config
->aggr_mode
!= AGGR_GLOBAL
)
430 if (!counter
->snapshot
)
431 evsel__compute_deltas(counter
, -1, -1, aggr
);
432 perf_counts_values__scale(aggr
, config
->scale
, &counter
->counts
->scaled
);
434 for (i
= 0; i
< 3; i
++)
435 update_stats(&ps
->res_stats
[i
], count
[i
]);
438 fprintf(config
->output
, "%s: %" PRIu64
" %" PRIu64
" %" PRIu64
"\n",
439 evsel__name(counter
), count
[0], count
[1], count
[2]);
443 * Save the full runtime - to allow normalization during printout:
445 perf_stat__update_shadow_stats(counter
, *count
, 0, &rt_stat
);
450 int perf_event__process_stat_event(struct perf_session
*session
,
451 union perf_event
*event
)
453 struct perf_counts_values count
;
454 struct perf_record_stat
*st
= &event
->stat
;
455 struct evsel
*counter
;
461 counter
= evlist__id2evsel(session
->evlist
, st
->id
);
463 pr_err("Failed to resolve counter for stat event.\n");
467 *perf_counts(counter
->counts
, st
->cpu
, st
->thread
) = count
;
468 counter
->supported
= true;
472 size_t perf_event__fprintf_stat(union perf_event
*event
, FILE *fp
)
474 struct perf_record_stat
*st
= (struct perf_record_stat
*)event
;
477 ret
= fprintf(fp
, "\n... id %" PRI_lu64
", cpu %d, thread %d\n",
478 st
->id
, st
->cpu
, st
->thread
);
479 ret
+= fprintf(fp
, "... value %" PRI_lu64
", enabled %" PRI_lu64
", running %" PRI_lu64
"\n",
480 st
->val
, st
->ena
, st
->run
);
485 size_t perf_event__fprintf_stat_round(union perf_event
*event
, FILE *fp
)
487 struct perf_record_stat_round
*rd
= (struct perf_record_stat_round
*)event
;
490 ret
= fprintf(fp
, "\n... time %" PRI_lu64
", type %s\n", rd
->time
,
491 rd
->type
== PERF_STAT_ROUND_TYPE__FINAL
? "FINAL" : "INTERVAL");
496 size_t perf_event__fprintf_stat_config(union perf_event
*event
, FILE *fp
)
498 struct perf_stat_config sc
;
501 perf_event__read_stat_config(&sc
, &event
->stat_config
);
503 ret
= fprintf(fp
, "\n");
504 ret
+= fprintf(fp
, "... aggr_mode %d\n", sc
.aggr_mode
);
505 ret
+= fprintf(fp
, "... scale %d\n", sc
.scale
);
506 ret
+= fprintf(fp
, "... interval %u\n", sc
.interval
);
511 int create_perf_stat_counter(struct evsel
*evsel
,
512 struct perf_stat_config
*config
,
513 struct target
*target
,
516 struct perf_event_attr
*attr
= &evsel
->core
.attr
;
517 struct evsel
*leader
= evsel
->leader
;
519 attr
->read_format
= PERF_FORMAT_TOTAL_TIME_ENABLED
|
520 PERF_FORMAT_TOTAL_TIME_RUNNING
;
523 * The event is part of non trivial group, let's enable
524 * the group read (for leader) and ID retrieval for all
527 if (leader
->core
.nr_members
> 1)
528 attr
->read_format
|= PERF_FORMAT_ID
|PERF_FORMAT_GROUP
;
530 attr
->inherit
= !config
->no_inherit
;
533 * Some events get initialized with sample_(period/type) set,
534 * like tracepoints. Clear it up for counting.
536 attr
->sample_period
= 0;
538 if (config
->identifier
)
539 attr
->sample_type
= PERF_SAMPLE_IDENTIFIER
;
541 if (config
->all_user
) {
542 attr
->exclude_kernel
= 1;
543 attr
->exclude_user
= 0;
546 if (config
->all_kernel
) {
547 attr
->exclude_kernel
= 0;
548 attr
->exclude_user
= 1;
552 * Disabling all counters initially, they will be enabled
553 * either manually by us or by kernel via enable_on_exec
556 if (evsel__is_group_leader(evsel
)) {
560 * In case of initial_delay we enable tracee
563 if (target__none(target
) && !config
->initial_delay
)
564 attr
->enable_on_exec
= 1;
567 if (target__has_cpu(target
) && !target__has_per_thread(target
))
568 return evsel__open_per_cpu(evsel
, evsel__cpus(evsel
), cpu
);
570 return evsel__open_per_thread(evsel
, evsel
->core
.threads
);