1 // SPDX-License-Identifier: GPL-2.0
8 #include "thread_map.h"
10 void update_stats(struct stats
*stats
, u64 val
)
15 delta
= val
- stats
->mean
;
16 stats
->mean
+= delta
/ stats
->n
;
17 stats
->M2
+= delta
*(val
- stats
->mean
);
26 double avg_stats(struct stats
*stats
)
32 * http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
34 * (\Sum n_i^2) - ((\Sum n_i)^2)/n
35 * s^2 = -------------------------------
38 * http://en.wikipedia.org/wiki/Stddev
40 * The std dev of the mean is related to the std dev by:
47 double stddev_stats(struct stats
*stats
)
49 double variance
, variance_mean
;
54 variance
= stats
->M2
/ (stats
->n
- 1);
55 variance_mean
= variance
/ stats
->n
;
57 return sqrt(variance_mean
);
60 double rel_stddev_stats(double stddev
, double avg
)
65 pct
= 100.0 * stddev
/avg
;
70 bool __perf_evsel_stat__is(struct perf_evsel
*evsel
,
71 enum perf_stat_evsel_id id
)
73 struct perf_stat_evsel
*ps
= evsel
->stats
;
78 #define ID(id, name) [PERF_STAT_EVSEL_ID__##id] = #name
79 static const char *id_str
[PERF_STAT_EVSEL_ID__MAX
] = {
81 ID(CYCLES_IN_TX
, cpu
/cycles
-t
/),
82 ID(TRANSACTION_START
, cpu
/tx
-start
/),
83 ID(ELISION_START
, cpu
/el
-start
/),
84 ID(CYCLES_IN_TX_CP
, cpu
/cycles
-ct
/),
85 ID(TOPDOWN_TOTAL_SLOTS
, topdown
-total
-slots
),
86 ID(TOPDOWN_SLOTS_ISSUED
, topdown
-slots
-issued
),
87 ID(TOPDOWN_SLOTS_RETIRED
, topdown
-slots
-retired
),
88 ID(TOPDOWN_FETCH_BUBBLES
, topdown
-fetch
-bubbles
),
89 ID(TOPDOWN_RECOVERY_BUBBLES
, topdown
-recovery
-bubbles
),
90 ID(SMI_NUM
, msr
/smi
/),
91 ID(APERF
, msr
/aperf
/),
95 static void perf_stat_evsel_id_init(struct perf_evsel
*evsel
)
97 struct perf_stat_evsel
*ps
= evsel
->stats
;
100 /* ps->id is 0 hence PERF_STAT_EVSEL_ID__NONE by default */
102 for (i
= 0; i
< PERF_STAT_EVSEL_ID__MAX
; i
++) {
103 if (!strcmp(perf_evsel__name(evsel
), id_str
[i
])) {
110 static void perf_evsel__reset_stat_priv(struct perf_evsel
*evsel
)
113 struct perf_stat_evsel
*ps
= evsel
->stats
;
115 for (i
= 0; i
< 3; i
++)
116 init_stats(&ps
->res_stats
[i
]);
118 perf_stat_evsel_id_init(evsel
);
121 static int perf_evsel__alloc_stat_priv(struct perf_evsel
*evsel
)
123 evsel
->stats
= zalloc(sizeof(struct perf_stat_evsel
));
124 if (evsel
->stats
== NULL
)
126 perf_evsel__reset_stat_priv(evsel
);
130 static void perf_evsel__free_stat_priv(struct perf_evsel
*evsel
)
132 struct perf_stat_evsel
*ps
= evsel
->stats
;
135 free(ps
->group_data
);
136 zfree(&evsel
->stats
);
139 static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel
*evsel
,
140 int ncpus
, int nthreads
)
142 struct perf_counts
*counts
;
144 counts
= perf_counts__new(ncpus
, nthreads
);
146 evsel
->prev_raw_counts
= counts
;
148 return counts
? 0 : -ENOMEM
;
151 static void perf_evsel__free_prev_raw_counts(struct perf_evsel
*evsel
)
153 perf_counts__delete(evsel
->prev_raw_counts
);
154 evsel
->prev_raw_counts
= NULL
;
157 static int perf_evsel__alloc_stats(struct perf_evsel
*evsel
, bool alloc_raw
)
159 int ncpus
= perf_evsel__nr_cpus(evsel
);
160 int nthreads
= thread_map__nr(evsel
->threads
);
162 if (perf_evsel__alloc_stat_priv(evsel
) < 0 ||
163 perf_evsel__alloc_counts(evsel
, ncpus
, nthreads
) < 0 ||
164 (alloc_raw
&& perf_evsel__alloc_prev_raw_counts(evsel
, ncpus
, nthreads
) < 0))
170 int perf_evlist__alloc_stats(struct perf_evlist
*evlist
, bool alloc_raw
)
172 struct perf_evsel
*evsel
;
174 evlist__for_each_entry(evlist
, evsel
) {
175 if (perf_evsel__alloc_stats(evsel
, alloc_raw
))
182 perf_evlist__free_stats(evlist
);
186 void perf_evlist__free_stats(struct perf_evlist
*evlist
)
188 struct perf_evsel
*evsel
;
190 evlist__for_each_entry(evlist
, evsel
) {
191 perf_evsel__free_stat_priv(evsel
);
192 perf_evsel__free_counts(evsel
);
193 perf_evsel__free_prev_raw_counts(evsel
);
197 void perf_evlist__reset_stats(struct perf_evlist
*evlist
)
199 struct perf_evsel
*evsel
;
201 evlist__for_each_entry(evlist
, evsel
) {
202 perf_evsel__reset_stat_priv(evsel
);
203 perf_evsel__reset_counts(evsel
);
207 static void zero_per_pkg(struct perf_evsel
*counter
)
209 if (counter
->per_pkg_mask
)
210 memset(counter
->per_pkg_mask
, 0, MAX_NR_CPUS
);
213 static int check_per_pkg(struct perf_evsel
*counter
,
214 struct perf_counts_values
*vals
, int cpu
, bool *skip
)
216 unsigned long *mask
= counter
->per_pkg_mask
;
217 struct cpu_map
*cpus
= perf_evsel__cpus(counter
);
222 if (!counter
->per_pkg
)
225 if (cpu_map__empty(cpus
))
229 mask
= zalloc(MAX_NR_CPUS
);
233 counter
->per_pkg_mask
= mask
;
237 * we do not consider an event that has not run as a good
238 * instance to mark a package as used (skip=1). Otherwise
239 * we may run into a situation where the first CPU in a package
240 * is not running anything, yet the second is, and this function
241 * would mark the package as used after the first CPU and would
242 * not read the values from the second CPU.
244 if (!(vals
->run
&& vals
->ena
))
247 s
= cpu_map__get_socket(cpus
, cpu
, NULL
);
251 *skip
= test_and_set_bit(s
, mask
) == 1;
256 process_counter_values(struct perf_stat_config
*config
, struct perf_evsel
*evsel
,
258 struct perf_counts_values
*count
)
260 struct perf_counts_values
*aggr
= &evsel
->counts
->aggr
;
261 static struct perf_counts_values zero
;
264 if (check_per_pkg(evsel
, count
, cpu
, &skip
)) {
265 pr_err("failed to read per-pkg counter\n");
272 switch (config
->aggr_mode
) {
277 if (!evsel
->snapshot
)
278 perf_evsel__compute_deltas(evsel
, cpu
, thread
, count
);
279 perf_counts_values__scale(count
, config
->scale
, NULL
);
280 if (config
->aggr_mode
== AGGR_NONE
)
281 perf_stat__update_shadow_stats(evsel
, count
->val
, cpu
,
283 if (config
->aggr_mode
== AGGR_THREAD
) {
285 perf_stat__update_shadow_stats(evsel
,
286 count
->val
, 0, &config
->stats
[thread
]);
288 perf_stat__update_shadow_stats(evsel
,
289 count
->val
, 0, &rt_stat
);
293 aggr
->val
+= count
->val
;
294 aggr
->ena
+= count
->ena
;
295 aggr
->run
+= count
->run
;
304 static int process_counter_maps(struct perf_stat_config
*config
,
305 struct perf_evsel
*counter
)
307 int nthreads
= thread_map__nr(counter
->threads
);
308 int ncpus
= perf_evsel__nr_cpus(counter
);
311 if (counter
->system_wide
)
314 for (thread
= 0; thread
< nthreads
; thread
++) {
315 for (cpu
= 0; cpu
< ncpus
; cpu
++) {
316 if (process_counter_values(config
, counter
, cpu
, thread
,
317 perf_counts(counter
->counts
, cpu
, thread
)))
325 int perf_stat_process_counter(struct perf_stat_config
*config
,
326 struct perf_evsel
*counter
)
328 struct perf_counts_values
*aggr
= &counter
->counts
->aggr
;
329 struct perf_stat_evsel
*ps
= counter
->stats
;
330 u64
*count
= counter
->counts
->aggr
.values
;
333 aggr
->val
= aggr
->ena
= aggr
->run
= 0;
336 * We calculate counter's data every interval,
337 * and the display code shows ps->res_stats
338 * avg value. We need to zero the stats for
339 * interval mode, otherwise overall avg running
340 * averages will be shown for each interval.
342 if (config
->interval
)
343 init_stats(ps
->res_stats
);
345 if (counter
->per_pkg
)
346 zero_per_pkg(counter
);
348 ret
= process_counter_maps(config
, counter
);
352 if (config
->aggr_mode
!= AGGR_GLOBAL
)
355 if (!counter
->snapshot
)
356 perf_evsel__compute_deltas(counter
, -1, -1, aggr
);
357 perf_counts_values__scale(aggr
, config
->scale
, &counter
->counts
->scaled
);
359 for (i
= 0; i
< 3; i
++)
360 update_stats(&ps
->res_stats
[i
], count
[i
]);
363 fprintf(config
->output
, "%s: %" PRIu64
" %" PRIu64
" %" PRIu64
"\n",
364 perf_evsel__name(counter
), count
[0], count
[1], count
[2]);
368 * Save the full runtime - to allow normalization during printout:
370 perf_stat__update_shadow_stats(counter
, *count
, 0, &rt_stat
);
375 int perf_event__process_stat_event(struct perf_session
*session
,
376 union perf_event
*event
)
378 struct perf_counts_values count
;
379 struct stat_event
*st
= &event
->stat
;
380 struct perf_evsel
*counter
;
386 counter
= perf_evlist__id2evsel(session
->evlist
, st
->id
);
388 pr_err("Failed to resolve counter for stat event.\n");
392 *perf_counts(counter
->counts
, st
->cpu
, st
->thread
) = count
;
393 counter
->supported
= true;
397 size_t perf_event__fprintf_stat(union perf_event
*event
, FILE *fp
)
399 struct stat_event
*st
= (struct stat_event
*) event
;
402 ret
= fprintf(fp
, "\n... id %" PRIu64
", cpu %d, thread %d\n",
403 st
->id
, st
->cpu
, st
->thread
);
404 ret
+= fprintf(fp
, "... value %" PRIu64
", enabled %" PRIu64
", running %" PRIu64
"\n",
405 st
->val
, st
->ena
, st
->run
);
410 size_t perf_event__fprintf_stat_round(union perf_event
*event
, FILE *fp
)
412 struct stat_round_event
*rd
= (struct stat_round_event
*)event
;
415 ret
= fprintf(fp
, "\n... time %" PRIu64
", type %s\n", rd
->time
,
416 rd
->type
== PERF_STAT_ROUND_TYPE__FINAL
? "FINAL" : "INTERVAL");
421 size_t perf_event__fprintf_stat_config(union perf_event
*event
, FILE *fp
)
423 struct perf_stat_config sc
;
426 perf_event__read_stat_config(&sc
, &event
->stat_config
);
428 ret
= fprintf(fp
, "\n");
429 ret
+= fprintf(fp
, "... aggr_mode %d\n", sc
.aggr_mode
);
430 ret
+= fprintf(fp
, "... scale %d\n", sc
.scale
);
431 ret
+= fprintf(fp
, "... interval %u\n", sc
.interval
);
436 int create_perf_stat_counter(struct perf_evsel
*evsel
,
437 struct perf_stat_config
*config
,
438 struct target
*target
)
440 struct perf_event_attr
*attr
= &evsel
->attr
;
441 struct perf_evsel
*leader
= evsel
->leader
;
443 attr
->read_format
= PERF_FORMAT_TOTAL_TIME_ENABLED
|
444 PERF_FORMAT_TOTAL_TIME_RUNNING
;
447 * The event is part of non trivial group, let's enable
448 * the group read (for leader) and ID retrieval for all
451 if (leader
->nr_members
> 1)
452 attr
->read_format
|= PERF_FORMAT_ID
|PERF_FORMAT_GROUP
;
454 attr
->inherit
= !config
->no_inherit
;
457 * Some events get initialized with sample_(period/type) set,
458 * like tracepoints. Clear it up for counting.
460 attr
->sample_period
= 0;
462 if (config
->identifier
)
463 attr
->sample_type
= PERF_SAMPLE_IDENTIFIER
;
466 * Disabling all counters initially, they will be enabled
467 * either manually by us or by kernel via enable_on_exec
470 if (perf_evsel__is_group_leader(evsel
)) {
474 * In case of initial_delay we enable tracee
477 if (target__none(target
) && !config
->initial_delay
)
478 attr
->enable_on_exec
= 1;
481 if (target__has_cpu(target
) && !target__has_per_thread(target
))
482 return perf_evsel__open_per_cpu(evsel
, perf_evsel__cpus(evsel
));
484 return perf_evsel__open_per_thread(evsel
, evsel
->threads
);
487 int perf_stat_synthesize_config(struct perf_stat_config
*config
,
488 struct perf_tool
*tool
,
489 struct perf_evlist
*evlist
,
490 perf_event__handler_t process
,
496 err
= perf_event__synthesize_attrs(tool
, evlist
, process
);
498 pr_err("Couldn't synthesize attrs.\n");
503 err
= perf_event__synthesize_extra_attr(tool
, evlist
, process
,
506 err
= perf_event__synthesize_thread_map2(tool
, evlist
->threads
,
509 pr_err("Couldn't synthesize thread map.\n");
513 err
= perf_event__synthesize_cpu_map(tool
, evlist
->cpus
,
516 pr_err("Couldn't synthesize thread map.\n");
520 err
= perf_event__synthesize_stat_config(tool
, config
, process
, NULL
);
522 pr_err("Couldn't synthesize config.\n");