1 // SPDX-License-Identifier: GPL-2.0
3 * Arm Statistical Profiling Extensions (SPE) support
4 * Copyright (c) 2017-2018, Arm Ltd.
7 #include <linux/kernel.h>
8 #include <linux/types.h>
9 #include <linux/bitops.h>
10 #include <linux/log2.h>
11 #include <linux/string.h>
12 #include <linux/zalloc.h>
15 #include "../../../util/cpumap.h"
16 #include "../../../util/event.h"
17 #include "../../../util/evsel.h"
18 #include "../../../util/evsel_config.h"
19 #include "../../../util/evlist.h"
20 #include "../../../util/session.h"
21 #include <internal/lib.h> // page_size
22 #include "../../../util/pmu.h"
23 #include "../../../util/debug.h"
24 #include "../../../util/auxtrace.h"
25 #include "../../../util/record.h"
26 #include "../../../util/header.h"
27 #include "../../../util/arm-spe.h"
28 #include <tools/libc_compat.h> // reallocarray
30 #define ARM_SPE_CPU_MAGIC 0x1010101010101010ULL
32 #define KiB(x) ((x) * 1024)
33 #define MiB(x) ((x) * 1024 * 1024)
35 struct arm_spe_recording
{
36 struct auxtrace_record itr
;
37 struct perf_pmu
*arm_spe_pmu
;
38 struct evlist
*evlist
;
44 * arm_spe_find_cpus() returns a new cpu map, and the caller should invoke
45 * perf_cpu_map__put() to release the map after use.
47 static struct perf_cpu_map
*arm_spe_find_cpus(struct evlist
*evlist
)
49 struct perf_cpu_map
*event_cpus
= evlist
->core
.user_requested_cpus
;
50 struct perf_cpu_map
*online_cpus
= perf_cpu_map__new_online_cpus();
51 struct perf_cpu_map
*intersect_cpus
;
53 /* cpu map is not "any" CPU , we have specific CPUs to work with */
54 if (!perf_cpu_map__has_any_cpu(event_cpus
)) {
55 intersect_cpus
= perf_cpu_map__intersect(event_cpus
, online_cpus
);
56 perf_cpu_map__put(online_cpus
);
57 /* Event can be "any" CPU so count all CPUs. */
59 intersect_cpus
= online_cpus
;
62 return intersect_cpus
;
66 arm_spe_info_priv_size(struct auxtrace_record
*itr __maybe_unused
,
67 struct evlist
*evlist
)
69 struct perf_cpu_map
*cpu_map
= arm_spe_find_cpus(evlist
);
75 size
= ARM_SPE_AUXTRACE_PRIV_MAX
+
76 ARM_SPE_CPU_PRIV_MAX
* perf_cpu_map__nr(cpu_map
);
79 perf_cpu_map__put(cpu_map
);
83 static int arm_spe_save_cpu_header(struct auxtrace_record
*itr
,
84 struct perf_cpu cpu
, __u64 data
[])
86 struct arm_spe_recording
*sper
=
87 container_of(itr
, struct arm_spe_recording
, itr
);
88 struct perf_pmu
*pmu
= NULL
;
93 cpuid
= get_cpuid_allow_env_override(cpu
);
96 val
= strtol(cpuid
, NULL
, 16);
98 data
[ARM_SPE_MAGIC
] = ARM_SPE_CPU_MAGIC
;
99 data
[ARM_SPE_CPU
] = cpu
.cpu
;
100 data
[ARM_SPE_CPU_NR_PARAMS
] = ARM_SPE_CPU_PRIV_MAX
- ARM_SPE_CPU_MIDR
;
101 data
[ARM_SPE_CPU_MIDR
] = val
;
103 /* Find the associate Arm SPE PMU for the CPU */
104 if (perf_cpu_map__has(sper
->arm_spe_pmu
->cpus
, cpu
))
105 pmu
= sper
->arm_spe_pmu
;
108 /* No Arm SPE PMU is found */
109 data
[ARM_SPE_CPU_PMU_TYPE
] = ULLONG_MAX
;
110 data
[ARM_SPE_CAP_MIN_IVAL
] = 0;
112 data
[ARM_SPE_CPU_PMU_TYPE
] = pmu
->type
;
114 if (perf_pmu__scan_file(pmu
, "caps/min_interval", "%lu", &val
) != 1)
116 data
[ARM_SPE_CAP_MIN_IVAL
] = val
;
120 return ARM_SPE_CPU_PRIV_MAX
;
123 static int arm_spe_info_fill(struct auxtrace_record
*itr
,
124 struct perf_session
*session
,
125 struct perf_record_auxtrace_info
*auxtrace_info
,
130 struct arm_spe_recording
*sper
=
131 container_of(itr
, struct arm_spe_recording
, itr
);
132 struct perf_pmu
*arm_spe_pmu
= sper
->arm_spe_pmu
;
133 struct perf_cpu_map
*cpu_map
;
137 if (priv_size
!= arm_spe_info_priv_size(itr
, session
->evlist
))
140 if (!session
->evlist
->core
.nr_mmaps
)
143 cpu_map
= arm_spe_find_cpus(session
->evlist
);
147 auxtrace_info
->type
= PERF_AUXTRACE_ARM_SPE
;
148 auxtrace_info
->priv
[ARM_SPE_HEADER_VERSION
] = ARM_SPE_HEADER_CURRENT_VERSION
;
149 auxtrace_info
->priv
[ARM_SPE_HEADER_SIZE
] =
150 ARM_SPE_AUXTRACE_PRIV_MAX
- ARM_SPE_HEADER_VERSION
;
151 auxtrace_info
->priv
[ARM_SPE_PMU_TYPE_V2
] = arm_spe_pmu
->type
;
152 auxtrace_info
->priv
[ARM_SPE_CPUS_NUM
] = perf_cpu_map__nr(cpu_map
);
154 offset
= ARM_SPE_AUXTRACE_PRIV_MAX
;
155 perf_cpu_map__for_each_cpu(cpu
, i
, cpu_map
) {
156 assert(offset
< priv_size
);
157 data
= &auxtrace_info
->priv
[offset
];
158 ret
= arm_spe_save_cpu_header(itr
, cpu
, data
);
166 perf_cpu_map__put(cpu_map
);
171 arm_spe_snapshot_resolve_auxtrace_defaults(struct record_opts
*opts
,
175 * The default snapshot size is the auxtrace mmap size. If neither auxtrace mmap size nor
176 * snapshot size is specified, then the default is 4MiB for privileged users, 128KiB for
177 * unprivileged users.
179 * The default auxtrace mmap size is 4MiB/page_size for privileged users, 128KiB for
180 * unprivileged users. If an unprivileged user does not specify mmap pages, the mmap pages
181 * will be reduced from the default 512KiB/page_size to 256KiB/page_size, otherwise the
182 * user is likely to get an error as they exceed their mlock limmit.
186 * No size were given to '-S' or '-m,', so go with the default
188 if (!opts
->auxtrace_snapshot_size
&& !opts
->auxtrace_mmap_pages
) {
190 opts
->auxtrace_mmap_pages
= MiB(4) / page_size
;
192 opts
->auxtrace_mmap_pages
= KiB(128) / page_size
;
193 if (opts
->mmap_pages
== UINT_MAX
)
194 opts
->mmap_pages
= KiB(256) / page_size
;
196 } else if (!opts
->auxtrace_mmap_pages
&& !privileged
&& opts
->mmap_pages
== UINT_MAX
) {
197 opts
->mmap_pages
= KiB(256) / page_size
;
201 * '-m,xyz' was specified but no snapshot size, so make the snapshot size as big as the
202 * auxtrace mmap area.
204 if (!opts
->auxtrace_snapshot_size
)
205 opts
->auxtrace_snapshot_size
= opts
->auxtrace_mmap_pages
* (size_t)page_size
;
208 * '-Sxyz' was specified but no auxtrace mmap area, so make the auxtrace mmap area big
209 * enough to fit the requested snapshot size.
211 if (!opts
->auxtrace_mmap_pages
) {
212 size_t sz
= opts
->auxtrace_snapshot_size
;
214 sz
= round_up(sz
, page_size
) / page_size
;
215 opts
->auxtrace_mmap_pages
= roundup_pow_of_two(sz
);
219 static __u64
arm_spe_pmu__sample_period(const struct perf_pmu
*arm_spe_pmu
)
221 static __u64 sample_period
;
224 return sample_period
;
227 * If kernel driver doesn't advertise a minimum,
228 * use max allowable by PMSIDR_EL1.INTERVAL
230 if (perf_pmu__scan_file(arm_spe_pmu
, "caps/min_interval", "%llu",
231 &sample_period
) != 1) {
232 pr_debug("arm_spe driver doesn't advertise a min. interval. Using 4096\n");
233 sample_period
= 4096;
235 return sample_period
;
238 static void arm_spe_setup_evsel(struct evsel
*evsel
, struct perf_cpu_map
*cpus
)
242 evsel
->core
.attr
.freq
= 0;
243 evsel
->core
.attr
.sample_period
= arm_spe_pmu__sample_period(evsel
->pmu
);
244 evsel
->needs_auxtrace_mmap
= true;
247 * To obtain the auxtrace buffer file descriptor, the auxtrace event
250 evlist__to_front(evsel
->evlist
, evsel
);
253 * In the case of per-cpu mmaps, sample CPU for AUX event;
254 * also enable the timestamp tracing for samples correlation.
256 if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus
)) {
257 evsel__set_sample_bit(evsel
, CPU
);
258 evsel__set_config_if_unset(evsel
->pmu
, evsel
, "ts_enable", 1);
262 * Set this only so that perf report knows that SPE generates memory info. It has no effect
263 * on the opening of the event or the SPE data produced.
265 evsel__set_sample_bit(evsel
, DATA_SRC
);
268 * The PHYS_ADDR flag does not affect the driver behaviour, it is used to
269 * inform that the resulting output's SPE samples contain physical addresses
272 bit
= perf_pmu__format_bits(evsel
->pmu
, "pa_enable");
273 if (evsel
->core
.attr
.config
& bit
)
274 evsel__set_sample_bit(evsel
, PHYS_ADDR
);
277 static int arm_spe_recording_options(struct auxtrace_record
*itr
,
278 struct evlist
*evlist
,
279 struct record_opts
*opts
)
281 struct arm_spe_recording
*sper
=
282 container_of(itr
, struct arm_spe_recording
, itr
);
283 struct evsel
*evsel
, *tmp
;
284 struct perf_cpu_map
*cpus
= evlist
->core
.user_requested_cpus
;
285 bool privileged
= perf_event_paranoid_check(-1);
286 struct evsel
*tracking_evsel
;
289 sper
->evlist
= evlist
;
291 evlist__for_each_entry(evlist
, evsel
) {
292 if (evsel__is_aux_event(evsel
)) {
293 if (!strstarts(evsel
->pmu
->name
, ARM_SPE_PMU_NAME
)) {
294 pr_err("Found unexpected auxtrace event: %s\n",
298 opts
->full_auxtrace
= true;
302 if (!opts
->full_auxtrace
)
306 * we are in snapshot mode.
308 if (opts
->auxtrace_snapshot_mode
) {
310 * Command arguments '-Sxyz' and/or '-m,xyz' are missing, so fill those in with
313 if (!opts
->auxtrace_snapshot_size
|| !opts
->auxtrace_mmap_pages
)
314 arm_spe_snapshot_resolve_auxtrace_defaults(opts
, privileged
);
317 * Snapshot size can't be bigger than the auxtrace area.
319 if (opts
->auxtrace_snapshot_size
> opts
->auxtrace_mmap_pages
* (size_t)page_size
) {
320 pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n",
321 opts
->auxtrace_snapshot_size
,
322 opts
->auxtrace_mmap_pages
* (size_t)page_size
);
327 * Something went wrong somewhere - this shouldn't happen.
329 if (!opts
->auxtrace_snapshot_size
|| !opts
->auxtrace_mmap_pages
) {
330 pr_err("Failed to calculate default snapshot size and/or AUX area tracing mmap pages\n");
335 /* We are in full trace mode but '-m,xyz' wasn't specified */
336 if (!opts
->auxtrace_mmap_pages
) {
338 opts
->auxtrace_mmap_pages
= MiB(4) / page_size
;
340 opts
->auxtrace_mmap_pages
= KiB(128) / page_size
;
341 if (opts
->mmap_pages
== UINT_MAX
)
342 opts
->mmap_pages
= KiB(256) / page_size
;
346 /* Validate auxtrace_mmap_pages */
347 if (opts
->auxtrace_mmap_pages
) {
348 size_t sz
= opts
->auxtrace_mmap_pages
* (size_t)page_size
;
349 size_t min_sz
= KiB(8);
351 if (sz
< min_sz
|| !is_power_of_2(sz
)) {
352 pr_err("Invalid mmap size for ARM SPE: must be at least %zuKiB and a power of 2\n",
358 if (opts
->auxtrace_snapshot_mode
)
359 pr_debug2("%sx snapshot size: %zu\n", ARM_SPE_PMU_NAME
,
360 opts
->auxtrace_snapshot_size
);
362 evlist__for_each_entry_safe(evlist
, tmp
, evsel
) {
363 if (evsel__is_aux_event(evsel
))
364 arm_spe_setup_evsel(evsel
, cpus
);
367 /* Add dummy event to keep tracking */
368 err
= parse_event(evlist
, "dummy:u");
372 tracking_evsel
= evlist__last(evlist
);
373 evlist__set_tracking_event(evlist
, tracking_evsel
);
375 tracking_evsel
->core
.attr
.freq
= 0;
376 tracking_evsel
->core
.attr
.sample_period
= 1;
378 /* In per-cpu case, always need the time of mmap events etc */
379 if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus
)) {
380 evsel__set_sample_bit(tracking_evsel
, TIME
);
381 evsel__set_sample_bit(tracking_evsel
, CPU
);
383 /* also track task context switch */
384 if (!record_opts__no_switch_events(opts
))
385 tracking_evsel
->core
.attr
.context_switch
= 1;
391 static int arm_spe_parse_snapshot_options(struct auxtrace_record
*itr __maybe_unused
,
392 struct record_opts
*opts
,
395 unsigned long long snapshot_size
= 0;
399 snapshot_size
= strtoull(str
, &endptr
, 0);
400 if (*endptr
|| snapshot_size
> SIZE_MAX
)
404 opts
->auxtrace_snapshot_mode
= true;
405 opts
->auxtrace_snapshot_size
= snapshot_size
;
410 static int arm_spe_snapshot_start(struct auxtrace_record
*itr
)
412 struct arm_spe_recording
*ptr
=
413 container_of(itr
, struct arm_spe_recording
, itr
);
417 evlist__for_each_entry(ptr
->evlist
, evsel
) {
418 if (evsel__is_aux_event(evsel
)) {
419 ret
= evsel__disable(evsel
);
427 static int arm_spe_snapshot_finish(struct auxtrace_record
*itr
)
429 struct arm_spe_recording
*ptr
=
430 container_of(itr
, struct arm_spe_recording
, itr
);
434 evlist__for_each_entry(ptr
->evlist
, evsel
) {
435 if (evsel__is_aux_event(evsel
)) {
436 ret
= evsel__enable(evsel
);
444 static int arm_spe_alloc_wrapped_array(struct arm_spe_recording
*ptr
, int idx
)
447 int cnt
= ptr
->wrapped_cnt
, new_cnt
, i
;
450 * No need to allocate, so return early.
456 * Make ptr->wrapped as big as idx.
461 * Free'ed in arm_spe_recording_free().
463 wrapped
= reallocarray(ptr
->wrapped
, new_cnt
, sizeof(bool));
468 * init new allocated values.
470 for (i
= cnt
; i
< new_cnt
; i
++)
473 ptr
->wrapped_cnt
= new_cnt
;
474 ptr
->wrapped
= wrapped
;
479 static bool arm_spe_buffer_has_wrapped(unsigned char *buffer
,
480 size_t buffer_size
, u64 head
)
483 u64
*buf
= (u64
*)buffer
;
484 size_t buf_size
= buffer_size
;
487 * Defensively handle the case where head might be continually increasing - if its value is
488 * equal or greater than the size of the ring buffer, then we can safely determine it has
489 * wrapped around. Otherwise, continue to detect if head might have wrapped.
491 if (head
>= buffer_size
)
495 * We want to look the very last 512 byte (chosen arbitrarily) in the ring buffer.
497 watermark
= buf_size
- 512;
500 * The value of head is somewhere within the size of the ring buffer. This can be that there
501 * hasn't been enough data to fill the ring buffer yet or the trace time was so long that
502 * head has numerically wrapped around. To find we need to check if we have data at the
503 * very end of the ring buffer. We can reliably do this because mmap'ed pages are zeroed
504 * out and there is a fresh mapping with every new session.
508 * head is less than 512 byte from the end of the ring buffer.
510 if (head
> watermark
)
514 * Speed things up by using 64 bit transactions (see "u64 *buf" above)
516 watermark
/= sizeof(u64
);
517 buf_size
/= sizeof(u64
);
520 * If we find trace data at the end of the ring buffer, head has been there and has
521 * numerically wrapped around at least once.
523 for (i
= watermark
; i
< buf_size
; i
++)
530 static int arm_spe_find_snapshot(struct auxtrace_record
*itr
, int idx
,
531 struct auxtrace_mmap
*mm
, unsigned char *data
,
536 struct arm_spe_recording
*ptr
=
537 container_of(itr
, struct arm_spe_recording
, itr
);
540 * Allocate memory to keep track of wrapping if this is the first
541 * time we deal with this *mm.
543 if (idx
>= ptr
->wrapped_cnt
) {
544 err
= arm_spe_alloc_wrapped_array(ptr
, idx
);
550 * Check to see if *head has wrapped around. If it hasn't only the
551 * amount of data between *head and *old is snapshot'ed to avoid
552 * bloating the perf.data file with zeros. But as soon as *head has
553 * wrapped around the entire size of the AUX ring buffer it taken.
555 wrapped
= ptr
->wrapped
[idx
];
556 if (!wrapped
&& arm_spe_buffer_has_wrapped(data
, mm
->len
, *head
)) {
558 ptr
->wrapped
[idx
] = true;
561 pr_debug3("%s: mmap index %d old head %zu new head %zu size %zu\n",
562 __func__
, idx
, (size_t)*old
, (size_t)*head
, mm
->len
);
565 * No wrap has occurred, we can just use *head and *old.
571 * *head has wrapped around - adjust *head and *old to pickup the
572 * entire content of the AUX buffer.
574 if (*head
>= mm
->len
) {
575 *old
= *head
- mm
->len
;
578 *old
= *head
- mm
->len
;
584 static u64
arm_spe_reference(struct auxtrace_record
*itr __maybe_unused
)
588 clock_gettime(CLOCK_MONOTONIC_RAW
, &ts
);
590 return ts
.tv_sec
^ ts
.tv_nsec
;
593 static void arm_spe_recording_free(struct auxtrace_record
*itr
)
595 struct arm_spe_recording
*sper
=
596 container_of(itr
, struct arm_spe_recording
, itr
);
598 zfree(&sper
->wrapped
);
602 struct auxtrace_record
*arm_spe_recording_init(int *err
,
603 struct perf_pmu
*arm_spe_pmu
)
605 struct arm_spe_recording
*sper
;
612 sper
= zalloc(sizeof(struct arm_spe_recording
));
618 sper
->arm_spe_pmu
= arm_spe_pmu
;
619 sper
->itr
.snapshot_start
= arm_spe_snapshot_start
;
620 sper
->itr
.snapshot_finish
= arm_spe_snapshot_finish
;
621 sper
->itr
.find_snapshot
= arm_spe_find_snapshot
;
622 sper
->itr
.parse_snapshot_options
= arm_spe_parse_snapshot_options
;
623 sper
->itr
.recording_options
= arm_spe_recording_options
;
624 sper
->itr
.info_priv_size
= arm_spe_info_priv_size
;
625 sper
->itr
.info_fill
= arm_spe_info_fill
;
626 sper
->itr
.free
= arm_spe_recording_free
;
627 sper
->itr
.reference
= arm_spe_reference
;
628 sper
->itr
.read_finish
= auxtrace_record__read_finish
;
629 sper
->itr
.alignment
= 0;
636 arm_spe_pmu_default_config(const struct perf_pmu
*arm_spe_pmu
, struct perf_event_attr
*attr
)
638 attr
->sample_period
= arm_spe_pmu__sample_period(arm_spe_pmu
);