1 // SPDX-License-Identifier: GPL-2.0
3 * Arm Statistical Profiling Extensions (SPE) support
4 * Copyright (c) 2017-2018, Arm Ltd.
11 #include <linux/bitops.h>
12 #include <linux/kernel.h>
13 #include <linux/log2.h>
14 #include <linux/types.h>
15 #include <linux/zalloc.h>
28 #include "thread-stack.h"
30 #include "util/synthetic-events.h"
33 #include "arm-spe-decoder/arm-spe-decoder.h"
34 #include "arm-spe-decoder/arm-spe-pkt-decoder.h"
36 #define MAX_TIMESTAMP (~0ULL)
39 struct auxtrace auxtrace
;
40 struct auxtrace_queues queues
;
41 struct auxtrace_heap heap
;
42 struct itrace_synth_opts synth_opts
;
44 struct perf_session
*session
;
45 struct machine
*machine
;
55 u8 sample_remote_access
;
68 unsigned long num_events
;
71 struct arm_spe_queue
{
73 unsigned int queue_nr
;
74 struct auxtrace_buffer
*buffer
;
75 struct auxtrace_buffer
*old_buffer
;
76 union perf_event
*event_buf
;
82 struct arm_spe_decoder
*decoder
;
85 struct thread
*thread
;
88 static void arm_spe_dump(struct arm_spe
*spe __maybe_unused
,
89 unsigned char *buf
, size_t len
)
91 struct arm_spe_pkt packet
;
94 char desc
[ARM_SPE_PKT_DESC_MAX
];
95 const char *color
= PERF_COLOR_BLUE
;
97 color_fprintf(stdout
, color
,
98 ". ... ARM SPE data: size %zu bytes\n",
102 ret
= arm_spe_get_packet(buf
, len
, &packet
);
108 color_fprintf(stdout
, color
, " %08x: ", pos
);
109 for (i
= 0; i
< pkt_len
; i
++)
110 color_fprintf(stdout
, color
, " %02x", buf
[i
]);
112 color_fprintf(stdout
, color
, " ");
114 ret
= arm_spe_pkt_desc(&packet
, desc
,
115 ARM_SPE_PKT_DESC_MAX
);
117 color_fprintf(stdout
, color
, " %s\n", desc
);
119 color_fprintf(stdout
, color
, " Bad packet!\n");
127 static void arm_spe_dump_event(struct arm_spe
*spe
, unsigned char *buf
,
131 arm_spe_dump(spe
, buf
, len
);
134 static int arm_spe_get_trace(struct arm_spe_buffer
*b
, void *data
)
136 struct arm_spe_queue
*speq
= data
;
137 struct auxtrace_buffer
*buffer
= speq
->buffer
;
138 struct auxtrace_buffer
*old_buffer
= speq
->old_buffer
;
139 struct auxtrace_queue
*queue
;
141 queue
= &speq
->spe
->queues
.queue_array
[speq
->queue_nr
];
143 buffer
= auxtrace_buffer__next(queue
, buffer
);
144 /* If no more data, drop the previous auxtrace_buffer and return */
147 auxtrace_buffer__drop_data(old_buffer
);
152 speq
->buffer
= buffer
;
154 /* If the aux_buffer doesn't have data associated, try to load it */
156 /* get the file desc associated with the perf data file */
157 int fd
= perf_data__fd(speq
->spe
->session
->data
);
159 buffer
->data
= auxtrace_buffer__get_data(buffer
, fd
);
164 b
->len
= buffer
->size
;
165 b
->buf
= buffer
->data
;
169 auxtrace_buffer__drop_data(old_buffer
);
170 speq
->old_buffer
= buffer
;
172 auxtrace_buffer__drop_data(buffer
);
173 return arm_spe_get_trace(b
, data
);
179 static struct arm_spe_queue
*arm_spe__alloc_queue(struct arm_spe
*spe
,
180 unsigned int queue_nr
)
182 struct arm_spe_params params
= { .get_trace
= 0, };
183 struct arm_spe_queue
*speq
;
185 speq
= zalloc(sizeof(*speq
));
189 speq
->event_buf
= malloc(PERF_SAMPLE_MAX_SIZE
);
190 if (!speq
->event_buf
)
194 speq
->queue_nr
= queue_nr
;
200 params
.get_trace
= arm_spe_get_trace
;
203 /* create new decoder */
204 speq
->decoder
= arm_spe_decoder_new(¶ms
);
211 zfree(&speq
->event_buf
);
217 static inline u8
arm_spe_cpumode(struct arm_spe
*spe
, u64 ip
)
219 return ip
>= spe
->kernel_start
?
220 PERF_RECORD_MISC_KERNEL
:
221 PERF_RECORD_MISC_USER
;
224 static void arm_spe_prep_sample(struct arm_spe
*spe
,
225 struct arm_spe_queue
*speq
,
226 union perf_event
*event
,
227 struct perf_sample
*sample
)
229 struct arm_spe_record
*record
= &speq
->decoder
->record
;
231 if (!spe
->timeless_decoding
)
232 sample
->time
= speq
->timestamp
;
234 sample
->ip
= record
->from_ip
;
235 sample
->cpumode
= arm_spe_cpumode(spe
, sample
->ip
);
236 sample
->pid
= speq
->pid
;
237 sample
->tid
= speq
->tid
;
238 sample
->addr
= record
->to_ip
;
240 sample
->cpu
= speq
->cpu
;
242 event
->sample
.header
.type
= PERF_RECORD_SAMPLE
;
243 event
->sample
.header
.misc
= sample
->cpumode
;
244 event
->sample
.header
.size
= sizeof(struct perf_event_header
);
248 arm_spe_deliver_synth_event(struct arm_spe
*spe
,
249 struct arm_spe_queue
*speq __maybe_unused
,
250 union perf_event
*event
,
251 struct perf_sample
*sample
)
255 ret
= perf_session__deliver_synth_event(spe
->session
, event
, sample
);
257 pr_err("ARM SPE: failed to deliver event, error %d\n", ret
);
263 arm_spe_synth_spe_events_sample(struct arm_spe_queue
*speq
,
266 struct arm_spe
*spe
= speq
->spe
;
267 union perf_event
*event
= speq
->event_buf
;
268 struct perf_sample sample
= { .ip
= 0, };
270 arm_spe_prep_sample(spe
, speq
, event
, &sample
);
272 sample
.id
= spe_events_id
;
273 sample
.stream_id
= spe_events_id
;
275 return arm_spe_deliver_synth_event(spe
, speq
, event
, &sample
);
278 static int arm_spe_sample(struct arm_spe_queue
*speq
)
280 const struct arm_spe_record
*record
= &speq
->decoder
->record
;
281 struct arm_spe
*spe
= speq
->spe
;
284 if (spe
->sample_flc
) {
285 if (record
->type
& ARM_SPE_L1D_MISS
) {
286 err
= arm_spe_synth_spe_events_sample(
287 speq
, spe
->l1d_miss_id
);
292 if (record
->type
& ARM_SPE_L1D_ACCESS
) {
293 err
= arm_spe_synth_spe_events_sample(
294 speq
, spe
->l1d_access_id
);
300 if (spe
->sample_llc
) {
301 if (record
->type
& ARM_SPE_LLC_MISS
) {
302 err
= arm_spe_synth_spe_events_sample(
303 speq
, spe
->llc_miss_id
);
308 if (record
->type
& ARM_SPE_LLC_ACCESS
) {
309 err
= arm_spe_synth_spe_events_sample(
310 speq
, spe
->llc_access_id
);
316 if (spe
->sample_tlb
) {
317 if (record
->type
& ARM_SPE_TLB_MISS
) {
318 err
= arm_spe_synth_spe_events_sample(
319 speq
, spe
->tlb_miss_id
);
324 if (record
->type
& ARM_SPE_TLB_ACCESS
) {
325 err
= arm_spe_synth_spe_events_sample(
326 speq
, spe
->tlb_access_id
);
332 if (spe
->sample_branch
&& (record
->type
& ARM_SPE_BRANCH_MISS
)) {
333 err
= arm_spe_synth_spe_events_sample(speq
,
334 spe
->branch_miss_id
);
339 if (spe
->sample_remote_access
&&
340 (record
->type
& ARM_SPE_REMOTE_ACCESS
)) {
341 err
= arm_spe_synth_spe_events_sample(speq
,
342 spe
->remote_access_id
);
350 static int arm_spe_run_decoder(struct arm_spe_queue
*speq
, u64
*timestamp
)
352 struct arm_spe
*spe
= speq
->spe
;
355 if (!spe
->kernel_start
)
356 spe
->kernel_start
= machine__kernel_start(spe
->machine
);
359 ret
= arm_spe_decode(speq
->decoder
);
361 pr_debug("No data or all data has been processed.\n");
366 * Error is detected when decode SPE trace data, continue to
367 * the next trace data and find out more records.
372 ret
= arm_spe_sample(speq
);
376 if (!spe
->timeless_decoding
&& speq
->timestamp
>= *timestamp
) {
377 *timestamp
= speq
->timestamp
;
385 static int arm_spe__setup_queue(struct arm_spe
*spe
,
386 struct auxtrace_queue
*queue
,
387 unsigned int queue_nr
)
389 struct arm_spe_queue
*speq
= queue
->priv
;
390 struct arm_spe_record
*record
;
392 if (list_empty(&queue
->head
) || speq
)
395 speq
= arm_spe__alloc_queue(spe
, queue_nr
);
402 if (queue
->cpu
!= -1)
403 speq
->cpu
= queue
->cpu
;
405 if (!speq
->on_heap
) {
408 if (spe
->timeless_decoding
)
412 ret
= arm_spe_decode(speq
->decoder
);
420 record
= &speq
->decoder
->record
;
422 speq
->timestamp
= record
->timestamp
;
423 ret
= auxtrace_heap__add(&spe
->heap
, queue_nr
, speq
->timestamp
);
426 speq
->on_heap
= true;
432 static int arm_spe__setup_queues(struct arm_spe
*spe
)
437 for (i
= 0; i
< spe
->queues
.nr_queues
; i
++) {
438 ret
= arm_spe__setup_queue(spe
, &spe
->queues
.queue_array
[i
], i
);
446 static int arm_spe__update_queues(struct arm_spe
*spe
)
448 if (spe
->queues
.new_data
) {
449 spe
->queues
.new_data
= false;
450 return arm_spe__setup_queues(spe
);
456 static bool arm_spe__is_timeless_decoding(struct arm_spe
*spe
)
459 struct evlist
*evlist
= spe
->session
->evlist
;
460 bool timeless_decoding
= true;
463 * Circle through the list of event and complain if we find one
464 * with the time bit set.
466 evlist__for_each_entry(evlist
, evsel
) {
467 if ((evsel
->core
.attr
.sample_type
& PERF_SAMPLE_TIME
))
468 timeless_decoding
= false;
471 return timeless_decoding
;
474 static void arm_spe_set_pid_tid_cpu(struct arm_spe
*spe
,
475 struct auxtrace_queue
*queue
)
477 struct arm_spe_queue
*speq
= queue
->priv
;
480 tid
= machine__get_current_tid(spe
->machine
, speq
->cpu
);
483 thread__zput(speq
->thread
);
485 speq
->tid
= queue
->tid
;
487 if ((!speq
->thread
) && (speq
->tid
!= -1)) {
488 speq
->thread
= machine__find_thread(spe
->machine
, -1,
493 speq
->pid
= speq
->thread
->pid_
;
494 if (queue
->cpu
== -1)
495 speq
->cpu
= speq
->thread
->cpu
;
499 static int arm_spe_process_queues(struct arm_spe
*spe
, u64 timestamp
)
501 unsigned int queue_nr
;
506 struct auxtrace_queue
*queue
;
507 struct arm_spe_queue
*speq
;
509 if (!spe
->heap
.heap_cnt
)
512 if (spe
->heap
.heap_array
[0].ordinal
>= timestamp
)
515 queue_nr
= spe
->heap
.heap_array
[0].queue_nr
;
516 queue
= &spe
->queues
.queue_array
[queue_nr
];
519 auxtrace_heap__pop(&spe
->heap
);
521 if (spe
->heap
.heap_cnt
) {
522 ts
= spe
->heap
.heap_array
[0].ordinal
+ 1;
529 arm_spe_set_pid_tid_cpu(spe
, queue
);
531 ret
= arm_spe_run_decoder(speq
, &ts
);
533 auxtrace_heap__add(&spe
->heap
, queue_nr
, ts
);
538 ret
= auxtrace_heap__add(&spe
->heap
, queue_nr
, ts
);
542 speq
->on_heap
= false;
549 static int arm_spe_process_timeless_queues(struct arm_spe
*spe
, pid_t tid
,
552 struct auxtrace_queues
*queues
= &spe
->queues
;
556 for (i
= 0; i
< queues
->nr_queues
; i
++) {
557 struct auxtrace_queue
*queue
= &spe
->queues
.queue_array
[i
];
558 struct arm_spe_queue
*speq
= queue
->priv
;
560 if (speq
&& (tid
== -1 || speq
->tid
== tid
)) {
562 arm_spe_set_pid_tid_cpu(spe
, queue
);
563 arm_spe_run_decoder(speq
, &ts
);
569 static int arm_spe_process_event(struct perf_session
*session
,
570 union perf_event
*event
,
571 struct perf_sample
*sample
,
572 struct perf_tool
*tool
)
576 struct arm_spe
*spe
= container_of(session
->auxtrace
,
577 struct arm_spe
, auxtrace
);
582 if (!tool
->ordered_events
) {
583 pr_err("SPE trace requires ordered events\n");
587 if (sample
->time
&& (sample
->time
!= (u64
) -1))
588 timestamp
= sample
->time
;
592 if (timestamp
|| spe
->timeless_decoding
) {
593 err
= arm_spe__update_queues(spe
);
598 if (spe
->timeless_decoding
) {
599 if (event
->header
.type
== PERF_RECORD_EXIT
) {
600 err
= arm_spe_process_timeless_queues(spe
,
604 } else if (timestamp
) {
605 if (event
->header
.type
== PERF_RECORD_EXIT
) {
606 err
= arm_spe_process_queues(spe
, timestamp
);
615 static int arm_spe_process_auxtrace_event(struct perf_session
*session
,
616 union perf_event
*event
,
617 struct perf_tool
*tool __maybe_unused
)
619 struct arm_spe
*spe
= container_of(session
->auxtrace
, struct arm_spe
,
622 if (!spe
->data_queued
) {
623 struct auxtrace_buffer
*buffer
;
625 int fd
= perf_data__fd(session
->data
);
628 if (perf_data__is_pipe(session
->data
)) {
631 data_offset
= lseek(fd
, 0, SEEK_CUR
);
632 if (data_offset
== -1)
636 err
= auxtrace_queues__add_event(&spe
->queues
, session
, event
,
637 data_offset
, &buffer
);
641 /* Dump here now we have copied a piped trace out of the pipe */
643 if (auxtrace_buffer__get_data(buffer
, fd
)) {
644 arm_spe_dump_event(spe
, buffer
->data
,
646 auxtrace_buffer__put_data(buffer
);
654 static int arm_spe_flush(struct perf_session
*session __maybe_unused
,
655 struct perf_tool
*tool __maybe_unused
)
657 struct arm_spe
*spe
= container_of(session
->auxtrace
, struct arm_spe
,
664 if (!tool
->ordered_events
)
667 ret
= arm_spe__update_queues(spe
);
671 if (spe
->timeless_decoding
)
672 return arm_spe_process_timeless_queues(spe
, -1,
675 return arm_spe_process_queues(spe
, MAX_TIMESTAMP
);
678 static void arm_spe_free_queue(void *priv
)
680 struct arm_spe_queue
*speq
= priv
;
684 thread__zput(speq
->thread
);
685 arm_spe_decoder_free(speq
->decoder
);
686 zfree(&speq
->event_buf
);
690 static void arm_spe_free_events(struct perf_session
*session
)
692 struct arm_spe
*spe
= container_of(session
->auxtrace
, struct arm_spe
,
694 struct auxtrace_queues
*queues
= &spe
->queues
;
697 for (i
= 0; i
< queues
->nr_queues
; i
++) {
698 arm_spe_free_queue(queues
->queue_array
[i
].priv
);
699 queues
->queue_array
[i
].priv
= NULL
;
701 auxtrace_queues__free(queues
);
704 static void arm_spe_free(struct perf_session
*session
)
706 struct arm_spe
*spe
= container_of(session
->auxtrace
, struct arm_spe
,
709 auxtrace_heap__free(&spe
->heap
);
710 arm_spe_free_events(session
);
711 session
->auxtrace
= NULL
;
715 static bool arm_spe_evsel_is_auxtrace(struct perf_session
*session
,
718 struct arm_spe
*spe
= container_of(session
->auxtrace
, struct arm_spe
, auxtrace
);
720 return evsel
->core
.attr
.type
== spe
->pmu_type
;
723 static const char * const arm_spe_info_fmts
[] = {
724 [ARM_SPE_PMU_TYPE
] = " PMU Type %"PRId64
"\n",
727 static void arm_spe_print_info(__u64
*arr
)
732 fprintf(stdout
, arm_spe_info_fmts
[ARM_SPE_PMU_TYPE
], arr
[ARM_SPE_PMU_TYPE
]);
735 struct arm_spe_synth
{
736 struct perf_tool dummy_tool
;
737 struct perf_session
*session
;
740 static int arm_spe_event_synth(struct perf_tool
*tool
,
741 union perf_event
*event
,
742 struct perf_sample
*sample __maybe_unused
,
743 struct machine
*machine __maybe_unused
)
745 struct arm_spe_synth
*arm_spe_synth
=
746 container_of(tool
, struct arm_spe_synth
, dummy_tool
);
748 return perf_session__deliver_synth_event(arm_spe_synth
->session
,
752 static int arm_spe_synth_event(struct perf_session
*session
,
753 struct perf_event_attr
*attr
, u64 id
)
755 struct arm_spe_synth arm_spe_synth
;
757 memset(&arm_spe_synth
, 0, sizeof(struct arm_spe_synth
));
758 arm_spe_synth
.session
= session
;
760 return perf_event__synthesize_attr(&arm_spe_synth
.dummy_tool
, attr
, 1,
761 &id
, arm_spe_event_synth
);
764 static void arm_spe_set_event_name(struct evlist
*evlist
, u64 id
,
769 evlist__for_each_entry(evlist
, evsel
) {
770 if (evsel
->core
.id
&& evsel
->core
.id
[0] == id
) {
773 evsel
->name
= strdup(name
);
780 arm_spe_synth_events(struct arm_spe
*spe
, struct perf_session
*session
)
782 struct evlist
*evlist
= session
->evlist
;
784 struct perf_event_attr attr
;
789 evlist__for_each_entry(evlist
, evsel
) {
790 if (evsel
->core
.attr
.type
== spe
->pmu_type
) {
797 pr_debug("No selected events with SPE trace data\n");
801 memset(&attr
, 0, sizeof(struct perf_event_attr
));
802 attr
.size
= sizeof(struct perf_event_attr
);
803 attr
.type
= PERF_TYPE_HARDWARE
;
804 attr
.sample_type
= evsel
->core
.attr
.sample_type
& PERF_SAMPLE_MASK
;
805 attr
.sample_type
|= PERF_SAMPLE_IP
| PERF_SAMPLE_TID
|
807 if (spe
->timeless_decoding
)
808 attr
.sample_type
&= ~(u64
)PERF_SAMPLE_TIME
;
810 attr
.sample_type
|= PERF_SAMPLE_TIME
;
812 attr
.exclude_user
= evsel
->core
.attr
.exclude_user
;
813 attr
.exclude_kernel
= evsel
->core
.attr
.exclude_kernel
;
814 attr
.exclude_hv
= evsel
->core
.attr
.exclude_hv
;
815 attr
.exclude_host
= evsel
->core
.attr
.exclude_host
;
816 attr
.exclude_guest
= evsel
->core
.attr
.exclude_guest
;
817 attr
.sample_id_all
= evsel
->core
.attr
.sample_id_all
;
818 attr
.read_format
= evsel
->core
.attr
.read_format
;
820 /* create new id val to be a fixed offset from evsel id */
821 id
= evsel
->core
.id
[0] + 1000000000;
826 if (spe
->synth_opts
.flc
) {
827 spe
->sample_flc
= true;
829 /* Level 1 data cache miss */
830 err
= arm_spe_synth_event(session
, &attr
, id
);
833 spe
->l1d_miss_id
= id
;
834 arm_spe_set_event_name(evlist
, id
, "l1d-miss");
837 /* Level 1 data cache access */
838 err
= arm_spe_synth_event(session
, &attr
, id
);
841 spe
->l1d_access_id
= id
;
842 arm_spe_set_event_name(evlist
, id
, "l1d-access");
846 if (spe
->synth_opts
.llc
) {
847 spe
->sample_llc
= true;
849 /* Last level cache miss */
850 err
= arm_spe_synth_event(session
, &attr
, id
);
853 spe
->llc_miss_id
= id
;
854 arm_spe_set_event_name(evlist
, id
, "llc-miss");
857 /* Last level cache access */
858 err
= arm_spe_synth_event(session
, &attr
, id
);
861 spe
->llc_access_id
= id
;
862 arm_spe_set_event_name(evlist
, id
, "llc-access");
866 if (spe
->synth_opts
.tlb
) {
867 spe
->sample_tlb
= true;
870 err
= arm_spe_synth_event(session
, &attr
, id
);
873 spe
->tlb_miss_id
= id
;
874 arm_spe_set_event_name(evlist
, id
, "tlb-miss");
878 err
= arm_spe_synth_event(session
, &attr
, id
);
881 spe
->tlb_access_id
= id
;
882 arm_spe_set_event_name(evlist
, id
, "tlb-access");
886 if (spe
->synth_opts
.branches
) {
887 spe
->sample_branch
= true;
890 err
= arm_spe_synth_event(session
, &attr
, id
);
893 spe
->branch_miss_id
= id
;
894 arm_spe_set_event_name(evlist
, id
, "branch-miss");
898 if (spe
->synth_opts
.remote_access
) {
899 spe
->sample_remote_access
= true;
902 err
= arm_spe_synth_event(session
, &attr
, id
);
905 spe
->remote_access_id
= id
;
906 arm_spe_set_event_name(evlist
, id
, "remote-access");
913 int arm_spe_process_auxtrace_info(union perf_event
*event
,
914 struct perf_session
*session
)
916 struct perf_record_auxtrace_info
*auxtrace_info
= &event
->auxtrace_info
;
917 size_t min_sz
= sizeof(u64
) * ARM_SPE_AUXTRACE_PRIV_MAX
;
921 if (auxtrace_info
->header
.size
< sizeof(struct perf_record_auxtrace_info
) +
925 spe
= zalloc(sizeof(struct arm_spe
));
929 err
= auxtrace_queues__init(&spe
->queues
);
933 spe
->session
= session
;
934 spe
->machine
= &session
->machines
.host
; /* No kvm support */
935 spe
->auxtrace_type
= auxtrace_info
->type
;
936 spe
->pmu_type
= auxtrace_info
->priv
[ARM_SPE_PMU_TYPE
];
938 spe
->timeless_decoding
= arm_spe__is_timeless_decoding(spe
);
939 spe
->auxtrace
.process_event
= arm_spe_process_event
;
940 spe
->auxtrace
.process_auxtrace_event
= arm_spe_process_auxtrace_event
;
941 spe
->auxtrace
.flush_events
= arm_spe_flush
;
942 spe
->auxtrace
.free_events
= arm_spe_free_events
;
943 spe
->auxtrace
.free
= arm_spe_free
;
944 spe
->auxtrace
.evsel_is_auxtrace
= arm_spe_evsel_is_auxtrace
;
945 session
->auxtrace
= &spe
->auxtrace
;
947 arm_spe_print_info(&auxtrace_info
->priv
[0]);
952 if (session
->itrace_synth_opts
&& session
->itrace_synth_opts
->set
)
953 spe
->synth_opts
= *session
->itrace_synth_opts
;
955 itrace_synth_opts__set_default(&spe
->synth_opts
, false);
957 err
= arm_spe_synth_events(spe
, session
);
959 goto err_free_queues
;
961 err
= auxtrace_queues__process_index(&spe
->queues
, session
);
963 goto err_free_queues
;
965 if (spe
->queues
.populated
)
966 spe
->data_queued
= true;
971 auxtrace_queues__free(&spe
->queues
);
972 session
->auxtrace
= NULL
;