1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright(C) 2015-2018 Linaro Limited.
5 * Author: Tor Jeremiassen <tor@ti.com>
6 * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
9 #include <linux/kernel.h>
10 #include <linux/bitfield.h>
11 #include <linux/bitops.h>
12 #include <linux/coresight-pmu.h>
13 #include <linux/err.h>
14 #include <linux/log2.h>
15 #include <linux/types.h>
16 #include <linux/zalloc.h>
23 #include "cs-etm-decoder/cs-etm-decoder.h"
32 #include "map_symbol.h"
37 #include "thread-stack.h"
39 #include <tools/libc_compat.h>
40 #include "util/synthetic-events.h"
41 #include "util/util.h"
43 struct cs_etm_auxtrace
{
44 struct auxtrace auxtrace
;
45 struct auxtrace_queues queues
;
46 struct auxtrace_heap heap
;
47 struct itrace_synth_opts synth_opts
;
48 struct perf_session
*session
;
49 struct perf_tsc_conversion tc
;
52 * Timeless has no timestamps in the trace so overlapping mmap lookups
53 * are less accurate but produces smaller trace data. We use context IDs
54 * in the trace instead of matching timestamps with fork records so
55 * they're not really needed in the general case. Overlapping mmaps
56 * happen in cases like between a fork and an exec.
58 bool timeless_decoding
;
61 * Per-thread ignores the trace channel ID and instead assumes that
62 * everything in a buffer comes from the same process regardless of
63 * which CPU it ran on. It also implies no context IDs so the TID is
64 * taken from the auxtrace buffer.
66 bool per_thread_decoding
;
69 bool has_virtual_ts
; /* Virtual/Kernel timestamps in the trace. */
72 u64 latest_kernel_timestamp
;
74 u64 branches_sample_type
;
76 u64 instructions_sample_type
;
77 u64 instructions_sample_period
;
80 unsigned int pmu_type
;
81 enum cs_etm_pid_fmt pid_fmt
;
84 struct cs_etm_traceid_queue
{
86 u64 period_instructions
;
87 size_t last_branch_pos
;
88 union perf_event
*event_buf
;
89 struct thread
*thread
;
90 struct thread
*prev_packet_thread
;
91 ocsd_ex_level prev_packet_el
;
93 struct branch_stack
*last_branch
;
94 struct branch_stack
*last_branch_rb
;
95 struct cs_etm_packet
*prev_packet
;
96 struct cs_etm_packet
*packet
;
97 struct cs_etm_packet_queue packet_queue
;
106 struct cs_etm_queue
{
107 struct cs_etm_auxtrace
*etm
;
108 struct cs_etm_decoder
*decoder
;
109 struct auxtrace_buffer
*buffer
;
110 unsigned int queue_nr
;
111 u8 pending_timestamp_chan_id
;
112 enum cs_etm_format format
;
114 const unsigned char *buf
;
115 size_t buf_len
, buf_used
;
116 /* Conversion between traceID and index in traceid_queues array */
117 struct intlist
*traceid_queues_list
;
118 struct cs_etm_traceid_queue
**traceid_queues
;
119 /* Conversion between traceID and metadata pointers */
120 struct intlist
*traceid_list
;
122 * Same as traceid_list, but traceid_list may be a reference to another
123 * queue's which has a matching sink ID.
125 struct intlist
*own_traceid_list
;
129 static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace
*etm
);
130 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace
*etm
,
132 static int cs_etm__get_data_block(struct cs_etm_queue
*etmq
);
133 static int cs_etm__decode_data_block(struct cs_etm_queue
*etmq
);
134 static int cs_etm__metadata_get_trace_id(u8
*trace_chan_id
, u64
*cpu_metadata
);
135 static u64
*get_cpu_data(struct cs_etm_auxtrace
*etm
, int cpu
);
136 static int cs_etm__metadata_set_trace_id(u8 trace_chan_id
, u64
*cpu_metadata
);
138 /* PTMs ETMIDR [11:8] set to b0011 */
139 #define ETMIDR_PTM_VERSION 0x00000300
142 * A struct auxtrace_heap_item only has a queue_nr and a timestamp to
143 * work with. One option is to modify to auxtrace_heap_XYZ() API or simply
144 * encode the etm queue number as the upper 16 bit and the channel as
147 #define TO_CS_QUEUE_NR(queue_nr, trace_chan_id) \
148 (queue_nr << 16 | trace_chan_id)
149 #define TO_QUEUE_NR(cs_queue_nr) (cs_queue_nr >> 16)
150 #define TO_TRACE_CHAN_ID(cs_queue_nr) (cs_queue_nr & 0x0000ffff)
151 #define SINK_UNSET ((u32) -1)
153 static u32
cs_etm__get_v7_protocol_version(u32 etmidr
)
155 etmidr
&= ETMIDR_PTM_VERSION
;
157 if (etmidr
== ETMIDR_PTM_VERSION
)
158 return CS_ETM_PROTO_PTM
;
160 return CS_ETM_PROTO_ETMV3
;
163 static int cs_etm__get_magic(struct cs_etm_queue
*etmq
, u8 trace_chan_id
, u64
*magic
)
165 struct int_node
*inode
;
168 inode
= intlist__find(etmq
->traceid_list
, trace_chan_id
);
172 metadata
= inode
->priv
;
173 *magic
= metadata
[CS_ETM_MAGIC
];
177 int cs_etm__get_cpu(struct cs_etm_queue
*etmq
, u8 trace_chan_id
, int *cpu
)
179 struct int_node
*inode
;
182 inode
= intlist__find(etmq
->traceid_list
, trace_chan_id
);
186 metadata
= inode
->priv
;
187 *cpu
= (int)metadata
[CS_ETM_CPU
];
192 * The returned PID format is presented as an enum:
194 * CS_ETM_PIDFMT_CTXTID: CONTEXTIDR or CONTEXTIDR_EL1 is traced.
195 * CS_ETM_PIDFMT_CTXTID2: CONTEXTIDR_EL2 is traced.
196 * CS_ETM_PIDFMT_NONE: No context IDs
198 * It's possible that the two bits ETM_OPT_CTXTID and ETM_OPT_CTXTID2
199 * are enabled at the same time when the session runs on an EL2 kernel.
200 * This means the CONTEXTIDR_EL1 and CONTEXTIDR_EL2 both will be
201 * recorded in the trace data, the tool will selectively use
202 * CONTEXTIDR_EL2 as PID.
204 * The result is cached in etm->pid_fmt so this function only needs to be called
205 * when processing the aux info.
207 static enum cs_etm_pid_fmt
cs_etm__init_pid_fmt(u64
*metadata
)
211 if (metadata
[CS_ETM_MAGIC
] == __perf_cs_etmv3_magic
) {
212 val
= metadata
[CS_ETM_ETMCR
];
213 /* CONTEXTIDR is traced */
214 if (val
& BIT(ETM_OPT_CTXTID
))
215 return CS_ETM_PIDFMT_CTXTID
;
217 val
= metadata
[CS_ETMV4_TRCCONFIGR
];
218 /* CONTEXTIDR_EL2 is traced */
219 if (val
& (BIT(ETM4_CFG_BIT_VMID
) | BIT(ETM4_CFG_BIT_VMID_OPT
)))
220 return CS_ETM_PIDFMT_CTXTID2
;
221 /* CONTEXTIDR_EL1 is traced */
222 else if (val
& BIT(ETM4_CFG_BIT_CTXTID
))
223 return CS_ETM_PIDFMT_CTXTID
;
226 return CS_ETM_PIDFMT_NONE
;
229 enum cs_etm_pid_fmt
cs_etm__get_pid_fmt(struct cs_etm_queue
*etmq
)
231 return etmq
->etm
->pid_fmt
;
234 static int cs_etm__insert_trace_id_node(struct cs_etm_queue
*etmq
,
235 u8 trace_chan_id
, u64
*cpu_metadata
)
237 /* Get an RB node for this CPU */
238 struct int_node
*inode
= intlist__findnew(etmq
->traceid_list
, trace_chan_id
);
240 /* Something went wrong, no need to continue */
244 /* Disallow re-mapping a different traceID to metadata pair. */
246 u64
*curr_cpu_data
= inode
->priv
;
250 if (curr_cpu_data
[CS_ETM_CPU
] != cpu_metadata
[CS_ETM_CPU
]) {
252 * With > CORESIGHT_TRACE_IDS_MAX ETMs, overlapping IDs
253 * are expected (but not supported) in per-thread mode,
254 * rather than signifying an error.
256 if (etmq
->etm
->per_thread_decoding
)
257 pr_err("CS_ETM: overlapping Trace IDs aren't currently supported in per-thread mode\n");
259 pr_err("CS_ETM: map mismatch between HW_ID packet CPU and Trace ID\n");
264 /* check that the mapped ID matches */
265 err
= cs_etm__metadata_get_trace_id(&curr_chan_id
, curr_cpu_data
);
269 if (curr_chan_id
!= trace_chan_id
) {
270 pr_err("CS_ETM: mismatch between CPU trace ID and HW_ID packet ID\n");
274 /* Skip re-adding the same mappings if everything matched */
278 /* Not one we've seen before, associate the traceID with the metadata pointer */
279 inode
->priv
= cpu_metadata
;
284 static struct cs_etm_queue
*cs_etm__get_queue(struct cs_etm_auxtrace
*etm
, int cpu
)
286 if (etm
->per_thread_decoding
)
287 return etm
->queues
.queue_array
[0].priv
;
289 return etm
->queues
.queue_array
[cpu
].priv
;
292 static int cs_etm__map_trace_id_v0(struct cs_etm_auxtrace
*etm
, u8 trace_chan_id
,
295 struct cs_etm_queue
*etmq
;
298 * If the queue is unformatted then only save one mapping in the
299 * queue associated with that CPU so only one decoder is made.
301 etmq
= cs_etm__get_queue(etm
, cpu_metadata
[CS_ETM_CPU
]);
302 if (etmq
->format
== UNFORMATTED
)
303 return cs_etm__insert_trace_id_node(etmq
, trace_chan_id
,
307 * Otherwise, version 0 trace IDs are global so save them into every
310 for (unsigned int i
= 0; i
< etm
->queues
.nr_queues
; ++i
) {
313 etmq
= etm
->queues
.queue_array
[i
].priv
;
314 ret
= cs_etm__insert_trace_id_node(etmq
, trace_chan_id
,
323 static int cs_etm__process_trace_id_v0(struct cs_etm_auxtrace
*etm
, int cpu
,
328 u8 trace_chan_id
= FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK
, hw_id
);
330 cpu_data
= get_cpu_data(etm
, cpu
);
331 if (cpu_data
== NULL
)
334 err
= cs_etm__map_trace_id_v0(etm
, trace_chan_id
, cpu_data
);
339 * if we are picking up the association from the packet, need to plug
340 * the correct trace ID into the metadata for setting up decoders later.
342 return cs_etm__metadata_set_trace_id(trace_chan_id
, cpu_data
);
345 static int cs_etm__process_trace_id_v0_1(struct cs_etm_auxtrace
*etm
, int cpu
,
348 struct cs_etm_queue
*etmq
= cs_etm__get_queue(etm
, cpu
);
351 u32 sink_id
= FIELD_GET(CS_AUX_HW_ID_SINK_ID_MASK
, hw_id
);
352 u8 trace_id
= FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK
, hw_id
);
355 * Check sink id hasn't changed in per-cpu mode. In per-thread mode,
356 * let it pass for now until an actual overlapping trace ID is hit. In
357 * most cases IDs won't overlap even if the sink changes.
359 if (!etmq
->etm
->per_thread_decoding
&& etmq
->sink_id
!= SINK_UNSET
&&
360 etmq
->sink_id
!= sink_id
) {
361 pr_err("CS_ETM: mismatch between sink IDs\n");
365 etmq
->sink_id
= sink_id
;
367 /* Find which other queues use this sink and link their ID maps */
368 for (unsigned int i
= 0; i
< etm
->queues
.nr_queues
; ++i
) {
369 struct cs_etm_queue
*other_etmq
= etm
->queues
.queue_array
[i
].priv
;
371 /* Different sinks, skip */
372 if (other_etmq
->sink_id
!= etmq
->sink_id
)
375 /* Already linked, skip */
376 if (other_etmq
->traceid_list
== etmq
->traceid_list
)
379 /* At the point of first linking, this one should be empty */
380 if (!intlist__empty(etmq
->traceid_list
)) {
381 pr_err("CS_ETM: Can't link populated trace ID lists\n");
385 etmq
->own_traceid_list
= NULL
;
386 intlist__delete(etmq
->traceid_list
);
387 etmq
->traceid_list
= other_etmq
->traceid_list
;
391 cpu_data
= get_cpu_data(etm
, cpu
);
392 ret
= cs_etm__insert_trace_id_node(etmq
, trace_id
, cpu_data
);
396 ret
= cs_etm__metadata_set_trace_id(trace_id
, cpu_data
);
403 static int cs_etm__metadata_get_trace_id(u8
*trace_chan_id
, u64
*cpu_metadata
)
405 u64 cs_etm_magic
= cpu_metadata
[CS_ETM_MAGIC
];
407 switch (cs_etm_magic
) {
408 case __perf_cs_etmv3_magic
:
409 *trace_chan_id
= (u8
)(cpu_metadata
[CS_ETM_ETMTRACEIDR
] &
410 CORESIGHT_TRACE_ID_VAL_MASK
);
412 case __perf_cs_etmv4_magic
:
413 case __perf_cs_ete_magic
:
414 *trace_chan_id
= (u8
)(cpu_metadata
[CS_ETMV4_TRCTRACEIDR
] &
415 CORESIGHT_TRACE_ID_VAL_MASK
);
424 * update metadata trace ID from the value found in the AUX_HW_INFO packet.
426 static int cs_etm__metadata_set_trace_id(u8 trace_chan_id
, u64
*cpu_metadata
)
428 u64 cs_etm_magic
= cpu_metadata
[CS_ETM_MAGIC
];
430 switch (cs_etm_magic
) {
431 case __perf_cs_etmv3_magic
:
432 cpu_metadata
[CS_ETM_ETMTRACEIDR
] = trace_chan_id
;
434 case __perf_cs_etmv4_magic
:
435 case __perf_cs_ete_magic
:
436 cpu_metadata
[CS_ETMV4_TRCTRACEIDR
] = trace_chan_id
;
446 * Get a metadata index for a specific cpu from an array.
449 static int get_cpu_data_idx(struct cs_etm_auxtrace
*etm
, int cpu
)
453 for (i
= 0; i
< etm
->num_cpu
; i
++) {
454 if (etm
->metadata
[i
][CS_ETM_CPU
] == (u64
)cpu
) {
463 * Get a metadata for a specific cpu from an array.
466 static u64
*get_cpu_data(struct cs_etm_auxtrace
*etm
, int cpu
)
468 int idx
= get_cpu_data_idx(etm
, cpu
);
470 return (idx
!= -1) ? etm
->metadata
[idx
] : NULL
;
474 * Handle the PERF_RECORD_AUX_OUTPUT_HW_ID event.
476 * The payload associates the Trace ID and the CPU.
477 * The routine is tolerant of seeing multiple packets with the same association,
478 * but a CPU / Trace ID association changing during a session is an error.
480 static int cs_etm__process_aux_output_hw_id(struct perf_session
*session
,
481 union perf_event
*event
)
483 struct cs_etm_auxtrace
*etm
;
484 struct perf_sample sample
;
487 int cpu
, version
, err
;
489 /* extract and parse the HW ID */
490 hw_id
= event
->aux_output_hw_id
.hw_id
;
491 version
= FIELD_GET(CS_AUX_HW_ID_MAJOR_VERSION_MASK
, hw_id
);
493 /* check that we can handle this version */
494 if (version
> CS_AUX_HW_ID_MAJOR_VERSION
) {
495 pr_err("CS ETM Trace: PERF_RECORD_AUX_OUTPUT_HW_ID version %d not supported. Please update Perf.\n",
500 /* get access to the etm metadata */
501 etm
= container_of(session
->auxtrace
, struct cs_etm_auxtrace
, auxtrace
);
502 if (!etm
|| !etm
->metadata
)
505 /* parse the sample to get the CPU */
506 evsel
= evlist__event2evsel(session
->evlist
, event
);
509 err
= evsel__parse_sample(evsel
, event
, &sample
);
514 /* no CPU in the sample - possibly recorded with an old version of perf */
515 pr_err("CS_ETM: no CPU AUX_OUTPUT_HW_ID sample. Use compatible perf to record.");
519 if (FIELD_GET(CS_AUX_HW_ID_MINOR_VERSION_MASK
, hw_id
) == 0)
520 return cs_etm__process_trace_id_v0(etm
, cpu
, hw_id
);
522 return cs_etm__process_trace_id_v0_1(etm
, cpu
, hw_id
);
525 void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue
*etmq
,
529 * When a timestamp packet is encountered the backend code
530 * is stopped so that the front end has time to process packets
531 * that were accumulated in the traceID queue. Since there can
532 * be more than one channel per cs_etm_queue, we need to specify
533 * what traceID queue needs servicing.
535 etmq
->pending_timestamp_chan_id
= trace_chan_id
;
538 static u64
cs_etm__etmq_get_timestamp(struct cs_etm_queue
*etmq
,
541 struct cs_etm_packet_queue
*packet_queue
;
543 if (!etmq
->pending_timestamp_chan_id
)
547 *trace_chan_id
= etmq
->pending_timestamp_chan_id
;
549 packet_queue
= cs_etm__etmq_get_packet_queue(etmq
,
550 etmq
->pending_timestamp_chan_id
);
554 /* Acknowledge pending status */
555 etmq
->pending_timestamp_chan_id
= 0;
557 /* See function cs_etm_decoder__do_{hard|soft}_timestamp() */
558 return packet_queue
->cs_timestamp
;
561 static void cs_etm__clear_packet_queue(struct cs_etm_packet_queue
*queue
)
567 queue
->packet_count
= 0;
568 for (i
= 0; i
< CS_ETM_PACKET_MAX_BUFFER
; i
++) {
569 queue
->packet_buffer
[i
].isa
= CS_ETM_ISA_UNKNOWN
;
570 queue
->packet_buffer
[i
].start_addr
= CS_ETM_INVAL_ADDR
;
571 queue
->packet_buffer
[i
].end_addr
= CS_ETM_INVAL_ADDR
;
572 queue
->packet_buffer
[i
].instr_count
= 0;
573 queue
->packet_buffer
[i
].last_instr_taken_branch
= false;
574 queue
->packet_buffer
[i
].last_instr_size
= 0;
575 queue
->packet_buffer
[i
].last_instr_type
= 0;
576 queue
->packet_buffer
[i
].last_instr_subtype
= 0;
577 queue
->packet_buffer
[i
].last_instr_cond
= 0;
578 queue
->packet_buffer
[i
].flags
= 0;
579 queue
->packet_buffer
[i
].exception_number
= UINT32_MAX
;
580 queue
->packet_buffer
[i
].trace_chan_id
= UINT8_MAX
;
581 queue
->packet_buffer
[i
].cpu
= INT_MIN
;
585 static void cs_etm__clear_all_packet_queues(struct cs_etm_queue
*etmq
)
588 struct int_node
*inode
;
589 struct cs_etm_traceid_queue
*tidq
;
590 struct intlist
*traceid_queues_list
= etmq
->traceid_queues_list
;
592 intlist__for_each_entry(inode
, traceid_queues_list
) {
593 idx
= (int)(intptr_t)inode
->priv
;
594 tidq
= etmq
->traceid_queues
[idx
];
595 cs_etm__clear_packet_queue(&tidq
->packet_queue
);
599 static int cs_etm__init_traceid_queue(struct cs_etm_queue
*etmq
,
600 struct cs_etm_traceid_queue
*tidq
,
604 struct auxtrace_queue
*queue
;
605 struct cs_etm_auxtrace
*etm
= etmq
->etm
;
607 cs_etm__clear_packet_queue(&tidq
->packet_queue
);
609 queue
= &etmq
->etm
->queues
.queue_array
[etmq
->queue_nr
];
610 tidq
->trace_chan_id
= trace_chan_id
;
611 tidq
->el
= tidq
->prev_packet_el
= ocsd_EL_unknown
;
612 tidq
->thread
= machine__findnew_thread(&etm
->session
->machines
.host
, -1,
614 tidq
->prev_packet_thread
= machine__idle_thread(&etm
->session
->machines
.host
);
616 tidq
->packet
= zalloc(sizeof(struct cs_etm_packet
));
620 tidq
->prev_packet
= zalloc(sizeof(struct cs_etm_packet
));
621 if (!tidq
->prev_packet
)
624 if (etm
->synth_opts
.last_branch
) {
625 size_t sz
= sizeof(struct branch_stack
);
627 sz
+= etm
->synth_opts
.last_branch_sz
*
628 sizeof(struct branch_entry
);
629 tidq
->last_branch
= zalloc(sz
);
630 if (!tidq
->last_branch
)
632 tidq
->last_branch_rb
= zalloc(sz
);
633 if (!tidq
->last_branch_rb
)
637 tidq
->event_buf
= malloc(PERF_SAMPLE_MAX_SIZE
);
638 if (!tidq
->event_buf
)
644 zfree(&tidq
->last_branch_rb
);
645 zfree(&tidq
->last_branch
);
646 zfree(&tidq
->prev_packet
);
647 zfree(&tidq
->packet
);
652 static struct cs_etm_traceid_queue
653 *cs_etm__etmq_get_traceid_queue(struct cs_etm_queue
*etmq
, u8 trace_chan_id
)
656 struct int_node
*inode
;
657 struct intlist
*traceid_queues_list
;
658 struct cs_etm_traceid_queue
*tidq
, **traceid_queues
;
659 struct cs_etm_auxtrace
*etm
= etmq
->etm
;
661 if (etm
->per_thread_decoding
)
662 trace_chan_id
= CS_ETM_PER_THREAD_TRACEID
;
664 traceid_queues_list
= etmq
->traceid_queues_list
;
667 * Check if the traceid_queue exist for this traceID by looking
670 inode
= intlist__find(traceid_queues_list
, trace_chan_id
);
672 idx
= (int)(intptr_t)inode
->priv
;
673 return etmq
->traceid_queues
[idx
];
676 /* We couldn't find a traceid_queue for this traceID, allocate one */
677 tidq
= malloc(sizeof(*tidq
));
681 memset(tidq
, 0, sizeof(*tidq
));
683 /* Get a valid index for the new traceid_queue */
684 idx
= intlist__nr_entries(traceid_queues_list
);
685 /* Memory for the inode is free'ed in cs_etm_free_traceid_queues () */
686 inode
= intlist__findnew(traceid_queues_list
, trace_chan_id
);
690 /* Associate this traceID with this index */
691 inode
->priv
= (void *)(intptr_t)idx
;
693 if (cs_etm__init_traceid_queue(etmq
, tidq
, trace_chan_id
))
696 /* Grow the traceid_queues array by one unit */
697 traceid_queues
= etmq
->traceid_queues
;
698 traceid_queues
= reallocarray(traceid_queues
,
700 sizeof(*traceid_queues
));
703 * On failure reallocarray() returns NULL and the original block of
704 * memory is left untouched.
709 traceid_queues
[idx
] = tidq
;
710 etmq
->traceid_queues
= traceid_queues
;
712 return etmq
->traceid_queues
[idx
];
716 * Function intlist__remove() removes the inode from the list
717 * and delete the memory associated to it.
719 intlist__remove(traceid_queues_list
, inode
);
725 struct cs_etm_packet_queue
726 *cs_etm__etmq_get_packet_queue(struct cs_etm_queue
*etmq
, u8 trace_chan_id
)
728 struct cs_etm_traceid_queue
*tidq
;
730 tidq
= cs_etm__etmq_get_traceid_queue(etmq
, trace_chan_id
);
732 return &tidq
->packet_queue
;
737 static void cs_etm__packet_swap(struct cs_etm_auxtrace
*etm
,
738 struct cs_etm_traceid_queue
*tidq
)
740 struct cs_etm_packet
*tmp
;
742 if (etm
->synth_opts
.branches
|| etm
->synth_opts
.last_branch
||
743 etm
->synth_opts
.instructions
) {
745 * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
746 * the next incoming packet.
748 * Threads and exception levels are also tracked for both the
749 * previous and current packets. This is because the previous
750 * packet is used for the 'from' IP for branch samples, so the
751 * thread at that time must also be assigned to that sample.
752 * Across discontinuity packets the thread can change, so by
753 * tracking the thread for the previous packet the branch sample
754 * will have the correct info.
757 tidq
->packet
= tidq
->prev_packet
;
758 tidq
->prev_packet
= tmp
;
759 tidq
->prev_packet_el
= tidq
->el
;
760 thread__put(tidq
->prev_packet_thread
);
761 tidq
->prev_packet_thread
= thread__get(tidq
->thread
);
765 static void cs_etm__packet_dump(const char *pkt_string
, void *data
)
767 const char *color
= PERF_COLOR_BLUE
;
768 int len
= strlen(pkt_string
);
769 struct cs_etm_queue
*etmq
= data
;
773 snprintf(queue_nr
, sizeof(queue_nr
), "Qnr:%d; ", etmq
->queue_nr
);
777 if (len
&& (pkt_string
[len
-1] == '\n'))
778 color_fprintf(stdout
, color
, " %s%s", queue_nr
, pkt_string
);
780 color_fprintf(stdout
, color
, " %s%s\n", queue_nr
, pkt_string
);
785 static void cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params
*t_params
,
786 u64
*metadata
, u32 etmidr
)
788 t_params
->protocol
= cs_etm__get_v7_protocol_version(etmidr
);
789 t_params
->etmv3
.reg_ctrl
= metadata
[CS_ETM_ETMCR
];
790 t_params
->etmv3
.reg_trc_id
= metadata
[CS_ETM_ETMTRACEIDR
];
793 static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params
*t_params
,
796 t_params
->protocol
= CS_ETM_PROTO_ETMV4i
;
797 t_params
->etmv4
.reg_idr0
= metadata
[CS_ETMV4_TRCIDR0
];
798 t_params
->etmv4
.reg_idr1
= metadata
[CS_ETMV4_TRCIDR1
];
799 t_params
->etmv4
.reg_idr2
= metadata
[CS_ETMV4_TRCIDR2
];
800 t_params
->etmv4
.reg_idr8
= metadata
[CS_ETMV4_TRCIDR8
];
801 t_params
->etmv4
.reg_configr
= metadata
[CS_ETMV4_TRCCONFIGR
];
802 t_params
->etmv4
.reg_traceidr
= metadata
[CS_ETMV4_TRCTRACEIDR
];
805 static void cs_etm__set_trace_param_ete(struct cs_etm_trace_params
*t_params
,
808 t_params
->protocol
= CS_ETM_PROTO_ETE
;
809 t_params
->ete
.reg_idr0
= metadata
[CS_ETE_TRCIDR0
];
810 t_params
->ete
.reg_idr1
= metadata
[CS_ETE_TRCIDR1
];
811 t_params
->ete
.reg_idr2
= metadata
[CS_ETE_TRCIDR2
];
812 t_params
->ete
.reg_idr8
= metadata
[CS_ETE_TRCIDR8
];
813 t_params
->ete
.reg_configr
= metadata
[CS_ETE_TRCCONFIGR
];
814 t_params
->ete
.reg_traceidr
= metadata
[CS_ETE_TRCTRACEIDR
];
815 t_params
->ete
.reg_devarch
= metadata
[CS_ETE_TRCDEVARCH
];
818 static int cs_etm__init_trace_params(struct cs_etm_trace_params
*t_params
,
819 struct cs_etm_queue
*etmq
)
821 struct int_node
*inode
;
823 intlist__for_each_entry(inode
, etmq
->traceid_list
) {
824 u64
*metadata
= inode
->priv
;
825 u64 architecture
= metadata
[CS_ETM_MAGIC
];
828 switch (architecture
) {
829 case __perf_cs_etmv3_magic
:
830 etmidr
= metadata
[CS_ETM_ETMIDR
];
831 cs_etm__set_trace_param_etmv3(t_params
++, metadata
, etmidr
);
833 case __perf_cs_etmv4_magic
:
834 cs_etm__set_trace_param_etmv4(t_params
++, metadata
);
836 case __perf_cs_ete_magic
:
837 cs_etm__set_trace_param_ete(t_params
++, metadata
);
847 static int cs_etm__init_decoder_params(struct cs_etm_decoder_params
*d_params
,
848 struct cs_etm_queue
*etmq
,
849 enum cs_etm_decoder_operation mode
)
853 if (!(mode
< CS_ETM_OPERATION_MAX
))
856 d_params
->packet_printer
= cs_etm__packet_dump
;
857 d_params
->operation
= mode
;
858 d_params
->data
= etmq
;
859 d_params
->formatted
= etmq
->format
== FORMATTED
;
860 d_params
->fsyncs
= false;
861 d_params
->hsyncs
= false;
862 d_params
->frame_aligned
= true;
869 static void cs_etm__dump_event(struct cs_etm_queue
*etmq
,
870 struct auxtrace_buffer
*buffer
)
873 const char *color
= PERF_COLOR_BLUE
;
874 size_t buffer_used
= 0;
876 fprintf(stdout
, "\n");
877 color_fprintf(stdout
, color
,
878 ". ... CoreSight %s Trace data: size %#zx bytes\n",
879 cs_etm_decoder__get_name(etmq
->decoder
), buffer
->size
);
884 ret
= cs_etm_decoder__process_data_block(
885 etmq
->decoder
, buffer
->offset
,
886 &((u8
*)buffer
->data
)[buffer_used
],
887 buffer
->size
- buffer_used
, &consumed
);
891 buffer_used
+= consumed
;
892 } while (buffer_used
< buffer
->size
);
894 cs_etm_decoder__reset(etmq
->decoder
);
897 static int cs_etm__flush_events(struct perf_session
*session
,
898 const struct perf_tool
*tool
)
900 struct cs_etm_auxtrace
*etm
= container_of(session
->auxtrace
,
901 struct cs_etm_auxtrace
,
906 if (!tool
->ordered_events
)
909 if (etm
->timeless_decoding
) {
911 * Pass tid = -1 to process all queues. But likely they will have
912 * already been processed on PERF_RECORD_EXIT anyway.
914 return cs_etm__process_timeless_queues(etm
, -1);
917 return cs_etm__process_timestamped_queues(etm
);
920 static void cs_etm__free_traceid_queues(struct cs_etm_queue
*etmq
)
924 struct int_node
*inode
, *tmp
;
925 struct cs_etm_traceid_queue
*tidq
;
926 struct intlist
*traceid_queues_list
= etmq
->traceid_queues_list
;
928 intlist__for_each_entry_safe(inode
, tmp
, traceid_queues_list
) {
929 priv
= (uintptr_t)inode
->priv
;
932 /* Free this traceid_queue from the array */
933 tidq
= etmq
->traceid_queues
[idx
];
934 thread__zput(tidq
->thread
);
935 thread__zput(tidq
->prev_packet_thread
);
936 zfree(&tidq
->event_buf
);
937 zfree(&tidq
->last_branch
);
938 zfree(&tidq
->last_branch_rb
);
939 zfree(&tidq
->prev_packet
);
940 zfree(&tidq
->packet
);
944 * Function intlist__remove() removes the inode from the list
945 * and delete the memory associated to it.
947 intlist__remove(traceid_queues_list
, inode
);
950 /* Then the RB tree itself */
951 intlist__delete(traceid_queues_list
);
952 etmq
->traceid_queues_list
= NULL
;
954 /* finally free the traceid_queues array */
955 zfree(&etmq
->traceid_queues
);
958 static void cs_etm__free_queue(void *priv
)
960 struct int_node
*inode
, *tmp
;
961 struct cs_etm_queue
*etmq
= priv
;
966 cs_etm_decoder__free(etmq
->decoder
);
967 cs_etm__free_traceid_queues(etmq
);
969 if (etmq
->own_traceid_list
) {
970 /* First remove all traceID/metadata nodes for the RB tree */
971 intlist__for_each_entry_safe(inode
, tmp
, etmq
->own_traceid_list
)
972 intlist__remove(etmq
->own_traceid_list
, inode
);
974 /* Then the RB tree itself */
975 intlist__delete(etmq
->own_traceid_list
);
981 static void cs_etm__free_events(struct perf_session
*session
)
984 struct cs_etm_auxtrace
*aux
= container_of(session
->auxtrace
,
985 struct cs_etm_auxtrace
,
987 struct auxtrace_queues
*queues
= &aux
->queues
;
989 for (i
= 0; i
< queues
->nr_queues
; i
++) {
990 cs_etm__free_queue(queues
->queue_array
[i
].priv
);
991 queues
->queue_array
[i
].priv
= NULL
;
994 auxtrace_queues__free(queues
);
997 static void cs_etm__free(struct perf_session
*session
)
1000 struct cs_etm_auxtrace
*aux
= container_of(session
->auxtrace
,
1001 struct cs_etm_auxtrace
,
1003 cs_etm__free_events(session
);
1004 session
->auxtrace
= NULL
;
1006 for (i
= 0; i
< aux
->num_cpu
; i
++)
1007 zfree(&aux
->metadata
[i
]);
1009 zfree(&aux
->metadata
);
1013 static bool cs_etm__evsel_is_auxtrace(struct perf_session
*session
,
1014 struct evsel
*evsel
)
1016 struct cs_etm_auxtrace
*aux
= container_of(session
->auxtrace
,
1017 struct cs_etm_auxtrace
,
1020 return evsel
->core
.attr
.type
== aux
->pmu_type
;
1023 static struct machine
*cs_etm__get_machine(struct cs_etm_queue
*etmq
,
1026 enum cs_etm_pid_fmt pid_fmt
= cs_etm__get_pid_fmt(etmq
);
1029 * For any virtualisation based on nVHE (e.g. pKVM), or host kernels
1030 * running at EL1 assume everything is the host.
1032 if (pid_fmt
== CS_ETM_PIDFMT_CTXTID
)
1033 return &etmq
->etm
->session
->machines
.host
;
1036 * Not perfect, but otherwise assume anything in EL1 is the default
1037 * guest, and everything else is the host. Distinguishing between guest
1038 * and host userspaces isn't currently supported either. Neither is
1039 * multiple guest support. All this does is reduce the likeliness of
1040 * decode errors where we look into the host kernel maps when it should
1041 * have been the guest maps.
1045 return machines__find_guest(&etmq
->etm
->session
->machines
,
1046 DEFAULT_GUEST_KERNEL_ID
);
1050 case ocsd_EL_unknown
:
1052 return &etmq
->etm
->session
->machines
.host
;
1056 static u8
cs_etm__cpu_mode(struct cs_etm_queue
*etmq
, u64 address
,
1059 struct machine
*machine
= cs_etm__get_machine(etmq
, el
);
1061 if (address
>= machine__kernel_start(machine
)) {
1062 if (machine__is_host(machine
))
1063 return PERF_RECORD_MISC_KERNEL
;
1065 return PERF_RECORD_MISC_GUEST_KERNEL
;
1067 if (machine__is_host(machine
))
1068 return PERF_RECORD_MISC_USER
;
1071 * Can't really happen at the moment because
1072 * cs_etm__get_machine() will always return
1073 * machines.host for any non EL1 trace.
1075 return PERF_RECORD_MISC_GUEST_USER
;
1080 static u32
cs_etm__mem_access(struct cs_etm_queue
*etmq
, u8 trace_chan_id
,
1081 u64 address
, size_t size
, u8
*buffer
,
1082 const ocsd_mem_space_acc_t mem_space
)
1087 struct addr_location al
;
1089 struct cs_etm_traceid_queue
*tidq
;
1095 addr_location__init(&al
);
1096 tidq
= cs_etm__etmq_get_traceid_queue(etmq
, trace_chan_id
);
1101 * We've already tracked EL along side the PID in cs_etm__set_thread()
1102 * so double check that it matches what OpenCSD thinks as well. It
1103 * doesn't distinguish between EL0 and EL1 for this mem access callback
1104 * so we had to do the extra tracking. Skip validation if it's any of
1107 if (!(mem_space
== OCSD_MEM_SPACE_ANY
||
1108 mem_space
== OCSD_MEM_SPACE_N
|| mem_space
== OCSD_MEM_SPACE_S
)) {
1109 if (mem_space
& OCSD_MEM_SPACE_EL1N
) {
1110 /* Includes both non secure EL1 and EL0 */
1111 assert(tidq
->el
== ocsd_EL1
|| tidq
->el
== ocsd_EL0
);
1112 } else if (mem_space
& OCSD_MEM_SPACE_EL2
)
1113 assert(tidq
->el
== ocsd_EL2
);
1114 else if (mem_space
& OCSD_MEM_SPACE_EL3
)
1115 assert(tidq
->el
== ocsd_EL3
);
1118 cpumode
= cs_etm__cpu_mode(etmq
, address
, tidq
->el
);
1120 if (!thread__find_map(tidq
->thread
, cpumode
, address
, &al
))
1123 dso
= map__dso(al
.map
);
1127 if (dso__data(dso
)->status
== DSO_DATA_STATUS_ERROR
&&
1128 dso__data_status_seen(dso
, DSO_DATA_STATUS_SEEN_ITRACE
))
1131 offset
= map__map_ip(al
.map
, address
);
1135 len
= dso__data_read_offset(dso
, maps__machine(thread__maps(tidq
->thread
)),
1136 offset
, buffer
, size
);
1139 ui__warning_once("CS ETM Trace: Missing DSO. Use 'perf archive' or debuginfod to export data from the traced system.\n"
1140 " Enable CONFIG_PROC_KCORE or use option '-k /path/to/vmlinux' for kernel symbols.\n");
1141 if (!dso__auxtrace_warned(dso
)) {
1142 pr_err("CS ETM Trace: Debug data not found for address %#"PRIx64
" in %s\n",
1144 dso__long_name(dso
) ? dso__long_name(dso
) : "Unknown");
1145 dso__set_auxtrace_warned(dso
);
1151 addr_location__exit(&al
);
1155 static struct cs_etm_queue
*cs_etm__alloc_queue(void)
1157 struct cs_etm_queue
*etmq
= zalloc(sizeof(*etmq
));
1161 etmq
->traceid_queues_list
= intlist__new(NULL
);
1162 if (!etmq
->traceid_queues_list
)
1166 * Create an RB tree for traceID-metadata tuple. Since the conversion
1167 * has to be made for each packet that gets decoded, optimizing access
1168 * in anything other than a sequential array is worth doing.
1170 etmq
->traceid_list
= etmq
->own_traceid_list
= intlist__new(NULL
);
1171 if (!etmq
->traceid_list
)
1177 intlist__delete(etmq
->traceid_queues_list
);
1183 static int cs_etm__setup_queue(struct cs_etm_auxtrace
*etm
,
1184 struct auxtrace_queue
*queue
,
1185 unsigned int queue_nr
)
1187 struct cs_etm_queue
*etmq
= queue
->priv
;
1192 etmq
= cs_etm__alloc_queue();
1199 etmq
->queue_nr
= queue_nr
;
1200 queue
->cpu
= queue_nr
; /* Placeholder, may be reset to -1 in per-thread mode */
1202 etmq
->sink_id
= SINK_UNSET
;
1207 static int cs_etm__queue_first_cs_timestamp(struct cs_etm_auxtrace
*etm
,
1208 struct cs_etm_queue
*etmq
,
1209 unsigned int queue_nr
)
1212 unsigned int cs_queue_nr
;
1217 * We are under a CPU-wide trace scenario. As such we need to know
1218 * when the code that generated the traces started to execute so that
1219 * it can be correlated with execution on other CPUs. So we get a
1220 * handle on the beginning of traces and decode until we find a
1221 * timestamp. The timestamp is then added to the auxtrace min heap
1222 * in order to know what nibble (of all the etmqs) to decode first.
1226 * Fetch an aux_buffer from this etmq. Bail if no more
1227 * blocks or an error has been encountered.
1229 ret
= cs_etm__get_data_block(etmq
);
1234 * Run decoder on the trace block. The decoder will stop when
1235 * encountering a CS timestamp, a full packet queue or the end of
1236 * trace for that block.
1238 ret
= cs_etm__decode_data_block(etmq
);
1243 * Function cs_etm_decoder__do_{hard|soft}_timestamp() does all
1244 * the timestamp calculation for us.
1246 cs_timestamp
= cs_etm__etmq_get_timestamp(etmq
, &trace_chan_id
);
1248 /* We found a timestamp, no need to continue. */
1253 * We didn't find a timestamp so empty all the traceid packet
1254 * queues before looking for another timestamp packet, either
1255 * in the current data block or a new one. Packets that were
1256 * just decoded are useless since no timestamp has been
1257 * associated with them. As such simply discard them.
1259 cs_etm__clear_all_packet_queues(etmq
);
1263 * We have a timestamp. Add it to the min heap to reflect when
1264 * instructions conveyed by the range packets of this traceID queue
1265 * started to execute. Once the same has been done for all the traceID
1266 * queues of each etmq, redenring and decoding can start in
1267 * chronological order.
1269 * Note that packets decoded above are still in the traceID's packet
1270 * queue and will be processed in cs_etm__process_timestamped_queues().
1272 cs_queue_nr
= TO_CS_QUEUE_NR(queue_nr
, trace_chan_id
);
1273 ret
= auxtrace_heap__add(&etm
->heap
, cs_queue_nr
, cs_timestamp
);
1279 void cs_etm__copy_last_branch_rb(struct cs_etm_queue
*etmq
,
1280 struct cs_etm_traceid_queue
*tidq
)
1282 struct branch_stack
*bs_src
= tidq
->last_branch_rb
;
1283 struct branch_stack
*bs_dst
= tidq
->last_branch
;
1287 * Set the number of records before early exit: ->nr is used to
1288 * determine how many branches to copy from ->entries.
1290 bs_dst
->nr
= bs_src
->nr
;
1293 * Early exit when there is nothing to copy.
1299 * As bs_src->entries is a circular buffer, we need to copy from it in
1300 * two steps. First, copy the branches from the most recently inserted
1301 * branch ->last_branch_pos until the end of bs_src->entries buffer.
1303 nr
= etmq
->etm
->synth_opts
.last_branch_sz
- tidq
->last_branch_pos
;
1304 memcpy(&bs_dst
->entries
[0],
1305 &bs_src
->entries
[tidq
->last_branch_pos
],
1306 sizeof(struct branch_entry
) * nr
);
1309 * If we wrapped around at least once, the branches from the beginning
1310 * of the bs_src->entries buffer and until the ->last_branch_pos element
1311 * are older valid branches: copy them over. The total number of
1312 * branches copied over will be equal to the number of branches asked by
1313 * the user in last_branch_sz.
1315 if (bs_src
->nr
>= etmq
->etm
->synth_opts
.last_branch_sz
) {
1316 memcpy(&bs_dst
->entries
[nr
],
1317 &bs_src
->entries
[0],
1318 sizeof(struct branch_entry
) * tidq
->last_branch_pos
);
1323 void cs_etm__reset_last_branch_rb(struct cs_etm_traceid_queue
*tidq
)
1325 tidq
->last_branch_pos
= 0;
1326 tidq
->last_branch_rb
->nr
= 0;
1329 static inline int cs_etm__t32_instr_size(struct cs_etm_queue
*etmq
,
1330 u8 trace_chan_id
, u64 addr
)
1334 cs_etm__mem_access(etmq
, trace_chan_id
, addr
, ARRAY_SIZE(instrBytes
),
1337 * T32 instruction size is indicated by bits[15:11] of the first
1338 * 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111
1339 * denote a 32-bit instruction.
1341 return ((instrBytes
[1] & 0xF8) >= 0xE8) ? 4 : 2;
1344 static inline u64
cs_etm__first_executed_instr(struct cs_etm_packet
*packet
)
1347 * Return 0 for packets that have no addresses so that CS_ETM_INVAL_ADDR doesn't
1348 * appear in samples.
1350 if (packet
->sample_type
== CS_ETM_DISCONTINUITY
||
1351 packet
->sample_type
== CS_ETM_EXCEPTION
)
1354 return packet
->start_addr
;
1358 u64
cs_etm__last_executed_instr(const struct cs_etm_packet
*packet
)
1360 /* Returns 0 for the CS_ETM_DISCONTINUITY packet */
1361 if (packet
->sample_type
== CS_ETM_DISCONTINUITY
)
1364 return packet
->end_addr
- packet
->last_instr_size
;
1367 static inline u64
cs_etm__instr_addr(struct cs_etm_queue
*etmq
,
1369 const struct cs_etm_packet
*packet
,
1372 if (packet
->isa
== CS_ETM_ISA_T32
) {
1373 u64 addr
= packet
->start_addr
;
1376 addr
+= cs_etm__t32_instr_size(etmq
,
1377 trace_chan_id
, addr
);
1383 /* Assume a 4 byte instruction size (A32/A64) */
1384 return packet
->start_addr
+ offset
* 4;
1387 static void cs_etm__update_last_branch_rb(struct cs_etm_queue
*etmq
,
1388 struct cs_etm_traceid_queue
*tidq
)
1390 struct branch_stack
*bs
= tidq
->last_branch_rb
;
1391 struct branch_entry
*be
;
1394 * The branches are recorded in a circular buffer in reverse
1395 * chronological order: we start recording from the last element of the
1396 * buffer down. After writing the first element of the stack, move the
1397 * insert position back to the end of the buffer.
1399 if (!tidq
->last_branch_pos
)
1400 tidq
->last_branch_pos
= etmq
->etm
->synth_opts
.last_branch_sz
;
1402 tidq
->last_branch_pos
-= 1;
1404 be
= &bs
->entries
[tidq
->last_branch_pos
];
1405 be
->from
= cs_etm__last_executed_instr(tidq
->prev_packet
);
1406 be
->to
= cs_etm__first_executed_instr(tidq
->packet
);
1407 /* No support for mispredict */
1408 be
->flags
.mispred
= 0;
1409 be
->flags
.predicted
= 1;
1412 * Increment bs->nr until reaching the number of last branches asked by
1413 * the user on the command line.
1415 if (bs
->nr
< etmq
->etm
->synth_opts
.last_branch_sz
)
1419 static int cs_etm__inject_event(union perf_event
*event
,
1420 struct perf_sample
*sample
, u64 type
)
1422 event
->header
.size
= perf_event__sample_event_size(sample
, type
, 0);
1423 return perf_event__synthesize_sample(event
, type
, 0, sample
);
1428 cs_etm__get_trace(struct cs_etm_queue
*etmq
)
1430 struct auxtrace_buffer
*aux_buffer
= etmq
->buffer
;
1431 struct auxtrace_buffer
*old_buffer
= aux_buffer
;
1432 struct auxtrace_queue
*queue
;
1434 queue
= &etmq
->etm
->queues
.queue_array
[etmq
->queue_nr
];
1436 aux_buffer
= auxtrace_buffer__next(queue
, aux_buffer
);
1438 /* If no more data, drop the previous auxtrace_buffer and return */
1441 auxtrace_buffer__drop_data(old_buffer
);
1446 etmq
->buffer
= aux_buffer
;
1448 /* If the aux_buffer doesn't have data associated, try to load it */
1449 if (!aux_buffer
->data
) {
1450 /* get the file desc associated with the perf data file */
1451 int fd
= perf_data__fd(etmq
->etm
->session
->data
);
1453 aux_buffer
->data
= auxtrace_buffer__get_data(aux_buffer
, fd
);
1454 if (!aux_buffer
->data
)
1458 /* If valid, drop the previous buffer */
1460 auxtrace_buffer__drop_data(old_buffer
);
1463 etmq
->buf_len
= aux_buffer
->size
;
1464 etmq
->buf
= aux_buffer
->data
;
1466 return etmq
->buf_len
;
1469 static void cs_etm__set_thread(struct cs_etm_queue
*etmq
,
1470 struct cs_etm_traceid_queue
*tidq
, pid_t tid
,
1473 struct machine
*machine
= cs_etm__get_machine(etmq
, el
);
1476 thread__zput(tidq
->thread
);
1477 tidq
->thread
= machine__find_thread(machine
, -1, tid
);
1480 /* Couldn't find a known thread */
1482 tidq
->thread
= machine__idle_thread(machine
);
1487 int cs_etm__etmq_set_tid_el(struct cs_etm_queue
*etmq
, pid_t tid
,
1488 u8 trace_chan_id
, ocsd_ex_level el
)
1490 struct cs_etm_traceid_queue
*tidq
;
1492 tidq
= cs_etm__etmq_get_traceid_queue(etmq
, trace_chan_id
);
1496 cs_etm__set_thread(etmq
, tidq
, tid
, el
);
1500 bool cs_etm__etmq_is_timeless(struct cs_etm_queue
*etmq
)
1502 return !!etmq
->etm
->timeless_decoding
;
1505 static void cs_etm__copy_insn(struct cs_etm_queue
*etmq
,
1507 const struct cs_etm_packet
*packet
,
1508 struct perf_sample
*sample
)
1511 * It's pointless to read instructions for the CS_ETM_DISCONTINUITY
1512 * packet, so directly bail out with 'insn_len' = 0.
1514 if (packet
->sample_type
== CS_ETM_DISCONTINUITY
) {
1515 sample
->insn_len
= 0;
1520 * T32 instruction size might be 32-bit or 16-bit, decide by calling
1521 * cs_etm__t32_instr_size().
1523 if (packet
->isa
== CS_ETM_ISA_T32
)
1524 sample
->insn_len
= cs_etm__t32_instr_size(etmq
, trace_chan_id
,
1526 /* Otherwise, A64 and A32 instruction size are always 32-bit. */
1528 sample
->insn_len
= 4;
1530 cs_etm__mem_access(etmq
, trace_chan_id
, sample
->ip
, sample
->insn_len
,
1531 (void *)sample
->insn
, 0);
1534 u64
cs_etm__convert_sample_time(struct cs_etm_queue
*etmq
, u64 cs_timestamp
)
1536 struct cs_etm_auxtrace
*etm
= etmq
->etm
;
1538 if (etm
->has_virtual_ts
)
1539 return tsc_to_perf_time(cs_timestamp
, &etm
->tc
);
1541 return cs_timestamp
;
1544 static inline u64
cs_etm__resolve_sample_time(struct cs_etm_queue
*etmq
,
1545 struct cs_etm_traceid_queue
*tidq
)
1547 struct cs_etm_auxtrace
*etm
= etmq
->etm
;
1548 struct cs_etm_packet_queue
*packet_queue
= &tidq
->packet_queue
;
1550 if (!etm
->timeless_decoding
&& etm
->has_virtual_ts
)
1551 return packet_queue
->cs_timestamp
;
1553 return etm
->latest_kernel_timestamp
;
1556 static int cs_etm__synth_instruction_sample(struct cs_etm_queue
*etmq
,
1557 struct cs_etm_traceid_queue
*tidq
,
1558 u64 addr
, u64 period
)
1561 struct cs_etm_auxtrace
*etm
= etmq
->etm
;
1562 union perf_event
*event
= tidq
->event_buf
;
1563 struct perf_sample sample
= {.ip
= 0,};
1565 event
->sample
.header
.type
= PERF_RECORD_SAMPLE
;
1566 event
->sample
.header
.misc
= cs_etm__cpu_mode(etmq
, addr
, tidq
->el
);
1567 event
->sample
.header
.size
= sizeof(struct perf_event_header
);
1569 /* Set time field based on etm auxtrace config. */
1570 sample
.time
= cs_etm__resolve_sample_time(etmq
, tidq
);
1573 sample
.pid
= thread__pid(tidq
->thread
);
1574 sample
.tid
= thread__tid(tidq
->thread
);
1575 sample
.id
= etmq
->etm
->instructions_id
;
1576 sample
.stream_id
= etmq
->etm
->instructions_id
;
1577 sample
.period
= period
;
1578 sample
.cpu
= tidq
->packet
->cpu
;
1579 sample
.flags
= tidq
->prev_packet
->flags
;
1580 sample
.cpumode
= event
->sample
.header
.misc
;
1582 cs_etm__copy_insn(etmq
, tidq
->trace_chan_id
, tidq
->packet
, &sample
);
1584 if (etm
->synth_opts
.last_branch
)
1585 sample
.branch_stack
= tidq
->last_branch
;
1587 if (etm
->synth_opts
.inject
) {
1588 ret
= cs_etm__inject_event(event
, &sample
,
1589 etm
->instructions_sample_type
);
1594 ret
= perf_session__deliver_synth_event(etm
->session
, event
, &sample
);
1598 "CS ETM Trace: failed to deliver instruction event, error %d\n",
1605 * The cs etm packet encodes an instruction range between a branch target
1606 * and the next taken branch. Generate sample accordingly.
1608 static int cs_etm__synth_branch_sample(struct cs_etm_queue
*etmq
,
1609 struct cs_etm_traceid_queue
*tidq
)
1612 struct cs_etm_auxtrace
*etm
= etmq
->etm
;
1613 struct perf_sample sample
= {.ip
= 0,};
1614 union perf_event
*event
= tidq
->event_buf
;
1615 struct dummy_branch_stack
{
1618 struct branch_entry entries
;
1622 ip
= cs_etm__last_executed_instr(tidq
->prev_packet
);
1624 event
->sample
.header
.type
= PERF_RECORD_SAMPLE
;
1625 event
->sample
.header
.misc
= cs_etm__cpu_mode(etmq
, ip
,
1626 tidq
->prev_packet_el
);
1627 event
->sample
.header
.size
= sizeof(struct perf_event_header
);
1629 /* Set time field based on etm auxtrace config. */
1630 sample
.time
= cs_etm__resolve_sample_time(etmq
, tidq
);
1633 sample
.pid
= thread__pid(tidq
->prev_packet_thread
);
1634 sample
.tid
= thread__tid(tidq
->prev_packet_thread
);
1635 sample
.addr
= cs_etm__first_executed_instr(tidq
->packet
);
1636 sample
.id
= etmq
->etm
->branches_id
;
1637 sample
.stream_id
= etmq
->etm
->branches_id
;
1639 sample
.cpu
= tidq
->packet
->cpu
;
1640 sample
.flags
= tidq
->prev_packet
->flags
;
1641 sample
.cpumode
= event
->sample
.header
.misc
;
1643 cs_etm__copy_insn(etmq
, tidq
->trace_chan_id
, tidq
->prev_packet
,
1647 * perf report cannot handle events without a branch stack
1649 if (etm
->synth_opts
.last_branch
) {
1650 dummy_bs
= (struct dummy_branch_stack
){
1658 sample
.branch_stack
= (struct branch_stack
*)&dummy_bs
;
1661 if (etm
->synth_opts
.inject
) {
1662 ret
= cs_etm__inject_event(event
, &sample
,
1663 etm
->branches_sample_type
);
1668 ret
= perf_session__deliver_synth_event(etm
->session
, event
, &sample
);
1672 "CS ETM Trace: failed to deliver instruction event, error %d\n",
1678 static int cs_etm__synth_events(struct cs_etm_auxtrace
*etm
,
1679 struct perf_session
*session
)
1681 struct evlist
*evlist
= session
->evlist
;
1682 struct evsel
*evsel
;
1683 struct perf_event_attr attr
;
1688 evlist__for_each_entry(evlist
, evsel
) {
1689 if (evsel
->core
.attr
.type
== etm
->pmu_type
) {
1696 pr_debug("No selected events with CoreSight Trace data\n");
1700 memset(&attr
, 0, sizeof(struct perf_event_attr
));
1701 attr
.size
= sizeof(struct perf_event_attr
);
1702 attr
.type
= PERF_TYPE_HARDWARE
;
1703 attr
.sample_type
= evsel
->core
.attr
.sample_type
& PERF_SAMPLE_MASK
;
1704 attr
.sample_type
|= PERF_SAMPLE_IP
| PERF_SAMPLE_TID
|
1706 if (etm
->timeless_decoding
)
1707 attr
.sample_type
&= ~(u64
)PERF_SAMPLE_TIME
;
1709 attr
.sample_type
|= PERF_SAMPLE_TIME
;
1711 attr
.exclude_user
= evsel
->core
.attr
.exclude_user
;
1712 attr
.exclude_kernel
= evsel
->core
.attr
.exclude_kernel
;
1713 attr
.exclude_hv
= evsel
->core
.attr
.exclude_hv
;
1714 attr
.exclude_host
= evsel
->core
.attr
.exclude_host
;
1715 attr
.exclude_guest
= evsel
->core
.attr
.exclude_guest
;
1716 attr
.sample_id_all
= evsel
->core
.attr
.sample_id_all
;
1717 attr
.read_format
= evsel
->core
.attr
.read_format
;
1719 /* create new id val to be a fixed offset from evsel id */
1720 id
= evsel
->core
.id
[0] + 1000000000;
1725 if (etm
->synth_opts
.branches
) {
1726 attr
.config
= PERF_COUNT_HW_BRANCH_INSTRUCTIONS
;
1727 attr
.sample_period
= 1;
1728 attr
.sample_type
|= PERF_SAMPLE_ADDR
;
1729 err
= perf_session__deliver_synth_attr_event(session
, &attr
, id
);
1732 etm
->branches_sample_type
= attr
.sample_type
;
1733 etm
->branches_id
= id
;
1735 attr
.sample_type
&= ~(u64
)PERF_SAMPLE_ADDR
;
1738 if (etm
->synth_opts
.last_branch
) {
1739 attr
.sample_type
|= PERF_SAMPLE_BRANCH_STACK
;
1741 * We don't use the hardware index, but the sample generation
1742 * code uses the new format branch_stack with this field,
1743 * so the event attributes must indicate that it's present.
1745 attr
.branch_sample_type
|= PERF_SAMPLE_BRANCH_HW_INDEX
;
1748 if (etm
->synth_opts
.instructions
) {
1749 attr
.config
= PERF_COUNT_HW_INSTRUCTIONS
;
1750 attr
.sample_period
= etm
->synth_opts
.period
;
1751 etm
->instructions_sample_period
= attr
.sample_period
;
1752 err
= perf_session__deliver_synth_attr_event(session
, &attr
, id
);
1755 etm
->instructions_sample_type
= attr
.sample_type
;
1756 etm
->instructions_id
= id
;
1763 static int cs_etm__sample(struct cs_etm_queue
*etmq
,
1764 struct cs_etm_traceid_queue
*tidq
)
1766 struct cs_etm_auxtrace
*etm
= etmq
->etm
;
1768 u8 trace_chan_id
= tidq
->trace_chan_id
;
1771 /* Get instructions remainder from previous packet */
1772 instrs_prev
= tidq
->period_instructions
;
1774 tidq
->period_instructions
+= tidq
->packet
->instr_count
;
1777 * Record a branch when the last instruction in
1778 * PREV_PACKET is a branch.
1780 if (etm
->synth_opts
.last_branch
&&
1781 tidq
->prev_packet
->sample_type
== CS_ETM_RANGE
&&
1782 tidq
->prev_packet
->last_instr_taken_branch
)
1783 cs_etm__update_last_branch_rb(etmq
, tidq
);
1785 if (etm
->synth_opts
.instructions
&&
1786 tidq
->period_instructions
>= etm
->instructions_sample_period
) {
1788 * Emit instruction sample periodically
1789 * TODO: allow period to be defined in cycles and clock time
1793 * Below diagram demonstrates the instruction samples
1796 * Instrs Instrs Instrs Instrs
1797 * Sample(n) Sample(n+1) Sample(n+2) Sample(n+3)
1800 * --------------------------------------------------
1804 * instructions(Pi) instructions(Pi')
1807 * \---------------- -----------------/
1809 * tidq->packet->instr_count
1811 * Instrs Sample(n...) are the synthesised samples occurring
1812 * every etm->instructions_sample_period instructions - as
1813 * defined on the perf command line. Sample(n) is being the
1814 * last sample before the current etm packet, n+1 to n+3
1815 * samples are generated from the current etm packet.
1817 * tidq->packet->instr_count represents the number of
1818 * instructions in the current etm packet.
1820 * Period instructions (Pi) contains the number of
1821 * instructions executed after the sample point(n) from the
1822 * previous etm packet. This will always be less than
1823 * etm->instructions_sample_period.
1825 * When generate new samples, it combines with two parts
1826 * instructions, one is the tail of the old packet and another
1827 * is the head of the new coming packet, to generate
1828 * sample(n+1); sample(n+2) and sample(n+3) consume the
1829 * instructions with sample period. After sample(n+3), the rest
1830 * instructions will be used by later packet and it is assigned
1831 * to tidq->period_instructions for next round calculation.
1835 * Get the initial offset into the current packet instructions;
1836 * entry conditions ensure that instrs_prev is less than
1837 * etm->instructions_sample_period.
1839 u64 offset
= etm
->instructions_sample_period
- instrs_prev
;
1842 /* Prepare last branches for instruction sample */
1843 if (etm
->synth_opts
.last_branch
)
1844 cs_etm__copy_last_branch_rb(etmq
, tidq
);
1846 while (tidq
->period_instructions
>=
1847 etm
->instructions_sample_period
) {
1849 * Calculate the address of the sampled instruction (-1
1850 * as sample is reported as though instruction has just
1851 * been executed, but PC has not advanced to next
1854 addr
= cs_etm__instr_addr(etmq
, trace_chan_id
,
1855 tidq
->packet
, offset
- 1);
1856 ret
= cs_etm__synth_instruction_sample(
1858 etm
->instructions_sample_period
);
1862 offset
+= etm
->instructions_sample_period
;
1863 tidq
->period_instructions
-=
1864 etm
->instructions_sample_period
;
1868 if (etm
->synth_opts
.branches
) {
1869 bool generate_sample
= false;
1871 /* Generate sample for tracing on packet */
1872 if (tidq
->prev_packet
->sample_type
== CS_ETM_DISCONTINUITY
)
1873 generate_sample
= true;
1875 /* Generate sample for branch taken packet */
1876 if (tidq
->prev_packet
->sample_type
== CS_ETM_RANGE
&&
1877 tidq
->prev_packet
->last_instr_taken_branch
)
1878 generate_sample
= true;
1880 if (generate_sample
) {
1881 ret
= cs_etm__synth_branch_sample(etmq
, tidq
);
1887 cs_etm__packet_swap(etm
, tidq
);
1892 static int cs_etm__exception(struct cs_etm_traceid_queue
*tidq
)
1895 * When the exception packet is inserted, whether the last instruction
1896 * in previous range packet is taken branch or not, we need to force
1897 * to set 'prev_packet->last_instr_taken_branch' to true. This ensures
1898 * to generate branch sample for the instruction range before the
1899 * exception is trapped to kernel or before the exception returning.
1901 * The exception packet includes the dummy address values, so don't
1902 * swap PACKET with PREV_PACKET. This keeps PREV_PACKET to be useful
1903 * for generating instruction and branch samples.
1905 if (tidq
->prev_packet
->sample_type
== CS_ETM_RANGE
)
1906 tidq
->prev_packet
->last_instr_taken_branch
= true;
1911 static int cs_etm__flush(struct cs_etm_queue
*etmq
,
1912 struct cs_etm_traceid_queue
*tidq
)
1915 struct cs_etm_auxtrace
*etm
= etmq
->etm
;
1917 /* Handle start tracing packet */
1918 if (tidq
->prev_packet
->sample_type
== CS_ETM_EMPTY
)
1921 if (etmq
->etm
->synth_opts
.last_branch
&&
1922 etmq
->etm
->synth_opts
.instructions
&&
1923 tidq
->prev_packet
->sample_type
== CS_ETM_RANGE
) {
1926 /* Prepare last branches for instruction sample */
1927 cs_etm__copy_last_branch_rb(etmq
, tidq
);
1930 * Generate a last branch event for the branches left in the
1931 * circular buffer at the end of the trace.
1933 * Use the address of the end of the last reported execution
1936 addr
= cs_etm__last_executed_instr(tidq
->prev_packet
);
1938 err
= cs_etm__synth_instruction_sample(
1940 tidq
->period_instructions
);
1944 tidq
->period_instructions
= 0;
1948 if (etm
->synth_opts
.branches
&&
1949 tidq
->prev_packet
->sample_type
== CS_ETM_RANGE
) {
1950 err
= cs_etm__synth_branch_sample(etmq
, tidq
);
1956 cs_etm__packet_swap(etm
, tidq
);
1958 /* Reset last branches after flush the trace */
1959 if (etm
->synth_opts
.last_branch
)
1960 cs_etm__reset_last_branch_rb(tidq
);
1965 static int cs_etm__end_block(struct cs_etm_queue
*etmq
,
1966 struct cs_etm_traceid_queue
*tidq
)
1971 * It has no new packet coming and 'etmq->packet' contains the stale
1972 * packet which was set at the previous time with packets swapping;
1973 * so skip to generate branch sample to avoid stale packet.
1975 * For this case only flush branch stack and generate a last branch
1976 * event for the branches left in the circular buffer at the end of
1979 if (etmq
->etm
->synth_opts
.last_branch
&&
1980 etmq
->etm
->synth_opts
.instructions
&&
1981 tidq
->prev_packet
->sample_type
== CS_ETM_RANGE
) {
1984 /* Prepare last branches for instruction sample */
1985 cs_etm__copy_last_branch_rb(etmq
, tidq
);
1988 * Use the address of the end of the last reported execution
1991 addr
= cs_etm__last_executed_instr(tidq
->prev_packet
);
1993 err
= cs_etm__synth_instruction_sample(
1995 tidq
->period_instructions
);
1999 tidq
->period_instructions
= 0;
2005 * cs_etm__get_data_block: Fetch a block from the auxtrace_buffer queue
2007 * Returns: < 0 if error
2008 * = 0 if no more auxtrace_buffer to read
2009 * > 0 if the current buffer isn't empty yet
2011 static int cs_etm__get_data_block(struct cs_etm_queue
*etmq
)
2015 if (!etmq
->buf_len
) {
2016 ret
= cs_etm__get_trace(etmq
);
2020 * We cannot assume consecutive blocks in the data file
2021 * are contiguous, reset the decoder to force re-sync.
2023 ret
= cs_etm_decoder__reset(etmq
->decoder
);
2028 return etmq
->buf_len
;
2031 static bool cs_etm__is_svc_instr(struct cs_etm_queue
*etmq
, u8 trace_chan_id
,
2032 struct cs_etm_packet
*packet
,
2035 /* Initialise to keep compiler happy */
2040 switch (packet
->isa
) {
2041 case CS_ETM_ISA_T32
:
2043 * The SVC of T32 is defined in ARM DDI 0487D.a, F5.1.247:
2046 * +-----------------+--------+
2047 * | 1 1 0 1 1 1 1 1 | imm8 |
2048 * +-----------------+--------+
2050 * According to the specification, it only defines SVC for T32
2051 * with 16 bits instruction and has no definition for 32bits;
2052 * so below only read 2 bytes as instruction size for T32.
2054 addr
= end_addr
- 2;
2055 cs_etm__mem_access(etmq
, trace_chan_id
, addr
, sizeof(instr16
),
2057 if ((instr16
& 0xFF00) == 0xDF00)
2061 case CS_ETM_ISA_A32
:
2063 * The SVC of A32 is defined in ARM DDI 0487D.a, F5.1.247:
2065 * b'31 b'28 b'27 b'24
2066 * +---------+---------+-------------------------+
2067 * | !1111 | 1 1 1 1 | imm24 |
2068 * +---------+---------+-------------------------+
2070 addr
= end_addr
- 4;
2071 cs_etm__mem_access(etmq
, trace_chan_id
, addr
, sizeof(instr32
),
2073 if ((instr32
& 0x0F000000) == 0x0F000000 &&
2074 (instr32
& 0xF0000000) != 0xF0000000)
2078 case CS_ETM_ISA_A64
:
2080 * The SVC of A64 is defined in ARM DDI 0487D.a, C6.2.294:
2083 * +-----------------------+---------+-----------+
2084 * | 1 1 0 1 0 1 0 0 0 0 0 | imm16 | 0 0 0 0 1 |
2085 * +-----------------------+---------+-----------+
2087 addr
= end_addr
- 4;
2088 cs_etm__mem_access(etmq
, trace_chan_id
, addr
, sizeof(instr32
),
2090 if ((instr32
& 0xFFE0001F) == 0xd4000001)
2094 case CS_ETM_ISA_UNKNOWN
:
2102 static bool cs_etm__is_syscall(struct cs_etm_queue
*etmq
,
2103 struct cs_etm_traceid_queue
*tidq
, u64 magic
)
2105 u8 trace_chan_id
= tidq
->trace_chan_id
;
2106 struct cs_etm_packet
*packet
= tidq
->packet
;
2107 struct cs_etm_packet
*prev_packet
= tidq
->prev_packet
;
2109 if (magic
== __perf_cs_etmv3_magic
)
2110 if (packet
->exception_number
== CS_ETMV3_EXC_SVC
)
2114 * ETMv4 exception type CS_ETMV4_EXC_CALL covers SVC, SMC and
2115 * HVC cases; need to check if it's SVC instruction based on
2118 if (magic
== __perf_cs_etmv4_magic
) {
2119 if (packet
->exception_number
== CS_ETMV4_EXC_CALL
&&
2120 cs_etm__is_svc_instr(etmq
, trace_chan_id
, prev_packet
,
2121 prev_packet
->end_addr
))
2128 static bool cs_etm__is_async_exception(struct cs_etm_traceid_queue
*tidq
,
2131 struct cs_etm_packet
*packet
= tidq
->packet
;
2133 if (magic
== __perf_cs_etmv3_magic
)
2134 if (packet
->exception_number
== CS_ETMV3_EXC_DEBUG_HALT
||
2135 packet
->exception_number
== CS_ETMV3_EXC_ASYNC_DATA_ABORT
||
2136 packet
->exception_number
== CS_ETMV3_EXC_PE_RESET
||
2137 packet
->exception_number
== CS_ETMV3_EXC_IRQ
||
2138 packet
->exception_number
== CS_ETMV3_EXC_FIQ
)
2141 if (magic
== __perf_cs_etmv4_magic
)
2142 if (packet
->exception_number
== CS_ETMV4_EXC_RESET
||
2143 packet
->exception_number
== CS_ETMV4_EXC_DEBUG_HALT
||
2144 packet
->exception_number
== CS_ETMV4_EXC_SYSTEM_ERROR
||
2145 packet
->exception_number
== CS_ETMV4_EXC_INST_DEBUG
||
2146 packet
->exception_number
== CS_ETMV4_EXC_DATA_DEBUG
||
2147 packet
->exception_number
== CS_ETMV4_EXC_IRQ
||
2148 packet
->exception_number
== CS_ETMV4_EXC_FIQ
)
2154 static bool cs_etm__is_sync_exception(struct cs_etm_queue
*etmq
,
2155 struct cs_etm_traceid_queue
*tidq
,
2158 u8 trace_chan_id
= tidq
->trace_chan_id
;
2159 struct cs_etm_packet
*packet
= tidq
->packet
;
2160 struct cs_etm_packet
*prev_packet
= tidq
->prev_packet
;
2162 if (magic
== __perf_cs_etmv3_magic
)
2163 if (packet
->exception_number
== CS_ETMV3_EXC_SMC
||
2164 packet
->exception_number
== CS_ETMV3_EXC_HYP
||
2165 packet
->exception_number
== CS_ETMV3_EXC_JAZELLE_THUMBEE
||
2166 packet
->exception_number
== CS_ETMV3_EXC_UNDEFINED_INSTR
||
2167 packet
->exception_number
== CS_ETMV3_EXC_PREFETCH_ABORT
||
2168 packet
->exception_number
== CS_ETMV3_EXC_DATA_FAULT
||
2169 packet
->exception_number
== CS_ETMV3_EXC_GENERIC
)
2172 if (magic
== __perf_cs_etmv4_magic
) {
2173 if (packet
->exception_number
== CS_ETMV4_EXC_TRAP
||
2174 packet
->exception_number
== CS_ETMV4_EXC_ALIGNMENT
||
2175 packet
->exception_number
== CS_ETMV4_EXC_INST_FAULT
||
2176 packet
->exception_number
== CS_ETMV4_EXC_DATA_FAULT
)
2180 * For CS_ETMV4_EXC_CALL, except SVC other instructions
2181 * (SMC, HVC) are taken as sync exceptions.
2183 if (packet
->exception_number
== CS_ETMV4_EXC_CALL
&&
2184 !cs_etm__is_svc_instr(etmq
, trace_chan_id
, prev_packet
,
2185 prev_packet
->end_addr
))
2189 * ETMv4 has 5 bits for exception number; if the numbers
2190 * are in the range ( CS_ETMV4_EXC_FIQ, CS_ETMV4_EXC_END ]
2191 * they are implementation defined exceptions.
2193 * For this case, simply take it as sync exception.
2195 if (packet
->exception_number
> CS_ETMV4_EXC_FIQ
&&
2196 packet
->exception_number
<= CS_ETMV4_EXC_END
)
2203 static int cs_etm__set_sample_flags(struct cs_etm_queue
*etmq
,
2204 struct cs_etm_traceid_queue
*tidq
)
2206 struct cs_etm_packet
*packet
= tidq
->packet
;
2207 struct cs_etm_packet
*prev_packet
= tidq
->prev_packet
;
2208 u8 trace_chan_id
= tidq
->trace_chan_id
;
2212 switch (packet
->sample_type
) {
2215 * Immediate branch instruction without neither link nor
2216 * return flag, it's normal branch instruction within
2219 if (packet
->last_instr_type
== OCSD_INSTR_BR
&&
2220 packet
->last_instr_subtype
== OCSD_S_INSTR_NONE
) {
2221 packet
->flags
= PERF_IP_FLAG_BRANCH
;
2223 if (packet
->last_instr_cond
)
2224 packet
->flags
|= PERF_IP_FLAG_CONDITIONAL
;
2228 * Immediate branch instruction with link (e.g. BL), this is
2229 * branch instruction for function call.
2231 if (packet
->last_instr_type
== OCSD_INSTR_BR
&&
2232 packet
->last_instr_subtype
== OCSD_S_INSTR_BR_LINK
)
2233 packet
->flags
= PERF_IP_FLAG_BRANCH
|
2237 * Indirect branch instruction with link (e.g. BLR), this is
2238 * branch instruction for function call.
2240 if (packet
->last_instr_type
== OCSD_INSTR_BR_INDIRECT
&&
2241 packet
->last_instr_subtype
== OCSD_S_INSTR_BR_LINK
)
2242 packet
->flags
= PERF_IP_FLAG_BRANCH
|
2246 * Indirect branch instruction with subtype of
2247 * OCSD_S_INSTR_V7_IMPLIED_RET, this is explicit hint for
2248 * function return for A32/T32.
2250 if (packet
->last_instr_type
== OCSD_INSTR_BR_INDIRECT
&&
2251 packet
->last_instr_subtype
== OCSD_S_INSTR_V7_IMPLIED_RET
)
2252 packet
->flags
= PERF_IP_FLAG_BRANCH
|
2253 PERF_IP_FLAG_RETURN
;
2256 * Indirect branch instruction without link (e.g. BR), usually
2257 * this is used for function return, especially for functions
2258 * within dynamic link lib.
2260 if (packet
->last_instr_type
== OCSD_INSTR_BR_INDIRECT
&&
2261 packet
->last_instr_subtype
== OCSD_S_INSTR_NONE
)
2262 packet
->flags
= PERF_IP_FLAG_BRANCH
|
2263 PERF_IP_FLAG_RETURN
;
2265 /* Return instruction for function return. */
2266 if (packet
->last_instr_type
== OCSD_INSTR_BR_INDIRECT
&&
2267 packet
->last_instr_subtype
== OCSD_S_INSTR_V8_RET
)
2268 packet
->flags
= PERF_IP_FLAG_BRANCH
|
2269 PERF_IP_FLAG_RETURN
;
2272 * Decoder might insert a discontinuity in the middle of
2273 * instruction packets, fixup prev_packet with flag
2274 * PERF_IP_FLAG_TRACE_BEGIN to indicate restarting trace.
2276 if (prev_packet
->sample_type
== CS_ETM_DISCONTINUITY
)
2277 prev_packet
->flags
|= PERF_IP_FLAG_BRANCH
|
2278 PERF_IP_FLAG_TRACE_BEGIN
;
2281 * If the previous packet is an exception return packet
2282 * and the return address just follows SVC instruction,
2283 * it needs to calibrate the previous packet sample flags
2284 * as PERF_IP_FLAG_SYSCALLRET.
2286 if (prev_packet
->flags
== (PERF_IP_FLAG_BRANCH
|
2287 PERF_IP_FLAG_RETURN
|
2288 PERF_IP_FLAG_INTERRUPT
) &&
2289 cs_etm__is_svc_instr(etmq
, trace_chan_id
,
2290 packet
, packet
->start_addr
))
2291 prev_packet
->flags
= PERF_IP_FLAG_BRANCH
|
2292 PERF_IP_FLAG_RETURN
|
2293 PERF_IP_FLAG_SYSCALLRET
;
2295 case CS_ETM_DISCONTINUITY
:
2297 * The trace is discontinuous, if the previous packet is
2298 * instruction packet, set flag PERF_IP_FLAG_TRACE_END
2299 * for previous packet.
2301 if (prev_packet
->sample_type
== CS_ETM_RANGE
)
2302 prev_packet
->flags
|= PERF_IP_FLAG_BRANCH
|
2303 PERF_IP_FLAG_TRACE_END
;
2305 case CS_ETM_EXCEPTION
:
2306 ret
= cs_etm__get_magic(etmq
, packet
->trace_chan_id
, &magic
);
2310 /* The exception is for system call. */
2311 if (cs_etm__is_syscall(etmq
, tidq
, magic
))
2312 packet
->flags
= PERF_IP_FLAG_BRANCH
|
2314 PERF_IP_FLAG_SYSCALLRET
;
2316 * The exceptions are triggered by external signals from bus,
2317 * interrupt controller, debug module, PE reset or halt.
2319 else if (cs_etm__is_async_exception(tidq
, magic
))
2320 packet
->flags
= PERF_IP_FLAG_BRANCH
|
2322 PERF_IP_FLAG_ASYNC
|
2323 PERF_IP_FLAG_INTERRUPT
;
2325 * Otherwise, exception is caused by trap, instruction &
2326 * data fault, or alignment errors.
2328 else if (cs_etm__is_sync_exception(etmq
, tidq
, magic
))
2329 packet
->flags
= PERF_IP_FLAG_BRANCH
|
2331 PERF_IP_FLAG_INTERRUPT
;
2334 * When the exception packet is inserted, since exception
2335 * packet is not used standalone for generating samples
2336 * and it's affiliation to the previous instruction range
2337 * packet; so set previous range packet flags to tell perf
2338 * it is an exception taken branch.
2340 if (prev_packet
->sample_type
== CS_ETM_RANGE
)
2341 prev_packet
->flags
= packet
->flags
;
2343 case CS_ETM_EXCEPTION_RET
:
2345 * When the exception return packet is inserted, since
2346 * exception return packet is not used standalone for
2347 * generating samples and it's affiliation to the previous
2348 * instruction range packet; so set previous range packet
2349 * flags to tell perf it is an exception return branch.
2351 * The exception return can be for either system call or
2352 * other exception types; unfortunately the packet doesn't
2353 * contain exception type related info so we cannot decide
2354 * the exception type purely based on exception return packet.
2355 * If we record the exception number from exception packet and
2356 * reuse it for exception return packet, this is not reliable
2357 * due the trace can be discontinuity or the interrupt can
2358 * be nested, thus the recorded exception number cannot be
2359 * used for exception return packet for these two cases.
2361 * For exception return packet, we only need to distinguish the
2362 * packet is for system call or for other types. Thus the
2363 * decision can be deferred when receive the next packet which
2364 * contains the return address, based on the return address we
2365 * can read out the previous instruction and check if it's a
2366 * system call instruction and then calibrate the sample flag
2369 if (prev_packet
->sample_type
== CS_ETM_RANGE
)
2370 prev_packet
->flags
= PERF_IP_FLAG_BRANCH
|
2371 PERF_IP_FLAG_RETURN
|
2372 PERF_IP_FLAG_INTERRUPT
;
2382 static int cs_etm__decode_data_block(struct cs_etm_queue
*etmq
)
2385 size_t processed
= 0;
2388 * Packets are decoded and added to the decoder's packet queue
2389 * until the decoder packet processing callback has requested that
2390 * processing stops or there is nothing left in the buffer. Normal
2391 * operations that stop processing are a timestamp packet or a full
2392 * decoder buffer queue.
2394 ret
= cs_etm_decoder__process_data_block(etmq
->decoder
,
2396 &etmq
->buf
[etmq
->buf_used
],
2402 etmq
->offset
+= processed
;
2403 etmq
->buf_used
+= processed
;
2404 etmq
->buf_len
-= processed
;
2410 static int cs_etm__process_traceid_queue(struct cs_etm_queue
*etmq
,
2411 struct cs_etm_traceid_queue
*tidq
)
2414 struct cs_etm_packet_queue
*packet_queue
;
2416 packet_queue
= &tidq
->packet_queue
;
2418 /* Process each packet in this chunk */
2420 ret
= cs_etm_decoder__get_packet(packet_queue
,
2424 * Stop processing this chunk on
2425 * end of data or error
2430 * Since packet addresses are swapped in packet
2431 * handling within below switch() statements,
2432 * thus setting sample flags must be called
2433 * prior to switch() statement to use address
2434 * information before packets swapping.
2436 ret
= cs_etm__set_sample_flags(etmq
, tidq
);
2440 switch (tidq
->packet
->sample_type
) {
2443 * If the packet contains an instruction
2444 * range, generate instruction sequence
2447 cs_etm__sample(etmq
, tidq
);
2449 case CS_ETM_EXCEPTION
:
2450 case CS_ETM_EXCEPTION_RET
:
2452 * If the exception packet is coming,
2453 * make sure the previous instruction
2454 * range packet to be handled properly.
2456 cs_etm__exception(tidq
);
2458 case CS_ETM_DISCONTINUITY
:
2460 * Discontinuity in trace, flush
2461 * previous branch stack
2463 cs_etm__flush(etmq
, tidq
);
2467 * Should not receive empty packet,
2470 pr_err("CS ETM Trace: empty packet\n");
2480 static void cs_etm__clear_all_traceid_queues(struct cs_etm_queue
*etmq
)
2483 struct int_node
*inode
;
2484 struct cs_etm_traceid_queue
*tidq
;
2485 struct intlist
*traceid_queues_list
= etmq
->traceid_queues_list
;
2487 intlist__for_each_entry(inode
, traceid_queues_list
) {
2488 idx
= (int)(intptr_t)inode
->priv
;
2489 tidq
= etmq
->traceid_queues
[idx
];
2491 /* Ignore return value */
2492 cs_etm__process_traceid_queue(etmq
, tidq
);
2496 static int cs_etm__run_per_thread_timeless_decoder(struct cs_etm_queue
*etmq
)
2499 struct cs_etm_traceid_queue
*tidq
;
2501 tidq
= cs_etm__etmq_get_traceid_queue(etmq
, CS_ETM_PER_THREAD_TRACEID
);
2505 /* Go through each buffer in the queue and decode them one by one */
2507 err
= cs_etm__get_data_block(etmq
);
2511 /* Run trace decoder until buffer consumed or end of trace */
2513 err
= cs_etm__decode_data_block(etmq
);
2518 * Process each packet in this chunk, nothing to do if
2519 * an error occurs other than hoping the next one will
2522 err
= cs_etm__process_traceid_queue(etmq
, tidq
);
2524 } while (etmq
->buf_len
);
2527 /* Flush any remaining branch stack entries */
2528 err
= cs_etm__end_block(etmq
, tidq
);
2534 static int cs_etm__run_per_cpu_timeless_decoder(struct cs_etm_queue
*etmq
)
2537 struct cs_etm_traceid_queue
*tidq
;
2538 struct int_node
*inode
;
2540 /* Go through each buffer in the queue and decode them one by one */
2542 err
= cs_etm__get_data_block(etmq
);
2546 /* Run trace decoder until buffer consumed or end of trace */
2548 err
= cs_etm__decode_data_block(etmq
);
2553 * cs_etm__run_per_thread_timeless_decoder() runs on a
2554 * single traceID queue because each TID has a separate
2555 * buffer. But here in per-cpu mode we need to iterate
2556 * over each channel instead.
2558 intlist__for_each_entry(inode
,
2559 etmq
->traceid_queues_list
) {
2560 idx
= (int)(intptr_t)inode
->priv
;
2561 tidq
= etmq
->traceid_queues
[idx
];
2562 cs_etm__process_traceid_queue(etmq
, tidq
);
2564 } while (etmq
->buf_len
);
2566 intlist__for_each_entry(inode
, etmq
->traceid_queues_list
) {
2567 idx
= (int)(intptr_t)inode
->priv
;
2568 tidq
= etmq
->traceid_queues
[idx
];
2569 /* Flush any remaining branch stack entries */
2570 err
= cs_etm__end_block(etmq
, tidq
);
2579 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace
*etm
,
2583 struct auxtrace_queues
*queues
= &etm
->queues
;
2585 for (i
= 0; i
< queues
->nr_queues
; i
++) {
2586 struct auxtrace_queue
*queue
= &etm
->queues
.queue_array
[i
];
2587 struct cs_etm_queue
*etmq
= queue
->priv
;
2588 struct cs_etm_traceid_queue
*tidq
;
2593 if (etm
->per_thread_decoding
) {
2594 tidq
= cs_etm__etmq_get_traceid_queue(
2595 etmq
, CS_ETM_PER_THREAD_TRACEID
);
2600 if (tid
== -1 || thread__tid(tidq
->thread
) == tid
)
2601 cs_etm__run_per_thread_timeless_decoder(etmq
);
2603 cs_etm__run_per_cpu_timeless_decoder(etmq
);
2609 static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace
*etm
)
2612 unsigned int cs_queue_nr
, queue_nr
, i
;
2615 struct auxtrace_queue
*queue
;
2616 struct cs_etm_queue
*etmq
;
2617 struct cs_etm_traceid_queue
*tidq
;
2620 * Pre-populate the heap with one entry from each queue so that we can
2621 * start processing in time order across all queues.
2623 for (i
= 0; i
< etm
->queues
.nr_queues
; i
++) {
2624 etmq
= etm
->queues
.queue_array
[i
].priv
;
2628 ret
= cs_etm__queue_first_cs_timestamp(etm
, etmq
, i
);
2634 if (!etm
->heap
.heap_cnt
)
2637 /* Take the entry at the top of the min heap */
2638 cs_queue_nr
= etm
->heap
.heap_array
[0].queue_nr
;
2639 queue_nr
= TO_QUEUE_NR(cs_queue_nr
);
2640 trace_chan_id
= TO_TRACE_CHAN_ID(cs_queue_nr
);
2641 queue
= &etm
->queues
.queue_array
[queue_nr
];
2645 * Remove the top entry from the heap since we are about
2648 auxtrace_heap__pop(&etm
->heap
);
2650 tidq
= cs_etm__etmq_get_traceid_queue(etmq
, trace_chan_id
);
2653 * No traceID queue has been allocated for this traceID,
2654 * which means something somewhere went very wrong. No
2655 * other choice than simply exit.
2662 * Packets associated with this timestamp are already in
2663 * the etmq's traceID queue, so process them.
2665 ret
= cs_etm__process_traceid_queue(etmq
, tidq
);
2670 * Packets for this timestamp have been processed, time to
2671 * move on to the next timestamp, fetching a new auxtrace_buffer
2675 ret
= cs_etm__get_data_block(etmq
);
2680 * No more auxtrace_buffers to process in this etmq, simply
2681 * move on to another entry in the auxtrace_heap.
2686 ret
= cs_etm__decode_data_block(etmq
);
2690 cs_timestamp
= cs_etm__etmq_get_timestamp(etmq
, &trace_chan_id
);
2692 if (!cs_timestamp
) {
2694 * Function cs_etm__decode_data_block() returns when
2695 * there is no more traces to decode in the current
2696 * auxtrace_buffer OR when a timestamp has been
2697 * encountered on any of the traceID queues. Since we
2698 * did not get a timestamp, there is no more traces to
2699 * process in this auxtrace_buffer. As such empty and
2700 * flush all traceID queues.
2702 cs_etm__clear_all_traceid_queues(etmq
);
2704 /* Fetch another auxtrace_buffer for this etmq */
2709 * Add to the min heap the timestamp for packets that have
2710 * just been decoded. They will be processed and synthesized
2711 * during the next call to cs_etm__process_traceid_queue() for
2712 * this queue/traceID.
2714 cs_queue_nr
= TO_CS_QUEUE_NR(queue_nr
, trace_chan_id
);
2715 ret
= auxtrace_heap__add(&etm
->heap
, cs_queue_nr
, cs_timestamp
);
2718 for (i
= 0; i
< etm
->queues
.nr_queues
; i
++) {
2719 struct int_node
*inode
;
2721 etmq
= etm
->queues
.queue_array
[i
].priv
;
2725 intlist__for_each_entry(inode
, etmq
->traceid_queues_list
) {
2726 int idx
= (int)(intptr_t)inode
->priv
;
2728 /* Flush any remaining branch stack entries */
2729 tidq
= etmq
->traceid_queues
[idx
];
2730 ret
= cs_etm__end_block(etmq
, tidq
);
2739 static int cs_etm__process_itrace_start(struct cs_etm_auxtrace
*etm
,
2740 union perf_event
*event
)
2744 if (etm
->timeless_decoding
)
2748 * Add the tid/pid to the log so that we can get a match when we get a
2749 * contextID from the decoder. Only track for the host: only kernel
2750 * trace is supported for guests which wouldn't need pids so this should
2753 th
= machine__findnew_thread(&etm
->session
->machines
.host
,
2754 event
->itrace_start
.pid
,
2755 event
->itrace_start
.tid
);
2764 static int cs_etm__process_switch_cpu_wide(struct cs_etm_auxtrace
*etm
,
2765 union perf_event
*event
)
2768 bool out
= event
->header
.misc
& PERF_RECORD_MISC_SWITCH_OUT
;
2771 * Context switch in per-thread mode are irrelevant since perf
2772 * will start/stop tracing as the process is scheduled.
2774 if (etm
->timeless_decoding
)
2778 * SWITCH_IN events carry the next process to be switched out while
2779 * SWITCH_OUT events carry the process to be switched in. As such
2780 * we don't care about IN events.
2786 * Add the tid/pid to the log so that we can get a match when we get a
2787 * contextID from the decoder. Only track for the host: only kernel
2788 * trace is supported for guests which wouldn't need pids so this should
2791 th
= machine__findnew_thread(&etm
->session
->machines
.host
,
2792 event
->context_switch
.next_prev_pid
,
2793 event
->context_switch
.next_prev_tid
);
2802 static int cs_etm__process_event(struct perf_session
*session
,
2803 union perf_event
*event
,
2804 struct perf_sample
*sample
,
2805 const struct perf_tool
*tool
)
2807 struct cs_etm_auxtrace
*etm
= container_of(session
->auxtrace
,
2808 struct cs_etm_auxtrace
,
2814 if (!tool
->ordered_events
) {
2815 pr_err("CoreSight ETM Trace requires ordered events\n");
2819 switch (event
->header
.type
) {
2820 case PERF_RECORD_EXIT
:
2822 * Don't need to wait for cs_etm__flush_events() in per-thread mode to
2823 * start the decode because we know there will be no more trace from
2824 * this thread. All this does is emit samples earlier than waiting for
2825 * the flush in other modes, but with timestamps it makes sense to wait
2826 * for flush so that events from different threads are interleaved
2829 if (etm
->per_thread_decoding
&& etm
->timeless_decoding
)
2830 return cs_etm__process_timeless_queues(etm
,
2834 case PERF_RECORD_ITRACE_START
:
2835 return cs_etm__process_itrace_start(etm
, event
);
2837 case PERF_RECORD_SWITCH_CPU_WIDE
:
2838 return cs_etm__process_switch_cpu_wide(etm
, event
);
2840 case PERF_RECORD_AUX
:
2842 * Record the latest kernel timestamp available in the header
2843 * for samples so that synthesised samples occur from this point
2846 if (sample
->time
&& (sample
->time
!= (u64
)-1))
2847 etm
->latest_kernel_timestamp
= sample
->time
;
2857 static void dump_queued_data(struct cs_etm_auxtrace
*etm
,
2858 struct perf_record_auxtrace
*event
)
2860 struct auxtrace_buffer
*buf
;
2863 * Find all buffers with same reference in the queues and dump them.
2864 * This is because the queues can contain multiple entries of the same
2865 * buffer that were split on aux records.
2867 for (i
= 0; i
< etm
->queues
.nr_queues
; ++i
)
2868 list_for_each_entry(buf
, &etm
->queues
.queue_array
[i
].head
, list
)
2869 if (buf
->reference
== event
->reference
)
2870 cs_etm__dump_event(etm
->queues
.queue_array
[i
].priv
, buf
);
2873 static int cs_etm__process_auxtrace_event(struct perf_session
*session
,
2874 union perf_event
*event
,
2875 const struct perf_tool
*tool __maybe_unused
)
2877 struct cs_etm_auxtrace
*etm
= container_of(session
->auxtrace
,
2878 struct cs_etm_auxtrace
,
2880 if (!etm
->data_queued
) {
2881 struct auxtrace_buffer
*buffer
;
2883 int fd
= perf_data__fd(session
->data
);
2884 bool is_pipe
= perf_data__is_pipe(session
->data
);
2886 int idx
= event
->auxtrace
.idx
;
2891 data_offset
= lseek(fd
, 0, SEEK_CUR
);
2892 if (data_offset
== -1)
2896 err
= auxtrace_queues__add_event(&etm
->queues
, session
,
2897 event
, data_offset
, &buffer
);
2902 if (auxtrace_buffer__get_data(buffer
, fd
)) {
2903 cs_etm__dump_event(etm
->queues
.queue_array
[idx
].priv
, buffer
);
2904 auxtrace_buffer__put_data(buffer
);
2906 } else if (dump_trace
)
2907 dump_queued_data(etm
, &event
->auxtrace
);
2912 static int cs_etm__setup_timeless_decoding(struct cs_etm_auxtrace
*etm
)
2914 struct evsel
*evsel
;
2915 struct evlist
*evlist
= etm
->session
->evlist
;
2917 /* Override timeless mode with user input from --itrace=Z */
2918 if (etm
->synth_opts
.timeless_decoding
) {
2919 etm
->timeless_decoding
= true;
2924 * Find the cs_etm evsel and look at what its timestamp setting was
2926 evlist__for_each_entry(evlist
, evsel
)
2927 if (cs_etm__evsel_is_auxtrace(etm
->session
, evsel
)) {
2928 etm
->timeless_decoding
=
2929 !(evsel
->core
.attr
.config
& BIT(ETM_OPT_TS
));
2933 pr_err("CS ETM: Couldn't find ETM evsel\n");
2938 * Read a single cpu parameter block from the auxtrace_info priv block.
2940 * For version 1 there is a per cpu nr_params entry. If we are handling
2941 * version 1 file, then there may be less, the same, or more params
2942 * indicated by this value than the compile time number we understand.
2944 * For a version 0 info block, there are a fixed number, and we need to
2945 * fill out the nr_param value in the metadata we create.
2947 static u64
*cs_etm__create_meta_blk(u64
*buff_in
, int *buff_in_offset
,
2948 int out_blk_size
, int nr_params_v0
)
2950 u64
*metadata
= NULL
;
2952 int nr_in_params
, nr_out_params
, nr_cmn_params
;
2955 metadata
= zalloc(sizeof(*metadata
) * out_blk_size
);
2959 /* read block current index & version */
2960 i
= *buff_in_offset
;
2961 hdr_version
= buff_in
[CS_HEADER_VERSION
];
2964 /* read version 0 info block into a version 1 metadata block */
2965 nr_in_params
= nr_params_v0
;
2966 metadata
[CS_ETM_MAGIC
] = buff_in
[i
+ CS_ETM_MAGIC
];
2967 metadata
[CS_ETM_CPU
] = buff_in
[i
+ CS_ETM_CPU
];
2968 metadata
[CS_ETM_NR_TRC_PARAMS
] = nr_in_params
;
2969 /* remaining block params at offset +1 from source */
2970 for (k
= CS_ETM_COMMON_BLK_MAX_V1
- 1; k
< nr_in_params
; k
++)
2971 metadata
[k
+ 1] = buff_in
[i
+ k
];
2972 /* version 0 has 2 common params */
2975 /* read version 1 info block - input and output nr_params may differ */
2976 /* version 1 has 3 common params */
2978 nr_in_params
= buff_in
[i
+ CS_ETM_NR_TRC_PARAMS
];
2980 /* if input has more params than output - skip excess */
2981 nr_out_params
= nr_in_params
+ nr_cmn_params
;
2982 if (nr_out_params
> out_blk_size
)
2983 nr_out_params
= out_blk_size
;
2985 for (k
= CS_ETM_MAGIC
; k
< nr_out_params
; k
++)
2986 metadata
[k
] = buff_in
[i
+ k
];
2988 /* record the actual nr params we copied */
2989 metadata
[CS_ETM_NR_TRC_PARAMS
] = nr_out_params
- nr_cmn_params
;
2992 /* adjust in offset by number of in params used */
2993 i
+= nr_in_params
+ nr_cmn_params
;
2994 *buff_in_offset
= i
;
2999 * Puts a fragment of an auxtrace buffer into the auxtrace queues based
3000 * on the bounds of aux_event, if it matches with the buffer that's at
3003 * Normally, whole auxtrace buffers would be added to the queue. But we
3004 * want to reset the decoder for every PERF_RECORD_AUX event, and the decoder
3005 * is reset across each buffer, so splitting the buffers up in advance has
3008 static int cs_etm__queue_aux_fragment(struct perf_session
*session
, off_t file_offset
, size_t sz
,
3009 struct perf_record_aux
*aux_event
, struct perf_sample
*sample
)
3012 char buf
[PERF_SAMPLE_MAX_SIZE
];
3013 union perf_event
*auxtrace_event_union
;
3014 struct perf_record_auxtrace
*auxtrace_event
;
3015 union perf_event auxtrace_fragment
;
3016 __u64 aux_offset
, aux_size
;
3017 enum cs_etm_format format
;
3019 struct cs_etm_auxtrace
*etm
= container_of(session
->auxtrace
,
3020 struct cs_etm_auxtrace
,
3024 * There should be a PERF_RECORD_AUXTRACE event at the file_offset that we got
3025 * from looping through the auxtrace index.
3027 err
= perf_session__peek_event(session
, file_offset
, buf
,
3028 PERF_SAMPLE_MAX_SIZE
, &auxtrace_event_union
, NULL
);
3031 auxtrace_event
= &auxtrace_event_union
->auxtrace
;
3032 if (auxtrace_event
->header
.type
!= PERF_RECORD_AUXTRACE
)
3035 if (auxtrace_event
->header
.size
< sizeof(struct perf_record_auxtrace
) ||
3036 auxtrace_event
->header
.size
!= sz
) {
3041 * In per-thread mode, auxtrace CPU is set to -1, but TID will be set instead. See
3042 * auxtrace_mmap_params__set_idx(). However, the sample AUX event will contain a
3043 * CPU as we set this always for the AUX_OUTPUT_HW_ID event.
3044 * So now compare only TIDs if auxtrace CPU is -1, and CPUs if auxtrace CPU is not -1.
3045 * Return 'not found' if mismatch.
3047 if (auxtrace_event
->cpu
== (__u32
) -1) {
3048 etm
->per_thread_decoding
= true;
3049 if (auxtrace_event
->tid
!= sample
->tid
)
3051 } else if (auxtrace_event
->cpu
!= sample
->cpu
) {
3052 if (etm
->per_thread_decoding
) {
3054 * Found a per-cpu buffer after a per-thread one was
3057 pr_err("CS ETM: Inconsistent per-thread/per-cpu mode.\n");
3063 if (aux_event
->flags
& PERF_AUX_FLAG_OVERWRITE
) {
3065 * Clamp size in snapshot mode. The buffer size is clamped in
3066 * __auxtrace_mmap__read() for snapshots, so the aux record size doesn't reflect
3069 aux_size
= min(aux_event
->aux_size
, auxtrace_event
->size
);
3072 * In this mode, the head also points to the end of the buffer so aux_offset
3073 * needs to have the size subtracted so it points to the beginning as in normal mode
3075 aux_offset
= aux_event
->aux_offset
- aux_size
;
3077 aux_size
= aux_event
->aux_size
;
3078 aux_offset
= aux_event
->aux_offset
;
3081 if (aux_offset
>= auxtrace_event
->offset
&&
3082 aux_offset
+ aux_size
<= auxtrace_event
->offset
+ auxtrace_event
->size
) {
3083 struct cs_etm_queue
*etmq
= etm
->queues
.queue_array
[auxtrace_event
->idx
].priv
;
3086 * If this AUX event was inside this buffer somewhere, create a new auxtrace event
3087 * based on the sizes of the aux event, and queue that fragment.
3089 auxtrace_fragment
.auxtrace
= *auxtrace_event
;
3090 auxtrace_fragment
.auxtrace
.size
= aux_size
;
3091 auxtrace_fragment
.auxtrace
.offset
= aux_offset
;
3092 file_offset
+= aux_offset
- auxtrace_event
->offset
+ auxtrace_event
->header
.size
;
3094 pr_debug3("CS ETM: Queue buffer size: %#"PRI_lx64
" offset: %#"PRI_lx64
3095 " tid: %d cpu: %d\n", aux_size
, aux_offset
, sample
->tid
, sample
->cpu
);
3096 err
= auxtrace_queues__add_event(&etm
->queues
, session
, &auxtrace_fragment
,
3101 format
= (aux_event
->flags
& PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW
) ?
3102 UNFORMATTED
: FORMATTED
;
3103 if (etmq
->format
!= UNSET
&& format
!= etmq
->format
) {
3104 pr_err("CS_ETM: mixed formatted and unformatted trace not supported\n");
3107 etmq
->format
= format
;
3111 /* Wasn't inside this buffer, but there were no parse errors. 1 == 'not found' */
3115 static int cs_etm__process_aux_hw_id_cb(struct perf_session
*session
, union perf_event
*event
,
3116 u64 offset __maybe_unused
, void *data __maybe_unused
)
3118 /* look to handle PERF_RECORD_AUX_OUTPUT_HW_ID early to ensure decoders can be set up */
3119 if (event
->header
.type
== PERF_RECORD_AUX_OUTPUT_HW_ID
) {
3120 (*(int *)data
)++; /* increment found count */
3121 return cs_etm__process_aux_output_hw_id(session
, event
);
3126 static int cs_etm__queue_aux_records_cb(struct perf_session
*session
, union perf_event
*event
,
3127 u64 offset __maybe_unused
, void *data __maybe_unused
)
3129 struct perf_sample sample
;
3131 struct auxtrace_index_entry
*ent
;
3132 struct auxtrace_index
*auxtrace_index
;
3133 struct evsel
*evsel
;
3136 /* Don't care about any other events, we're only queuing buffers for AUX events */
3137 if (event
->header
.type
!= PERF_RECORD_AUX
)
3140 if (event
->header
.size
< sizeof(struct perf_record_aux
))
3143 /* Truncated Aux records can have 0 size and shouldn't result in anything being queued. */
3144 if (!event
->aux
.aux_size
)
3148 * Parse the sample, we need the sample_id_all data that comes after the event so that the
3149 * CPU or PID can be matched to an AUXTRACE buffer's CPU or PID.
3151 evsel
= evlist__event2evsel(session
->evlist
, event
);
3154 ret
= evsel__parse_sample(evsel
, event
, &sample
);
3159 * Loop through the auxtrace index to find the buffer that matches up with this aux event.
3161 list_for_each_entry(auxtrace_index
, &session
->auxtrace_index
, list
) {
3162 for (i
= 0; i
< auxtrace_index
->nr
; i
++) {
3163 ent
= &auxtrace_index
->entries
[i
];
3164 ret
= cs_etm__queue_aux_fragment(session
, ent
->file_offset
,
3165 ent
->sz
, &event
->aux
, &sample
);
3167 * Stop search on error or successful values. Continue search on
3176 * Couldn't find the buffer corresponding to this aux record, something went wrong. Warn but
3177 * don't exit with an error because it will still be possible to decode other aux records.
3179 pr_err("CS ETM: Couldn't find auxtrace buffer for aux_offset: %#"PRI_lx64
3180 " tid: %d cpu: %d\n", event
->aux
.aux_offset
, sample
.tid
, sample
.cpu
);
3184 static int cs_etm__queue_aux_records(struct perf_session
*session
)
3186 struct auxtrace_index
*index
= list_first_entry_or_null(&session
->auxtrace_index
,
3187 struct auxtrace_index
, list
);
3188 if (index
&& index
->nr
> 0)
3189 return perf_session__peek_events(session
, session
->header
.data_offset
,
3190 session
->header
.data_size
,
3191 cs_etm__queue_aux_records_cb
, NULL
);
3194 * We would get here if there are no entries in the index (either no auxtrace
3195 * buffers or no index at all). Fail silently as there is the possibility of
3196 * queueing them in cs_etm__process_auxtrace_event() if etm->data_queued is still
3199 * In that scenario, buffers will not be split by AUX records.
3204 #define HAS_PARAM(j, type, param) (metadata[(j)][CS_ETM_NR_TRC_PARAMS] <= \
3205 (CS_##type##_##param - CS_ETM_COMMON_BLK_MAX_V1))
3208 * Loop through the ETMs and complain if we find at least one where ts_source != 1 (virtual
3211 static bool cs_etm__has_virtual_ts(u64
**metadata
, int num_cpu
)
3215 for (j
= 0; j
< num_cpu
; j
++) {
3216 switch (metadata
[j
][CS_ETM_MAGIC
]) {
3217 case __perf_cs_etmv4_magic
:
3218 if (HAS_PARAM(j
, ETMV4
, TS_SOURCE
) || metadata
[j
][CS_ETMV4_TS_SOURCE
] != 1)
3221 case __perf_cs_ete_magic
:
3222 if (HAS_PARAM(j
, ETE
, TS_SOURCE
) || metadata
[j
][CS_ETE_TS_SOURCE
] != 1)
3226 /* Unknown / unsupported magic number. */
3233 /* map trace ids to correct metadata block, from information in metadata */
3234 static int cs_etm__map_trace_ids_metadata(struct cs_etm_auxtrace
*etm
, int num_cpu
,
3241 for (i
= 0; i
< num_cpu
; i
++) {
3242 cs_etm_magic
= metadata
[i
][CS_ETM_MAGIC
];
3243 switch (cs_etm_magic
) {
3244 case __perf_cs_etmv3_magic
:
3245 metadata
[i
][CS_ETM_ETMTRACEIDR
] &= CORESIGHT_TRACE_ID_VAL_MASK
;
3246 trace_chan_id
= (u8
)(metadata
[i
][CS_ETM_ETMTRACEIDR
]);
3248 case __perf_cs_etmv4_magic
:
3249 case __perf_cs_ete_magic
:
3250 metadata
[i
][CS_ETMV4_TRCTRACEIDR
] &= CORESIGHT_TRACE_ID_VAL_MASK
;
3251 trace_chan_id
= (u8
)(metadata
[i
][CS_ETMV4_TRCTRACEIDR
]);
3254 /* unknown magic number */
3257 err
= cs_etm__map_trace_id_v0(etm
, trace_chan_id
, metadata
[i
]);
3265 * Use the data gathered by the peeks for HW_ID (trace ID mappings) and AUX
3266 * (formatted or not) packets to create the decoders.
3268 static int cs_etm__create_queue_decoders(struct cs_etm_queue
*etmq
)
3270 struct cs_etm_decoder_params d_params
;
3271 struct cs_etm_trace_params
*t_params
;
3272 int decoders
= intlist__nr_entries(etmq
->traceid_list
);
3278 * Each queue can only contain data from one CPU when unformatted, so only one decoder is
3281 if (etmq
->format
== UNFORMATTED
)
3282 assert(decoders
== 1);
3284 /* Use metadata to fill in trace parameters for trace decoder */
3285 t_params
= zalloc(sizeof(*t_params
) * decoders
);
3290 if (cs_etm__init_trace_params(t_params
, etmq
))
3293 /* Set decoder parameters to decode trace packets */
3294 if (cs_etm__init_decoder_params(&d_params
, etmq
,
3295 dump_trace
? CS_ETM_OPERATION_PRINT
:
3296 CS_ETM_OPERATION_DECODE
))
3299 etmq
->decoder
= cs_etm_decoder__new(decoders
, &d_params
,
3306 * Register a function to handle all memory accesses required by
3307 * the trace decoder library.
3309 if (cs_etm_decoder__add_mem_access_cb(etmq
->decoder
,
3311 cs_etm__mem_access
))
3312 goto out_free_decoder
;
3318 cs_etm_decoder__free(etmq
->decoder
);
3324 static int cs_etm__create_decoders(struct cs_etm_auxtrace
*etm
)
3326 struct auxtrace_queues
*queues
= &etm
->queues
;
3328 for (unsigned int i
= 0; i
< queues
->nr_queues
; i
++) {
3329 bool empty
= list_empty(&queues
->queue_array
[i
].head
);
3330 struct cs_etm_queue
*etmq
= queues
->queue_array
[i
].priv
;
3334 * Don't create decoders for empty queues, mainly because
3335 * etmq->format is unknown for empty queues.
3337 assert(empty
|| etmq
->format
!= UNSET
);
3341 ret
= cs_etm__create_queue_decoders(etmq
);
3348 int cs_etm__process_auxtrace_info_full(union perf_event
*event
,
3349 struct perf_session
*session
)
3351 struct perf_record_auxtrace_info
*auxtrace_info
= &event
->auxtrace_info
;
3352 struct cs_etm_auxtrace
*etm
= NULL
;
3353 struct perf_record_time_conv
*tc
= &session
->time_conv
;
3354 int event_header_size
= sizeof(struct perf_event_header
);
3355 int total_size
= auxtrace_info
->header
.size
;
3357 int num_cpu
, max_cpu
= 0;
3359 int aux_hw_id_found
;
3362 u64
**metadata
= NULL
;
3364 /* First the global part */
3365 ptr
= (u64
*) auxtrace_info
->priv
;
3366 num_cpu
= ptr
[CS_PMU_TYPE_CPUS
] & 0xffffffff;
3367 metadata
= zalloc(sizeof(*metadata
) * num_cpu
);
3371 /* Start parsing after the common part of the header */
3372 i
= CS_HEADER_VERSION_MAX
;
3375 * The metadata is stored in the auxtrace_info section and encodes
3376 * the configuration of the ARM embedded trace macrocell which is
3377 * required by the trace decoder to properly decode the trace due
3378 * to its highly compressed nature.
3380 for (int j
= 0; j
< num_cpu
; j
++) {
3381 if (ptr
[i
] == __perf_cs_etmv3_magic
) {
3383 cs_etm__create_meta_blk(ptr
, &i
,
3385 CS_ETM_NR_TRC_PARAMS_V0
);
3386 } else if (ptr
[i
] == __perf_cs_etmv4_magic
) {
3388 cs_etm__create_meta_blk(ptr
, &i
,
3390 CS_ETMV4_NR_TRC_PARAMS_V0
);
3391 } else if (ptr
[i
] == __perf_cs_ete_magic
) {
3392 metadata
[j
] = cs_etm__create_meta_blk(ptr
, &i
, CS_ETE_PRIV_MAX
, -1);
3394 ui__error("CS ETM Trace: Unrecognised magic number %#"PRIx64
". File could be from a newer version of perf.\n",
3397 goto err_free_metadata
;
3402 goto err_free_metadata
;
3405 if ((int) metadata
[j
][CS_ETM_CPU
] > max_cpu
)
3406 max_cpu
= metadata
[j
][CS_ETM_CPU
];
3410 * Each of CS_HEADER_VERSION_MAX, CS_ETM_PRIV_MAX and
3411 * CS_ETMV4_PRIV_MAX mark how many double words are in the
3412 * global metadata, and each cpu's metadata respectively.
3413 * The following tests if the correct number of double words was
3414 * present in the auxtrace info section.
3416 priv_size
= total_size
- event_header_size
- INFO_HEADER_SIZE
;
3417 if (i
* 8 != priv_size
) {
3419 goto err_free_metadata
;
3422 etm
= zalloc(sizeof(*etm
));
3426 goto err_free_metadata
;
3430 * As all the ETMs run at the same exception level, the system should
3431 * have the same PID format crossing CPUs. So cache the PID format
3432 * and reuse it for sequential decoding.
3434 etm
->pid_fmt
= cs_etm__init_pid_fmt(metadata
[0]);
3436 err
= auxtrace_queues__init_nr(&etm
->queues
, max_cpu
+ 1);
3440 for (unsigned int j
= 0; j
< etm
->queues
.nr_queues
; ++j
) {
3441 err
= cs_etm__setup_queue(etm
, &etm
->queues
.queue_array
[j
], j
);
3443 goto err_free_queues
;
3446 if (session
->itrace_synth_opts
->set
) {
3447 etm
->synth_opts
= *session
->itrace_synth_opts
;
3449 itrace_synth_opts__set_default(&etm
->synth_opts
,
3450 session
->itrace_synth_opts
->default_no_sample
);
3451 etm
->synth_opts
.callchain
= false;
3454 etm
->session
= session
;
3456 etm
->num_cpu
= num_cpu
;
3457 etm
->pmu_type
= (unsigned int) ((ptr
[CS_PMU_TYPE_CPUS
] >> 32) & 0xffffffff);
3458 etm
->snapshot_mode
= (ptr
[CS_ETM_SNAPSHOT
] != 0);
3459 etm
->metadata
= metadata
;
3460 etm
->auxtrace_type
= auxtrace_info
->type
;
3462 if (etm
->synth_opts
.use_timestamp
)
3464 * Prior to Armv8.4, Arm CPUs don't support FEAT_TRF feature,
3465 * therefore the decoder cannot know if the timestamp trace is
3466 * same with the kernel time.
3468 * If a user has knowledge for the working platform and can
3469 * specify itrace option 'T' to tell decoder to forcely use the
3470 * traced timestamp as the kernel time.
3472 etm
->has_virtual_ts
= true;
3474 /* Use virtual timestamps if all ETMs report ts_source = 1 */
3475 etm
->has_virtual_ts
= cs_etm__has_virtual_ts(metadata
, num_cpu
);
3477 if (!etm
->has_virtual_ts
)
3478 ui__warning("Virtual timestamps are not enabled, or not supported by the traced system.\n"
3479 "The time field of the samples will not be set accurately.\n"
3480 "For Arm CPUs prior to Armv8.4 or without support FEAT_TRF,\n"
3481 "you can specify the itrace option 'T' for timestamp decoding\n"
3482 "if the Coresight timestamp on the platform is same with the kernel time.\n\n");
3484 etm
->auxtrace
.process_event
= cs_etm__process_event
;
3485 etm
->auxtrace
.process_auxtrace_event
= cs_etm__process_auxtrace_event
;
3486 etm
->auxtrace
.flush_events
= cs_etm__flush_events
;
3487 etm
->auxtrace
.free_events
= cs_etm__free_events
;
3488 etm
->auxtrace
.free
= cs_etm__free
;
3489 etm
->auxtrace
.evsel_is_auxtrace
= cs_etm__evsel_is_auxtrace
;
3490 session
->auxtrace
= &etm
->auxtrace
;
3492 err
= cs_etm__setup_timeless_decoding(etm
);
3496 etm
->tc
.time_shift
= tc
->time_shift
;
3497 etm
->tc
.time_mult
= tc
->time_mult
;
3498 etm
->tc
.time_zero
= tc
->time_zero
;
3499 if (event_contains(*tc
, time_cycles
)) {
3500 etm
->tc
.time_cycles
= tc
->time_cycles
;
3501 etm
->tc
.time_mask
= tc
->time_mask
;
3502 etm
->tc
.cap_user_time_zero
= tc
->cap_user_time_zero
;
3503 etm
->tc
.cap_user_time_short
= tc
->cap_user_time_short
;
3505 err
= cs_etm__synth_events(etm
, session
);
3507 goto err_free_queues
;
3509 err
= cs_etm__queue_aux_records(session
);
3511 goto err_free_queues
;
3514 * Map Trace ID values to CPU metadata.
3516 * Trace metadata will always contain Trace ID values from the legacy algorithm
3517 * in case it's read by a version of Perf that doesn't know about HW_ID packets
3518 * or the kernel doesn't emit them.
3520 * The updated kernel drivers that use AUX_HW_ID to sent Trace IDs will attempt to use
3521 * the same IDs as the old algorithm as far as is possible, unless there are clashes
3522 * in which case a different value will be used. This means an older perf may still
3523 * be able to record and read files generate on a newer system.
3525 * For a perf able to interpret AUX_HW_ID packets we first check for the presence of
3526 * those packets. If they are there then the values will be mapped and plugged into
3527 * the metadata and decoders are only created for each mapping received.
3529 * If no AUX_HW_ID packets are present - which means a file recorded on an old kernel
3530 * then we map Trace ID values to CPU directly from the metadata and create decoders
3534 /* Scan for AUX_OUTPUT_HW_ID records to map trace ID values to CPU metadata */
3535 aux_hw_id_found
= 0;
3536 err
= perf_session__peek_events(session
, session
->header
.data_offset
,
3537 session
->header
.data_size
,
3538 cs_etm__process_aux_hw_id_cb
, &aux_hw_id_found
);
3540 goto err_free_queues
;
3542 /* if no HW ID found this is a file with metadata values only, map from metadata */
3543 if (!aux_hw_id_found
) {
3544 err
= cs_etm__map_trace_ids_metadata(etm
, num_cpu
, metadata
);
3546 goto err_free_queues
;
3549 err
= cs_etm__create_decoders(etm
);
3551 goto err_free_queues
;
3553 etm
->data_queued
= etm
->queues
.populated
;
3557 auxtrace_queues__free(&etm
->queues
);
3558 session
->auxtrace
= NULL
;
3562 /* No need to check @metadata[j], free(NULL) is supported */
3563 for (int j
= 0; j
< num_cpu
; j
++)
3564 zfree(&metadata
[j
]);