1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright(C) 2015-2018 Linaro Limited.
5 * Author: Tor Jeremiassen <tor@ti.com>
6 * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
9 #include <linux/bitops.h>
10 #include <linux/err.h>
11 #include <linux/kernel.h>
12 #include <linux/log2.h>
13 #include <linux/types.h>
20 #include "cs-etm-decoder/cs-etm-decoder.h"
28 #include "thread_map.h"
29 #include "thread-stack.h"
32 #define MAX_TIMESTAMP (~0ULL)
35 * A64 instructions are always 4 bytes
37 * Only A64 is supported, so can use this constant for converting between
38 * addresses and instruction counts, calculting offsets etc
40 #define A64_INSTR_SIZE 4
42 struct cs_etm_auxtrace
{
43 struct auxtrace auxtrace
;
44 struct auxtrace_queues queues
;
45 struct auxtrace_heap heap
;
46 struct itrace_synth_opts synth_opts
;
47 struct perf_session
*session
;
48 struct machine
*machine
;
49 struct thread
*unknown_thread
;
55 u8 sample_instructions
;
59 u64 branches_sample_type
;
61 u64 instructions_sample_type
;
62 u64 instructions_sample_period
;
66 unsigned int pmu_type
;
70 struct cs_etm_auxtrace
*etm
;
71 struct thread
*thread
;
72 struct cs_etm_decoder
*decoder
;
73 struct auxtrace_buffer
*buffer
;
74 const struct cs_etm_state
*state
;
75 union perf_event
*event_buf
;
76 unsigned int queue_nr
;
82 u64 period_instructions
;
83 struct branch_stack
*last_branch
;
84 struct branch_stack
*last_branch_rb
;
85 size_t last_branch_pos
;
86 struct cs_etm_packet
*prev_packet
;
87 struct cs_etm_packet
*packet
;
90 static int cs_etm__update_queues(struct cs_etm_auxtrace
*etm
);
91 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace
*etm
,
92 pid_t tid
, u64 time_
);
94 static void cs_etm__packet_dump(const char *pkt_string
)
96 const char *color
= PERF_COLOR_BLUE
;
97 int len
= strlen(pkt_string
);
99 if (len
&& (pkt_string
[len
-1] == '\n'))
100 color_fprintf(stdout
, color
, " %s", pkt_string
);
102 color_fprintf(stdout
, color
, " %s\n", pkt_string
);
107 static void cs_etm__dump_event(struct cs_etm_auxtrace
*etm
,
108 struct auxtrace_buffer
*buffer
)
111 const char *color
= PERF_COLOR_BLUE
;
112 struct cs_etm_decoder_params d_params
;
113 struct cs_etm_trace_params
*t_params
;
114 struct cs_etm_decoder
*decoder
;
115 size_t buffer_used
= 0;
117 fprintf(stdout
, "\n");
118 color_fprintf(stdout
, color
,
119 ". ... CoreSight ETM Trace data: size %zu bytes\n",
122 /* Use metadata to fill in trace parameters for trace decoder */
123 t_params
= zalloc(sizeof(*t_params
) * etm
->num_cpu
);
124 for (i
= 0; i
< etm
->num_cpu
; i
++) {
125 t_params
[i
].protocol
= CS_ETM_PROTO_ETMV4i
;
126 t_params
[i
].etmv4
.reg_idr0
= etm
->metadata
[i
][CS_ETMV4_TRCIDR0
];
127 t_params
[i
].etmv4
.reg_idr1
= etm
->metadata
[i
][CS_ETMV4_TRCIDR1
];
128 t_params
[i
].etmv4
.reg_idr2
= etm
->metadata
[i
][CS_ETMV4_TRCIDR2
];
129 t_params
[i
].etmv4
.reg_idr8
= etm
->metadata
[i
][CS_ETMV4_TRCIDR8
];
130 t_params
[i
].etmv4
.reg_configr
=
131 etm
->metadata
[i
][CS_ETMV4_TRCCONFIGR
];
132 t_params
[i
].etmv4
.reg_traceidr
=
133 etm
->metadata
[i
][CS_ETMV4_TRCTRACEIDR
];
136 /* Set decoder parameters to simply print the trace packets */
137 d_params
.packet_printer
= cs_etm__packet_dump
;
138 d_params
.operation
= CS_ETM_OPERATION_PRINT
;
139 d_params
.formatted
= true;
140 d_params
.fsyncs
= false;
141 d_params
.hsyncs
= false;
142 d_params
.frame_aligned
= true;
144 decoder
= cs_etm_decoder__new(etm
->num_cpu
, &d_params
, t_params
);
153 ret
= cs_etm_decoder__process_data_block(
154 decoder
, buffer
->offset
,
155 &((u8
*)buffer
->data
)[buffer_used
],
156 buffer
->size
- buffer_used
, &consumed
);
160 buffer_used
+= consumed
;
161 } while (buffer_used
< buffer
->size
);
163 cs_etm_decoder__free(decoder
);
166 static int cs_etm__flush_events(struct perf_session
*session
,
167 struct perf_tool
*tool
)
170 struct cs_etm_auxtrace
*etm
= container_of(session
->auxtrace
,
171 struct cs_etm_auxtrace
,
176 if (!tool
->ordered_events
)
179 if (!etm
->timeless_decoding
)
182 ret
= cs_etm__update_queues(etm
);
187 return cs_etm__process_timeless_queues(etm
, -1, MAX_TIMESTAMP
- 1);
190 static void cs_etm__free_queue(void *priv
)
192 struct cs_etm_queue
*etmq
= priv
;
197 thread__zput(etmq
->thread
);
198 cs_etm_decoder__free(etmq
->decoder
);
199 zfree(&etmq
->event_buf
);
200 zfree(&etmq
->last_branch
);
201 zfree(&etmq
->last_branch_rb
);
202 zfree(&etmq
->prev_packet
);
203 zfree(&etmq
->packet
);
207 static void cs_etm__free_events(struct perf_session
*session
)
210 struct cs_etm_auxtrace
*aux
= container_of(session
->auxtrace
,
211 struct cs_etm_auxtrace
,
213 struct auxtrace_queues
*queues
= &aux
->queues
;
215 for (i
= 0; i
< queues
->nr_queues
; i
++) {
216 cs_etm__free_queue(queues
->queue_array
[i
].priv
);
217 queues
->queue_array
[i
].priv
= NULL
;
220 auxtrace_queues__free(queues
);
223 static void cs_etm__free(struct perf_session
*session
)
226 struct int_node
*inode
, *tmp
;
227 struct cs_etm_auxtrace
*aux
= container_of(session
->auxtrace
,
228 struct cs_etm_auxtrace
,
230 cs_etm__free_events(session
);
231 session
->auxtrace
= NULL
;
233 /* First remove all traceID/CPU# nodes for the RB tree */
234 intlist__for_each_entry_safe(inode
, tmp
, traceid_list
)
235 intlist__remove(traceid_list
, inode
);
236 /* Then the RB tree itself */
237 intlist__delete(traceid_list
);
239 for (i
= 0; i
< aux
->num_cpu
; i
++)
240 zfree(&aux
->metadata
[i
]);
242 thread__zput(aux
->unknown_thread
);
243 zfree(&aux
->metadata
);
247 static u32
cs_etm__mem_access(struct cs_etm_queue
*etmq
, u64 address
,
248 size_t size
, u8
*buffer
)
253 struct thread
*thread
;
254 struct machine
*machine
;
255 struct addr_location al
;
260 machine
= etmq
->etm
->machine
;
261 if (address
>= etmq
->etm
->kernel_start
)
262 cpumode
= PERF_RECORD_MISC_KERNEL
;
264 cpumode
= PERF_RECORD_MISC_USER
;
266 thread
= etmq
->thread
;
268 if (cpumode
!= PERF_RECORD_MISC_KERNEL
)
270 thread
= etmq
->etm
->unknown_thread
;
273 if (!thread__find_map(thread
, cpumode
, address
, &al
) || !al
.map
->dso
)
276 if (al
.map
->dso
->data
.status
== DSO_DATA_STATUS_ERROR
&&
277 dso__data_status_seen(al
.map
->dso
, DSO_DATA_STATUS_SEEN_ITRACE
))
280 offset
= al
.map
->map_ip(al
.map
, address
);
284 len
= dso__data_read_offset(al
.map
->dso
, machine
, offset
, buffer
, size
);
292 static struct cs_etm_queue
*cs_etm__alloc_queue(struct cs_etm_auxtrace
*etm
,
293 unsigned int queue_nr
)
296 struct cs_etm_decoder_params d_params
;
297 struct cs_etm_trace_params
*t_params
;
298 struct cs_etm_queue
*etmq
;
299 size_t szp
= sizeof(struct cs_etm_packet
);
301 etmq
= zalloc(sizeof(*etmq
));
305 etmq
->packet
= zalloc(szp
);
309 if (etm
->synth_opts
.last_branch
|| etm
->sample_branches
) {
310 etmq
->prev_packet
= zalloc(szp
);
311 if (!etmq
->prev_packet
)
315 if (etm
->synth_opts
.last_branch
) {
316 size_t sz
= sizeof(struct branch_stack
);
318 sz
+= etm
->synth_opts
.last_branch_sz
*
319 sizeof(struct branch_entry
);
320 etmq
->last_branch
= zalloc(sz
);
321 if (!etmq
->last_branch
)
323 etmq
->last_branch_rb
= zalloc(sz
);
324 if (!etmq
->last_branch_rb
)
328 etmq
->event_buf
= malloc(PERF_SAMPLE_MAX_SIZE
);
329 if (!etmq
->event_buf
)
333 etmq
->queue_nr
= queue_nr
;
338 /* Use metadata to fill in trace parameters for trace decoder */
339 t_params
= zalloc(sizeof(*t_params
) * etm
->num_cpu
);
344 for (i
= 0; i
< etm
->num_cpu
; i
++) {
345 t_params
[i
].protocol
= CS_ETM_PROTO_ETMV4i
;
346 t_params
[i
].etmv4
.reg_idr0
= etm
->metadata
[i
][CS_ETMV4_TRCIDR0
];
347 t_params
[i
].etmv4
.reg_idr1
= etm
->metadata
[i
][CS_ETMV4_TRCIDR1
];
348 t_params
[i
].etmv4
.reg_idr2
= etm
->metadata
[i
][CS_ETMV4_TRCIDR2
];
349 t_params
[i
].etmv4
.reg_idr8
= etm
->metadata
[i
][CS_ETMV4_TRCIDR8
];
350 t_params
[i
].etmv4
.reg_configr
=
351 etm
->metadata
[i
][CS_ETMV4_TRCCONFIGR
];
352 t_params
[i
].etmv4
.reg_traceidr
=
353 etm
->metadata
[i
][CS_ETMV4_TRCTRACEIDR
];
356 /* Set decoder parameters to simply print the trace packets */
357 d_params
.packet_printer
= cs_etm__packet_dump
;
358 d_params
.operation
= CS_ETM_OPERATION_DECODE
;
359 d_params
.formatted
= true;
360 d_params
.fsyncs
= false;
361 d_params
.hsyncs
= false;
362 d_params
.frame_aligned
= true;
363 d_params
.data
= etmq
;
365 etmq
->decoder
= cs_etm_decoder__new(etm
->num_cpu
, &d_params
, t_params
);
373 * Register a function to handle all memory accesses required by
374 * the trace decoder library.
376 if (cs_etm_decoder__add_mem_access_cb(etmq
->decoder
,
379 goto out_free_decoder
;
382 etmq
->period_instructions
= 0;
387 cs_etm_decoder__free(etmq
->decoder
);
389 zfree(&etmq
->event_buf
);
390 zfree(&etmq
->last_branch
);
391 zfree(&etmq
->last_branch_rb
);
392 zfree(&etmq
->prev_packet
);
393 zfree(&etmq
->packet
);
399 static int cs_etm__setup_queue(struct cs_etm_auxtrace
*etm
,
400 struct auxtrace_queue
*queue
,
401 unsigned int queue_nr
)
403 struct cs_etm_queue
*etmq
= queue
->priv
;
405 if (list_empty(&queue
->head
) || etmq
)
408 etmq
= cs_etm__alloc_queue(etm
, queue_nr
);
415 if (queue
->cpu
!= -1)
416 etmq
->cpu
= queue
->cpu
;
418 etmq
->tid
= queue
->tid
;
423 static int cs_etm__setup_queues(struct cs_etm_auxtrace
*etm
)
428 for (i
= 0; i
< etm
->queues
.nr_queues
; i
++) {
429 ret
= cs_etm__setup_queue(etm
, &etm
->queues
.queue_array
[i
], i
);
437 static int cs_etm__update_queues(struct cs_etm_auxtrace
*etm
)
439 if (etm
->queues
.new_data
) {
440 etm
->queues
.new_data
= false;
441 return cs_etm__setup_queues(etm
);
447 static inline void cs_etm__copy_last_branch_rb(struct cs_etm_queue
*etmq
)
449 struct branch_stack
*bs_src
= etmq
->last_branch_rb
;
450 struct branch_stack
*bs_dst
= etmq
->last_branch
;
454 * Set the number of records before early exit: ->nr is used to
455 * determine how many branches to copy from ->entries.
457 bs_dst
->nr
= bs_src
->nr
;
460 * Early exit when there is nothing to copy.
466 * As bs_src->entries is a circular buffer, we need to copy from it in
467 * two steps. First, copy the branches from the most recently inserted
468 * branch ->last_branch_pos until the end of bs_src->entries buffer.
470 nr
= etmq
->etm
->synth_opts
.last_branch_sz
- etmq
->last_branch_pos
;
471 memcpy(&bs_dst
->entries
[0],
472 &bs_src
->entries
[etmq
->last_branch_pos
],
473 sizeof(struct branch_entry
) * nr
);
476 * If we wrapped around at least once, the branches from the beginning
477 * of the bs_src->entries buffer and until the ->last_branch_pos element
478 * are older valid branches: copy them over. The total number of
479 * branches copied over will be equal to the number of branches asked by
480 * the user in last_branch_sz.
482 if (bs_src
->nr
>= etmq
->etm
->synth_opts
.last_branch_sz
) {
483 memcpy(&bs_dst
->entries
[nr
],
485 sizeof(struct branch_entry
) * etmq
->last_branch_pos
);
489 static inline void cs_etm__reset_last_branch_rb(struct cs_etm_queue
*etmq
)
491 etmq
->last_branch_pos
= 0;
492 etmq
->last_branch_rb
->nr
= 0;
495 static inline u64
cs_etm__last_executed_instr(struct cs_etm_packet
*packet
)
498 * The packet records the execution range with an exclusive end address
500 * A64 instructions are constant size, so the last executed
501 * instruction is A64_INSTR_SIZE before the end address
502 * Will need to do instruction level decode for T32 instructions as
503 * they can be variable size (not yet supported).
505 return packet
->end_addr
- A64_INSTR_SIZE
;
508 static inline u64
cs_etm__instr_count(const struct cs_etm_packet
*packet
)
511 * Only A64 instructions are currently supported, so can get
512 * instruction count by dividing.
513 * Will need to do instruction level decode for T32 instructions as
514 * they can be variable size (not yet supported).
516 return (packet
->end_addr
- packet
->start_addr
) / A64_INSTR_SIZE
;
519 static inline u64
cs_etm__instr_addr(const struct cs_etm_packet
*packet
,
523 * Only A64 instructions are currently supported, so can get
524 * instruction address by muliplying.
525 * Will need to do instruction level decode for T32 instructions as
526 * they can be variable size (not yet supported).
528 return packet
->start_addr
+ offset
* A64_INSTR_SIZE
;
531 static void cs_etm__update_last_branch_rb(struct cs_etm_queue
*etmq
)
533 struct branch_stack
*bs
= etmq
->last_branch_rb
;
534 struct branch_entry
*be
;
537 * The branches are recorded in a circular buffer in reverse
538 * chronological order: we start recording from the last element of the
539 * buffer down. After writing the first element of the stack, move the
540 * insert position back to the end of the buffer.
542 if (!etmq
->last_branch_pos
)
543 etmq
->last_branch_pos
= etmq
->etm
->synth_opts
.last_branch_sz
;
545 etmq
->last_branch_pos
-= 1;
547 be
= &bs
->entries
[etmq
->last_branch_pos
];
548 be
->from
= cs_etm__last_executed_instr(etmq
->prev_packet
);
549 be
->to
= etmq
->packet
->start_addr
;
550 /* No support for mispredict */
551 be
->flags
.mispred
= 0;
552 be
->flags
.predicted
= 1;
555 * Increment bs->nr until reaching the number of last branches asked by
556 * the user on the command line.
558 if (bs
->nr
< etmq
->etm
->synth_opts
.last_branch_sz
)
562 static int cs_etm__inject_event(union perf_event
*event
,
563 struct perf_sample
*sample
, u64 type
)
565 event
->header
.size
= perf_event__sample_event_size(sample
, type
, 0);
566 return perf_event__synthesize_sample(event
, type
, 0, sample
);
571 cs_etm__get_trace(struct cs_etm_buffer
*buff
, struct cs_etm_queue
*etmq
)
573 struct auxtrace_buffer
*aux_buffer
= etmq
->buffer
;
574 struct auxtrace_buffer
*old_buffer
= aux_buffer
;
575 struct auxtrace_queue
*queue
;
577 queue
= &etmq
->etm
->queues
.queue_array
[etmq
->queue_nr
];
579 aux_buffer
= auxtrace_buffer__next(queue
, aux_buffer
);
581 /* If no more data, drop the previous auxtrace_buffer and return */
584 auxtrace_buffer__drop_data(old_buffer
);
589 etmq
->buffer
= aux_buffer
;
591 /* If the aux_buffer doesn't have data associated, try to load it */
592 if (!aux_buffer
->data
) {
593 /* get the file desc associated with the perf data file */
594 int fd
= perf_data__fd(etmq
->etm
->session
->data
);
596 aux_buffer
->data
= auxtrace_buffer__get_data(aux_buffer
, fd
);
597 if (!aux_buffer
->data
)
601 /* If valid, drop the previous buffer */
603 auxtrace_buffer__drop_data(old_buffer
);
605 buff
->offset
= aux_buffer
->offset
;
606 buff
->len
= aux_buffer
->size
;
607 buff
->buf
= aux_buffer
->data
;
609 buff
->ref_timestamp
= aux_buffer
->reference
;
614 static void cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace
*etm
,
615 struct auxtrace_queue
*queue
)
617 struct cs_etm_queue
*etmq
= queue
->priv
;
619 /* CPU-wide tracing isn't supported yet */
620 if (queue
->tid
== -1)
623 if ((!etmq
->thread
) && (etmq
->tid
!= -1))
624 etmq
->thread
= machine__find_thread(etm
->machine
, -1,
628 etmq
->pid
= etmq
->thread
->pid_
;
629 if (queue
->cpu
== -1)
630 etmq
->cpu
= etmq
->thread
->cpu
;
634 static int cs_etm__synth_instruction_sample(struct cs_etm_queue
*etmq
,
635 u64 addr
, u64 period
)
638 struct cs_etm_auxtrace
*etm
= etmq
->etm
;
639 union perf_event
*event
= etmq
->event_buf
;
640 struct perf_sample sample
= {.ip
= 0,};
642 event
->sample
.header
.type
= PERF_RECORD_SAMPLE
;
643 event
->sample
.header
.misc
= PERF_RECORD_MISC_USER
;
644 event
->sample
.header
.size
= sizeof(struct perf_event_header
);
647 sample
.pid
= etmq
->pid
;
648 sample
.tid
= etmq
->tid
;
649 sample
.id
= etmq
->etm
->instructions_id
;
650 sample
.stream_id
= etmq
->etm
->instructions_id
;
651 sample
.period
= period
;
652 sample
.cpu
= etmq
->packet
->cpu
;
655 sample
.cpumode
= event
->header
.misc
;
657 if (etm
->synth_opts
.last_branch
) {
658 cs_etm__copy_last_branch_rb(etmq
);
659 sample
.branch_stack
= etmq
->last_branch
;
662 if (etm
->synth_opts
.inject
) {
663 ret
= cs_etm__inject_event(event
, &sample
,
664 etm
->instructions_sample_type
);
669 ret
= perf_session__deliver_synth_event(etm
->session
, event
, &sample
);
673 "CS ETM Trace: failed to deliver instruction event, error %d\n",
676 if (etm
->synth_opts
.last_branch
)
677 cs_etm__reset_last_branch_rb(etmq
);
683 * The cs etm packet encodes an instruction range between a branch target
684 * and the next taken branch. Generate sample accordingly.
686 static int cs_etm__synth_branch_sample(struct cs_etm_queue
*etmq
)
689 struct cs_etm_auxtrace
*etm
= etmq
->etm
;
690 struct perf_sample sample
= {.ip
= 0,};
691 union perf_event
*event
= etmq
->event_buf
;
692 struct dummy_branch_stack
{
694 struct branch_entry entries
;
697 event
->sample
.header
.type
= PERF_RECORD_SAMPLE
;
698 event
->sample
.header
.misc
= PERF_RECORD_MISC_USER
;
699 event
->sample
.header
.size
= sizeof(struct perf_event_header
);
701 sample
.ip
= cs_etm__last_executed_instr(etmq
->prev_packet
);
702 sample
.pid
= etmq
->pid
;
703 sample
.tid
= etmq
->tid
;
704 sample
.addr
= etmq
->packet
->start_addr
;
705 sample
.id
= etmq
->etm
->branches_id
;
706 sample
.stream_id
= etmq
->etm
->branches_id
;
708 sample
.cpu
= etmq
->packet
->cpu
;
710 sample
.cpumode
= PERF_RECORD_MISC_USER
;
713 * perf report cannot handle events without a branch stack
715 if (etm
->synth_opts
.last_branch
) {
716 dummy_bs
= (struct dummy_branch_stack
){
723 sample
.branch_stack
= (struct branch_stack
*)&dummy_bs
;
726 if (etm
->synth_opts
.inject
) {
727 ret
= cs_etm__inject_event(event
, &sample
,
728 etm
->branches_sample_type
);
733 ret
= perf_session__deliver_synth_event(etm
->session
, event
, &sample
);
737 "CS ETM Trace: failed to deliver instruction event, error %d\n",
743 struct cs_etm_synth
{
744 struct perf_tool dummy_tool
;
745 struct perf_session
*session
;
748 static int cs_etm__event_synth(struct perf_tool
*tool
,
749 union perf_event
*event
,
750 struct perf_sample
*sample __maybe_unused
,
751 struct machine
*machine __maybe_unused
)
753 struct cs_etm_synth
*cs_etm_synth
=
754 container_of(tool
, struct cs_etm_synth
, dummy_tool
);
756 return perf_session__deliver_synth_event(cs_etm_synth
->session
,
760 static int cs_etm__synth_event(struct perf_session
*session
,
761 struct perf_event_attr
*attr
, u64 id
)
763 struct cs_etm_synth cs_etm_synth
;
765 memset(&cs_etm_synth
, 0, sizeof(struct cs_etm_synth
));
766 cs_etm_synth
.session
= session
;
768 return perf_event__synthesize_attr(&cs_etm_synth
.dummy_tool
, attr
, 1,
769 &id
, cs_etm__event_synth
);
772 static int cs_etm__synth_events(struct cs_etm_auxtrace
*etm
,
773 struct perf_session
*session
)
775 struct perf_evlist
*evlist
= session
->evlist
;
776 struct perf_evsel
*evsel
;
777 struct perf_event_attr attr
;
782 evlist__for_each_entry(evlist
, evsel
) {
783 if (evsel
->attr
.type
== etm
->pmu_type
) {
790 pr_debug("No selected events with CoreSight Trace data\n");
794 memset(&attr
, 0, sizeof(struct perf_event_attr
));
795 attr
.size
= sizeof(struct perf_event_attr
);
796 attr
.type
= PERF_TYPE_HARDWARE
;
797 attr
.sample_type
= evsel
->attr
.sample_type
& PERF_SAMPLE_MASK
;
798 attr
.sample_type
|= PERF_SAMPLE_IP
| PERF_SAMPLE_TID
|
800 if (etm
->timeless_decoding
)
801 attr
.sample_type
&= ~(u64
)PERF_SAMPLE_TIME
;
803 attr
.sample_type
|= PERF_SAMPLE_TIME
;
805 attr
.exclude_user
= evsel
->attr
.exclude_user
;
806 attr
.exclude_kernel
= evsel
->attr
.exclude_kernel
;
807 attr
.exclude_hv
= evsel
->attr
.exclude_hv
;
808 attr
.exclude_host
= evsel
->attr
.exclude_host
;
809 attr
.exclude_guest
= evsel
->attr
.exclude_guest
;
810 attr
.sample_id_all
= evsel
->attr
.sample_id_all
;
811 attr
.read_format
= evsel
->attr
.read_format
;
813 /* create new id val to be a fixed offset from evsel id */
814 id
= evsel
->id
[0] + 1000000000;
819 if (etm
->synth_opts
.branches
) {
820 attr
.config
= PERF_COUNT_HW_BRANCH_INSTRUCTIONS
;
821 attr
.sample_period
= 1;
822 attr
.sample_type
|= PERF_SAMPLE_ADDR
;
823 err
= cs_etm__synth_event(session
, &attr
, id
);
826 etm
->sample_branches
= true;
827 etm
->branches_sample_type
= attr
.sample_type
;
828 etm
->branches_id
= id
;
830 attr
.sample_type
&= ~(u64
)PERF_SAMPLE_ADDR
;
833 if (etm
->synth_opts
.last_branch
)
834 attr
.sample_type
|= PERF_SAMPLE_BRANCH_STACK
;
836 if (etm
->synth_opts
.instructions
) {
837 attr
.config
= PERF_COUNT_HW_INSTRUCTIONS
;
838 attr
.sample_period
= etm
->synth_opts
.period
;
839 etm
->instructions_sample_period
= attr
.sample_period
;
840 err
= cs_etm__synth_event(session
, &attr
, id
);
843 etm
->sample_instructions
= true;
844 etm
->instructions_sample_type
= attr
.sample_type
;
845 etm
->instructions_id
= id
;
852 static int cs_etm__sample(struct cs_etm_queue
*etmq
)
854 struct cs_etm_auxtrace
*etm
= etmq
->etm
;
855 struct cs_etm_packet
*tmp
;
859 instrs_executed
= cs_etm__instr_count(etmq
->packet
);
860 etmq
->period_instructions
+= instrs_executed
;
863 * Record a branch when the last instruction in
864 * PREV_PACKET is a branch.
866 if (etm
->synth_opts
.last_branch
&&
868 etmq
->prev_packet
->sample_type
== CS_ETM_RANGE
&&
869 etmq
->prev_packet
->last_instr_taken_branch
)
870 cs_etm__update_last_branch_rb(etmq
);
872 if (etm
->sample_instructions
&&
873 etmq
->period_instructions
>= etm
->instructions_sample_period
) {
875 * Emit instruction sample periodically
876 * TODO: allow period to be defined in cycles and clock time
879 /* Get number of instructions executed after the sample point */
880 u64 instrs_over
= etmq
->period_instructions
-
881 etm
->instructions_sample_period
;
884 * Calculate the address of the sampled instruction (-1 as
885 * sample is reported as though instruction has just been
886 * executed, but PC has not advanced to next instruction)
888 u64 offset
= (instrs_executed
- instrs_over
- 1);
889 u64 addr
= cs_etm__instr_addr(etmq
->packet
, offset
);
891 ret
= cs_etm__synth_instruction_sample(
892 etmq
, addr
, etm
->instructions_sample_period
);
896 /* Carry remaining instructions into next sample period */
897 etmq
->period_instructions
= instrs_over
;
900 if (etm
->sample_branches
&&
902 etmq
->prev_packet
->sample_type
== CS_ETM_RANGE
&&
903 etmq
->prev_packet
->last_instr_taken_branch
) {
904 ret
= cs_etm__synth_branch_sample(etmq
);
909 if (etm
->sample_branches
|| etm
->synth_opts
.last_branch
) {
911 * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
912 * the next incoming packet.
915 etmq
->packet
= etmq
->prev_packet
;
916 etmq
->prev_packet
= tmp
;
922 static int cs_etm__flush(struct cs_etm_queue
*etmq
)
925 struct cs_etm_packet
*tmp
;
927 if (etmq
->etm
->synth_opts
.last_branch
&&
929 etmq
->prev_packet
->sample_type
== CS_ETM_RANGE
) {
931 * Generate a last branch event for the branches left in the
932 * circular buffer at the end of the trace.
934 * Use the address of the end of the last reported execution
937 u64 addr
= cs_etm__last_executed_instr(etmq
->prev_packet
);
939 err
= cs_etm__synth_instruction_sample(
941 etmq
->period_instructions
);
942 etmq
->period_instructions
= 0;
945 * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
946 * the next incoming packet.
949 etmq
->packet
= etmq
->prev_packet
;
950 etmq
->prev_packet
= tmp
;
956 static int cs_etm__run_decoder(struct cs_etm_queue
*etmq
)
958 struct cs_etm_auxtrace
*etm
= etmq
->etm
;
959 struct cs_etm_buffer buffer
;
960 size_t buffer_used
, processed
;
963 if (!etm
->kernel_start
)
964 etm
->kernel_start
= machine__kernel_start(etm
->machine
);
966 /* Go through each buffer in the queue and decode them one by one */
969 memset(&buffer
, 0, sizeof(buffer
));
970 err
= cs_etm__get_trace(&buffer
, etmq
);
974 * We cannot assume consecutive blocks in the data file are
975 * contiguous, reset the decoder to force re-sync.
977 err
= cs_etm_decoder__reset(etmq
->decoder
);
981 /* Run trace decoder until buffer consumed or end of trace */
984 err
= cs_etm_decoder__process_data_block(
987 &buffer
.buf
[buffer_used
],
988 buffer
.len
- buffer_used
,
993 etmq
->offset
+= processed
;
994 buffer_used
+= processed
;
996 /* Process each packet in this chunk */
998 err
= cs_etm_decoder__get_packet(etmq
->decoder
,
1002 * Stop processing this chunk on
1003 * end of data or error
1007 switch (etmq
->packet
->sample_type
) {
1010 * If the packet contains an instruction
1011 * range, generate instruction sequence
1014 cs_etm__sample(etmq
);
1016 case CS_ETM_TRACE_ON
:
1018 * Discontinuity in trace, flush
1019 * previous branch stack
1021 cs_etm__flush(etmq
);
1027 } while (buffer
.len
> buffer_used
);
1030 /* Flush any remaining branch stack entries */
1031 err
= cs_etm__flush(etmq
);
1037 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace
*etm
,
1038 pid_t tid
, u64 time_
)
1041 struct auxtrace_queues
*queues
= &etm
->queues
;
1043 for (i
= 0; i
< queues
->nr_queues
; i
++) {
1044 struct auxtrace_queue
*queue
= &etm
->queues
.queue_array
[i
];
1045 struct cs_etm_queue
*etmq
= queue
->priv
;
1047 if (etmq
&& ((tid
== -1) || (etmq
->tid
== tid
))) {
1049 cs_etm__set_pid_tid_cpu(etm
, queue
);
1050 cs_etm__run_decoder(etmq
);
1057 static int cs_etm__process_event(struct perf_session
*session
,
1058 union perf_event
*event
,
1059 struct perf_sample
*sample
,
1060 struct perf_tool
*tool
)
1064 struct cs_etm_auxtrace
*etm
= container_of(session
->auxtrace
,
1065 struct cs_etm_auxtrace
,
1071 if (!tool
->ordered_events
) {
1072 pr_err("CoreSight ETM Trace requires ordered events\n");
1076 if (!etm
->timeless_decoding
)
1079 if (sample
->time
&& (sample
->time
!= (u64
) -1))
1080 timestamp
= sample
->time
;
1084 if (timestamp
|| etm
->timeless_decoding
) {
1085 err
= cs_etm__update_queues(etm
);
1090 if (event
->header
.type
== PERF_RECORD_EXIT
)
1091 return cs_etm__process_timeless_queues(etm
,
1098 static int cs_etm__process_auxtrace_event(struct perf_session
*session
,
1099 union perf_event
*event
,
1100 struct perf_tool
*tool __maybe_unused
)
1102 struct cs_etm_auxtrace
*etm
= container_of(session
->auxtrace
,
1103 struct cs_etm_auxtrace
,
1105 if (!etm
->data_queued
) {
1106 struct auxtrace_buffer
*buffer
;
1108 int fd
= perf_data__fd(session
->data
);
1109 bool is_pipe
= perf_data__is_pipe(session
->data
);
1115 data_offset
= lseek(fd
, 0, SEEK_CUR
);
1116 if (data_offset
== -1)
1120 err
= auxtrace_queues__add_event(&etm
->queues
, session
,
1121 event
, data_offset
, &buffer
);
1126 if (auxtrace_buffer__get_data(buffer
, fd
)) {
1127 cs_etm__dump_event(etm
, buffer
);
1128 auxtrace_buffer__put_data(buffer
);
1135 static bool cs_etm__is_timeless_decoding(struct cs_etm_auxtrace
*etm
)
1137 struct perf_evsel
*evsel
;
1138 struct perf_evlist
*evlist
= etm
->session
->evlist
;
1139 bool timeless_decoding
= true;
1142 * Circle through the list of event and complain if we find one
1143 * with the time bit set.
1145 evlist__for_each_entry(evlist
, evsel
) {
1146 if ((evsel
->attr
.sample_type
& PERF_SAMPLE_TIME
))
1147 timeless_decoding
= false;
1150 return timeless_decoding
;
1153 static const char * const cs_etm_global_header_fmts
[] = {
1154 [CS_HEADER_VERSION_0
] = " Header version %llx\n",
1155 [CS_PMU_TYPE_CPUS
] = " PMU type/num cpus %llx\n",
1156 [CS_ETM_SNAPSHOT
] = " Snapshot %llx\n",
1159 static const char * const cs_etm_priv_fmts
[] = {
1160 [CS_ETM_MAGIC
] = " Magic number %llx\n",
1161 [CS_ETM_CPU
] = " CPU %lld\n",
1162 [CS_ETM_ETMCR
] = " ETMCR %llx\n",
1163 [CS_ETM_ETMTRACEIDR
] = " ETMTRACEIDR %llx\n",
1164 [CS_ETM_ETMCCER
] = " ETMCCER %llx\n",
1165 [CS_ETM_ETMIDR
] = " ETMIDR %llx\n",
1168 static const char * const cs_etmv4_priv_fmts
[] = {
1169 [CS_ETM_MAGIC
] = " Magic number %llx\n",
1170 [CS_ETM_CPU
] = " CPU %lld\n",
1171 [CS_ETMV4_TRCCONFIGR
] = " TRCCONFIGR %llx\n",
1172 [CS_ETMV4_TRCTRACEIDR
] = " TRCTRACEIDR %llx\n",
1173 [CS_ETMV4_TRCIDR0
] = " TRCIDR0 %llx\n",
1174 [CS_ETMV4_TRCIDR1
] = " TRCIDR1 %llx\n",
1175 [CS_ETMV4_TRCIDR2
] = " TRCIDR2 %llx\n",
1176 [CS_ETMV4_TRCIDR8
] = " TRCIDR8 %llx\n",
1177 [CS_ETMV4_TRCAUTHSTATUS
] = " TRCAUTHSTATUS %llx\n",
1180 static void cs_etm__print_auxtrace_info(u64
*val
, int num
)
1184 for (i
= 0; i
< CS_HEADER_VERSION_0_MAX
; i
++)
1185 fprintf(stdout
, cs_etm_global_header_fmts
[i
], val
[i
]);
1187 for (i
= CS_HEADER_VERSION_0_MAX
; cpu
< num
; cpu
++) {
1188 if (val
[i
] == __perf_cs_etmv3_magic
)
1189 for (j
= 0; j
< CS_ETM_PRIV_MAX
; j
++, i
++)
1190 fprintf(stdout
, cs_etm_priv_fmts
[j
], val
[i
]);
1191 else if (val
[i
] == __perf_cs_etmv4_magic
)
1192 for (j
= 0; j
< CS_ETMV4_PRIV_MAX
; j
++, i
++)
1193 fprintf(stdout
, cs_etmv4_priv_fmts
[j
], val
[i
]);
1195 /* failure.. return */
1200 int cs_etm__process_auxtrace_info(union perf_event
*event
,
1201 struct perf_session
*session
)
1203 struct auxtrace_info_event
*auxtrace_info
= &event
->auxtrace_info
;
1204 struct cs_etm_auxtrace
*etm
= NULL
;
1205 struct int_node
*inode
;
1206 unsigned int pmu_type
;
1207 int event_header_size
= sizeof(struct perf_event_header
);
1208 int info_header_size
;
1209 int total_size
= auxtrace_info
->header
.size
;
1212 int err
= 0, idx
= -1;
1214 u64
*ptr
, *hdr
= NULL
;
1215 u64
**metadata
= NULL
;
1218 * sizeof(auxtrace_info_event::type) +
1219 * sizeof(auxtrace_info_event::reserved) == 8
1221 info_header_size
= 8;
1223 if (total_size
< (event_header_size
+ info_header_size
))
1226 priv_size
= total_size
- event_header_size
- info_header_size
;
1228 /* First the global part */
1229 ptr
= (u64
*) auxtrace_info
->priv
;
1231 /* Look for version '0' of the header */
1235 hdr
= zalloc(sizeof(*hdr
) * CS_HEADER_VERSION_0_MAX
);
1239 /* Extract header information - see cs-etm.h for format */
1240 for (i
= 0; i
< CS_HEADER_VERSION_0_MAX
; i
++)
1242 num_cpu
= hdr
[CS_PMU_TYPE_CPUS
] & 0xffffffff;
1243 pmu_type
= (unsigned int) ((hdr
[CS_PMU_TYPE_CPUS
] >> 32) &
1247 * Create an RB tree for traceID-CPU# tuple. Since the conversion has
1248 * to be made for each packet that gets decoded, optimizing access in
1249 * anything other than a sequential array is worth doing.
1251 traceid_list
= intlist__new(NULL
);
1252 if (!traceid_list
) {
1257 metadata
= zalloc(sizeof(*metadata
) * num_cpu
);
1260 goto err_free_traceid_list
;
1264 * The metadata is stored in the auxtrace_info section and encodes
1265 * the configuration of the ARM embedded trace macrocell which is
1266 * required by the trace decoder to properly decode the trace due
1267 * to its highly compressed nature.
1269 for (j
= 0; j
< num_cpu
; j
++) {
1270 if (ptr
[i
] == __perf_cs_etmv3_magic
) {
1271 metadata
[j
] = zalloc(sizeof(*metadata
[j
]) *
1275 goto err_free_metadata
;
1277 for (k
= 0; k
< CS_ETM_PRIV_MAX
; k
++)
1278 metadata
[j
][k
] = ptr
[i
+ k
];
1280 /* The traceID is our handle */
1281 idx
= metadata
[j
][CS_ETM_ETMTRACEIDR
];
1282 i
+= CS_ETM_PRIV_MAX
;
1283 } else if (ptr
[i
] == __perf_cs_etmv4_magic
) {
1284 metadata
[j
] = zalloc(sizeof(*metadata
[j
]) *
1288 goto err_free_metadata
;
1290 for (k
= 0; k
< CS_ETMV4_PRIV_MAX
; k
++)
1291 metadata
[j
][k
] = ptr
[i
+ k
];
1293 /* The traceID is our handle */
1294 idx
= metadata
[j
][CS_ETMV4_TRCTRACEIDR
];
1295 i
+= CS_ETMV4_PRIV_MAX
;
1298 /* Get an RB node for this CPU */
1299 inode
= intlist__findnew(traceid_list
, idx
);
1301 /* Something went wrong, no need to continue */
1303 err
= PTR_ERR(inode
);
1304 goto err_free_metadata
;
1308 * The node for that CPU should not be taken.
1309 * Back out if that's the case.
1313 goto err_free_metadata
;
1315 /* All good, associate the traceID with the CPU# */
1316 inode
->priv
= &metadata
[j
][CS_ETM_CPU
];
1320 * Each of CS_HEADER_VERSION_0_MAX, CS_ETM_PRIV_MAX and
1321 * CS_ETMV4_PRIV_MAX mark how many double words are in the
1322 * global metadata, and each cpu's metadata respectively.
1323 * The following tests if the correct number of double words was
1324 * present in the auxtrace info section.
1326 if (i
* 8 != priv_size
) {
1328 goto err_free_metadata
;
1331 etm
= zalloc(sizeof(*etm
));
1335 goto err_free_metadata
;
1338 err
= auxtrace_queues__init(&etm
->queues
);
1342 etm
->session
= session
;
1343 etm
->machine
= &session
->machines
.host
;
1345 etm
->num_cpu
= num_cpu
;
1346 etm
->pmu_type
= pmu_type
;
1347 etm
->snapshot_mode
= (hdr
[CS_ETM_SNAPSHOT
] != 0);
1348 etm
->metadata
= metadata
;
1349 etm
->auxtrace_type
= auxtrace_info
->type
;
1350 etm
->timeless_decoding
= cs_etm__is_timeless_decoding(etm
);
1352 etm
->auxtrace
.process_event
= cs_etm__process_event
;
1353 etm
->auxtrace
.process_auxtrace_event
= cs_etm__process_auxtrace_event
;
1354 etm
->auxtrace
.flush_events
= cs_etm__flush_events
;
1355 etm
->auxtrace
.free_events
= cs_etm__free_events
;
1356 etm
->auxtrace
.free
= cs_etm__free
;
1357 session
->auxtrace
= &etm
->auxtrace
;
1359 etm
->unknown_thread
= thread__new(999999999, 999999999);
1360 if (!etm
->unknown_thread
)
1361 goto err_free_queues
;
1364 * Initialize list node so that at thread__zput() we can avoid
1365 * segmentation fault at list_del_init().
1367 INIT_LIST_HEAD(&etm
->unknown_thread
->node
);
1369 err
= thread__set_comm(etm
->unknown_thread
, "unknown", 0);
1371 goto err_delete_thread
;
1373 if (thread__init_map_groups(etm
->unknown_thread
, etm
->machine
))
1374 goto err_delete_thread
;
1377 cs_etm__print_auxtrace_info(auxtrace_info
->priv
, num_cpu
);
1381 if (session
->itrace_synth_opts
&& session
->itrace_synth_opts
->set
) {
1382 etm
->synth_opts
= *session
->itrace_synth_opts
;
1384 itrace_synth_opts__set_default(&etm
->synth_opts
);
1385 etm
->synth_opts
.callchain
= false;
1388 err
= cs_etm__synth_events(etm
, session
);
1390 goto err_delete_thread
;
1392 err
= auxtrace_queues__process_index(&etm
->queues
, session
);
1394 goto err_delete_thread
;
1396 etm
->data_queued
= etm
->queues
.populated
;
1401 thread__zput(etm
->unknown_thread
);
1403 auxtrace_queues__free(&etm
->queues
);
1404 session
->auxtrace
= NULL
;
1408 /* No need to check @metadata[j], free(NULL) is supported */
1409 for (j
= 0; j
< num_cpu
; j
++)
1412 err_free_traceid_list
:
1413 intlist__delete(traceid_list
);