1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright(C) 2015-2018 Linaro Limited.
5 * Author: Tor Jeremiassen <tor@ti.com>
6 * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
9 #include <linux/bitops.h>
10 #include <linux/err.h>
11 #include <linux/kernel.h>
12 #include <linux/log2.h>
13 #include <linux/types.h>
20 #include "cs-etm-decoder/cs-etm-decoder.h"
28 #include "thread_map.h"
29 #include "thread-stack.h"
32 #define MAX_TIMESTAMP (~0ULL)
35 * A64 instructions are always 4 bytes
37 * Only A64 is supported, so can use this constant for converting between
38 * addresses and instruction counts, calculting offsets etc
40 #define A64_INSTR_SIZE 4
42 struct cs_etm_auxtrace
{
43 struct auxtrace auxtrace
;
44 struct auxtrace_queues queues
;
45 struct auxtrace_heap heap
;
46 struct itrace_synth_opts synth_opts
;
47 struct perf_session
*session
;
48 struct machine
*machine
;
49 struct thread
*unknown_thread
;
55 u8 sample_instructions
;
59 u64 branches_sample_type
;
61 u64 instructions_sample_type
;
62 u64 instructions_sample_period
;
66 unsigned int pmu_type
;
70 struct cs_etm_auxtrace
*etm
;
71 struct thread
*thread
;
72 struct cs_etm_decoder
*decoder
;
73 struct auxtrace_buffer
*buffer
;
74 const struct cs_etm_state
*state
;
75 union perf_event
*event_buf
;
76 unsigned int queue_nr
;
82 u64 period_instructions
;
83 struct branch_stack
*last_branch
;
84 struct branch_stack
*last_branch_rb
;
85 size_t last_branch_pos
;
86 struct cs_etm_packet
*prev_packet
;
87 struct cs_etm_packet
*packet
;
90 static int cs_etm__update_queues(struct cs_etm_auxtrace
*etm
);
91 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace
*etm
,
92 pid_t tid
, u64 time_
);
94 static void cs_etm__packet_dump(const char *pkt_string
)
96 const char *color
= PERF_COLOR_BLUE
;
97 int len
= strlen(pkt_string
);
99 if (len
&& (pkt_string
[len
-1] == '\n'))
100 color_fprintf(stdout
, color
, " %s", pkt_string
);
102 color_fprintf(stdout
, color
, " %s\n", pkt_string
);
107 static void cs_etm__dump_event(struct cs_etm_auxtrace
*etm
,
108 struct auxtrace_buffer
*buffer
)
111 const char *color
= PERF_COLOR_BLUE
;
112 struct cs_etm_decoder_params d_params
;
113 struct cs_etm_trace_params
*t_params
;
114 struct cs_etm_decoder
*decoder
;
115 size_t buffer_used
= 0;
117 fprintf(stdout
, "\n");
118 color_fprintf(stdout
, color
,
119 ". ... CoreSight ETM Trace data: size %zu bytes\n",
122 /* Use metadata to fill in trace parameters for trace decoder */
123 t_params
= zalloc(sizeof(*t_params
) * etm
->num_cpu
);
124 for (i
= 0; i
< etm
->num_cpu
; i
++) {
125 t_params
[i
].protocol
= CS_ETM_PROTO_ETMV4i
;
126 t_params
[i
].etmv4
.reg_idr0
= etm
->metadata
[i
][CS_ETMV4_TRCIDR0
];
127 t_params
[i
].etmv4
.reg_idr1
= etm
->metadata
[i
][CS_ETMV4_TRCIDR1
];
128 t_params
[i
].etmv4
.reg_idr2
= etm
->metadata
[i
][CS_ETMV4_TRCIDR2
];
129 t_params
[i
].etmv4
.reg_idr8
= etm
->metadata
[i
][CS_ETMV4_TRCIDR8
];
130 t_params
[i
].etmv4
.reg_configr
=
131 etm
->metadata
[i
][CS_ETMV4_TRCCONFIGR
];
132 t_params
[i
].etmv4
.reg_traceidr
=
133 etm
->metadata
[i
][CS_ETMV4_TRCTRACEIDR
];
136 /* Set decoder parameters to simply print the trace packets */
137 d_params
.packet_printer
= cs_etm__packet_dump
;
138 d_params
.operation
= CS_ETM_OPERATION_PRINT
;
139 d_params
.formatted
= true;
140 d_params
.fsyncs
= false;
141 d_params
.hsyncs
= false;
142 d_params
.frame_aligned
= true;
144 decoder
= cs_etm_decoder__new(etm
->num_cpu
, &d_params
, t_params
);
153 ret
= cs_etm_decoder__process_data_block(
154 decoder
, buffer
->offset
,
155 &((u8
*)buffer
->data
)[buffer_used
],
156 buffer
->size
- buffer_used
, &consumed
);
160 buffer_used
+= consumed
;
161 } while (buffer_used
< buffer
->size
);
163 cs_etm_decoder__free(decoder
);
166 static int cs_etm__flush_events(struct perf_session
*session
,
167 struct perf_tool
*tool
)
170 struct cs_etm_auxtrace
*etm
= container_of(session
->auxtrace
,
171 struct cs_etm_auxtrace
,
176 if (!tool
->ordered_events
)
179 if (!etm
->timeless_decoding
)
182 ret
= cs_etm__update_queues(etm
);
187 return cs_etm__process_timeless_queues(etm
, -1, MAX_TIMESTAMP
- 1);
190 static void cs_etm__free_queue(void *priv
)
192 struct cs_etm_queue
*etmq
= priv
;
197 thread__zput(etmq
->thread
);
198 cs_etm_decoder__free(etmq
->decoder
);
199 zfree(&etmq
->event_buf
);
200 zfree(&etmq
->last_branch
);
201 zfree(&etmq
->last_branch_rb
);
202 zfree(&etmq
->prev_packet
);
203 zfree(&etmq
->packet
);
207 static void cs_etm__free_events(struct perf_session
*session
)
210 struct cs_etm_auxtrace
*aux
= container_of(session
->auxtrace
,
211 struct cs_etm_auxtrace
,
213 struct auxtrace_queues
*queues
= &aux
->queues
;
215 for (i
= 0; i
< queues
->nr_queues
; i
++) {
216 cs_etm__free_queue(queues
->queue_array
[i
].priv
);
217 queues
->queue_array
[i
].priv
= NULL
;
220 auxtrace_queues__free(queues
);
223 static void cs_etm__free(struct perf_session
*session
)
226 struct int_node
*inode
, *tmp
;
227 struct cs_etm_auxtrace
*aux
= container_of(session
->auxtrace
,
228 struct cs_etm_auxtrace
,
230 cs_etm__free_events(session
);
231 session
->auxtrace
= NULL
;
233 /* First remove all traceID/CPU# nodes for the RB tree */
234 intlist__for_each_entry_safe(inode
, tmp
, traceid_list
)
235 intlist__remove(traceid_list
, inode
);
236 /* Then the RB tree itself */
237 intlist__delete(traceid_list
);
239 for (i
= 0; i
< aux
->num_cpu
; i
++)
240 zfree(&aux
->metadata
[i
]);
242 thread__zput(aux
->unknown_thread
);
243 zfree(&aux
->metadata
);
247 static u8
cs_etm__cpu_mode(struct cs_etm_queue
*etmq
, u64 address
)
249 struct machine
*machine
;
251 machine
= etmq
->etm
->machine
;
253 if (address
>= etmq
->etm
->kernel_start
) {
254 if (machine__is_host(machine
))
255 return PERF_RECORD_MISC_KERNEL
;
257 return PERF_RECORD_MISC_GUEST_KERNEL
;
259 if (machine__is_host(machine
))
260 return PERF_RECORD_MISC_USER
;
262 return PERF_RECORD_MISC_GUEST_USER
;
264 return PERF_RECORD_MISC_HYPERVISOR
;
268 static u32
cs_etm__mem_access(struct cs_etm_queue
*etmq
, u64 address
,
269 size_t size
, u8
*buffer
)
274 struct thread
*thread
;
275 struct machine
*machine
;
276 struct addr_location al
;
281 machine
= etmq
->etm
->machine
;
282 cpumode
= cs_etm__cpu_mode(etmq
, address
);
284 thread
= etmq
->thread
;
286 if (cpumode
!= PERF_RECORD_MISC_KERNEL
)
288 thread
= etmq
->etm
->unknown_thread
;
291 if (!thread__find_map(thread
, cpumode
, address
, &al
) || !al
.map
->dso
)
294 if (al
.map
->dso
->data
.status
== DSO_DATA_STATUS_ERROR
&&
295 dso__data_status_seen(al
.map
->dso
, DSO_DATA_STATUS_SEEN_ITRACE
))
298 offset
= al
.map
->map_ip(al
.map
, address
);
302 len
= dso__data_read_offset(al
.map
->dso
, machine
, offset
, buffer
, size
);
310 static struct cs_etm_queue
*cs_etm__alloc_queue(struct cs_etm_auxtrace
*etm
,
311 unsigned int queue_nr
)
314 struct cs_etm_decoder_params d_params
;
315 struct cs_etm_trace_params
*t_params
;
316 struct cs_etm_queue
*etmq
;
317 size_t szp
= sizeof(struct cs_etm_packet
);
319 etmq
= zalloc(sizeof(*etmq
));
323 etmq
->packet
= zalloc(szp
);
327 if (etm
->synth_opts
.last_branch
|| etm
->sample_branches
) {
328 etmq
->prev_packet
= zalloc(szp
);
329 if (!etmq
->prev_packet
)
333 if (etm
->synth_opts
.last_branch
) {
334 size_t sz
= sizeof(struct branch_stack
);
336 sz
+= etm
->synth_opts
.last_branch_sz
*
337 sizeof(struct branch_entry
);
338 etmq
->last_branch
= zalloc(sz
);
339 if (!etmq
->last_branch
)
341 etmq
->last_branch_rb
= zalloc(sz
);
342 if (!etmq
->last_branch_rb
)
346 etmq
->event_buf
= malloc(PERF_SAMPLE_MAX_SIZE
);
347 if (!etmq
->event_buf
)
351 etmq
->queue_nr
= queue_nr
;
356 /* Use metadata to fill in trace parameters for trace decoder */
357 t_params
= zalloc(sizeof(*t_params
) * etm
->num_cpu
);
362 for (i
= 0; i
< etm
->num_cpu
; i
++) {
363 t_params
[i
].protocol
= CS_ETM_PROTO_ETMV4i
;
364 t_params
[i
].etmv4
.reg_idr0
= etm
->metadata
[i
][CS_ETMV4_TRCIDR0
];
365 t_params
[i
].etmv4
.reg_idr1
= etm
->metadata
[i
][CS_ETMV4_TRCIDR1
];
366 t_params
[i
].etmv4
.reg_idr2
= etm
->metadata
[i
][CS_ETMV4_TRCIDR2
];
367 t_params
[i
].etmv4
.reg_idr8
= etm
->metadata
[i
][CS_ETMV4_TRCIDR8
];
368 t_params
[i
].etmv4
.reg_configr
=
369 etm
->metadata
[i
][CS_ETMV4_TRCCONFIGR
];
370 t_params
[i
].etmv4
.reg_traceidr
=
371 etm
->metadata
[i
][CS_ETMV4_TRCTRACEIDR
];
374 /* Set decoder parameters to simply print the trace packets */
375 d_params
.packet_printer
= cs_etm__packet_dump
;
376 d_params
.operation
= CS_ETM_OPERATION_DECODE
;
377 d_params
.formatted
= true;
378 d_params
.fsyncs
= false;
379 d_params
.hsyncs
= false;
380 d_params
.frame_aligned
= true;
381 d_params
.data
= etmq
;
383 etmq
->decoder
= cs_etm_decoder__new(etm
->num_cpu
, &d_params
, t_params
);
391 * Register a function to handle all memory accesses required by
392 * the trace decoder library.
394 if (cs_etm_decoder__add_mem_access_cb(etmq
->decoder
,
397 goto out_free_decoder
;
400 etmq
->period_instructions
= 0;
405 cs_etm_decoder__free(etmq
->decoder
);
407 zfree(&etmq
->event_buf
);
408 zfree(&etmq
->last_branch
);
409 zfree(&etmq
->last_branch_rb
);
410 zfree(&etmq
->prev_packet
);
411 zfree(&etmq
->packet
);
417 static int cs_etm__setup_queue(struct cs_etm_auxtrace
*etm
,
418 struct auxtrace_queue
*queue
,
419 unsigned int queue_nr
)
421 struct cs_etm_queue
*etmq
= queue
->priv
;
423 if (list_empty(&queue
->head
) || etmq
)
426 etmq
= cs_etm__alloc_queue(etm
, queue_nr
);
433 if (queue
->cpu
!= -1)
434 etmq
->cpu
= queue
->cpu
;
436 etmq
->tid
= queue
->tid
;
441 static int cs_etm__setup_queues(struct cs_etm_auxtrace
*etm
)
446 for (i
= 0; i
< etm
->queues
.nr_queues
; i
++) {
447 ret
= cs_etm__setup_queue(etm
, &etm
->queues
.queue_array
[i
], i
);
455 static int cs_etm__update_queues(struct cs_etm_auxtrace
*etm
)
457 if (etm
->queues
.new_data
) {
458 etm
->queues
.new_data
= false;
459 return cs_etm__setup_queues(etm
);
465 static inline void cs_etm__copy_last_branch_rb(struct cs_etm_queue
*etmq
)
467 struct branch_stack
*bs_src
= etmq
->last_branch_rb
;
468 struct branch_stack
*bs_dst
= etmq
->last_branch
;
472 * Set the number of records before early exit: ->nr is used to
473 * determine how many branches to copy from ->entries.
475 bs_dst
->nr
= bs_src
->nr
;
478 * Early exit when there is nothing to copy.
484 * As bs_src->entries is a circular buffer, we need to copy from it in
485 * two steps. First, copy the branches from the most recently inserted
486 * branch ->last_branch_pos until the end of bs_src->entries buffer.
488 nr
= etmq
->etm
->synth_opts
.last_branch_sz
- etmq
->last_branch_pos
;
489 memcpy(&bs_dst
->entries
[0],
490 &bs_src
->entries
[etmq
->last_branch_pos
],
491 sizeof(struct branch_entry
) * nr
);
494 * If we wrapped around at least once, the branches from the beginning
495 * of the bs_src->entries buffer and until the ->last_branch_pos element
496 * are older valid branches: copy them over. The total number of
497 * branches copied over will be equal to the number of branches asked by
498 * the user in last_branch_sz.
500 if (bs_src
->nr
>= etmq
->etm
->synth_opts
.last_branch_sz
) {
501 memcpy(&bs_dst
->entries
[nr
],
503 sizeof(struct branch_entry
) * etmq
->last_branch_pos
);
507 static inline void cs_etm__reset_last_branch_rb(struct cs_etm_queue
*etmq
)
509 etmq
->last_branch_pos
= 0;
510 etmq
->last_branch_rb
->nr
= 0;
513 static inline u64
cs_etm__last_executed_instr(struct cs_etm_packet
*packet
)
515 /* Returns 0 for the CS_ETM_TRACE_ON packet */
516 if (packet
->sample_type
== CS_ETM_TRACE_ON
)
520 * The packet records the execution range with an exclusive end address
522 * A64 instructions are constant size, so the last executed
523 * instruction is A64_INSTR_SIZE before the end address
524 * Will need to do instruction level decode for T32 instructions as
525 * they can be variable size (not yet supported).
527 return packet
->end_addr
- A64_INSTR_SIZE
;
530 static inline u64
cs_etm__first_executed_instr(struct cs_etm_packet
*packet
)
532 /* Returns 0 for the CS_ETM_TRACE_ON packet */
533 if (packet
->sample_type
== CS_ETM_TRACE_ON
)
536 return packet
->start_addr
;
539 static inline u64
cs_etm__instr_count(const struct cs_etm_packet
*packet
)
542 * Only A64 instructions are currently supported, so can get
543 * instruction count by dividing.
544 * Will need to do instruction level decode for T32 instructions as
545 * they can be variable size (not yet supported).
547 return (packet
->end_addr
- packet
->start_addr
) / A64_INSTR_SIZE
;
550 static inline u64
cs_etm__instr_addr(const struct cs_etm_packet
*packet
,
554 * Only A64 instructions are currently supported, so can get
555 * instruction address by muliplying.
556 * Will need to do instruction level decode for T32 instructions as
557 * they can be variable size (not yet supported).
559 return packet
->start_addr
+ offset
* A64_INSTR_SIZE
;
562 static void cs_etm__update_last_branch_rb(struct cs_etm_queue
*etmq
)
564 struct branch_stack
*bs
= etmq
->last_branch_rb
;
565 struct branch_entry
*be
;
568 * The branches are recorded in a circular buffer in reverse
569 * chronological order: we start recording from the last element of the
570 * buffer down. After writing the first element of the stack, move the
571 * insert position back to the end of the buffer.
573 if (!etmq
->last_branch_pos
)
574 etmq
->last_branch_pos
= etmq
->etm
->synth_opts
.last_branch_sz
;
576 etmq
->last_branch_pos
-= 1;
578 be
= &bs
->entries
[etmq
->last_branch_pos
];
579 be
->from
= cs_etm__last_executed_instr(etmq
->prev_packet
);
580 be
->to
= cs_etm__first_executed_instr(etmq
->packet
);
581 /* No support for mispredict */
582 be
->flags
.mispred
= 0;
583 be
->flags
.predicted
= 1;
586 * Increment bs->nr until reaching the number of last branches asked by
587 * the user on the command line.
589 if (bs
->nr
< etmq
->etm
->synth_opts
.last_branch_sz
)
593 static int cs_etm__inject_event(union perf_event
*event
,
594 struct perf_sample
*sample
, u64 type
)
596 event
->header
.size
= perf_event__sample_event_size(sample
, type
, 0);
597 return perf_event__synthesize_sample(event
, type
, 0, sample
);
602 cs_etm__get_trace(struct cs_etm_buffer
*buff
, struct cs_etm_queue
*etmq
)
604 struct auxtrace_buffer
*aux_buffer
= etmq
->buffer
;
605 struct auxtrace_buffer
*old_buffer
= aux_buffer
;
606 struct auxtrace_queue
*queue
;
608 queue
= &etmq
->etm
->queues
.queue_array
[etmq
->queue_nr
];
610 aux_buffer
= auxtrace_buffer__next(queue
, aux_buffer
);
612 /* If no more data, drop the previous auxtrace_buffer and return */
615 auxtrace_buffer__drop_data(old_buffer
);
620 etmq
->buffer
= aux_buffer
;
622 /* If the aux_buffer doesn't have data associated, try to load it */
623 if (!aux_buffer
->data
) {
624 /* get the file desc associated with the perf data file */
625 int fd
= perf_data__fd(etmq
->etm
->session
->data
);
627 aux_buffer
->data
= auxtrace_buffer__get_data(aux_buffer
, fd
);
628 if (!aux_buffer
->data
)
632 /* If valid, drop the previous buffer */
634 auxtrace_buffer__drop_data(old_buffer
);
636 buff
->offset
= aux_buffer
->offset
;
637 buff
->len
= aux_buffer
->size
;
638 buff
->buf
= aux_buffer
->data
;
640 buff
->ref_timestamp
= aux_buffer
->reference
;
645 static void cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace
*etm
,
646 struct auxtrace_queue
*queue
)
648 struct cs_etm_queue
*etmq
= queue
->priv
;
650 /* CPU-wide tracing isn't supported yet */
651 if (queue
->tid
== -1)
654 if ((!etmq
->thread
) && (etmq
->tid
!= -1))
655 etmq
->thread
= machine__find_thread(etm
->machine
, -1,
659 etmq
->pid
= etmq
->thread
->pid_
;
660 if (queue
->cpu
== -1)
661 etmq
->cpu
= etmq
->thread
->cpu
;
665 static int cs_etm__synth_instruction_sample(struct cs_etm_queue
*etmq
,
666 u64 addr
, u64 period
)
669 struct cs_etm_auxtrace
*etm
= etmq
->etm
;
670 union perf_event
*event
= etmq
->event_buf
;
671 struct perf_sample sample
= {.ip
= 0,};
673 event
->sample
.header
.type
= PERF_RECORD_SAMPLE
;
674 event
->sample
.header
.misc
= cs_etm__cpu_mode(etmq
, addr
);
675 event
->sample
.header
.size
= sizeof(struct perf_event_header
);
678 sample
.pid
= etmq
->pid
;
679 sample
.tid
= etmq
->tid
;
680 sample
.id
= etmq
->etm
->instructions_id
;
681 sample
.stream_id
= etmq
->etm
->instructions_id
;
682 sample
.period
= period
;
683 sample
.cpu
= etmq
->packet
->cpu
;
686 sample
.cpumode
= event
->sample
.header
.misc
;
688 if (etm
->synth_opts
.last_branch
) {
689 cs_etm__copy_last_branch_rb(etmq
);
690 sample
.branch_stack
= etmq
->last_branch
;
693 if (etm
->synth_opts
.inject
) {
694 ret
= cs_etm__inject_event(event
, &sample
,
695 etm
->instructions_sample_type
);
700 ret
= perf_session__deliver_synth_event(etm
->session
, event
, &sample
);
704 "CS ETM Trace: failed to deliver instruction event, error %d\n",
707 if (etm
->synth_opts
.last_branch
)
708 cs_etm__reset_last_branch_rb(etmq
);
714 * The cs etm packet encodes an instruction range between a branch target
715 * and the next taken branch. Generate sample accordingly.
717 static int cs_etm__synth_branch_sample(struct cs_etm_queue
*etmq
)
720 struct cs_etm_auxtrace
*etm
= etmq
->etm
;
721 struct perf_sample sample
= {.ip
= 0,};
722 union perf_event
*event
= etmq
->event_buf
;
723 struct dummy_branch_stack
{
725 struct branch_entry entries
;
729 ip
= cs_etm__last_executed_instr(etmq
->prev_packet
);
731 event
->sample
.header
.type
= PERF_RECORD_SAMPLE
;
732 event
->sample
.header
.misc
= cs_etm__cpu_mode(etmq
, ip
);
733 event
->sample
.header
.size
= sizeof(struct perf_event_header
);
736 sample
.pid
= etmq
->pid
;
737 sample
.tid
= etmq
->tid
;
738 sample
.addr
= cs_etm__first_executed_instr(etmq
->packet
);
739 sample
.id
= etmq
->etm
->branches_id
;
740 sample
.stream_id
= etmq
->etm
->branches_id
;
742 sample
.cpu
= etmq
->packet
->cpu
;
744 sample
.cpumode
= event
->sample
.header
.misc
;
747 * perf report cannot handle events without a branch stack
749 if (etm
->synth_opts
.last_branch
) {
750 dummy_bs
= (struct dummy_branch_stack
){
757 sample
.branch_stack
= (struct branch_stack
*)&dummy_bs
;
760 if (etm
->synth_opts
.inject
) {
761 ret
= cs_etm__inject_event(event
, &sample
,
762 etm
->branches_sample_type
);
767 ret
= perf_session__deliver_synth_event(etm
->session
, event
, &sample
);
771 "CS ETM Trace: failed to deliver instruction event, error %d\n",
777 struct cs_etm_synth
{
778 struct perf_tool dummy_tool
;
779 struct perf_session
*session
;
782 static int cs_etm__event_synth(struct perf_tool
*tool
,
783 union perf_event
*event
,
784 struct perf_sample
*sample __maybe_unused
,
785 struct machine
*machine __maybe_unused
)
787 struct cs_etm_synth
*cs_etm_synth
=
788 container_of(tool
, struct cs_etm_synth
, dummy_tool
);
790 return perf_session__deliver_synth_event(cs_etm_synth
->session
,
794 static int cs_etm__synth_event(struct perf_session
*session
,
795 struct perf_event_attr
*attr
, u64 id
)
797 struct cs_etm_synth cs_etm_synth
;
799 memset(&cs_etm_synth
, 0, sizeof(struct cs_etm_synth
));
800 cs_etm_synth
.session
= session
;
802 return perf_event__synthesize_attr(&cs_etm_synth
.dummy_tool
, attr
, 1,
803 &id
, cs_etm__event_synth
);
806 static int cs_etm__synth_events(struct cs_etm_auxtrace
*etm
,
807 struct perf_session
*session
)
809 struct perf_evlist
*evlist
= session
->evlist
;
810 struct perf_evsel
*evsel
;
811 struct perf_event_attr attr
;
816 evlist__for_each_entry(evlist
, evsel
) {
817 if (evsel
->attr
.type
== etm
->pmu_type
) {
824 pr_debug("No selected events with CoreSight Trace data\n");
828 memset(&attr
, 0, sizeof(struct perf_event_attr
));
829 attr
.size
= sizeof(struct perf_event_attr
);
830 attr
.type
= PERF_TYPE_HARDWARE
;
831 attr
.sample_type
= evsel
->attr
.sample_type
& PERF_SAMPLE_MASK
;
832 attr
.sample_type
|= PERF_SAMPLE_IP
| PERF_SAMPLE_TID
|
834 if (etm
->timeless_decoding
)
835 attr
.sample_type
&= ~(u64
)PERF_SAMPLE_TIME
;
837 attr
.sample_type
|= PERF_SAMPLE_TIME
;
839 attr
.exclude_user
= evsel
->attr
.exclude_user
;
840 attr
.exclude_kernel
= evsel
->attr
.exclude_kernel
;
841 attr
.exclude_hv
= evsel
->attr
.exclude_hv
;
842 attr
.exclude_host
= evsel
->attr
.exclude_host
;
843 attr
.exclude_guest
= evsel
->attr
.exclude_guest
;
844 attr
.sample_id_all
= evsel
->attr
.sample_id_all
;
845 attr
.read_format
= evsel
->attr
.read_format
;
847 /* create new id val to be a fixed offset from evsel id */
848 id
= evsel
->id
[0] + 1000000000;
853 if (etm
->synth_opts
.branches
) {
854 attr
.config
= PERF_COUNT_HW_BRANCH_INSTRUCTIONS
;
855 attr
.sample_period
= 1;
856 attr
.sample_type
|= PERF_SAMPLE_ADDR
;
857 err
= cs_etm__synth_event(session
, &attr
, id
);
860 etm
->sample_branches
= true;
861 etm
->branches_sample_type
= attr
.sample_type
;
862 etm
->branches_id
= id
;
864 attr
.sample_type
&= ~(u64
)PERF_SAMPLE_ADDR
;
867 if (etm
->synth_opts
.last_branch
)
868 attr
.sample_type
|= PERF_SAMPLE_BRANCH_STACK
;
870 if (etm
->synth_opts
.instructions
) {
871 attr
.config
= PERF_COUNT_HW_INSTRUCTIONS
;
872 attr
.sample_period
= etm
->synth_opts
.period
;
873 etm
->instructions_sample_period
= attr
.sample_period
;
874 err
= cs_etm__synth_event(session
, &attr
, id
);
877 etm
->sample_instructions
= true;
878 etm
->instructions_sample_type
= attr
.sample_type
;
879 etm
->instructions_id
= id
;
886 static int cs_etm__sample(struct cs_etm_queue
*etmq
)
888 struct cs_etm_auxtrace
*etm
= etmq
->etm
;
889 struct cs_etm_packet
*tmp
;
893 instrs_executed
= cs_etm__instr_count(etmq
->packet
);
894 etmq
->period_instructions
+= instrs_executed
;
897 * Record a branch when the last instruction in
898 * PREV_PACKET is a branch.
900 if (etm
->synth_opts
.last_branch
&&
902 etmq
->prev_packet
->sample_type
== CS_ETM_RANGE
&&
903 etmq
->prev_packet
->last_instr_taken_branch
)
904 cs_etm__update_last_branch_rb(etmq
);
906 if (etm
->sample_instructions
&&
907 etmq
->period_instructions
>= etm
->instructions_sample_period
) {
909 * Emit instruction sample periodically
910 * TODO: allow period to be defined in cycles and clock time
913 /* Get number of instructions executed after the sample point */
914 u64 instrs_over
= etmq
->period_instructions
-
915 etm
->instructions_sample_period
;
918 * Calculate the address of the sampled instruction (-1 as
919 * sample is reported as though instruction has just been
920 * executed, but PC has not advanced to next instruction)
922 u64 offset
= (instrs_executed
- instrs_over
- 1);
923 u64 addr
= cs_etm__instr_addr(etmq
->packet
, offset
);
925 ret
= cs_etm__synth_instruction_sample(
926 etmq
, addr
, etm
->instructions_sample_period
);
930 /* Carry remaining instructions into next sample period */
931 etmq
->period_instructions
= instrs_over
;
934 if (etm
->sample_branches
&& etmq
->prev_packet
) {
935 bool generate_sample
= false;
937 /* Generate sample for tracing on packet */
938 if (etmq
->prev_packet
->sample_type
== CS_ETM_TRACE_ON
)
939 generate_sample
= true;
941 /* Generate sample for branch taken packet */
942 if (etmq
->prev_packet
->sample_type
== CS_ETM_RANGE
&&
943 etmq
->prev_packet
->last_instr_taken_branch
)
944 generate_sample
= true;
946 if (generate_sample
) {
947 ret
= cs_etm__synth_branch_sample(etmq
);
953 if (etm
->sample_branches
|| etm
->synth_opts
.last_branch
) {
955 * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
956 * the next incoming packet.
959 etmq
->packet
= etmq
->prev_packet
;
960 etmq
->prev_packet
= tmp
;
966 static int cs_etm__flush(struct cs_etm_queue
*etmq
)
969 struct cs_etm_auxtrace
*etm
= etmq
->etm
;
970 struct cs_etm_packet
*tmp
;
972 if (!etmq
->prev_packet
)
975 /* Handle start tracing packet */
976 if (etmq
->prev_packet
->sample_type
== CS_ETM_EMPTY
)
979 if (etmq
->etm
->synth_opts
.last_branch
&&
980 etmq
->prev_packet
->sample_type
== CS_ETM_RANGE
) {
982 * Generate a last branch event for the branches left in the
983 * circular buffer at the end of the trace.
985 * Use the address of the end of the last reported execution
988 u64 addr
= cs_etm__last_executed_instr(etmq
->prev_packet
);
990 err
= cs_etm__synth_instruction_sample(
992 etmq
->period_instructions
);
996 etmq
->period_instructions
= 0;
1000 if (etm
->sample_branches
&&
1001 etmq
->prev_packet
->sample_type
== CS_ETM_RANGE
) {
1002 err
= cs_etm__synth_branch_sample(etmq
);
1008 if (etmq
->etm
->synth_opts
.last_branch
) {
1010 * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
1011 * the next incoming packet.
1014 etmq
->packet
= etmq
->prev_packet
;
1015 etmq
->prev_packet
= tmp
;
1021 static int cs_etm__run_decoder(struct cs_etm_queue
*etmq
)
1023 struct cs_etm_auxtrace
*etm
= etmq
->etm
;
1024 struct cs_etm_buffer buffer
;
1025 size_t buffer_used
, processed
;
1028 if (!etm
->kernel_start
)
1029 etm
->kernel_start
= machine__kernel_start(etm
->machine
);
1031 /* Go through each buffer in the queue and decode them one by one */
1034 memset(&buffer
, 0, sizeof(buffer
));
1035 err
= cs_etm__get_trace(&buffer
, etmq
);
1039 * We cannot assume consecutive blocks in the data file are
1040 * contiguous, reset the decoder to force re-sync.
1042 err
= cs_etm_decoder__reset(etmq
->decoder
);
1046 /* Run trace decoder until buffer consumed or end of trace */
1049 err
= cs_etm_decoder__process_data_block(
1052 &buffer
.buf
[buffer_used
],
1053 buffer
.len
- buffer_used
,
1058 etmq
->offset
+= processed
;
1059 buffer_used
+= processed
;
1061 /* Process each packet in this chunk */
1063 err
= cs_etm_decoder__get_packet(etmq
->decoder
,
1067 * Stop processing this chunk on
1068 * end of data or error
1072 switch (etmq
->packet
->sample_type
) {
1075 * If the packet contains an instruction
1076 * range, generate instruction sequence
1079 cs_etm__sample(etmq
);
1081 case CS_ETM_TRACE_ON
:
1083 * Discontinuity in trace, flush
1084 * previous branch stack
1086 cs_etm__flush(etmq
);
1090 * Should not receive empty packet,
1093 pr_err("CS ETM Trace: empty packet\n");
1099 } while (buffer
.len
> buffer_used
);
1102 /* Flush any remaining branch stack entries */
1103 err
= cs_etm__flush(etmq
);
1109 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace
*etm
,
1110 pid_t tid
, u64 time_
)
1113 struct auxtrace_queues
*queues
= &etm
->queues
;
1115 for (i
= 0; i
< queues
->nr_queues
; i
++) {
1116 struct auxtrace_queue
*queue
= &etm
->queues
.queue_array
[i
];
1117 struct cs_etm_queue
*etmq
= queue
->priv
;
1119 if (etmq
&& ((tid
== -1) || (etmq
->tid
== tid
))) {
1121 cs_etm__set_pid_tid_cpu(etm
, queue
);
1122 cs_etm__run_decoder(etmq
);
1129 static int cs_etm__process_event(struct perf_session
*session
,
1130 union perf_event
*event
,
1131 struct perf_sample
*sample
,
1132 struct perf_tool
*tool
)
1136 struct cs_etm_auxtrace
*etm
= container_of(session
->auxtrace
,
1137 struct cs_etm_auxtrace
,
1143 if (!tool
->ordered_events
) {
1144 pr_err("CoreSight ETM Trace requires ordered events\n");
1148 if (!etm
->timeless_decoding
)
1151 if (sample
->time
&& (sample
->time
!= (u64
) -1))
1152 timestamp
= sample
->time
;
1156 if (timestamp
|| etm
->timeless_decoding
) {
1157 err
= cs_etm__update_queues(etm
);
1162 if (event
->header
.type
== PERF_RECORD_EXIT
)
1163 return cs_etm__process_timeless_queues(etm
,
1170 static int cs_etm__process_auxtrace_event(struct perf_session
*session
,
1171 union perf_event
*event
,
1172 struct perf_tool
*tool __maybe_unused
)
1174 struct cs_etm_auxtrace
*etm
= container_of(session
->auxtrace
,
1175 struct cs_etm_auxtrace
,
1177 if (!etm
->data_queued
) {
1178 struct auxtrace_buffer
*buffer
;
1180 int fd
= perf_data__fd(session
->data
);
1181 bool is_pipe
= perf_data__is_pipe(session
->data
);
1187 data_offset
= lseek(fd
, 0, SEEK_CUR
);
1188 if (data_offset
== -1)
1192 err
= auxtrace_queues__add_event(&etm
->queues
, session
,
1193 event
, data_offset
, &buffer
);
1198 if (auxtrace_buffer__get_data(buffer
, fd
)) {
1199 cs_etm__dump_event(etm
, buffer
);
1200 auxtrace_buffer__put_data(buffer
);
1207 static bool cs_etm__is_timeless_decoding(struct cs_etm_auxtrace
*etm
)
1209 struct perf_evsel
*evsel
;
1210 struct perf_evlist
*evlist
= etm
->session
->evlist
;
1211 bool timeless_decoding
= true;
1214 * Circle through the list of event and complain if we find one
1215 * with the time bit set.
1217 evlist__for_each_entry(evlist
, evsel
) {
1218 if ((evsel
->attr
.sample_type
& PERF_SAMPLE_TIME
))
1219 timeless_decoding
= false;
1222 return timeless_decoding
;
1225 static const char * const cs_etm_global_header_fmts
[] = {
1226 [CS_HEADER_VERSION_0
] = " Header version %llx\n",
1227 [CS_PMU_TYPE_CPUS
] = " PMU type/num cpus %llx\n",
1228 [CS_ETM_SNAPSHOT
] = " Snapshot %llx\n",
1231 static const char * const cs_etm_priv_fmts
[] = {
1232 [CS_ETM_MAGIC
] = " Magic number %llx\n",
1233 [CS_ETM_CPU
] = " CPU %lld\n",
1234 [CS_ETM_ETMCR
] = " ETMCR %llx\n",
1235 [CS_ETM_ETMTRACEIDR
] = " ETMTRACEIDR %llx\n",
1236 [CS_ETM_ETMCCER
] = " ETMCCER %llx\n",
1237 [CS_ETM_ETMIDR
] = " ETMIDR %llx\n",
1240 static const char * const cs_etmv4_priv_fmts
[] = {
1241 [CS_ETM_MAGIC
] = " Magic number %llx\n",
1242 [CS_ETM_CPU
] = " CPU %lld\n",
1243 [CS_ETMV4_TRCCONFIGR
] = " TRCCONFIGR %llx\n",
1244 [CS_ETMV4_TRCTRACEIDR
] = " TRCTRACEIDR %llx\n",
1245 [CS_ETMV4_TRCIDR0
] = " TRCIDR0 %llx\n",
1246 [CS_ETMV4_TRCIDR1
] = " TRCIDR1 %llx\n",
1247 [CS_ETMV4_TRCIDR2
] = " TRCIDR2 %llx\n",
1248 [CS_ETMV4_TRCIDR8
] = " TRCIDR8 %llx\n",
1249 [CS_ETMV4_TRCAUTHSTATUS
] = " TRCAUTHSTATUS %llx\n",
1252 static void cs_etm__print_auxtrace_info(u64
*val
, int num
)
1256 for (i
= 0; i
< CS_HEADER_VERSION_0_MAX
; i
++)
1257 fprintf(stdout
, cs_etm_global_header_fmts
[i
], val
[i
]);
1259 for (i
= CS_HEADER_VERSION_0_MAX
; cpu
< num
; cpu
++) {
1260 if (val
[i
] == __perf_cs_etmv3_magic
)
1261 for (j
= 0; j
< CS_ETM_PRIV_MAX
; j
++, i
++)
1262 fprintf(stdout
, cs_etm_priv_fmts
[j
], val
[i
]);
1263 else if (val
[i
] == __perf_cs_etmv4_magic
)
1264 for (j
= 0; j
< CS_ETMV4_PRIV_MAX
; j
++, i
++)
1265 fprintf(stdout
, cs_etmv4_priv_fmts
[j
], val
[i
]);
1267 /* failure.. return */
1272 int cs_etm__process_auxtrace_info(union perf_event
*event
,
1273 struct perf_session
*session
)
1275 struct auxtrace_info_event
*auxtrace_info
= &event
->auxtrace_info
;
1276 struct cs_etm_auxtrace
*etm
= NULL
;
1277 struct int_node
*inode
;
1278 unsigned int pmu_type
;
1279 int event_header_size
= sizeof(struct perf_event_header
);
1280 int info_header_size
;
1281 int total_size
= auxtrace_info
->header
.size
;
1284 int err
= 0, idx
= -1;
1286 u64
*ptr
, *hdr
= NULL
;
1287 u64
**metadata
= NULL
;
1290 * sizeof(auxtrace_info_event::type) +
1291 * sizeof(auxtrace_info_event::reserved) == 8
1293 info_header_size
= 8;
1295 if (total_size
< (event_header_size
+ info_header_size
))
1298 priv_size
= total_size
- event_header_size
- info_header_size
;
1300 /* First the global part */
1301 ptr
= (u64
*) auxtrace_info
->priv
;
1303 /* Look for version '0' of the header */
1307 hdr
= zalloc(sizeof(*hdr
) * CS_HEADER_VERSION_0_MAX
);
1311 /* Extract header information - see cs-etm.h for format */
1312 for (i
= 0; i
< CS_HEADER_VERSION_0_MAX
; i
++)
1314 num_cpu
= hdr
[CS_PMU_TYPE_CPUS
] & 0xffffffff;
1315 pmu_type
= (unsigned int) ((hdr
[CS_PMU_TYPE_CPUS
] >> 32) &
1319 * Create an RB tree for traceID-CPU# tuple. Since the conversion has
1320 * to be made for each packet that gets decoded, optimizing access in
1321 * anything other than a sequential array is worth doing.
1323 traceid_list
= intlist__new(NULL
);
1324 if (!traceid_list
) {
1329 metadata
= zalloc(sizeof(*metadata
) * num_cpu
);
1332 goto err_free_traceid_list
;
1336 * The metadata is stored in the auxtrace_info section and encodes
1337 * the configuration of the ARM embedded trace macrocell which is
1338 * required by the trace decoder to properly decode the trace due
1339 * to its highly compressed nature.
1341 for (j
= 0; j
< num_cpu
; j
++) {
1342 if (ptr
[i
] == __perf_cs_etmv3_magic
) {
1343 metadata
[j
] = zalloc(sizeof(*metadata
[j
]) *
1347 goto err_free_metadata
;
1349 for (k
= 0; k
< CS_ETM_PRIV_MAX
; k
++)
1350 metadata
[j
][k
] = ptr
[i
+ k
];
1352 /* The traceID is our handle */
1353 idx
= metadata
[j
][CS_ETM_ETMTRACEIDR
];
1354 i
+= CS_ETM_PRIV_MAX
;
1355 } else if (ptr
[i
] == __perf_cs_etmv4_magic
) {
1356 metadata
[j
] = zalloc(sizeof(*metadata
[j
]) *
1360 goto err_free_metadata
;
1362 for (k
= 0; k
< CS_ETMV4_PRIV_MAX
; k
++)
1363 metadata
[j
][k
] = ptr
[i
+ k
];
1365 /* The traceID is our handle */
1366 idx
= metadata
[j
][CS_ETMV4_TRCTRACEIDR
];
1367 i
+= CS_ETMV4_PRIV_MAX
;
1370 /* Get an RB node for this CPU */
1371 inode
= intlist__findnew(traceid_list
, idx
);
1373 /* Something went wrong, no need to continue */
1375 err
= PTR_ERR(inode
);
1376 goto err_free_metadata
;
1380 * The node for that CPU should not be taken.
1381 * Back out if that's the case.
1385 goto err_free_metadata
;
1387 /* All good, associate the traceID with the CPU# */
1388 inode
->priv
= &metadata
[j
][CS_ETM_CPU
];
1392 * Each of CS_HEADER_VERSION_0_MAX, CS_ETM_PRIV_MAX and
1393 * CS_ETMV4_PRIV_MAX mark how many double words are in the
1394 * global metadata, and each cpu's metadata respectively.
1395 * The following tests if the correct number of double words was
1396 * present in the auxtrace info section.
1398 if (i
* 8 != priv_size
) {
1400 goto err_free_metadata
;
1403 etm
= zalloc(sizeof(*etm
));
1407 goto err_free_metadata
;
1410 err
= auxtrace_queues__init(&etm
->queues
);
1414 etm
->session
= session
;
1415 etm
->machine
= &session
->machines
.host
;
1417 etm
->num_cpu
= num_cpu
;
1418 etm
->pmu_type
= pmu_type
;
1419 etm
->snapshot_mode
= (hdr
[CS_ETM_SNAPSHOT
] != 0);
1420 etm
->metadata
= metadata
;
1421 etm
->auxtrace_type
= auxtrace_info
->type
;
1422 etm
->timeless_decoding
= cs_etm__is_timeless_decoding(etm
);
1424 etm
->auxtrace
.process_event
= cs_etm__process_event
;
1425 etm
->auxtrace
.process_auxtrace_event
= cs_etm__process_auxtrace_event
;
1426 etm
->auxtrace
.flush_events
= cs_etm__flush_events
;
1427 etm
->auxtrace
.free_events
= cs_etm__free_events
;
1428 etm
->auxtrace
.free
= cs_etm__free
;
1429 session
->auxtrace
= &etm
->auxtrace
;
1431 etm
->unknown_thread
= thread__new(999999999, 999999999);
1432 if (!etm
->unknown_thread
)
1433 goto err_free_queues
;
1436 * Initialize list node so that at thread__zput() we can avoid
1437 * segmentation fault at list_del_init().
1439 INIT_LIST_HEAD(&etm
->unknown_thread
->node
);
1441 err
= thread__set_comm(etm
->unknown_thread
, "unknown", 0);
1443 goto err_delete_thread
;
1445 if (thread__init_map_groups(etm
->unknown_thread
, etm
->machine
))
1446 goto err_delete_thread
;
1449 cs_etm__print_auxtrace_info(auxtrace_info
->priv
, num_cpu
);
1453 if (session
->itrace_synth_opts
&& session
->itrace_synth_opts
->set
) {
1454 etm
->synth_opts
= *session
->itrace_synth_opts
;
1456 itrace_synth_opts__set_default(&etm
->synth_opts
,
1457 session
->itrace_synth_opts
->default_no_sample
);
1458 etm
->synth_opts
.callchain
= false;
1461 err
= cs_etm__synth_events(etm
, session
);
1463 goto err_delete_thread
;
1465 err
= auxtrace_queues__process_index(&etm
->queues
, session
);
1467 goto err_delete_thread
;
1469 etm
->data_queued
= etm
->queues
.populated
;
1474 thread__zput(etm
->unknown_thread
);
1476 auxtrace_queues__free(&etm
->queues
);
1477 session
->auxtrace
= NULL
;
1481 /* No need to check @metadata[j], free(NULL) is supported */
1482 for (j
= 0; j
< num_cpu
; j
++)
1485 err_free_traceid_list
:
1486 intlist__delete(traceid_list
);