1 // SPDX-License-Identifier: GPL-2.0-only
3 * intel_pt.c: Intel Processor Trace support
4 * Copyright (c) 2013-2015, Intel Corporation.
11 #include <linux/kernel.h>
12 #include <linux/string.h>
13 #include <linux/types.h>
14 #include <linux/zalloc.h>
27 #include "thread-stack.h"
29 #include "callchain.h"
36 #include "util/perf_api_probe.h"
37 #include "util/synthetic-events.h"
38 #include "time-utils.h"
40 #include "../arch/x86/include/uapi/asm/perf_regs.h"
42 #include "intel-pt-decoder/intel-pt-log.h"
43 #include "intel-pt-decoder/intel-pt-decoder.h"
44 #include "intel-pt-decoder/intel-pt-insn-decoder.h"
45 #include "intel-pt-decoder/intel-pt-pkt-decoder.h"
47 #define MAX_TIMESTAMP (~0ULL)
55 struct auxtrace auxtrace
;
56 struct auxtrace_queues queues
;
57 struct auxtrace_heap heap
;
59 struct perf_session
*session
;
60 struct machine
*machine
;
61 struct evsel
*switch_evsel
;
62 struct thread
*unknown_thread
;
63 bool timeless_decoding
;
72 bool use_thread_stack
;
74 unsigned int br_stack_sz
;
75 unsigned int br_stack_sz_plus
;
76 int have_sched_switch
;
82 struct perf_tsc_conversion tc
;
83 bool cap_user_time_zero
;
85 struct itrace_synth_opts synth_opts
;
87 bool sample_instructions
;
88 u64 instructions_sample_type
;
93 u64 branches_sample_type
;
96 bool sample_transactions
;
97 u64 transactions_sample_type
;
100 bool sample_ptwrites
;
101 u64 ptwrites_sample_type
;
104 bool sample_pwr_events
;
105 u64 pwr_events_sample_type
;
113 struct evsel
*pebs_evsel
;
122 unsigned max_non_turbo_ratio
;
125 unsigned long num_events
;
128 struct addr_filters filts
;
130 struct range
*time_ranges
;
131 unsigned int range_cnt
;
133 struct ip_callchain
*chain
;
134 struct branch_stack
*br_stack
;
138 INTEL_PT_SS_NOT_TRACING
,
141 INTEL_PT_SS_EXPECTING_SWITCH_EVENT
,
142 INTEL_PT_SS_EXPECTING_SWITCH_IP
,
145 struct intel_pt_queue
{
147 unsigned int queue_nr
;
148 struct auxtrace_buffer
*buffer
;
149 struct auxtrace_buffer
*old_buffer
;
151 const struct intel_pt_state
*state
;
152 struct ip_callchain
*chain
;
153 struct branch_stack
*last_branch
;
154 union perf_event
*event_buf
;
157 bool step_through_buffers
;
158 bool use_buffer_pid_tid
;
164 struct thread
*thread
;
171 unsigned int sel_idx
;
177 u64 last_in_insn_cnt
;
179 u64 last_br_insn_cnt
;
181 unsigned int cbr_seen
;
182 char insn
[INTEL_PT_INSN_BUF_SZ
];
185 static void intel_pt_dump(struct intel_pt
*pt __maybe_unused
,
186 unsigned char *buf
, size_t len
)
188 struct intel_pt_pkt packet
;
191 char desc
[INTEL_PT_PKT_DESC_MAX
];
192 const char *color
= PERF_COLOR_BLUE
;
193 enum intel_pt_pkt_ctx ctx
= INTEL_PT_NO_CTX
;
195 color_fprintf(stdout
, color
,
196 ". ... Intel Processor Trace data: size %zu bytes\n",
200 ret
= intel_pt_get_packet(buf
, len
, &packet
, &ctx
);
206 color_fprintf(stdout
, color
, " %08x: ", pos
);
207 for (i
= 0; i
< pkt_len
; i
++)
208 color_fprintf(stdout
, color
, " %02x", buf
[i
]);
210 color_fprintf(stdout
, color
, " ");
212 ret
= intel_pt_pkt_desc(&packet
, desc
,
213 INTEL_PT_PKT_DESC_MAX
);
215 color_fprintf(stdout
, color
, " %s\n", desc
);
217 color_fprintf(stdout
, color
, " Bad packet!\n");
225 static void intel_pt_dump_event(struct intel_pt
*pt
, unsigned char *buf
,
229 intel_pt_dump(pt
, buf
, len
);
232 static void intel_pt_log_event(union perf_event
*event
)
234 FILE *f
= intel_pt_log_fp();
236 if (!intel_pt_enable_logging
|| !f
)
239 perf_event__fprintf(event
, NULL
, f
);
242 static void intel_pt_dump_sample(struct perf_session
*session
,
243 struct perf_sample
*sample
)
245 struct intel_pt
*pt
= container_of(session
->auxtrace
, struct intel_pt
,
249 intel_pt_dump(pt
, sample
->aux_sample
.data
, sample
->aux_sample
.size
);
252 static bool intel_pt_log_events(struct intel_pt
*pt
, u64 tm
)
254 struct perf_time_interval
*range
= pt
->synth_opts
.ptime_range
;
255 int n
= pt
->synth_opts
.range_num
;
257 if (pt
->synth_opts
.log_plus_flags
& AUXTRACE_LOG_FLG_ALL_PERF_EVTS
)
260 if (pt
->synth_opts
.log_minus_flags
& AUXTRACE_LOG_FLG_ALL_PERF_EVTS
)
263 /* perf_time__ranges_skip_sample does not work if time is zero */
267 return !n
|| !perf_time__ranges_skip_sample(range
, n
, tm
);
270 static int intel_pt_do_fix_overlap(struct intel_pt
*pt
, struct auxtrace_buffer
*a
,
271 struct auxtrace_buffer
*b
)
273 bool consecutive
= false;
276 start
= intel_pt_find_overlap(a
->data
, a
->size
, b
->data
, b
->size
,
277 pt
->have_tsc
, &consecutive
);
280 b
->use_size
= b
->data
+ b
->size
- start
;
282 if (b
->use_size
&& consecutive
)
283 b
->consecutive
= true;
287 static int intel_pt_get_buffer(struct intel_pt_queue
*ptq
,
288 struct auxtrace_buffer
*buffer
,
289 struct auxtrace_buffer
*old_buffer
,
290 struct intel_pt_buffer
*b
)
295 int fd
= perf_data__fd(ptq
->pt
->session
->data
);
297 buffer
->data
= auxtrace_buffer__get_data(buffer
, fd
);
302 might_overlap
= ptq
->pt
->snapshot_mode
|| ptq
->pt
->sampling_mode
;
303 if (might_overlap
&& !buffer
->consecutive
&& old_buffer
&&
304 intel_pt_do_fix_overlap(ptq
->pt
, old_buffer
, buffer
))
307 if (buffer
->use_data
) {
308 b
->len
= buffer
->use_size
;
309 b
->buf
= buffer
->use_data
;
311 b
->len
= buffer
->size
;
312 b
->buf
= buffer
->data
;
314 b
->ref_timestamp
= buffer
->reference
;
316 if (!old_buffer
|| (might_overlap
&& !buffer
->consecutive
)) {
317 b
->consecutive
= false;
318 b
->trace_nr
= buffer
->buffer_nr
+ 1;
320 b
->consecutive
= true;
326 /* Do not drop buffers with references - refer intel_pt_get_trace() */
327 static void intel_pt_lookahead_drop_buffer(struct intel_pt_queue
*ptq
,
328 struct auxtrace_buffer
*buffer
)
330 if (!buffer
|| buffer
== ptq
->buffer
|| buffer
== ptq
->old_buffer
)
333 auxtrace_buffer__drop_data(buffer
);
336 /* Must be serialized with respect to intel_pt_get_trace() */
337 static int intel_pt_lookahead(void *data
, intel_pt_lookahead_cb_t cb
,
340 struct intel_pt_queue
*ptq
= data
;
341 struct auxtrace_buffer
*buffer
= ptq
->buffer
;
342 struct auxtrace_buffer
*old_buffer
= ptq
->old_buffer
;
343 struct auxtrace_queue
*queue
;
346 queue
= &ptq
->pt
->queues
.queue_array
[ptq
->queue_nr
];
349 struct intel_pt_buffer b
= { .len
= 0 };
351 buffer
= auxtrace_buffer__next(queue
, buffer
);
355 err
= intel_pt_get_buffer(ptq
, buffer
, old_buffer
, &b
);
360 intel_pt_lookahead_drop_buffer(ptq
, old_buffer
);
363 intel_pt_lookahead_drop_buffer(ptq
, buffer
);
367 err
= cb(&b
, cb_data
);
372 if (buffer
!= old_buffer
)
373 intel_pt_lookahead_drop_buffer(ptq
, buffer
);
374 intel_pt_lookahead_drop_buffer(ptq
, old_buffer
);
380 * This function assumes data is processed sequentially only.
381 * Must be serialized with respect to intel_pt_lookahead()
383 static int intel_pt_get_trace(struct intel_pt_buffer
*b
, void *data
)
385 struct intel_pt_queue
*ptq
= data
;
386 struct auxtrace_buffer
*buffer
= ptq
->buffer
;
387 struct auxtrace_buffer
*old_buffer
= ptq
->old_buffer
;
388 struct auxtrace_queue
*queue
;
396 queue
= &ptq
->pt
->queues
.queue_array
[ptq
->queue_nr
];
398 buffer
= auxtrace_buffer__next(queue
, buffer
);
401 auxtrace_buffer__drop_data(old_buffer
);
406 ptq
->buffer
= buffer
;
408 err
= intel_pt_get_buffer(ptq
, buffer
, old_buffer
, b
);
412 if (ptq
->step_through_buffers
)
417 auxtrace_buffer__drop_data(old_buffer
);
418 ptq
->old_buffer
= buffer
;
420 auxtrace_buffer__drop_data(buffer
);
421 return intel_pt_get_trace(b
, data
);
427 struct intel_pt_cache_entry
{
428 struct auxtrace_cache_entry entry
;
431 enum intel_pt_insn_op op
;
432 enum intel_pt_insn_branch branch
;
435 char insn
[INTEL_PT_INSN_BUF_SZ
];
438 static int intel_pt_config_div(const char *var
, const char *value
, void *data
)
443 if (!strcmp(var
, "intel-pt.cache-divisor")) {
444 val
= strtol(value
, NULL
, 0);
445 if (val
> 0 && val
<= INT_MAX
)
452 static int intel_pt_cache_divisor(void)
459 perf_config(intel_pt_config_div
, &d
);
467 static unsigned int intel_pt_cache_size(struct dso
*dso
,
468 struct machine
*machine
)
472 size
= dso__data_size(dso
, machine
);
473 size
/= intel_pt_cache_divisor();
476 if (size
> (1 << 21))
478 return 32 - __builtin_clz(size
);
481 static struct auxtrace_cache
*intel_pt_cache(struct dso
*dso
,
482 struct machine
*machine
)
484 struct auxtrace_cache
*c
;
487 if (dso
->auxtrace_cache
)
488 return dso
->auxtrace_cache
;
490 bits
= intel_pt_cache_size(dso
, machine
);
492 /* Ignoring cache creation failure */
493 c
= auxtrace_cache__new(bits
, sizeof(struct intel_pt_cache_entry
), 200);
495 dso
->auxtrace_cache
= c
;
500 static int intel_pt_cache_add(struct dso
*dso
, struct machine
*machine
,
501 u64 offset
, u64 insn_cnt
, u64 byte_cnt
,
502 struct intel_pt_insn
*intel_pt_insn
)
504 struct auxtrace_cache
*c
= intel_pt_cache(dso
, machine
);
505 struct intel_pt_cache_entry
*e
;
511 e
= auxtrace_cache__alloc_entry(c
);
515 e
->insn_cnt
= insn_cnt
;
516 e
->byte_cnt
= byte_cnt
;
517 e
->op
= intel_pt_insn
->op
;
518 e
->branch
= intel_pt_insn
->branch
;
519 e
->length
= intel_pt_insn
->length
;
520 e
->rel
= intel_pt_insn
->rel
;
521 memcpy(e
->insn
, intel_pt_insn
->buf
, INTEL_PT_INSN_BUF_SZ
);
523 err
= auxtrace_cache__add(c
, offset
, &e
->entry
);
525 auxtrace_cache__free_entry(c
, e
);
530 static struct intel_pt_cache_entry
*
531 intel_pt_cache_lookup(struct dso
*dso
, struct machine
*machine
, u64 offset
)
533 struct auxtrace_cache
*c
= intel_pt_cache(dso
, machine
);
538 return auxtrace_cache__lookup(dso
->auxtrace_cache
, offset
);
541 static void intel_pt_cache_invalidate(struct dso
*dso
, struct machine
*machine
,
544 struct auxtrace_cache
*c
= intel_pt_cache(dso
, machine
);
549 auxtrace_cache__remove(dso
->auxtrace_cache
, offset
);
552 static inline u8
intel_pt_cpumode(struct intel_pt
*pt
, uint64_t ip
)
554 return ip
>= pt
->kernel_start
?
555 PERF_RECORD_MISC_KERNEL
:
556 PERF_RECORD_MISC_USER
;
559 static int intel_pt_walk_next_insn(struct intel_pt_insn
*intel_pt_insn
,
560 uint64_t *insn_cnt_ptr
, uint64_t *ip
,
561 uint64_t to_ip
, uint64_t max_insn_cnt
,
564 struct intel_pt_queue
*ptq
= data
;
565 struct machine
*machine
= ptq
->pt
->machine
;
566 struct thread
*thread
;
567 struct addr_location al
;
568 unsigned char buf
[INTEL_PT_INSN_BUF_SZ
];
572 u64 offset
, start_offset
, start_ip
;
576 intel_pt_insn
->length
= 0;
578 if (to_ip
&& *ip
== to_ip
)
581 cpumode
= intel_pt_cpumode(ptq
->pt
, *ip
);
583 thread
= ptq
->thread
;
585 if (cpumode
!= PERF_RECORD_MISC_KERNEL
)
587 thread
= ptq
->pt
->unknown_thread
;
591 if (!thread__find_map(thread
, cpumode
, *ip
, &al
) || !al
.map
->dso
)
594 if (al
.map
->dso
->data
.status
== DSO_DATA_STATUS_ERROR
&&
595 dso__data_status_seen(al
.map
->dso
,
596 DSO_DATA_STATUS_SEEN_ITRACE
))
599 offset
= al
.map
->map_ip(al
.map
, *ip
);
601 if (!to_ip
&& one_map
) {
602 struct intel_pt_cache_entry
*e
;
604 e
= intel_pt_cache_lookup(al
.map
->dso
, machine
, offset
);
606 (!max_insn_cnt
|| e
->insn_cnt
<= max_insn_cnt
)) {
607 *insn_cnt_ptr
= e
->insn_cnt
;
609 intel_pt_insn
->op
= e
->op
;
610 intel_pt_insn
->branch
= e
->branch
;
611 intel_pt_insn
->length
= e
->length
;
612 intel_pt_insn
->rel
= e
->rel
;
613 memcpy(intel_pt_insn
->buf
, e
->insn
,
614 INTEL_PT_INSN_BUF_SZ
);
615 intel_pt_log_insn_no_data(intel_pt_insn
, *ip
);
620 start_offset
= offset
;
623 /* Load maps to ensure dso->is_64_bit has been updated */
626 x86_64
= al
.map
->dso
->is_64_bit
;
629 len
= dso__data_read_offset(al
.map
->dso
, machine
,
631 INTEL_PT_INSN_BUF_SZ
);
635 if (intel_pt_get_insn(buf
, len
, x86_64
, intel_pt_insn
))
638 intel_pt_log_insn(intel_pt_insn
, *ip
);
642 if (intel_pt_insn
->branch
!= INTEL_PT_BR_NO_BRANCH
)
645 if (max_insn_cnt
&& insn_cnt
>= max_insn_cnt
)
648 *ip
+= intel_pt_insn
->length
;
650 if (to_ip
&& *ip
== to_ip
)
653 if (*ip
>= al
.map
->end
)
656 offset
+= intel_pt_insn
->length
;
661 *insn_cnt_ptr
= insn_cnt
;
667 * Didn't lookup in the 'to_ip' case, so do it now to prevent duplicate
671 struct intel_pt_cache_entry
*e
;
673 e
= intel_pt_cache_lookup(al
.map
->dso
, machine
, start_offset
);
678 /* Ignore cache errors */
679 intel_pt_cache_add(al
.map
->dso
, machine
, start_offset
, insn_cnt
,
680 *ip
- start_ip
, intel_pt_insn
);
685 *insn_cnt_ptr
= insn_cnt
;
689 static bool intel_pt_match_pgd_ip(struct intel_pt
*pt
, uint64_t ip
,
690 uint64_t offset
, const char *filename
)
692 struct addr_filter
*filt
;
693 bool have_filter
= false;
694 bool hit_tracestop
= false;
695 bool hit_filter
= false;
697 list_for_each_entry(filt
, &pt
->filts
.head
, list
) {
701 if ((filename
&& !filt
->filename
) ||
702 (!filename
&& filt
->filename
) ||
703 (filename
&& strcmp(filename
, filt
->filename
)))
706 if (!(offset
>= filt
->addr
&& offset
< filt
->addr
+ filt
->size
))
709 intel_pt_log("TIP.PGD ip %#"PRIx64
" offset %#"PRIx64
" in %s hit filter: %s offset %#"PRIx64
" size %#"PRIx64
"\n",
710 ip
, offset
, filename
? filename
: "[kernel]",
711 filt
->start
? "filter" : "stop",
712 filt
->addr
, filt
->size
);
717 hit_tracestop
= true;
720 if (!hit_tracestop
&& !hit_filter
)
721 intel_pt_log("TIP.PGD ip %#"PRIx64
" offset %#"PRIx64
" in %s is not in a filter region\n",
722 ip
, offset
, filename
? filename
: "[kernel]");
724 return hit_tracestop
|| (have_filter
&& !hit_filter
);
727 static int __intel_pt_pgd_ip(uint64_t ip
, void *data
)
729 struct intel_pt_queue
*ptq
= data
;
730 struct thread
*thread
;
731 struct addr_location al
;
735 if (ip
>= ptq
->pt
->kernel_start
)
736 return intel_pt_match_pgd_ip(ptq
->pt
, ip
, ip
, NULL
);
738 cpumode
= PERF_RECORD_MISC_USER
;
740 thread
= ptq
->thread
;
744 if (!thread__find_map(thread
, cpumode
, ip
, &al
) || !al
.map
->dso
)
747 offset
= al
.map
->map_ip(al
.map
, ip
);
749 return intel_pt_match_pgd_ip(ptq
->pt
, ip
, offset
,
750 al
.map
->dso
->long_name
);
753 static bool intel_pt_pgd_ip(uint64_t ip
, void *data
)
755 return __intel_pt_pgd_ip(ip
, data
) > 0;
758 static bool intel_pt_get_config(struct intel_pt
*pt
,
759 struct perf_event_attr
*attr
, u64
*config
)
761 if (attr
->type
== pt
->pmu_type
) {
763 *config
= attr
->config
;
770 static bool intel_pt_exclude_kernel(struct intel_pt
*pt
)
774 evlist__for_each_entry(pt
->session
->evlist
, evsel
) {
775 if (intel_pt_get_config(pt
, &evsel
->core
.attr
, NULL
) &&
776 !evsel
->core
.attr
.exclude_kernel
)
782 static bool intel_pt_return_compression(struct intel_pt
*pt
)
787 if (!pt
->noretcomp_bit
)
790 evlist__for_each_entry(pt
->session
->evlist
, evsel
) {
791 if (intel_pt_get_config(pt
, &evsel
->core
.attr
, &config
) &&
792 (config
& pt
->noretcomp_bit
))
798 static bool intel_pt_branch_enable(struct intel_pt
*pt
)
803 evlist__for_each_entry(pt
->session
->evlist
, evsel
) {
804 if (intel_pt_get_config(pt
, &evsel
->core
.attr
, &config
) &&
805 (config
& 1) && !(config
& 0x2000))
811 static unsigned int intel_pt_mtc_period(struct intel_pt
*pt
)
817 if (!pt
->mtc_freq_bits
)
820 for (shift
= 0, config
= pt
->mtc_freq_bits
; !(config
& 1); shift
++)
823 evlist__for_each_entry(pt
->session
->evlist
, evsel
) {
824 if (intel_pt_get_config(pt
, &evsel
->core
.attr
, &config
))
825 return (config
& pt
->mtc_freq_bits
) >> shift
;
830 static bool intel_pt_timeless_decoding(struct intel_pt
*pt
)
833 bool timeless_decoding
= true;
836 if (!pt
->tsc_bit
|| !pt
->cap_user_time_zero
)
839 evlist__for_each_entry(pt
->session
->evlist
, evsel
) {
840 if (!(evsel
->core
.attr
.sample_type
& PERF_SAMPLE_TIME
))
842 if (intel_pt_get_config(pt
, &evsel
->core
.attr
, &config
)) {
843 if (config
& pt
->tsc_bit
)
844 timeless_decoding
= false;
849 return timeless_decoding
;
852 static bool intel_pt_tracing_kernel(struct intel_pt
*pt
)
856 evlist__for_each_entry(pt
->session
->evlist
, evsel
) {
857 if (intel_pt_get_config(pt
, &evsel
->core
.attr
, NULL
) &&
858 !evsel
->core
.attr
.exclude_kernel
)
864 static bool intel_pt_have_tsc(struct intel_pt
*pt
)
867 bool have_tsc
= false;
873 evlist__for_each_entry(pt
->session
->evlist
, evsel
) {
874 if (intel_pt_get_config(pt
, &evsel
->core
.attr
, &config
)) {
875 if (config
& pt
->tsc_bit
)
884 static bool intel_pt_sampling_mode(struct intel_pt
*pt
)
888 evlist__for_each_entry(pt
->session
->evlist
, evsel
) {
889 if ((evsel
->core
.attr
.sample_type
& PERF_SAMPLE_AUX
) &&
890 evsel
->core
.attr
.aux_sample_size
)
896 static u64
intel_pt_ns_to_ticks(const struct intel_pt
*pt
, u64 ns
)
900 quot
= ns
/ pt
->tc
.time_mult
;
901 rem
= ns
% pt
->tc
.time_mult
;
902 return (quot
<< pt
->tc
.time_shift
) + (rem
<< pt
->tc
.time_shift
) /
906 static struct ip_callchain
*intel_pt_alloc_chain(struct intel_pt
*pt
)
908 size_t sz
= sizeof(struct ip_callchain
);
910 /* Add 1 to callchain_sz for callchain context */
911 sz
+= (pt
->synth_opts
.callchain_sz
+ 1) * sizeof(u64
);
915 static int intel_pt_callchain_init(struct intel_pt
*pt
)
919 evlist__for_each_entry(pt
->session
->evlist
, evsel
) {
920 if (!(evsel
->core
.attr
.sample_type
& PERF_SAMPLE_CALLCHAIN
))
921 evsel
->synth_sample_type
|= PERF_SAMPLE_CALLCHAIN
;
924 pt
->chain
= intel_pt_alloc_chain(pt
);
931 static void intel_pt_add_callchain(struct intel_pt
*pt
,
932 struct perf_sample
*sample
)
934 struct thread
*thread
= machine__findnew_thread(pt
->machine
,
938 thread_stack__sample_late(thread
, sample
->cpu
, pt
->chain
,
939 pt
->synth_opts
.callchain_sz
+ 1, sample
->ip
,
942 sample
->callchain
= pt
->chain
;
945 static struct branch_stack
*intel_pt_alloc_br_stack(unsigned int entry_cnt
)
947 size_t sz
= sizeof(struct branch_stack
);
949 sz
+= entry_cnt
* sizeof(struct branch_entry
);
953 static int intel_pt_br_stack_init(struct intel_pt
*pt
)
957 evlist__for_each_entry(pt
->session
->evlist
, evsel
) {
958 if (!(evsel
->core
.attr
.sample_type
& PERF_SAMPLE_BRANCH_STACK
))
959 evsel
->synth_sample_type
|= PERF_SAMPLE_BRANCH_STACK
;
962 pt
->br_stack
= intel_pt_alloc_br_stack(pt
->br_stack_sz
);
969 static void intel_pt_add_br_stack(struct intel_pt
*pt
,
970 struct perf_sample
*sample
)
972 struct thread
*thread
= machine__findnew_thread(pt
->machine
,
976 thread_stack__br_sample_late(thread
, sample
->cpu
, pt
->br_stack
,
977 pt
->br_stack_sz
, sample
->ip
,
980 sample
->branch_stack
= pt
->br_stack
;
983 /* INTEL_PT_LBR_0, INTEL_PT_LBR_1 and INTEL_PT_LBR_2 */
984 #define LBRS_MAX (INTEL_PT_BLK_ITEM_ID_CNT * 3U)
986 static struct intel_pt_queue
*intel_pt_alloc_queue(struct intel_pt
*pt
,
987 unsigned int queue_nr
)
989 struct intel_pt_params params
= { .get_trace
= 0, };
990 struct perf_env
*env
= pt
->machine
->env
;
991 struct intel_pt_queue
*ptq
;
993 ptq
= zalloc(sizeof(struct intel_pt_queue
));
997 if (pt
->synth_opts
.callchain
) {
998 ptq
->chain
= intel_pt_alloc_chain(pt
);
1003 if (pt
->synth_opts
.last_branch
|| pt
->synth_opts
.other_events
) {
1004 unsigned int entry_cnt
= max(LBRS_MAX
, pt
->br_stack_sz
);
1006 ptq
->last_branch
= intel_pt_alloc_br_stack(entry_cnt
);
1007 if (!ptq
->last_branch
)
1011 ptq
->event_buf
= malloc(PERF_SAMPLE_MAX_SIZE
);
1012 if (!ptq
->event_buf
)
1016 ptq
->queue_nr
= queue_nr
;
1017 ptq
->exclude_kernel
= intel_pt_exclude_kernel(pt
);
1023 params
.get_trace
= intel_pt_get_trace
;
1024 params
.walk_insn
= intel_pt_walk_next_insn
;
1025 params
.lookahead
= intel_pt_lookahead
;
1027 params
.return_compression
= intel_pt_return_compression(pt
);
1028 params
.branch_enable
= intel_pt_branch_enable(pt
);
1029 params
.max_non_turbo_ratio
= pt
->max_non_turbo_ratio
;
1030 params
.mtc_period
= intel_pt_mtc_period(pt
);
1031 params
.tsc_ctc_ratio_n
= pt
->tsc_ctc_ratio_n
;
1032 params
.tsc_ctc_ratio_d
= pt
->tsc_ctc_ratio_d
;
1033 params
.quick
= pt
->synth_opts
.quick
;
1035 if (pt
->filts
.cnt
> 0)
1036 params
.pgd_ip
= intel_pt_pgd_ip
;
1038 if (pt
->synth_opts
.instructions
) {
1039 if (pt
->synth_opts
.period
) {
1040 switch (pt
->synth_opts
.period_type
) {
1041 case PERF_ITRACE_PERIOD_INSTRUCTIONS
:
1042 params
.period_type
=
1043 INTEL_PT_PERIOD_INSTRUCTIONS
;
1044 params
.period
= pt
->synth_opts
.period
;
1046 case PERF_ITRACE_PERIOD_TICKS
:
1047 params
.period_type
= INTEL_PT_PERIOD_TICKS
;
1048 params
.period
= pt
->synth_opts
.period
;
1050 case PERF_ITRACE_PERIOD_NANOSECS
:
1051 params
.period_type
= INTEL_PT_PERIOD_TICKS
;
1052 params
.period
= intel_pt_ns_to_ticks(pt
,
1053 pt
->synth_opts
.period
);
1060 if (!params
.period
) {
1061 params
.period_type
= INTEL_PT_PERIOD_INSTRUCTIONS
;
1066 if (env
->cpuid
&& !strncmp(env
->cpuid
, "GenuineIntel,6,92,", 18))
1067 params
.flags
|= INTEL_PT_FUP_WITH_NLIP
;
1069 ptq
->decoder
= intel_pt_decoder_new(¶ms
);
1076 zfree(&ptq
->event_buf
);
1077 zfree(&ptq
->last_branch
);
1083 static void intel_pt_free_queue(void *priv
)
1085 struct intel_pt_queue
*ptq
= priv
;
1089 thread__zput(ptq
->thread
);
1090 intel_pt_decoder_free(ptq
->decoder
);
1091 zfree(&ptq
->event_buf
);
1092 zfree(&ptq
->last_branch
);
1097 static void intel_pt_set_pid_tid_cpu(struct intel_pt
*pt
,
1098 struct auxtrace_queue
*queue
)
1100 struct intel_pt_queue
*ptq
= queue
->priv
;
1102 if (queue
->tid
== -1 || pt
->have_sched_switch
) {
1103 ptq
->tid
= machine__get_current_tid(pt
->machine
, ptq
->cpu
);
1104 thread__zput(ptq
->thread
);
1107 if (!ptq
->thread
&& ptq
->tid
!= -1)
1108 ptq
->thread
= machine__find_thread(pt
->machine
, -1, ptq
->tid
);
1111 ptq
->pid
= ptq
->thread
->pid_
;
1112 if (queue
->cpu
== -1)
1113 ptq
->cpu
= ptq
->thread
->cpu
;
1117 static void intel_pt_sample_flags(struct intel_pt_queue
*ptq
)
1119 if (ptq
->state
->flags
& INTEL_PT_ABORT_TX
) {
1120 ptq
->flags
= PERF_IP_FLAG_BRANCH
| PERF_IP_FLAG_TX_ABORT
;
1121 } else if (ptq
->state
->flags
& INTEL_PT_ASYNC
) {
1122 if (ptq
->state
->to_ip
)
1123 ptq
->flags
= PERF_IP_FLAG_BRANCH
| PERF_IP_FLAG_CALL
|
1124 PERF_IP_FLAG_ASYNC
|
1125 PERF_IP_FLAG_INTERRUPT
;
1127 ptq
->flags
= PERF_IP_FLAG_BRANCH
|
1128 PERF_IP_FLAG_TRACE_END
;
1131 if (ptq
->state
->from_ip
)
1132 ptq
->flags
= intel_pt_insn_type(ptq
->state
->insn_op
);
1134 ptq
->flags
= PERF_IP_FLAG_BRANCH
|
1135 PERF_IP_FLAG_TRACE_BEGIN
;
1136 if (ptq
->state
->flags
& INTEL_PT_IN_TX
)
1137 ptq
->flags
|= PERF_IP_FLAG_IN_TX
;
1138 ptq
->insn_len
= ptq
->state
->insn_len
;
1139 memcpy(ptq
->insn
, ptq
->state
->insn
, INTEL_PT_INSN_BUF_SZ
);
1142 if (ptq
->state
->type
& INTEL_PT_TRACE_BEGIN
)
1143 ptq
->flags
|= PERF_IP_FLAG_TRACE_BEGIN
;
1144 if (ptq
->state
->type
& INTEL_PT_TRACE_END
)
1145 ptq
->flags
|= PERF_IP_FLAG_TRACE_END
;
1148 static void intel_pt_setup_time_range(struct intel_pt
*pt
,
1149 struct intel_pt_queue
*ptq
)
1154 ptq
->sel_timestamp
= pt
->time_ranges
[0].start
;
1157 if (ptq
->sel_timestamp
) {
1158 ptq
->sel_start
= true;
1160 ptq
->sel_timestamp
= pt
->time_ranges
[0].end
;
1161 ptq
->sel_start
= false;
1165 static int intel_pt_setup_queue(struct intel_pt
*pt
,
1166 struct auxtrace_queue
*queue
,
1167 unsigned int queue_nr
)
1169 struct intel_pt_queue
*ptq
= queue
->priv
;
1171 if (list_empty(&queue
->head
))
1175 ptq
= intel_pt_alloc_queue(pt
, queue_nr
);
1180 if (queue
->cpu
!= -1)
1181 ptq
->cpu
= queue
->cpu
;
1182 ptq
->tid
= queue
->tid
;
1184 ptq
->cbr_seen
= UINT_MAX
;
1186 if (pt
->sampling_mode
&& !pt
->snapshot_mode
&&
1187 pt
->timeless_decoding
)
1188 ptq
->step_through_buffers
= true;
1190 ptq
->sync_switch
= pt
->sync_switch
;
1192 intel_pt_setup_time_range(pt
, ptq
);
1195 if (!ptq
->on_heap
&&
1196 (!ptq
->sync_switch
||
1197 ptq
->switch_state
!= INTEL_PT_SS_EXPECTING_SWITCH_EVENT
)) {
1198 const struct intel_pt_state
*state
;
1201 if (pt
->timeless_decoding
)
1204 intel_pt_log("queue %u getting timestamp\n", queue_nr
);
1205 intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n",
1206 queue_nr
, ptq
->cpu
, ptq
->pid
, ptq
->tid
);
1208 if (ptq
->sel_start
&& ptq
->sel_timestamp
) {
1209 ret
= intel_pt_fast_forward(ptq
->decoder
,
1210 ptq
->sel_timestamp
);
1216 state
= intel_pt_decode(ptq
->decoder
);
1218 if (state
->err
== INTEL_PT_ERR_NODATA
) {
1219 intel_pt_log("queue %u has no timestamp\n",
1225 if (state
->timestamp
)
1229 ptq
->timestamp
= state
->timestamp
;
1230 intel_pt_log("queue %u timestamp 0x%" PRIx64
"\n",
1231 queue_nr
, ptq
->timestamp
);
1233 ptq
->have_sample
= true;
1234 if (ptq
->sel_start
&& ptq
->sel_timestamp
&&
1235 ptq
->timestamp
< ptq
->sel_timestamp
)
1236 ptq
->have_sample
= false;
1237 intel_pt_sample_flags(ptq
);
1238 ret
= auxtrace_heap__add(&pt
->heap
, queue_nr
, ptq
->timestamp
);
1241 ptq
->on_heap
= true;
1247 static int intel_pt_setup_queues(struct intel_pt
*pt
)
1252 for (i
= 0; i
< pt
->queues
.nr_queues
; i
++) {
1253 ret
= intel_pt_setup_queue(pt
, &pt
->queues
.queue_array
[i
], i
);
1260 static inline bool intel_pt_skip_event(struct intel_pt
*pt
)
1262 return pt
->synth_opts
.initial_skip
&&
1263 pt
->num_events
++ < pt
->synth_opts
.initial_skip
;
1267 * Cannot count CBR as skipped because it won't go away until cbr == cbr_seen.
1268 * Also ensure CBR is first non-skipped event by allowing for 4 more samples
1269 * from this decoder state.
1271 static inline bool intel_pt_skip_cbr_event(struct intel_pt
*pt
)
1273 return pt
->synth_opts
.initial_skip
&&
1274 pt
->num_events
+ 4 < pt
->synth_opts
.initial_skip
;
1277 static void intel_pt_prep_a_sample(struct intel_pt_queue
*ptq
,
1278 union perf_event
*event
,
1279 struct perf_sample
*sample
)
1281 event
->sample
.header
.type
= PERF_RECORD_SAMPLE
;
1282 event
->sample
.header
.size
= sizeof(struct perf_event_header
);
1284 sample
->pid
= ptq
->pid
;
1285 sample
->tid
= ptq
->tid
;
1286 sample
->cpu
= ptq
->cpu
;
1287 sample
->insn_len
= ptq
->insn_len
;
1288 memcpy(sample
->insn
, ptq
->insn
, INTEL_PT_INSN_BUF_SZ
);
1291 static void intel_pt_prep_b_sample(struct intel_pt
*pt
,
1292 struct intel_pt_queue
*ptq
,
1293 union perf_event
*event
,
1294 struct perf_sample
*sample
)
1296 intel_pt_prep_a_sample(ptq
, event
, sample
);
1298 if (!pt
->timeless_decoding
)
1299 sample
->time
= tsc_to_perf_time(ptq
->timestamp
, &pt
->tc
);
1301 sample
->ip
= ptq
->state
->from_ip
;
1302 sample
->cpumode
= intel_pt_cpumode(pt
, sample
->ip
);
1303 sample
->addr
= ptq
->state
->to_ip
;
1305 sample
->flags
= ptq
->flags
;
1307 event
->sample
.header
.misc
= sample
->cpumode
;
1310 static int intel_pt_inject_event(union perf_event
*event
,
1311 struct perf_sample
*sample
, u64 type
)
1313 event
->header
.size
= perf_event__sample_event_size(sample
, type
, 0);
1314 return perf_event__synthesize_sample(event
, type
, 0, sample
);
1317 static inline int intel_pt_opt_inject(struct intel_pt
*pt
,
1318 union perf_event
*event
,
1319 struct perf_sample
*sample
, u64 type
)
1321 if (!pt
->synth_opts
.inject
)
1324 return intel_pt_inject_event(event
, sample
, type
);
1327 static int intel_pt_deliver_synth_event(struct intel_pt
*pt
,
1328 union perf_event
*event
,
1329 struct perf_sample
*sample
, u64 type
)
1333 ret
= intel_pt_opt_inject(pt
, event
, sample
, type
);
1337 ret
= perf_session__deliver_synth_event(pt
->session
, event
, sample
);
1339 pr_err("Intel PT: failed to deliver event, error %d\n", ret
);
1344 static int intel_pt_synth_branch_sample(struct intel_pt_queue
*ptq
)
1346 struct intel_pt
*pt
= ptq
->pt
;
1347 union perf_event
*event
= ptq
->event_buf
;
1348 struct perf_sample sample
= { .ip
= 0, };
1349 struct dummy_branch_stack
{
1352 struct branch_entry entries
;
1355 if (pt
->branches_filter
&& !(pt
->branches_filter
& ptq
->flags
))
1358 if (intel_pt_skip_event(pt
))
1361 intel_pt_prep_b_sample(pt
, ptq
, event
, &sample
);
1363 sample
.id
= ptq
->pt
->branches_id
;
1364 sample
.stream_id
= ptq
->pt
->branches_id
;
1367 * perf report cannot handle events without a branch stack when using
1368 * SORT_MODE__BRANCH so make a dummy one.
1370 if (pt
->synth_opts
.last_branch
&& sort__mode
== SORT_MODE__BRANCH
) {
1371 dummy_bs
= (struct dummy_branch_stack
){
1379 sample
.branch_stack
= (struct branch_stack
*)&dummy_bs
;
1382 sample
.cyc_cnt
= ptq
->ipc_cyc_cnt
- ptq
->last_br_cyc_cnt
;
1383 if (sample
.cyc_cnt
) {
1384 sample
.insn_cnt
= ptq
->ipc_insn_cnt
- ptq
->last_br_insn_cnt
;
1385 ptq
->last_br_insn_cnt
= ptq
->ipc_insn_cnt
;
1386 ptq
->last_br_cyc_cnt
= ptq
->ipc_cyc_cnt
;
1389 return intel_pt_deliver_synth_event(pt
, event
, &sample
,
1390 pt
->branches_sample_type
);
1393 static void intel_pt_prep_sample(struct intel_pt
*pt
,
1394 struct intel_pt_queue
*ptq
,
1395 union perf_event
*event
,
1396 struct perf_sample
*sample
)
1398 intel_pt_prep_b_sample(pt
, ptq
, event
, sample
);
1400 if (pt
->synth_opts
.callchain
) {
1401 thread_stack__sample(ptq
->thread
, ptq
->cpu
, ptq
->chain
,
1402 pt
->synth_opts
.callchain_sz
+ 1,
1403 sample
->ip
, pt
->kernel_start
);
1404 sample
->callchain
= ptq
->chain
;
1407 if (pt
->synth_opts
.last_branch
) {
1408 thread_stack__br_sample(ptq
->thread
, ptq
->cpu
, ptq
->last_branch
,
1410 sample
->branch_stack
= ptq
->last_branch
;
1414 static int intel_pt_synth_instruction_sample(struct intel_pt_queue
*ptq
)
1416 struct intel_pt
*pt
= ptq
->pt
;
1417 union perf_event
*event
= ptq
->event_buf
;
1418 struct perf_sample sample
= { .ip
= 0, };
1420 if (intel_pt_skip_event(pt
))
1423 intel_pt_prep_sample(pt
, ptq
, event
, &sample
);
1425 sample
.id
= ptq
->pt
->instructions_id
;
1426 sample
.stream_id
= ptq
->pt
->instructions_id
;
1427 if (pt
->synth_opts
.quick
)
1430 sample
.period
= ptq
->state
->tot_insn_cnt
- ptq
->last_insn_cnt
;
1432 sample
.cyc_cnt
= ptq
->ipc_cyc_cnt
- ptq
->last_in_cyc_cnt
;
1433 if (sample
.cyc_cnt
) {
1434 sample
.insn_cnt
= ptq
->ipc_insn_cnt
- ptq
->last_in_insn_cnt
;
1435 ptq
->last_in_insn_cnt
= ptq
->ipc_insn_cnt
;
1436 ptq
->last_in_cyc_cnt
= ptq
->ipc_cyc_cnt
;
1439 ptq
->last_insn_cnt
= ptq
->state
->tot_insn_cnt
;
1441 return intel_pt_deliver_synth_event(pt
, event
, &sample
,
1442 pt
->instructions_sample_type
);
1445 static int intel_pt_synth_transaction_sample(struct intel_pt_queue
*ptq
)
1447 struct intel_pt
*pt
= ptq
->pt
;
1448 union perf_event
*event
= ptq
->event_buf
;
1449 struct perf_sample sample
= { .ip
= 0, };
1451 if (intel_pt_skip_event(pt
))
1454 intel_pt_prep_sample(pt
, ptq
, event
, &sample
);
1456 sample
.id
= ptq
->pt
->transactions_id
;
1457 sample
.stream_id
= ptq
->pt
->transactions_id
;
1459 return intel_pt_deliver_synth_event(pt
, event
, &sample
,
1460 pt
->transactions_sample_type
);
1463 static void intel_pt_prep_p_sample(struct intel_pt
*pt
,
1464 struct intel_pt_queue
*ptq
,
1465 union perf_event
*event
,
1466 struct perf_sample
*sample
)
1468 intel_pt_prep_sample(pt
, ptq
, event
, sample
);
1471 * Zero IP is used to mean "trace start" but that is not the case for
1472 * power or PTWRITE events with no IP, so clear the flags.
1478 static int intel_pt_synth_ptwrite_sample(struct intel_pt_queue
*ptq
)
1480 struct intel_pt
*pt
= ptq
->pt
;
1481 union perf_event
*event
= ptq
->event_buf
;
1482 struct perf_sample sample
= { .ip
= 0, };
1483 struct perf_synth_intel_ptwrite raw
;
1485 if (intel_pt_skip_event(pt
))
1488 intel_pt_prep_p_sample(pt
, ptq
, event
, &sample
);
1490 sample
.id
= ptq
->pt
->ptwrites_id
;
1491 sample
.stream_id
= ptq
->pt
->ptwrites_id
;
1494 raw
.ip
= !!(ptq
->state
->flags
& INTEL_PT_FUP_IP
);
1495 raw
.payload
= cpu_to_le64(ptq
->state
->ptw_payload
);
1497 sample
.raw_size
= perf_synth__raw_size(raw
);
1498 sample
.raw_data
= perf_synth__raw_data(&raw
);
1500 return intel_pt_deliver_synth_event(pt
, event
, &sample
,
1501 pt
->ptwrites_sample_type
);
1504 static int intel_pt_synth_cbr_sample(struct intel_pt_queue
*ptq
)
1506 struct intel_pt
*pt
= ptq
->pt
;
1507 union perf_event
*event
= ptq
->event_buf
;
1508 struct perf_sample sample
= { .ip
= 0, };
1509 struct perf_synth_intel_cbr raw
;
1512 if (intel_pt_skip_cbr_event(pt
))
1515 ptq
->cbr_seen
= ptq
->state
->cbr
;
1517 intel_pt_prep_p_sample(pt
, ptq
, event
, &sample
);
1519 sample
.id
= ptq
->pt
->cbr_id
;
1520 sample
.stream_id
= ptq
->pt
->cbr_id
;
1522 flags
= (u16
)ptq
->state
->cbr_payload
| (pt
->max_non_turbo_ratio
<< 16);
1523 raw
.flags
= cpu_to_le32(flags
);
1524 raw
.freq
= cpu_to_le32(raw
.cbr
* pt
->cbr2khz
);
1527 sample
.raw_size
= perf_synth__raw_size(raw
);
1528 sample
.raw_data
= perf_synth__raw_data(&raw
);
1530 return intel_pt_deliver_synth_event(pt
, event
, &sample
,
1531 pt
->pwr_events_sample_type
);
1534 static int intel_pt_synth_mwait_sample(struct intel_pt_queue
*ptq
)
1536 struct intel_pt
*pt
= ptq
->pt
;
1537 union perf_event
*event
= ptq
->event_buf
;
1538 struct perf_sample sample
= { .ip
= 0, };
1539 struct perf_synth_intel_mwait raw
;
1541 if (intel_pt_skip_event(pt
))
1544 intel_pt_prep_p_sample(pt
, ptq
, event
, &sample
);
1546 sample
.id
= ptq
->pt
->mwait_id
;
1547 sample
.stream_id
= ptq
->pt
->mwait_id
;
1550 raw
.payload
= cpu_to_le64(ptq
->state
->mwait_payload
);
1552 sample
.raw_size
= perf_synth__raw_size(raw
);
1553 sample
.raw_data
= perf_synth__raw_data(&raw
);
1555 return intel_pt_deliver_synth_event(pt
, event
, &sample
,
1556 pt
->pwr_events_sample_type
);
1559 static int intel_pt_synth_pwre_sample(struct intel_pt_queue
*ptq
)
1561 struct intel_pt
*pt
= ptq
->pt
;
1562 union perf_event
*event
= ptq
->event_buf
;
1563 struct perf_sample sample
= { .ip
= 0, };
1564 struct perf_synth_intel_pwre raw
;
1566 if (intel_pt_skip_event(pt
))
1569 intel_pt_prep_p_sample(pt
, ptq
, event
, &sample
);
1571 sample
.id
= ptq
->pt
->pwre_id
;
1572 sample
.stream_id
= ptq
->pt
->pwre_id
;
1575 raw
.payload
= cpu_to_le64(ptq
->state
->pwre_payload
);
1577 sample
.raw_size
= perf_synth__raw_size(raw
);
1578 sample
.raw_data
= perf_synth__raw_data(&raw
);
1580 return intel_pt_deliver_synth_event(pt
, event
, &sample
,
1581 pt
->pwr_events_sample_type
);
1584 static int intel_pt_synth_exstop_sample(struct intel_pt_queue
*ptq
)
1586 struct intel_pt
*pt
= ptq
->pt
;
1587 union perf_event
*event
= ptq
->event_buf
;
1588 struct perf_sample sample
= { .ip
= 0, };
1589 struct perf_synth_intel_exstop raw
;
1591 if (intel_pt_skip_event(pt
))
1594 intel_pt_prep_p_sample(pt
, ptq
, event
, &sample
);
1596 sample
.id
= ptq
->pt
->exstop_id
;
1597 sample
.stream_id
= ptq
->pt
->exstop_id
;
1600 raw
.ip
= !!(ptq
->state
->flags
& INTEL_PT_FUP_IP
);
1602 sample
.raw_size
= perf_synth__raw_size(raw
);
1603 sample
.raw_data
= perf_synth__raw_data(&raw
);
1605 return intel_pt_deliver_synth_event(pt
, event
, &sample
,
1606 pt
->pwr_events_sample_type
);
1609 static int intel_pt_synth_pwrx_sample(struct intel_pt_queue
*ptq
)
1611 struct intel_pt
*pt
= ptq
->pt
;
1612 union perf_event
*event
= ptq
->event_buf
;
1613 struct perf_sample sample
= { .ip
= 0, };
1614 struct perf_synth_intel_pwrx raw
;
1616 if (intel_pt_skip_event(pt
))
1619 intel_pt_prep_p_sample(pt
, ptq
, event
, &sample
);
1621 sample
.id
= ptq
->pt
->pwrx_id
;
1622 sample
.stream_id
= ptq
->pt
->pwrx_id
;
1625 raw
.payload
= cpu_to_le64(ptq
->state
->pwrx_payload
);
1627 sample
.raw_size
= perf_synth__raw_size(raw
);
1628 sample
.raw_data
= perf_synth__raw_data(&raw
);
1630 return intel_pt_deliver_synth_event(pt
, event
, &sample
,
1631 pt
->pwr_events_sample_type
);
1635 * PEBS gp_regs array indexes plus 1 so that 0 means not present. Refer
1636 * intel_pt_add_gp_regs().
1638 static const int pebs_gp_regs
[] = {
1639 [PERF_REG_X86_FLAGS
] = 1,
1640 [PERF_REG_X86_IP
] = 2,
1641 [PERF_REG_X86_AX
] = 3,
1642 [PERF_REG_X86_CX
] = 4,
1643 [PERF_REG_X86_DX
] = 5,
1644 [PERF_REG_X86_BX
] = 6,
1645 [PERF_REG_X86_SP
] = 7,
1646 [PERF_REG_X86_BP
] = 8,
1647 [PERF_REG_X86_SI
] = 9,
1648 [PERF_REG_X86_DI
] = 10,
1649 [PERF_REG_X86_R8
] = 11,
1650 [PERF_REG_X86_R9
] = 12,
1651 [PERF_REG_X86_R10
] = 13,
1652 [PERF_REG_X86_R11
] = 14,
1653 [PERF_REG_X86_R12
] = 15,
1654 [PERF_REG_X86_R13
] = 16,
1655 [PERF_REG_X86_R14
] = 17,
1656 [PERF_REG_X86_R15
] = 18,
1659 static u64
*intel_pt_add_gp_regs(struct regs_dump
*intr_regs
, u64
*pos
,
1660 const struct intel_pt_blk_items
*items
,
1663 const u64
*gp_regs
= items
->val
[INTEL_PT_GP_REGS_POS
];
1664 u32 mask
= items
->mask
[INTEL_PT_GP_REGS_POS
];
1668 for (i
= 0, bit
= 1; i
< PERF_REG_X86_64_MAX
; i
++, bit
<<= 1) {
1669 /* Get the PEBS gp_regs array index */
1670 int n
= pebs_gp_regs
[i
] - 1;
1675 * Add only registers that were requested (i.e. 'regs_mask') and
1676 * that were provided (i.e. 'mask'), and update the resulting
1677 * mask (i.e. 'intr_regs->mask') accordingly.
1679 if (mask
& 1 << n
&& regs_mask
& bit
) {
1680 intr_regs
->mask
|= bit
;
1681 *pos
++ = gp_regs
[n
];
1688 #ifndef PERF_REG_X86_XMM0
1689 #define PERF_REG_X86_XMM0 32
1692 static void intel_pt_add_xmm(struct regs_dump
*intr_regs
, u64
*pos
,
1693 const struct intel_pt_blk_items
*items
,
1696 u32 mask
= items
->has_xmm
& (regs_mask
>> PERF_REG_X86_XMM0
);
1697 const u64
*xmm
= items
->xmm
;
1700 * If there are any XMM registers, then there should be all of them.
1701 * Nevertheless, follow the logic to add only registers that were
1702 * requested (i.e. 'regs_mask') and that were provided (i.e. 'mask'),
1703 * and update the resulting mask (i.e. 'intr_regs->mask') accordingly.
1705 intr_regs
->mask
|= (u64
)mask
<< PERF_REG_X86_XMM0
;
1707 for (; mask
; mask
>>= 1, xmm
++) {
1713 #define LBR_INFO_MISPRED (1ULL << 63)
1714 #define LBR_INFO_IN_TX (1ULL << 62)
1715 #define LBR_INFO_ABORT (1ULL << 61)
1716 #define LBR_INFO_CYCLES 0xffff
1718 /* Refer kernel's intel_pmu_store_pebs_lbrs() */
1719 static u64
intel_pt_lbr_flags(u64 info
)
1722 struct branch_flags flags
;
1727 u
.flags
.mispred
= !!(info
& LBR_INFO_MISPRED
);
1728 u
.flags
.predicted
= !(info
& LBR_INFO_MISPRED
);
1729 u
.flags
.in_tx
= !!(info
& LBR_INFO_IN_TX
);
1730 u
.flags
.abort
= !!(info
& LBR_INFO_ABORT
);
1731 u
.flags
.cycles
= info
& LBR_INFO_CYCLES
;
1736 static void intel_pt_add_lbrs(struct branch_stack
*br_stack
,
1737 const struct intel_pt_blk_items
*items
)
1744 to
= &br_stack
->entries
[0].from
;
1746 for (i
= INTEL_PT_LBR_0_POS
; i
<= INTEL_PT_LBR_2_POS
; i
++) {
1747 u32 mask
= items
->mask
[i
];
1748 const u64
*from
= items
->val
[i
];
1750 for (; mask
; mask
>>= 3, from
+= 3) {
1751 if ((mask
& 7) == 7) {
1754 *to
++ = intel_pt_lbr_flags(from
[2]);
1761 static int intel_pt_synth_pebs_sample(struct intel_pt_queue
*ptq
)
1763 const struct intel_pt_blk_items
*items
= &ptq
->state
->items
;
1764 struct perf_sample sample
= { .ip
= 0, };
1765 union perf_event
*event
= ptq
->event_buf
;
1766 struct intel_pt
*pt
= ptq
->pt
;
1767 struct evsel
*evsel
= pt
->pebs_evsel
;
1768 u64 sample_type
= evsel
->core
.attr
.sample_type
;
1769 u64 id
= evsel
->core
.id
[0];
1771 u64 regs
[8 * sizeof(sample
.intr_regs
.mask
)];
1773 if (intel_pt_skip_event(pt
))
1776 intel_pt_prep_a_sample(ptq
, event
, &sample
);
1779 sample
.stream_id
= id
;
1781 if (!evsel
->core
.attr
.freq
)
1782 sample
.period
= evsel
->core
.attr
.sample_period
;
1784 /* No support for non-zero CS base */
1786 sample
.ip
= items
->ip
;
1787 else if (items
->has_rip
)
1788 sample
.ip
= items
->rip
;
1790 sample
.ip
= ptq
->state
->from_ip
;
1792 /* No support for guest mode at this time */
1793 cpumode
= sample
.ip
< ptq
->pt
->kernel_start
?
1794 PERF_RECORD_MISC_USER
:
1795 PERF_RECORD_MISC_KERNEL
;
1797 event
->sample
.header
.misc
= cpumode
| PERF_RECORD_MISC_EXACT_IP
;
1799 sample
.cpumode
= cpumode
;
1801 if (sample_type
& PERF_SAMPLE_TIME
) {
1804 if (items
->has_timestamp
)
1805 timestamp
= items
->timestamp
;
1806 else if (!pt
->timeless_decoding
)
1807 timestamp
= ptq
->timestamp
;
1809 sample
.time
= tsc_to_perf_time(timestamp
, &pt
->tc
);
1812 if (sample_type
& PERF_SAMPLE_CALLCHAIN
&&
1813 pt
->synth_opts
.callchain
) {
1814 thread_stack__sample(ptq
->thread
, ptq
->cpu
, ptq
->chain
,
1815 pt
->synth_opts
.callchain_sz
, sample
.ip
,
1817 sample
.callchain
= ptq
->chain
;
1820 if (sample_type
& PERF_SAMPLE_REGS_INTR
&&
1821 (items
->mask
[INTEL_PT_GP_REGS_POS
] ||
1822 items
->mask
[INTEL_PT_XMM_POS
])) {
1823 u64 regs_mask
= evsel
->core
.attr
.sample_regs_intr
;
1826 sample
.intr_regs
.abi
= items
->is_32_bit
?
1827 PERF_SAMPLE_REGS_ABI_32
:
1828 PERF_SAMPLE_REGS_ABI_64
;
1829 sample
.intr_regs
.regs
= regs
;
1831 pos
= intel_pt_add_gp_regs(&sample
.intr_regs
, regs
, items
, regs_mask
);
1833 intel_pt_add_xmm(&sample
.intr_regs
, pos
, items
, regs_mask
);
1836 if (sample_type
& PERF_SAMPLE_BRANCH_STACK
) {
1837 if (items
->mask
[INTEL_PT_LBR_0_POS
] ||
1838 items
->mask
[INTEL_PT_LBR_1_POS
] ||
1839 items
->mask
[INTEL_PT_LBR_2_POS
]) {
1840 intel_pt_add_lbrs(ptq
->last_branch
, items
);
1841 } else if (pt
->synth_opts
.last_branch
) {
1842 thread_stack__br_sample(ptq
->thread
, ptq
->cpu
,
1846 ptq
->last_branch
->nr
= 0;
1848 sample
.branch_stack
= ptq
->last_branch
;
1851 if (sample_type
& PERF_SAMPLE_ADDR
&& items
->has_mem_access_address
)
1852 sample
.addr
= items
->mem_access_address
;
1854 if (sample_type
& PERF_SAMPLE_WEIGHT
) {
1856 * Refer kernel's setup_pebs_adaptive_sample_data() and
1857 * intel_hsw_weight().
1859 if (items
->has_mem_access_latency
)
1860 sample
.weight
= items
->mem_access_latency
;
1861 if (!sample
.weight
&& items
->has_tsx_aux_info
) {
1862 /* Cycles last block */
1863 sample
.weight
= (u32
)items
->tsx_aux_info
;
1867 if (sample_type
& PERF_SAMPLE_TRANSACTION
&& items
->has_tsx_aux_info
) {
1868 u64 ax
= items
->has_rax
? items
->rax
: 0;
1869 /* Refer kernel's intel_hsw_transaction() */
1870 u64 txn
= (u8
)(items
->tsx_aux_info
>> 32);
1872 /* For RTM XABORTs also log the abort code from AX */
1873 if (txn
& PERF_TXN_TRANSACTION
&& ax
& 1)
1874 txn
|= ((ax
>> 24) & 0xff) << PERF_TXN_ABORT_SHIFT
;
1875 sample
.transaction
= txn
;
1878 return intel_pt_deliver_synth_event(pt
, event
, &sample
, sample_type
);
1881 static int intel_pt_synth_error(struct intel_pt
*pt
, int code
, int cpu
,
1882 pid_t pid
, pid_t tid
, u64 ip
, u64 timestamp
)
1884 union perf_event event
;
1885 char msg
[MAX_AUXTRACE_ERROR_MSG
];
1888 if (pt
->synth_opts
.error_minus_flags
) {
1889 if (code
== INTEL_PT_ERR_OVR
&&
1890 pt
->synth_opts
.error_minus_flags
& AUXTRACE_ERR_FLG_OVERFLOW
)
1892 if (code
== INTEL_PT_ERR_LOST
&&
1893 pt
->synth_opts
.error_minus_flags
& AUXTRACE_ERR_FLG_DATA_LOST
)
1897 intel_pt__strerror(code
, msg
, MAX_AUXTRACE_ERROR_MSG
);
1899 auxtrace_synth_error(&event
.auxtrace_error
, PERF_AUXTRACE_ERROR_ITRACE
,
1900 code
, cpu
, pid
, tid
, ip
, msg
, timestamp
);
1902 err
= perf_session__deliver_synth_event(pt
->session
, &event
, NULL
);
1904 pr_err("Intel Processor Trace: failed to deliver error event, error %d\n",
1910 static int intel_ptq_synth_error(struct intel_pt_queue
*ptq
,
1911 const struct intel_pt_state
*state
)
1913 struct intel_pt
*pt
= ptq
->pt
;
1914 u64 tm
= ptq
->timestamp
;
1916 tm
= pt
->timeless_decoding
? 0 : tsc_to_perf_time(tm
, &pt
->tc
);
1918 return intel_pt_synth_error(pt
, state
->err
, ptq
->cpu
, ptq
->pid
,
1919 ptq
->tid
, state
->from_ip
, tm
);
1922 static int intel_pt_next_tid(struct intel_pt
*pt
, struct intel_pt_queue
*ptq
)
1924 struct auxtrace_queue
*queue
;
1925 pid_t tid
= ptq
->next_tid
;
1931 intel_pt_log("switch: cpu %d tid %d\n", ptq
->cpu
, tid
);
1933 err
= machine__set_current_tid(pt
->machine
, ptq
->cpu
, -1, tid
);
1935 queue
= &pt
->queues
.queue_array
[ptq
->queue_nr
];
1936 intel_pt_set_pid_tid_cpu(pt
, queue
);
1943 static inline bool intel_pt_is_switch_ip(struct intel_pt_queue
*ptq
, u64 ip
)
1945 struct intel_pt
*pt
= ptq
->pt
;
1947 return ip
== pt
->switch_ip
&&
1948 (ptq
->flags
& PERF_IP_FLAG_BRANCH
) &&
1949 !(ptq
->flags
& (PERF_IP_FLAG_CONDITIONAL
| PERF_IP_FLAG_ASYNC
|
1950 PERF_IP_FLAG_INTERRUPT
| PERF_IP_FLAG_TX_ABORT
));
1953 #define INTEL_PT_PWR_EVT (INTEL_PT_MWAIT_OP | INTEL_PT_PWR_ENTRY | \
1954 INTEL_PT_EX_STOP | INTEL_PT_PWR_EXIT)
1956 static int intel_pt_sample(struct intel_pt_queue
*ptq
)
1958 const struct intel_pt_state
*state
= ptq
->state
;
1959 struct intel_pt
*pt
= ptq
->pt
;
1962 if (!ptq
->have_sample
)
1965 ptq
->have_sample
= false;
1967 if (ptq
->state
->tot_cyc_cnt
> ptq
->ipc_cyc_cnt
) {
1969 * Cycle count and instruction count only go together to create
1970 * a valid IPC ratio when the cycle count changes.
1972 ptq
->ipc_insn_cnt
= ptq
->state
->tot_insn_cnt
;
1973 ptq
->ipc_cyc_cnt
= ptq
->state
->tot_cyc_cnt
;
1977 * Do PEBS first to allow for the possibility that the PEBS timestamp
1978 * precedes the current timestamp.
1980 if (pt
->sample_pebs
&& state
->type
& INTEL_PT_BLK_ITEMS
) {
1981 err
= intel_pt_synth_pebs_sample(ptq
);
1986 if (pt
->sample_pwr_events
) {
1987 if (ptq
->state
->cbr
!= ptq
->cbr_seen
) {
1988 err
= intel_pt_synth_cbr_sample(ptq
);
1992 if (state
->type
& INTEL_PT_PWR_EVT
) {
1993 if (state
->type
& INTEL_PT_MWAIT_OP
) {
1994 err
= intel_pt_synth_mwait_sample(ptq
);
1998 if (state
->type
& INTEL_PT_PWR_ENTRY
) {
1999 err
= intel_pt_synth_pwre_sample(ptq
);
2003 if (state
->type
& INTEL_PT_EX_STOP
) {
2004 err
= intel_pt_synth_exstop_sample(ptq
);
2008 if (state
->type
& INTEL_PT_PWR_EXIT
) {
2009 err
= intel_pt_synth_pwrx_sample(ptq
);
2016 if (pt
->sample_instructions
&& (state
->type
& INTEL_PT_INSTRUCTION
)) {
2017 err
= intel_pt_synth_instruction_sample(ptq
);
2022 if (pt
->sample_transactions
&& (state
->type
& INTEL_PT_TRANSACTION
)) {
2023 err
= intel_pt_synth_transaction_sample(ptq
);
2028 if (pt
->sample_ptwrites
&& (state
->type
& INTEL_PT_PTW
)) {
2029 err
= intel_pt_synth_ptwrite_sample(ptq
);
2034 if (!(state
->type
& INTEL_PT_BRANCH
))
2037 if (pt
->use_thread_stack
) {
2038 thread_stack__event(ptq
->thread
, ptq
->cpu
, ptq
->flags
,
2039 state
->from_ip
, state
->to_ip
, ptq
->insn_len
,
2040 state
->trace_nr
, pt
->callstack
,
2041 pt
->br_stack_sz_plus
,
2044 thread_stack__set_trace_nr(ptq
->thread
, ptq
->cpu
, state
->trace_nr
);
2047 if (pt
->sample_branches
) {
2048 err
= intel_pt_synth_branch_sample(ptq
);
2053 if (!ptq
->sync_switch
)
2056 if (intel_pt_is_switch_ip(ptq
, state
->to_ip
)) {
2057 switch (ptq
->switch_state
) {
2058 case INTEL_PT_SS_NOT_TRACING
:
2059 case INTEL_PT_SS_UNKNOWN
:
2060 case INTEL_PT_SS_EXPECTING_SWITCH_IP
:
2061 err
= intel_pt_next_tid(pt
, ptq
);
2064 ptq
->switch_state
= INTEL_PT_SS_TRACING
;
2067 ptq
->switch_state
= INTEL_PT_SS_EXPECTING_SWITCH_EVENT
;
2070 } else if (!state
->to_ip
) {
2071 ptq
->switch_state
= INTEL_PT_SS_NOT_TRACING
;
2072 } else if (ptq
->switch_state
== INTEL_PT_SS_NOT_TRACING
) {
2073 ptq
->switch_state
= INTEL_PT_SS_UNKNOWN
;
2074 } else if (ptq
->switch_state
== INTEL_PT_SS_UNKNOWN
&&
2075 state
->to_ip
== pt
->ptss_ip
&&
2076 (ptq
->flags
& PERF_IP_FLAG_CALL
)) {
2077 ptq
->switch_state
= INTEL_PT_SS_TRACING
;
2083 static u64
intel_pt_switch_ip(struct intel_pt
*pt
, u64
*ptss_ip
)
2085 struct machine
*machine
= pt
->machine
;
2087 struct symbol
*sym
, *start
;
2088 u64 ip
, switch_ip
= 0;
2094 map
= machine__kernel_map(machine
);
2101 start
= dso__first_symbol(map
->dso
);
2103 for (sym
= start
; sym
; sym
= dso__next_symbol(sym
)) {
2104 if (sym
->binding
== STB_GLOBAL
&&
2105 !strcmp(sym
->name
, "__switch_to")) {
2106 ip
= map
->unmap_ip(map
, sym
->start
);
2107 if (ip
>= map
->start
&& ip
< map
->end
) {
2114 if (!switch_ip
|| !ptss_ip
)
2117 if (pt
->have_sched_switch
== 1)
2118 ptss
= "perf_trace_sched_switch";
2120 ptss
= "__perf_event_task_sched_out";
2122 for (sym
= start
; sym
; sym
= dso__next_symbol(sym
)) {
2123 if (!strcmp(sym
->name
, ptss
)) {
2124 ip
= map
->unmap_ip(map
, sym
->start
);
2125 if (ip
>= map
->start
&& ip
< map
->end
) {
2135 static void intel_pt_enable_sync_switch(struct intel_pt
*pt
)
2139 pt
->sync_switch
= true;
2141 for (i
= 0; i
< pt
->queues
.nr_queues
; i
++) {
2142 struct auxtrace_queue
*queue
= &pt
->queues
.queue_array
[i
];
2143 struct intel_pt_queue
*ptq
= queue
->priv
;
2146 ptq
->sync_switch
= true;
2151 * To filter against time ranges, it is only necessary to look at the next start
2154 static bool intel_pt_next_time(struct intel_pt_queue
*ptq
)
2156 struct intel_pt
*pt
= ptq
->pt
;
2158 if (ptq
->sel_start
) {
2159 /* Next time is an end time */
2160 ptq
->sel_start
= false;
2161 ptq
->sel_timestamp
= pt
->time_ranges
[ptq
->sel_idx
].end
;
2163 } else if (ptq
->sel_idx
+ 1 < pt
->range_cnt
) {
2164 /* Next time is a start time */
2165 ptq
->sel_start
= true;
2167 ptq
->sel_timestamp
= pt
->time_ranges
[ptq
->sel_idx
].start
;
2175 static int intel_pt_time_filter(struct intel_pt_queue
*ptq
, u64
*ff_timestamp
)
2180 if (ptq
->sel_start
) {
2181 if (ptq
->timestamp
>= ptq
->sel_timestamp
) {
2182 /* After start time, so consider next time */
2183 intel_pt_next_time(ptq
);
2184 if (!ptq
->sel_timestamp
) {
2188 /* Check against end time */
2191 /* Before start time, so fast forward */
2192 ptq
->have_sample
= false;
2193 if (ptq
->sel_timestamp
> *ff_timestamp
) {
2194 if (ptq
->sync_switch
) {
2195 intel_pt_next_tid(ptq
->pt
, ptq
);
2196 ptq
->switch_state
= INTEL_PT_SS_UNKNOWN
;
2198 *ff_timestamp
= ptq
->sel_timestamp
;
2199 err
= intel_pt_fast_forward(ptq
->decoder
,
2200 ptq
->sel_timestamp
);
2205 } else if (ptq
->timestamp
> ptq
->sel_timestamp
) {
2206 /* After end time, so consider next time */
2207 if (!intel_pt_next_time(ptq
)) {
2208 /* No next time range, so stop decoding */
2209 ptq
->have_sample
= false;
2210 ptq
->switch_state
= INTEL_PT_SS_NOT_TRACING
;
2213 /* Check against next start time */
2216 /* Before end time */
2222 static int intel_pt_run_decoder(struct intel_pt_queue
*ptq
, u64
*timestamp
)
2224 const struct intel_pt_state
*state
= ptq
->state
;
2225 struct intel_pt
*pt
= ptq
->pt
;
2226 u64 ff_timestamp
= 0;
2229 if (!pt
->kernel_start
) {
2230 pt
->kernel_start
= machine__kernel_start(pt
->machine
);
2231 if (pt
->per_cpu_mmaps
&&
2232 (pt
->have_sched_switch
== 1 || pt
->have_sched_switch
== 3) &&
2233 !pt
->timeless_decoding
&& intel_pt_tracing_kernel(pt
) &&
2234 !pt
->sampling_mode
) {
2235 pt
->switch_ip
= intel_pt_switch_ip(pt
, &pt
->ptss_ip
);
2236 if (pt
->switch_ip
) {
2237 intel_pt_log("switch_ip: %"PRIx64
" ptss_ip: %"PRIx64
"\n",
2238 pt
->switch_ip
, pt
->ptss_ip
);
2239 intel_pt_enable_sync_switch(pt
);
2244 intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n",
2245 ptq
->queue_nr
, ptq
->cpu
, ptq
->pid
, ptq
->tid
);
2247 err
= intel_pt_sample(ptq
);
2251 state
= intel_pt_decode(ptq
->decoder
);
2253 if (state
->err
== INTEL_PT_ERR_NODATA
)
2255 if (ptq
->sync_switch
&&
2256 state
->from_ip
>= pt
->kernel_start
) {
2257 ptq
->sync_switch
= false;
2258 intel_pt_next_tid(pt
, ptq
);
2260 if (pt
->synth_opts
.errors
) {
2261 err
= intel_ptq_synth_error(ptq
, state
);
2269 ptq
->have_sample
= true;
2270 intel_pt_sample_flags(ptq
);
2272 /* Use estimated TSC upon return to user space */
2274 (state
->from_ip
>= pt
->kernel_start
|| !state
->from_ip
) &&
2275 state
->to_ip
&& state
->to_ip
< pt
->kernel_start
) {
2276 intel_pt_log("TSC %"PRIx64
" est. TSC %"PRIx64
"\n",
2277 state
->timestamp
, state
->est_timestamp
);
2278 ptq
->timestamp
= state
->est_timestamp
;
2279 /* Use estimated TSC in unknown switch state */
2280 } else if (ptq
->sync_switch
&&
2281 ptq
->switch_state
== INTEL_PT_SS_UNKNOWN
&&
2282 intel_pt_is_switch_ip(ptq
, state
->to_ip
) &&
2283 ptq
->next_tid
== -1) {
2284 intel_pt_log("TSC %"PRIx64
" est. TSC %"PRIx64
"\n",
2285 state
->timestamp
, state
->est_timestamp
);
2286 ptq
->timestamp
= state
->est_timestamp
;
2287 } else if (state
->timestamp
> ptq
->timestamp
) {
2288 ptq
->timestamp
= state
->timestamp
;
2291 if (ptq
->sel_timestamp
) {
2292 err
= intel_pt_time_filter(ptq
, &ff_timestamp
);
2297 if (!pt
->timeless_decoding
&& ptq
->timestamp
>= *timestamp
) {
2298 *timestamp
= ptq
->timestamp
;
2305 static inline int intel_pt_update_queues(struct intel_pt
*pt
)
2307 if (pt
->queues
.new_data
) {
2308 pt
->queues
.new_data
= false;
2309 return intel_pt_setup_queues(pt
);
2314 static int intel_pt_process_queues(struct intel_pt
*pt
, u64 timestamp
)
2316 unsigned int queue_nr
;
2321 struct auxtrace_queue
*queue
;
2322 struct intel_pt_queue
*ptq
;
2324 if (!pt
->heap
.heap_cnt
)
2327 if (pt
->heap
.heap_array
[0].ordinal
>= timestamp
)
2330 queue_nr
= pt
->heap
.heap_array
[0].queue_nr
;
2331 queue
= &pt
->queues
.queue_array
[queue_nr
];
2334 intel_pt_log("queue %u processing 0x%" PRIx64
" to 0x%" PRIx64
"\n",
2335 queue_nr
, pt
->heap
.heap_array
[0].ordinal
,
2338 auxtrace_heap__pop(&pt
->heap
);
2340 if (pt
->heap
.heap_cnt
) {
2341 ts
= pt
->heap
.heap_array
[0].ordinal
+ 1;
2348 intel_pt_set_pid_tid_cpu(pt
, queue
);
2350 ret
= intel_pt_run_decoder(ptq
, &ts
);
2353 auxtrace_heap__add(&pt
->heap
, queue_nr
, ts
);
2358 ret
= auxtrace_heap__add(&pt
->heap
, queue_nr
, ts
);
2362 ptq
->on_heap
= false;
2369 static int intel_pt_process_timeless_queues(struct intel_pt
*pt
, pid_t tid
,
2372 struct auxtrace_queues
*queues
= &pt
->queues
;
2376 for (i
= 0; i
< queues
->nr_queues
; i
++) {
2377 struct auxtrace_queue
*queue
= &pt
->queues
.queue_array
[i
];
2378 struct intel_pt_queue
*ptq
= queue
->priv
;
2380 if (ptq
&& (tid
== -1 || ptq
->tid
== tid
)) {
2382 intel_pt_set_pid_tid_cpu(pt
, queue
);
2383 intel_pt_run_decoder(ptq
, &ts
);
2389 static void intel_pt_sample_set_pid_tid_cpu(struct intel_pt_queue
*ptq
,
2390 struct auxtrace_queue
*queue
,
2391 struct perf_sample
*sample
)
2393 struct machine
*m
= ptq
->pt
->machine
;
2395 ptq
->pid
= sample
->pid
;
2396 ptq
->tid
= sample
->tid
;
2397 ptq
->cpu
= queue
->cpu
;
2399 intel_pt_log("queue %u cpu %d pid %d tid %d\n",
2400 ptq
->queue_nr
, ptq
->cpu
, ptq
->pid
, ptq
->tid
);
2402 thread__zput(ptq
->thread
);
2407 if (ptq
->pid
== -1) {
2408 ptq
->thread
= machine__find_thread(m
, -1, ptq
->tid
);
2410 ptq
->pid
= ptq
->thread
->pid_
;
2414 ptq
->thread
= machine__findnew_thread(m
, ptq
->pid
, ptq
->tid
);
2417 static int intel_pt_process_timeless_sample(struct intel_pt
*pt
,
2418 struct perf_sample
*sample
)
2420 struct auxtrace_queue
*queue
;
2421 struct intel_pt_queue
*ptq
;
2424 queue
= auxtrace_queues__sample_queue(&pt
->queues
, sample
, pt
->session
);
2433 ptq
->time
= sample
->time
;
2434 intel_pt_sample_set_pid_tid_cpu(ptq
, queue
, sample
);
2435 intel_pt_run_decoder(ptq
, &ts
);
2439 static int intel_pt_lost(struct intel_pt
*pt
, struct perf_sample
*sample
)
2441 return intel_pt_synth_error(pt
, INTEL_PT_ERR_LOST
, sample
->cpu
,
2442 sample
->pid
, sample
->tid
, 0, sample
->time
);
2445 static struct intel_pt_queue
*intel_pt_cpu_to_ptq(struct intel_pt
*pt
, int cpu
)
2449 if (cpu
< 0 || !pt
->queues
.nr_queues
)
2452 if ((unsigned)cpu
>= pt
->queues
.nr_queues
)
2453 i
= pt
->queues
.nr_queues
- 1;
2457 if (pt
->queues
.queue_array
[i
].cpu
== cpu
)
2458 return pt
->queues
.queue_array
[i
].priv
;
2460 for (j
= 0; i
> 0; j
++) {
2461 if (pt
->queues
.queue_array
[--i
].cpu
== cpu
)
2462 return pt
->queues
.queue_array
[i
].priv
;
2465 for (; j
< pt
->queues
.nr_queues
; j
++) {
2466 if (pt
->queues
.queue_array
[j
].cpu
== cpu
)
2467 return pt
->queues
.queue_array
[j
].priv
;
2473 static int intel_pt_sync_switch(struct intel_pt
*pt
, int cpu
, pid_t tid
,
2476 struct intel_pt_queue
*ptq
;
2479 if (!pt
->sync_switch
)
2482 ptq
= intel_pt_cpu_to_ptq(pt
, cpu
);
2483 if (!ptq
|| !ptq
->sync_switch
)
2486 switch (ptq
->switch_state
) {
2487 case INTEL_PT_SS_NOT_TRACING
:
2489 case INTEL_PT_SS_UNKNOWN
:
2490 case INTEL_PT_SS_TRACING
:
2491 ptq
->next_tid
= tid
;
2492 ptq
->switch_state
= INTEL_PT_SS_EXPECTING_SWITCH_IP
;
2494 case INTEL_PT_SS_EXPECTING_SWITCH_EVENT
:
2495 if (!ptq
->on_heap
) {
2496 ptq
->timestamp
= perf_time_to_tsc(timestamp
,
2498 err
= auxtrace_heap__add(&pt
->heap
, ptq
->queue_nr
,
2502 ptq
->on_heap
= true;
2504 ptq
->switch_state
= INTEL_PT_SS_TRACING
;
2506 case INTEL_PT_SS_EXPECTING_SWITCH_IP
:
2507 intel_pt_log("ERROR: cpu %d expecting switch ip\n", cpu
);
2518 static int intel_pt_process_switch(struct intel_pt
*pt
,
2519 struct perf_sample
*sample
)
2521 struct evsel
*evsel
;
2525 evsel
= perf_evlist__id2evsel(pt
->session
->evlist
, sample
->id
);
2526 if (evsel
!= pt
->switch_evsel
)
2529 tid
= evsel__intval(evsel
, sample
, "next_pid");
2532 intel_pt_log("sched_switch: cpu %d tid %d time %"PRIu64
" tsc %#"PRIx64
"\n",
2533 cpu
, tid
, sample
->time
, perf_time_to_tsc(sample
->time
,
2536 ret
= intel_pt_sync_switch(pt
, cpu
, tid
, sample
->time
);
2540 return machine__set_current_tid(pt
->machine
, cpu
, -1, tid
);
2543 static int intel_pt_context_switch_in(struct intel_pt
*pt
,
2544 struct perf_sample
*sample
)
2546 pid_t pid
= sample
->pid
;
2547 pid_t tid
= sample
->tid
;
2548 int cpu
= sample
->cpu
;
2550 if (pt
->sync_switch
) {
2551 struct intel_pt_queue
*ptq
;
2553 ptq
= intel_pt_cpu_to_ptq(pt
, cpu
);
2554 if (ptq
&& ptq
->sync_switch
) {
2556 switch (ptq
->switch_state
) {
2557 case INTEL_PT_SS_NOT_TRACING
:
2558 case INTEL_PT_SS_UNKNOWN
:
2559 case INTEL_PT_SS_TRACING
:
2561 case INTEL_PT_SS_EXPECTING_SWITCH_EVENT
:
2562 case INTEL_PT_SS_EXPECTING_SWITCH_IP
:
2563 ptq
->switch_state
= INTEL_PT_SS_TRACING
;
2572 * If the current tid has not been updated yet, ensure it is now that
2573 * a "switch in" event has occurred.
2575 if (machine__get_current_tid(pt
->machine
, cpu
) == tid
)
2578 return machine__set_current_tid(pt
->machine
, cpu
, pid
, tid
);
2581 static int intel_pt_context_switch(struct intel_pt
*pt
, union perf_event
*event
,
2582 struct perf_sample
*sample
)
2584 bool out
= event
->header
.misc
& PERF_RECORD_MISC_SWITCH_OUT
;
2590 if (pt
->have_sched_switch
== 3) {
2592 return intel_pt_context_switch_in(pt
, sample
);
2593 if (event
->header
.type
!= PERF_RECORD_SWITCH_CPU_WIDE
) {
2594 pr_err("Expecting CPU-wide context switch event\n");
2597 pid
= event
->context_switch
.next_prev_pid
;
2598 tid
= event
->context_switch
.next_prev_tid
;
2607 pr_err("context_switch event has no tid\n");
2611 ret
= intel_pt_sync_switch(pt
, cpu
, tid
, sample
->time
);
2615 return machine__set_current_tid(pt
->machine
, cpu
, pid
, tid
);
2618 static int intel_pt_process_itrace_start(struct intel_pt
*pt
,
2619 union perf_event
*event
,
2620 struct perf_sample
*sample
)
2622 if (!pt
->per_cpu_mmaps
)
2625 intel_pt_log("itrace_start: cpu %d pid %d tid %d time %"PRIu64
" tsc %#"PRIx64
"\n",
2626 sample
->cpu
, event
->itrace_start
.pid
,
2627 event
->itrace_start
.tid
, sample
->time
,
2628 perf_time_to_tsc(sample
->time
, &pt
->tc
));
2630 return machine__set_current_tid(pt
->machine
, sample
->cpu
,
2631 event
->itrace_start
.pid
,
2632 event
->itrace_start
.tid
);
2635 static int intel_pt_find_map(struct thread
*thread
, u8 cpumode
, u64 addr
,
2636 struct addr_location
*al
)
2638 if (!al
->map
|| addr
< al
->map
->start
|| addr
>= al
->map
->end
) {
2639 if (!thread__find_map(thread
, cpumode
, addr
, al
))
2646 /* Invalidate all instruction cache entries that overlap the text poke */
2647 static int intel_pt_text_poke(struct intel_pt
*pt
, union perf_event
*event
)
2649 u8 cpumode
= event
->header
.misc
& PERF_RECORD_MISC_CPUMODE_MASK
;
2650 u64 addr
= event
->text_poke
.addr
+ event
->text_poke
.new_len
- 1;
2651 /* Assume text poke begins in a basic block no more than 4096 bytes */
2652 int cnt
= 4096 + event
->text_poke
.new_len
;
2653 struct thread
*thread
= pt
->unknown_thread
;
2654 struct addr_location al
= { .map
= NULL
};
2655 struct machine
*machine
= pt
->machine
;
2656 struct intel_pt_cache_entry
*e
;
2659 if (!event
->text_poke
.new_len
)
2662 for (; cnt
; cnt
--, addr
--) {
2663 if (intel_pt_find_map(thread
, cpumode
, addr
, &al
)) {
2664 if (addr
< event
->text_poke
.addr
)
2669 if (!al
.map
->dso
|| !al
.map
->dso
->auxtrace_cache
)
2672 offset
= al
.map
->map_ip(al
.map
, addr
);
2674 e
= intel_pt_cache_lookup(al
.map
->dso
, machine
, offset
);
2678 if (addr
+ e
->byte_cnt
+ e
->length
<= event
->text_poke
.addr
) {
2680 * No overlap. Working backwards there cannot be another
2681 * basic block that overlaps the text poke if there is a
2682 * branch instruction before the text poke address.
2684 if (e
->branch
!= INTEL_PT_BR_NO_BRANCH
)
2687 intel_pt_cache_invalidate(al
.map
->dso
, machine
, offset
);
2688 intel_pt_log("Invalidated instruction cache for %s at %#"PRIx64
"\n",
2689 al
.map
->dso
->long_name
, addr
);
2696 static int intel_pt_process_event(struct perf_session
*session
,
2697 union perf_event
*event
,
2698 struct perf_sample
*sample
,
2699 struct perf_tool
*tool
)
2701 struct intel_pt
*pt
= container_of(session
->auxtrace
, struct intel_pt
,
2709 if (!tool
->ordered_events
) {
2710 pr_err("Intel Processor Trace requires ordered events\n");
2714 if (sample
->time
&& sample
->time
!= (u64
)-1)
2715 timestamp
= perf_time_to_tsc(sample
->time
, &pt
->tc
);
2719 if (timestamp
|| pt
->timeless_decoding
) {
2720 err
= intel_pt_update_queues(pt
);
2725 if (pt
->timeless_decoding
) {
2726 if (pt
->sampling_mode
) {
2727 if (sample
->aux_sample
.size
)
2728 err
= intel_pt_process_timeless_sample(pt
,
2730 } else if (event
->header
.type
== PERF_RECORD_EXIT
) {
2731 err
= intel_pt_process_timeless_queues(pt
,
2735 } else if (timestamp
) {
2736 err
= intel_pt_process_queues(pt
, timestamp
);
2741 if (event
->header
.type
== PERF_RECORD_SAMPLE
) {
2742 if (pt
->synth_opts
.add_callchain
&& !sample
->callchain
)
2743 intel_pt_add_callchain(pt
, sample
);
2744 if (pt
->synth_opts
.add_last_branch
&& !sample
->branch_stack
)
2745 intel_pt_add_br_stack(pt
, sample
);
2748 if (event
->header
.type
== PERF_RECORD_AUX
&&
2749 (event
->aux
.flags
& PERF_AUX_FLAG_TRUNCATED
) &&
2750 pt
->synth_opts
.errors
) {
2751 err
= intel_pt_lost(pt
, sample
);
2756 if (pt
->switch_evsel
&& event
->header
.type
== PERF_RECORD_SAMPLE
)
2757 err
= intel_pt_process_switch(pt
, sample
);
2758 else if (event
->header
.type
== PERF_RECORD_ITRACE_START
)
2759 err
= intel_pt_process_itrace_start(pt
, event
, sample
);
2760 else if (event
->header
.type
== PERF_RECORD_SWITCH
||
2761 event
->header
.type
== PERF_RECORD_SWITCH_CPU_WIDE
)
2762 err
= intel_pt_context_switch(pt
, event
, sample
);
2764 if (!err
&& event
->header
.type
== PERF_RECORD_TEXT_POKE
)
2765 err
= intel_pt_text_poke(pt
, event
);
2767 if (intel_pt_enable_logging
&& intel_pt_log_events(pt
, sample
->time
)) {
2768 intel_pt_log("event %u: cpu %d time %"PRIu64
" tsc %#"PRIx64
" ",
2769 event
->header
.type
, sample
->cpu
, sample
->time
, timestamp
);
2770 intel_pt_log_event(event
);
2776 static int intel_pt_flush(struct perf_session
*session
, struct perf_tool
*tool
)
2778 struct intel_pt
*pt
= container_of(session
->auxtrace
, struct intel_pt
,
2785 if (!tool
->ordered_events
)
2788 ret
= intel_pt_update_queues(pt
);
2792 if (pt
->timeless_decoding
)
2793 return intel_pt_process_timeless_queues(pt
, -1,
2796 return intel_pt_process_queues(pt
, MAX_TIMESTAMP
);
2799 static void intel_pt_free_events(struct perf_session
*session
)
2801 struct intel_pt
*pt
= container_of(session
->auxtrace
, struct intel_pt
,
2803 struct auxtrace_queues
*queues
= &pt
->queues
;
2806 for (i
= 0; i
< queues
->nr_queues
; i
++) {
2807 intel_pt_free_queue(queues
->queue_array
[i
].priv
);
2808 queues
->queue_array
[i
].priv
= NULL
;
2810 intel_pt_log_disable();
2811 auxtrace_queues__free(queues
);
2814 static void intel_pt_free(struct perf_session
*session
)
2816 struct intel_pt
*pt
= container_of(session
->auxtrace
, struct intel_pt
,
2819 auxtrace_heap__free(&pt
->heap
);
2820 intel_pt_free_events(session
);
2821 session
->auxtrace
= NULL
;
2822 thread__put(pt
->unknown_thread
);
2823 addr_filters__exit(&pt
->filts
);
2826 zfree(&pt
->time_ranges
);
2830 static bool intel_pt_evsel_is_auxtrace(struct perf_session
*session
,
2831 struct evsel
*evsel
)
2833 struct intel_pt
*pt
= container_of(session
->auxtrace
, struct intel_pt
,
2836 return evsel
->core
.attr
.type
== pt
->pmu_type
;
2839 static int intel_pt_process_auxtrace_event(struct perf_session
*session
,
2840 union perf_event
*event
,
2841 struct perf_tool
*tool __maybe_unused
)
2843 struct intel_pt
*pt
= container_of(session
->auxtrace
, struct intel_pt
,
2846 if (!pt
->data_queued
) {
2847 struct auxtrace_buffer
*buffer
;
2849 int fd
= perf_data__fd(session
->data
);
2852 if (perf_data__is_pipe(session
->data
)) {
2855 data_offset
= lseek(fd
, 0, SEEK_CUR
);
2856 if (data_offset
== -1)
2860 err
= auxtrace_queues__add_event(&pt
->queues
, session
, event
,
2861 data_offset
, &buffer
);
2865 /* Dump here now we have copied a piped trace out of the pipe */
2867 if (auxtrace_buffer__get_data(buffer
, fd
)) {
2868 intel_pt_dump_event(pt
, buffer
->data
,
2870 auxtrace_buffer__put_data(buffer
);
2878 static int intel_pt_queue_data(struct perf_session
*session
,
2879 struct perf_sample
*sample
,
2880 union perf_event
*event
, u64 data_offset
)
2882 struct intel_pt
*pt
= container_of(session
->auxtrace
, struct intel_pt
,
2887 return auxtrace_queues__add_event(&pt
->queues
, session
, event
,
2891 if (sample
->time
&& sample
->time
!= (u64
)-1)
2892 timestamp
= perf_time_to_tsc(sample
->time
, &pt
->tc
);
2896 return auxtrace_queues__add_sample(&pt
->queues
, session
, sample
,
2897 data_offset
, timestamp
);
2900 struct intel_pt_synth
{
2901 struct perf_tool dummy_tool
;
2902 struct perf_session
*session
;
2905 static int intel_pt_event_synth(struct perf_tool
*tool
,
2906 union perf_event
*event
,
2907 struct perf_sample
*sample __maybe_unused
,
2908 struct machine
*machine __maybe_unused
)
2910 struct intel_pt_synth
*intel_pt_synth
=
2911 container_of(tool
, struct intel_pt_synth
, dummy_tool
);
2913 return perf_session__deliver_synth_event(intel_pt_synth
->session
, event
,
2917 static int intel_pt_synth_event(struct perf_session
*session
, const char *name
,
2918 struct perf_event_attr
*attr
, u64 id
)
2920 struct intel_pt_synth intel_pt_synth
;
2923 pr_debug("Synthesizing '%s' event with id %" PRIu64
" sample type %#" PRIx64
"\n",
2924 name
, id
, (u64
)attr
->sample_type
);
2926 memset(&intel_pt_synth
, 0, sizeof(struct intel_pt_synth
));
2927 intel_pt_synth
.session
= session
;
2929 err
= perf_event__synthesize_attr(&intel_pt_synth
.dummy_tool
, attr
, 1,
2930 &id
, intel_pt_event_synth
);
2932 pr_err("%s: failed to synthesize '%s' event type\n",
2938 static void intel_pt_set_event_name(struct evlist
*evlist
, u64 id
,
2941 struct evsel
*evsel
;
2943 evlist__for_each_entry(evlist
, evsel
) {
2944 if (evsel
->core
.id
&& evsel
->core
.id
[0] == id
) {
2946 zfree(&evsel
->name
);
2947 evsel
->name
= strdup(name
);
2953 static struct evsel
*intel_pt_evsel(struct intel_pt
*pt
,
2954 struct evlist
*evlist
)
2956 struct evsel
*evsel
;
2958 evlist__for_each_entry(evlist
, evsel
) {
2959 if (evsel
->core
.attr
.type
== pt
->pmu_type
&& evsel
->core
.ids
)
2966 static int intel_pt_synth_events(struct intel_pt
*pt
,
2967 struct perf_session
*session
)
2969 struct evlist
*evlist
= session
->evlist
;
2970 struct evsel
*evsel
= intel_pt_evsel(pt
, evlist
);
2971 struct perf_event_attr attr
;
2976 pr_debug("There are no selected events with Intel Processor Trace data\n");
2980 memset(&attr
, 0, sizeof(struct perf_event_attr
));
2981 attr
.size
= sizeof(struct perf_event_attr
);
2982 attr
.type
= PERF_TYPE_HARDWARE
;
2983 attr
.sample_type
= evsel
->core
.attr
.sample_type
& PERF_SAMPLE_MASK
;
2984 attr
.sample_type
|= PERF_SAMPLE_IP
| PERF_SAMPLE_TID
|
2986 if (pt
->timeless_decoding
)
2987 attr
.sample_type
&= ~(u64
)PERF_SAMPLE_TIME
;
2989 attr
.sample_type
|= PERF_SAMPLE_TIME
;
2990 if (!pt
->per_cpu_mmaps
)
2991 attr
.sample_type
&= ~(u64
)PERF_SAMPLE_CPU
;
2992 attr
.exclude_user
= evsel
->core
.attr
.exclude_user
;
2993 attr
.exclude_kernel
= evsel
->core
.attr
.exclude_kernel
;
2994 attr
.exclude_hv
= evsel
->core
.attr
.exclude_hv
;
2995 attr
.exclude_host
= evsel
->core
.attr
.exclude_host
;
2996 attr
.exclude_guest
= evsel
->core
.attr
.exclude_guest
;
2997 attr
.sample_id_all
= evsel
->core
.attr
.sample_id_all
;
2998 attr
.read_format
= evsel
->core
.attr
.read_format
;
3000 id
= evsel
->core
.id
[0] + 1000000000;
3004 if (pt
->synth_opts
.branches
) {
3005 attr
.config
= PERF_COUNT_HW_BRANCH_INSTRUCTIONS
;
3006 attr
.sample_period
= 1;
3007 attr
.sample_type
|= PERF_SAMPLE_ADDR
;
3008 err
= intel_pt_synth_event(session
, "branches", &attr
, id
);
3011 pt
->sample_branches
= true;
3012 pt
->branches_sample_type
= attr
.sample_type
;
3013 pt
->branches_id
= id
;
3015 attr
.sample_type
&= ~(u64
)PERF_SAMPLE_ADDR
;
3018 if (pt
->synth_opts
.callchain
)
3019 attr
.sample_type
|= PERF_SAMPLE_CALLCHAIN
;
3020 if (pt
->synth_opts
.last_branch
)
3021 attr
.sample_type
|= PERF_SAMPLE_BRANCH_STACK
;
3023 if (pt
->synth_opts
.instructions
) {
3024 attr
.config
= PERF_COUNT_HW_INSTRUCTIONS
;
3025 if (pt
->synth_opts
.period_type
== PERF_ITRACE_PERIOD_NANOSECS
)
3026 attr
.sample_period
=
3027 intel_pt_ns_to_ticks(pt
, pt
->synth_opts
.period
);
3029 attr
.sample_period
= pt
->synth_opts
.period
;
3030 err
= intel_pt_synth_event(session
, "instructions", &attr
, id
);
3033 pt
->sample_instructions
= true;
3034 pt
->instructions_sample_type
= attr
.sample_type
;
3035 pt
->instructions_id
= id
;
3039 attr
.sample_type
&= ~(u64
)PERF_SAMPLE_PERIOD
;
3040 attr
.sample_period
= 1;
3042 if (pt
->synth_opts
.transactions
) {
3043 attr
.config
= PERF_COUNT_HW_INSTRUCTIONS
;
3044 err
= intel_pt_synth_event(session
, "transactions", &attr
, id
);
3047 pt
->sample_transactions
= true;
3048 pt
->transactions_sample_type
= attr
.sample_type
;
3049 pt
->transactions_id
= id
;
3050 intel_pt_set_event_name(evlist
, id
, "transactions");
3054 attr
.type
= PERF_TYPE_SYNTH
;
3055 attr
.sample_type
|= PERF_SAMPLE_RAW
;
3057 if (pt
->synth_opts
.ptwrites
) {
3058 attr
.config
= PERF_SYNTH_INTEL_PTWRITE
;
3059 err
= intel_pt_synth_event(session
, "ptwrite", &attr
, id
);
3062 pt
->sample_ptwrites
= true;
3063 pt
->ptwrites_sample_type
= attr
.sample_type
;
3064 pt
->ptwrites_id
= id
;
3065 intel_pt_set_event_name(evlist
, id
, "ptwrite");
3069 if (pt
->synth_opts
.pwr_events
) {
3070 pt
->sample_pwr_events
= true;
3071 pt
->pwr_events_sample_type
= attr
.sample_type
;
3073 attr
.config
= PERF_SYNTH_INTEL_CBR
;
3074 err
= intel_pt_synth_event(session
, "cbr", &attr
, id
);
3078 intel_pt_set_event_name(evlist
, id
, "cbr");
3082 if (pt
->synth_opts
.pwr_events
&& (evsel
->core
.attr
.config
& 0x10)) {
3083 attr
.config
= PERF_SYNTH_INTEL_MWAIT
;
3084 err
= intel_pt_synth_event(session
, "mwait", &attr
, id
);
3088 intel_pt_set_event_name(evlist
, id
, "mwait");
3091 attr
.config
= PERF_SYNTH_INTEL_PWRE
;
3092 err
= intel_pt_synth_event(session
, "pwre", &attr
, id
);
3096 intel_pt_set_event_name(evlist
, id
, "pwre");
3099 attr
.config
= PERF_SYNTH_INTEL_EXSTOP
;
3100 err
= intel_pt_synth_event(session
, "exstop", &attr
, id
);
3104 intel_pt_set_event_name(evlist
, id
, "exstop");
3107 attr
.config
= PERF_SYNTH_INTEL_PWRX
;
3108 err
= intel_pt_synth_event(session
, "pwrx", &attr
, id
);
3112 intel_pt_set_event_name(evlist
, id
, "pwrx");
3119 static void intel_pt_setup_pebs_events(struct intel_pt
*pt
)
3121 struct evsel
*evsel
;
3123 if (!pt
->synth_opts
.other_events
)
3126 evlist__for_each_entry(pt
->session
->evlist
, evsel
) {
3127 if (evsel
->core
.attr
.aux_output
&& evsel
->core
.id
) {
3128 pt
->sample_pebs
= true;
3129 pt
->pebs_evsel
= evsel
;
3135 static struct evsel
*intel_pt_find_sched_switch(struct evlist
*evlist
)
3137 struct evsel
*evsel
;
3139 evlist__for_each_entry_reverse(evlist
, evsel
) {
3140 const char *name
= evsel__name(evsel
);
3142 if (!strcmp(name
, "sched:sched_switch"))
3149 static bool intel_pt_find_switch(struct evlist
*evlist
)
3151 struct evsel
*evsel
;
3153 evlist__for_each_entry(evlist
, evsel
) {
3154 if (evsel
->core
.attr
.context_switch
)
3161 static int intel_pt_perf_config(const char *var
, const char *value
, void *data
)
3163 struct intel_pt
*pt
= data
;
3165 if (!strcmp(var
, "intel-pt.mispred-all"))
3166 pt
->mispred_all
= perf_config_bool(var
, value
);
3171 /* Find least TSC which converts to ns or later */
3172 static u64
intel_pt_tsc_start(u64 ns
, struct intel_pt
*pt
)
3176 tsc
= perf_time_to_tsc(ns
, &pt
->tc
);
3179 tm
= tsc_to_perf_time(tsc
, &pt
->tc
);
3186 tm
= tsc_to_perf_time(++tsc
, &pt
->tc
);
3191 /* Find greatest TSC which converts to ns or earlier */
3192 static u64
intel_pt_tsc_end(u64 ns
, struct intel_pt
*pt
)
3196 tsc
= perf_time_to_tsc(ns
, &pt
->tc
);
3199 tm
= tsc_to_perf_time(tsc
, &pt
->tc
);
3206 tm
= tsc_to_perf_time(--tsc
, &pt
->tc
);
3211 static int intel_pt_setup_time_ranges(struct intel_pt
*pt
,
3212 struct itrace_synth_opts
*opts
)
3214 struct perf_time_interval
*p
= opts
->ptime_range
;
3215 int n
= opts
->range_num
;
3218 if (!n
|| !p
|| pt
->timeless_decoding
)
3221 pt
->time_ranges
= calloc(n
, sizeof(struct range
));
3222 if (!pt
->time_ranges
)
3227 intel_pt_log("%s: %u range(s)\n", __func__
, n
);
3229 for (i
= 0; i
< n
; i
++) {
3230 struct range
*r
= &pt
->time_ranges
[i
];
3231 u64 ts
= p
[i
].start
;
3235 * Take care to ensure the TSC range matches the perf-time range
3236 * when converted back to perf-time.
3238 r
->start
= ts
? intel_pt_tsc_start(ts
, pt
) : 0;
3239 r
->end
= te
? intel_pt_tsc_end(te
, pt
) : 0;
3241 intel_pt_log("range %d: perf time interval: %"PRIu64
" to %"PRIu64
"\n",
3243 intel_pt_log("range %d: TSC time interval: %#"PRIx64
" to %#"PRIx64
"\n",
3244 i
, r
->start
, r
->end
);
3250 static const char * const intel_pt_info_fmts
[] = {
3251 [INTEL_PT_PMU_TYPE
] = " PMU Type %"PRId64
"\n",
3252 [INTEL_PT_TIME_SHIFT
] = " Time Shift %"PRIu64
"\n",
3253 [INTEL_PT_TIME_MULT
] = " Time Muliplier %"PRIu64
"\n",
3254 [INTEL_PT_TIME_ZERO
] = " Time Zero %"PRIu64
"\n",
3255 [INTEL_PT_CAP_USER_TIME_ZERO
] = " Cap Time Zero %"PRId64
"\n",
3256 [INTEL_PT_TSC_BIT
] = " TSC bit %#"PRIx64
"\n",
3257 [INTEL_PT_NORETCOMP_BIT
] = " NoRETComp bit %#"PRIx64
"\n",
3258 [INTEL_PT_HAVE_SCHED_SWITCH
] = " Have sched_switch %"PRId64
"\n",
3259 [INTEL_PT_SNAPSHOT_MODE
] = " Snapshot mode %"PRId64
"\n",
3260 [INTEL_PT_PER_CPU_MMAPS
] = " Per-cpu maps %"PRId64
"\n",
3261 [INTEL_PT_MTC_BIT
] = " MTC bit %#"PRIx64
"\n",
3262 [INTEL_PT_TSC_CTC_N
] = " TSC:CTC numerator %"PRIu64
"\n",
3263 [INTEL_PT_TSC_CTC_D
] = " TSC:CTC denominator %"PRIu64
"\n",
3264 [INTEL_PT_CYC_BIT
] = " CYC bit %#"PRIx64
"\n",
3265 [INTEL_PT_MAX_NONTURBO_RATIO
] = " Max non-turbo ratio %"PRIu64
"\n",
3266 [INTEL_PT_FILTER_STR_LEN
] = " Filter string len. %"PRIu64
"\n",
3269 static void intel_pt_print_info(__u64
*arr
, int start
, int finish
)
3276 for (i
= start
; i
<= finish
; i
++)
3277 fprintf(stdout
, intel_pt_info_fmts
[i
], arr
[i
]);
3280 static void intel_pt_print_info_str(const char *name
, const char *str
)
3285 fprintf(stdout
, " %-20s%s\n", name
, str
? str
: "");
3288 static bool intel_pt_has(struct perf_record_auxtrace_info
*auxtrace_info
, int pos
)
3290 return auxtrace_info
->header
.size
>=
3291 sizeof(struct perf_record_auxtrace_info
) + (sizeof(u64
) * (pos
+ 1));
3294 int intel_pt_process_auxtrace_info(union perf_event
*event
,
3295 struct perf_session
*session
)
3297 struct perf_record_auxtrace_info
*auxtrace_info
= &event
->auxtrace_info
;
3298 size_t min_sz
= sizeof(u64
) * INTEL_PT_PER_CPU_MMAPS
;
3299 struct intel_pt
*pt
;
3304 if (auxtrace_info
->header
.size
< sizeof(struct perf_record_auxtrace_info
) +
3308 pt
= zalloc(sizeof(struct intel_pt
));
3312 addr_filters__init(&pt
->filts
);
3314 err
= perf_config(intel_pt_perf_config
, pt
);
3318 err
= auxtrace_queues__init(&pt
->queues
);
3322 intel_pt_log_set_name(INTEL_PT_PMU_NAME
);
3324 pt
->session
= session
;
3325 pt
->machine
= &session
->machines
.host
; /* No kvm support */
3326 pt
->auxtrace_type
= auxtrace_info
->type
;
3327 pt
->pmu_type
= auxtrace_info
->priv
[INTEL_PT_PMU_TYPE
];
3328 pt
->tc
.time_shift
= auxtrace_info
->priv
[INTEL_PT_TIME_SHIFT
];
3329 pt
->tc
.time_mult
= auxtrace_info
->priv
[INTEL_PT_TIME_MULT
];
3330 pt
->tc
.time_zero
= auxtrace_info
->priv
[INTEL_PT_TIME_ZERO
];
3331 pt
->cap_user_time_zero
= auxtrace_info
->priv
[INTEL_PT_CAP_USER_TIME_ZERO
];
3332 pt
->tsc_bit
= auxtrace_info
->priv
[INTEL_PT_TSC_BIT
];
3333 pt
->noretcomp_bit
= auxtrace_info
->priv
[INTEL_PT_NORETCOMP_BIT
];
3334 pt
->have_sched_switch
= auxtrace_info
->priv
[INTEL_PT_HAVE_SCHED_SWITCH
];
3335 pt
->snapshot_mode
= auxtrace_info
->priv
[INTEL_PT_SNAPSHOT_MODE
];
3336 pt
->per_cpu_mmaps
= auxtrace_info
->priv
[INTEL_PT_PER_CPU_MMAPS
];
3337 intel_pt_print_info(&auxtrace_info
->priv
[0], INTEL_PT_PMU_TYPE
,
3338 INTEL_PT_PER_CPU_MMAPS
);
3340 if (intel_pt_has(auxtrace_info
, INTEL_PT_CYC_BIT
)) {
3341 pt
->mtc_bit
= auxtrace_info
->priv
[INTEL_PT_MTC_BIT
];
3342 pt
->mtc_freq_bits
= auxtrace_info
->priv
[INTEL_PT_MTC_FREQ_BITS
];
3343 pt
->tsc_ctc_ratio_n
= auxtrace_info
->priv
[INTEL_PT_TSC_CTC_N
];
3344 pt
->tsc_ctc_ratio_d
= auxtrace_info
->priv
[INTEL_PT_TSC_CTC_D
];
3345 pt
->cyc_bit
= auxtrace_info
->priv
[INTEL_PT_CYC_BIT
];
3346 intel_pt_print_info(&auxtrace_info
->priv
[0], INTEL_PT_MTC_BIT
,
3350 if (intel_pt_has(auxtrace_info
, INTEL_PT_MAX_NONTURBO_RATIO
)) {
3351 pt
->max_non_turbo_ratio
=
3352 auxtrace_info
->priv
[INTEL_PT_MAX_NONTURBO_RATIO
];
3353 intel_pt_print_info(&auxtrace_info
->priv
[0],
3354 INTEL_PT_MAX_NONTURBO_RATIO
,
3355 INTEL_PT_MAX_NONTURBO_RATIO
);
3358 info
= &auxtrace_info
->priv
[INTEL_PT_FILTER_STR_LEN
] + 1;
3359 info_end
= (void *)info
+ auxtrace_info
->header
.size
;
3361 if (intel_pt_has(auxtrace_info
, INTEL_PT_FILTER_STR_LEN
)) {
3364 len
= auxtrace_info
->priv
[INTEL_PT_FILTER_STR_LEN
];
3365 intel_pt_print_info(&auxtrace_info
->priv
[0],
3366 INTEL_PT_FILTER_STR_LEN
,
3367 INTEL_PT_FILTER_STR_LEN
);
3369 const char *filter
= (const char *)info
;
3371 len
= roundup(len
+ 1, 8);
3373 if ((void *)info
> info_end
) {
3374 pr_err("%s: bad filter string length\n", __func__
);
3376 goto err_free_queues
;
3378 pt
->filter
= memdup(filter
, len
);
3381 goto err_free_queues
;
3383 if (session
->header
.needs_swap
)
3384 mem_bswap_64(pt
->filter
, len
);
3385 if (pt
->filter
[len
- 1]) {
3386 pr_err("%s: filter string not null terminated\n", __func__
);
3388 goto err_free_queues
;
3390 err
= addr_filters__parse_bare_filter(&pt
->filts
,
3393 goto err_free_queues
;
3395 intel_pt_print_info_str("Filter string", pt
->filter
);
3398 pt
->timeless_decoding
= intel_pt_timeless_decoding(pt
);
3399 if (pt
->timeless_decoding
&& !pt
->tc
.time_mult
)
3400 pt
->tc
.time_mult
= 1;
3401 pt
->have_tsc
= intel_pt_have_tsc(pt
);
3402 pt
->sampling_mode
= intel_pt_sampling_mode(pt
);
3403 pt
->est_tsc
= !pt
->timeless_decoding
;
3405 pt
->unknown_thread
= thread__new(999999999, 999999999);
3406 if (!pt
->unknown_thread
) {
3408 goto err_free_queues
;
3412 * Since this thread will not be kept in any rbtree not in a
3413 * list, initialize its list node so that at thread__put() the
3414 * current thread lifetime assuption is kept and we don't segfault
3415 * at list_del_init().
3417 INIT_LIST_HEAD(&pt
->unknown_thread
->node
);
3419 err
= thread__set_comm(pt
->unknown_thread
, "unknown", 0);
3421 goto err_delete_thread
;
3422 if (thread__init_maps(pt
->unknown_thread
, pt
->machine
)) {
3424 goto err_delete_thread
;
3427 pt
->auxtrace
.process_event
= intel_pt_process_event
;
3428 pt
->auxtrace
.process_auxtrace_event
= intel_pt_process_auxtrace_event
;
3429 pt
->auxtrace
.queue_data
= intel_pt_queue_data
;
3430 pt
->auxtrace
.dump_auxtrace_sample
= intel_pt_dump_sample
;
3431 pt
->auxtrace
.flush_events
= intel_pt_flush
;
3432 pt
->auxtrace
.free_events
= intel_pt_free_events
;
3433 pt
->auxtrace
.free
= intel_pt_free
;
3434 pt
->auxtrace
.evsel_is_auxtrace
= intel_pt_evsel_is_auxtrace
;
3435 session
->auxtrace
= &pt
->auxtrace
;
3440 if (pt
->have_sched_switch
== 1) {
3441 pt
->switch_evsel
= intel_pt_find_sched_switch(session
->evlist
);
3442 if (!pt
->switch_evsel
) {
3443 pr_err("%s: missing sched_switch event\n", __func__
);
3445 goto err_delete_thread
;
3447 } else if (pt
->have_sched_switch
== 2 &&
3448 !intel_pt_find_switch(session
->evlist
)) {
3449 pr_err("%s: missing context_switch attribute flag\n", __func__
);
3451 goto err_delete_thread
;
3454 if (session
->itrace_synth_opts
->set
) {
3455 pt
->synth_opts
= *session
->itrace_synth_opts
;
3457 itrace_synth_opts__set_default(&pt
->synth_opts
,
3458 session
->itrace_synth_opts
->default_no_sample
);
3459 if (!session
->itrace_synth_opts
->default_no_sample
&&
3460 !session
->itrace_synth_opts
->inject
) {
3461 pt
->synth_opts
.branches
= false;
3462 pt
->synth_opts
.callchain
= true;
3463 pt
->synth_opts
.add_callchain
= true;
3465 pt
->synth_opts
.thread_stack
=
3466 session
->itrace_synth_opts
->thread_stack
;
3469 if (pt
->synth_opts
.log
)
3470 intel_pt_log_enable();
3472 /* Maximum non-turbo ratio is TSC freq / 100 MHz */
3473 if (pt
->tc
.time_mult
) {
3474 u64 tsc_freq
= intel_pt_ns_to_ticks(pt
, 1000000000);
3476 if (!pt
->max_non_turbo_ratio
)
3477 pt
->max_non_turbo_ratio
=
3478 (tsc_freq
+ 50000000) / 100000000;
3479 intel_pt_log("TSC frequency %"PRIu64
"\n", tsc_freq
);
3480 intel_pt_log("Maximum non-turbo ratio %u\n",
3481 pt
->max_non_turbo_ratio
);
3482 pt
->cbr2khz
= tsc_freq
/ pt
->max_non_turbo_ratio
/ 1000;
3485 err
= intel_pt_setup_time_ranges(pt
, session
->itrace_synth_opts
);
3487 goto err_delete_thread
;
3489 if (pt
->synth_opts
.calls
)
3490 pt
->branches_filter
|= PERF_IP_FLAG_CALL
| PERF_IP_FLAG_ASYNC
|
3491 PERF_IP_FLAG_TRACE_END
;
3492 if (pt
->synth_opts
.returns
)
3493 pt
->branches_filter
|= PERF_IP_FLAG_RETURN
|
3494 PERF_IP_FLAG_TRACE_BEGIN
;
3496 if ((pt
->synth_opts
.callchain
|| pt
->synth_opts
.add_callchain
) &&
3497 !symbol_conf
.use_callchain
) {
3498 symbol_conf
.use_callchain
= true;
3499 if (callchain_register_param(&callchain_param
) < 0) {
3500 symbol_conf
.use_callchain
= false;
3501 pt
->synth_opts
.callchain
= false;
3502 pt
->synth_opts
.add_callchain
= false;
3506 if (pt
->synth_opts
.add_callchain
) {
3507 err
= intel_pt_callchain_init(pt
);
3509 goto err_delete_thread
;
3512 if (pt
->synth_opts
.last_branch
|| pt
->synth_opts
.add_last_branch
) {
3513 pt
->br_stack_sz
= pt
->synth_opts
.last_branch_sz
;
3514 pt
->br_stack_sz_plus
= pt
->br_stack_sz
;
3517 if (pt
->synth_opts
.add_last_branch
) {
3518 err
= intel_pt_br_stack_init(pt
);
3520 goto err_delete_thread
;
3522 * Additional branch stack size to cater for tracing from the
3523 * actual sample ip to where the sample time is recorded.
3524 * Measured at about 200 branches, but generously set to 1024.
3525 * If kernel space is not being traced, then add just 1 for the
3526 * branch to kernel space.
3528 if (intel_pt_tracing_kernel(pt
))
3529 pt
->br_stack_sz_plus
+= 1024;
3531 pt
->br_stack_sz_plus
+= 1;
3534 pt
->use_thread_stack
= pt
->synth_opts
.callchain
||
3535 pt
->synth_opts
.add_callchain
||
3536 pt
->synth_opts
.thread_stack
||
3537 pt
->synth_opts
.last_branch
||
3538 pt
->synth_opts
.add_last_branch
;
3540 pt
->callstack
= pt
->synth_opts
.callchain
||
3541 pt
->synth_opts
.add_callchain
||
3542 pt
->synth_opts
.thread_stack
;
3544 err
= intel_pt_synth_events(pt
, session
);
3546 goto err_delete_thread
;
3548 intel_pt_setup_pebs_events(pt
);
3550 if (pt
->sampling_mode
|| list_empty(&session
->auxtrace_index
))
3551 err
= auxtrace_queue_data(session
, true, true);
3553 err
= auxtrace_queues__process_index(&pt
->queues
, session
);
3555 goto err_delete_thread
;
3557 if (pt
->queues
.populated
)
3558 pt
->data_queued
= true;
3560 if (pt
->timeless_decoding
)
3561 pr_debug2("Intel PT decoding without timestamps\n");
3567 thread__zput(pt
->unknown_thread
);
3569 intel_pt_log_disable();
3570 auxtrace_queues__free(&pt
->queues
);
3571 session
->auxtrace
= NULL
;
3573 addr_filters__exit(&pt
->filts
);
3575 zfree(&pt
->time_ranges
);