1 // SPDX-License-Identifier: GPL-2.0-only
3 * intel_pt.c: Intel Processor Trace support
4 * Copyright (c) 2013-2015, Intel Corporation.
11 #include <linux/kernel.h>
12 #include <linux/string.h>
13 #include <linux/types.h>
14 #include <linux/zalloc.h>
27 #include "thread-stack.h"
29 #include "callchain.h"
36 #include "util/perf_api_probe.h"
37 #include "util/synthetic-events.h"
38 #include "time-utils.h"
40 #include "../arch/x86/include/uapi/asm/perf_regs.h"
42 #include "intel-pt-decoder/intel-pt-log.h"
43 #include "intel-pt-decoder/intel-pt-decoder.h"
44 #include "intel-pt-decoder/intel-pt-insn-decoder.h"
45 #include "intel-pt-decoder/intel-pt-pkt-decoder.h"
47 #define MAX_TIMESTAMP (~0ULL)
55 struct auxtrace auxtrace
;
56 struct auxtrace_queues queues
;
57 struct auxtrace_heap heap
;
59 struct perf_session
*session
;
60 struct machine
*machine
;
61 struct evsel
*switch_evsel
;
62 struct thread
*unknown_thread
;
63 bool timeless_decoding
;
72 bool use_thread_stack
;
74 unsigned int br_stack_sz
;
75 unsigned int br_stack_sz_plus
;
76 int have_sched_switch
;
82 struct perf_tsc_conversion tc
;
83 bool cap_user_time_zero
;
85 struct itrace_synth_opts synth_opts
;
87 bool sample_instructions
;
88 u64 instructions_sample_type
;
93 u64 branches_sample_type
;
96 bool sample_transactions
;
97 u64 transactions_sample_type
;
100 bool sample_ptwrites
;
101 u64 ptwrites_sample_type
;
104 bool sample_pwr_events
;
105 u64 pwr_events_sample_type
;
113 struct evsel
*pebs_evsel
;
122 unsigned max_non_turbo_ratio
;
125 unsigned long num_events
;
128 struct addr_filters filts
;
130 struct range
*time_ranges
;
131 unsigned int range_cnt
;
133 struct ip_callchain
*chain
;
134 struct branch_stack
*br_stack
;
138 INTEL_PT_SS_NOT_TRACING
,
141 INTEL_PT_SS_EXPECTING_SWITCH_EVENT
,
142 INTEL_PT_SS_EXPECTING_SWITCH_IP
,
145 struct intel_pt_queue
{
147 unsigned int queue_nr
;
148 struct auxtrace_buffer
*buffer
;
149 struct auxtrace_buffer
*old_buffer
;
151 const struct intel_pt_state
*state
;
152 struct ip_callchain
*chain
;
153 struct branch_stack
*last_branch
;
154 union perf_event
*event_buf
;
157 bool step_through_buffers
;
158 bool use_buffer_pid_tid
;
164 struct thread
*thread
;
171 unsigned int sel_idx
;
177 u64 last_in_insn_cnt
;
179 u64 last_br_insn_cnt
;
181 unsigned int cbr_seen
;
182 char insn
[INTEL_PT_INSN_BUF_SZ
];
185 static void intel_pt_dump(struct intel_pt
*pt __maybe_unused
,
186 unsigned char *buf
, size_t len
)
188 struct intel_pt_pkt packet
;
191 char desc
[INTEL_PT_PKT_DESC_MAX
];
192 const char *color
= PERF_COLOR_BLUE
;
193 enum intel_pt_pkt_ctx ctx
= INTEL_PT_NO_CTX
;
195 color_fprintf(stdout
, color
,
196 ". ... Intel Processor Trace data: size %zu bytes\n",
200 ret
= intel_pt_get_packet(buf
, len
, &packet
, &ctx
);
206 color_fprintf(stdout
, color
, " %08x: ", pos
);
207 for (i
= 0; i
< pkt_len
; i
++)
208 color_fprintf(stdout
, color
, " %02x", buf
[i
]);
210 color_fprintf(stdout
, color
, " ");
212 ret
= intel_pt_pkt_desc(&packet
, desc
,
213 INTEL_PT_PKT_DESC_MAX
);
215 color_fprintf(stdout
, color
, " %s\n", desc
);
217 color_fprintf(stdout
, color
, " Bad packet!\n");
225 static void intel_pt_dump_event(struct intel_pt
*pt
, unsigned char *buf
,
229 intel_pt_dump(pt
, buf
, len
);
232 static void intel_pt_log_event(union perf_event
*event
)
234 FILE *f
= intel_pt_log_fp();
236 if (!intel_pt_enable_logging
|| !f
)
239 perf_event__fprintf(event
, NULL
, f
);
242 static void intel_pt_dump_sample(struct perf_session
*session
,
243 struct perf_sample
*sample
)
245 struct intel_pt
*pt
= container_of(session
->auxtrace
, struct intel_pt
,
249 intel_pt_dump(pt
, sample
->aux_sample
.data
, sample
->aux_sample
.size
);
252 static bool intel_pt_log_events(struct intel_pt
*pt
, u64 tm
)
254 struct perf_time_interval
*range
= pt
->synth_opts
.ptime_range
;
255 int n
= pt
->synth_opts
.range_num
;
257 if (pt
->synth_opts
.log_plus_flags
& AUXTRACE_LOG_FLG_ALL_PERF_EVTS
)
260 if (pt
->synth_opts
.log_minus_flags
& AUXTRACE_LOG_FLG_ALL_PERF_EVTS
)
263 /* perf_time__ranges_skip_sample does not work if time is zero */
267 return !n
|| !perf_time__ranges_skip_sample(range
, n
, tm
);
270 static int intel_pt_do_fix_overlap(struct intel_pt
*pt
, struct auxtrace_buffer
*a
,
271 struct auxtrace_buffer
*b
)
273 bool consecutive
= false;
276 start
= intel_pt_find_overlap(a
->data
, a
->size
, b
->data
, b
->size
,
277 pt
->have_tsc
, &consecutive
);
280 b
->use_size
= b
->data
+ b
->size
- start
;
282 if (b
->use_size
&& consecutive
)
283 b
->consecutive
= true;
287 static int intel_pt_get_buffer(struct intel_pt_queue
*ptq
,
288 struct auxtrace_buffer
*buffer
,
289 struct auxtrace_buffer
*old_buffer
,
290 struct intel_pt_buffer
*b
)
295 int fd
= perf_data__fd(ptq
->pt
->session
->data
);
297 buffer
->data
= auxtrace_buffer__get_data(buffer
, fd
);
302 might_overlap
= ptq
->pt
->snapshot_mode
|| ptq
->pt
->sampling_mode
;
303 if (might_overlap
&& !buffer
->consecutive
&& old_buffer
&&
304 intel_pt_do_fix_overlap(ptq
->pt
, old_buffer
, buffer
))
307 if (buffer
->use_data
) {
308 b
->len
= buffer
->use_size
;
309 b
->buf
= buffer
->use_data
;
311 b
->len
= buffer
->size
;
312 b
->buf
= buffer
->data
;
314 b
->ref_timestamp
= buffer
->reference
;
316 if (!old_buffer
|| (might_overlap
&& !buffer
->consecutive
)) {
317 b
->consecutive
= false;
318 b
->trace_nr
= buffer
->buffer_nr
+ 1;
320 b
->consecutive
= true;
326 /* Do not drop buffers with references - refer intel_pt_get_trace() */
327 static void intel_pt_lookahead_drop_buffer(struct intel_pt_queue
*ptq
,
328 struct auxtrace_buffer
*buffer
)
330 if (!buffer
|| buffer
== ptq
->buffer
|| buffer
== ptq
->old_buffer
)
333 auxtrace_buffer__drop_data(buffer
);
336 /* Must be serialized with respect to intel_pt_get_trace() */
337 static int intel_pt_lookahead(void *data
, intel_pt_lookahead_cb_t cb
,
340 struct intel_pt_queue
*ptq
= data
;
341 struct auxtrace_buffer
*buffer
= ptq
->buffer
;
342 struct auxtrace_buffer
*old_buffer
= ptq
->old_buffer
;
343 struct auxtrace_queue
*queue
;
346 queue
= &ptq
->pt
->queues
.queue_array
[ptq
->queue_nr
];
349 struct intel_pt_buffer b
= { .len
= 0 };
351 buffer
= auxtrace_buffer__next(queue
, buffer
);
355 err
= intel_pt_get_buffer(ptq
, buffer
, old_buffer
, &b
);
360 intel_pt_lookahead_drop_buffer(ptq
, old_buffer
);
363 intel_pt_lookahead_drop_buffer(ptq
, buffer
);
367 err
= cb(&b
, cb_data
);
372 if (buffer
!= old_buffer
)
373 intel_pt_lookahead_drop_buffer(ptq
, buffer
);
374 intel_pt_lookahead_drop_buffer(ptq
, old_buffer
);
380 * This function assumes data is processed sequentially only.
381 * Must be serialized with respect to intel_pt_lookahead()
383 static int intel_pt_get_trace(struct intel_pt_buffer
*b
, void *data
)
385 struct intel_pt_queue
*ptq
= data
;
386 struct auxtrace_buffer
*buffer
= ptq
->buffer
;
387 struct auxtrace_buffer
*old_buffer
= ptq
->old_buffer
;
388 struct auxtrace_queue
*queue
;
396 queue
= &ptq
->pt
->queues
.queue_array
[ptq
->queue_nr
];
398 buffer
= auxtrace_buffer__next(queue
, buffer
);
401 auxtrace_buffer__drop_data(old_buffer
);
406 ptq
->buffer
= buffer
;
408 err
= intel_pt_get_buffer(ptq
, buffer
, old_buffer
, b
);
412 if (ptq
->step_through_buffers
)
417 auxtrace_buffer__drop_data(old_buffer
);
418 ptq
->old_buffer
= buffer
;
420 auxtrace_buffer__drop_data(buffer
);
421 return intel_pt_get_trace(b
, data
);
427 struct intel_pt_cache_entry
{
428 struct auxtrace_cache_entry entry
;
431 enum intel_pt_insn_op op
;
432 enum intel_pt_insn_branch branch
;
435 char insn
[INTEL_PT_INSN_BUF_SZ
];
438 static int intel_pt_config_div(const char *var
, const char *value
, void *data
)
443 if (!strcmp(var
, "intel-pt.cache-divisor")) {
444 val
= strtol(value
, NULL
, 0);
445 if (val
> 0 && val
<= INT_MAX
)
452 static int intel_pt_cache_divisor(void)
459 perf_config(intel_pt_config_div
, &d
);
467 static unsigned int intel_pt_cache_size(struct dso
*dso
,
468 struct machine
*machine
)
472 size
= dso__data_size(dso
, machine
);
473 size
/= intel_pt_cache_divisor();
476 if (size
> (1 << 21))
478 return 32 - __builtin_clz(size
);
481 static struct auxtrace_cache
*intel_pt_cache(struct dso
*dso
,
482 struct machine
*machine
)
484 struct auxtrace_cache
*c
;
487 if (dso
->auxtrace_cache
)
488 return dso
->auxtrace_cache
;
490 bits
= intel_pt_cache_size(dso
, machine
);
492 /* Ignoring cache creation failure */
493 c
= auxtrace_cache__new(bits
, sizeof(struct intel_pt_cache_entry
), 200);
495 dso
->auxtrace_cache
= c
;
500 static int intel_pt_cache_add(struct dso
*dso
, struct machine
*machine
,
501 u64 offset
, u64 insn_cnt
, u64 byte_cnt
,
502 struct intel_pt_insn
*intel_pt_insn
)
504 struct auxtrace_cache
*c
= intel_pt_cache(dso
, machine
);
505 struct intel_pt_cache_entry
*e
;
511 e
= auxtrace_cache__alloc_entry(c
);
515 e
->insn_cnt
= insn_cnt
;
516 e
->byte_cnt
= byte_cnt
;
517 e
->op
= intel_pt_insn
->op
;
518 e
->branch
= intel_pt_insn
->branch
;
519 e
->length
= intel_pt_insn
->length
;
520 e
->rel
= intel_pt_insn
->rel
;
521 memcpy(e
->insn
, intel_pt_insn
->buf
, INTEL_PT_INSN_BUF_SZ
);
523 err
= auxtrace_cache__add(c
, offset
, &e
->entry
);
525 auxtrace_cache__free_entry(c
, e
);
530 static struct intel_pt_cache_entry
*
531 intel_pt_cache_lookup(struct dso
*dso
, struct machine
*machine
, u64 offset
)
533 struct auxtrace_cache
*c
= intel_pt_cache(dso
, machine
);
538 return auxtrace_cache__lookup(dso
->auxtrace_cache
, offset
);
541 static void intel_pt_cache_invalidate(struct dso
*dso
, struct machine
*machine
,
544 struct auxtrace_cache
*c
= intel_pt_cache(dso
, machine
);
549 auxtrace_cache__remove(dso
->auxtrace_cache
, offset
);
552 static inline u8
intel_pt_cpumode(struct intel_pt
*pt
, uint64_t ip
)
554 return ip
>= pt
->kernel_start
?
555 PERF_RECORD_MISC_KERNEL
:
556 PERF_RECORD_MISC_USER
;
559 static int intel_pt_walk_next_insn(struct intel_pt_insn
*intel_pt_insn
,
560 uint64_t *insn_cnt_ptr
, uint64_t *ip
,
561 uint64_t to_ip
, uint64_t max_insn_cnt
,
564 struct intel_pt_queue
*ptq
= data
;
565 struct machine
*machine
= ptq
->pt
->machine
;
566 struct thread
*thread
;
567 struct addr_location al
;
568 unsigned char buf
[INTEL_PT_INSN_BUF_SZ
];
572 u64 offset
, start_offset
, start_ip
;
576 intel_pt_insn
->length
= 0;
578 if (to_ip
&& *ip
== to_ip
)
581 cpumode
= intel_pt_cpumode(ptq
->pt
, *ip
);
583 thread
= ptq
->thread
;
585 if (cpumode
!= PERF_RECORD_MISC_KERNEL
)
587 thread
= ptq
->pt
->unknown_thread
;
591 if (!thread__find_map(thread
, cpumode
, *ip
, &al
) || !al
.map
->dso
)
594 if (al
.map
->dso
->data
.status
== DSO_DATA_STATUS_ERROR
&&
595 dso__data_status_seen(al
.map
->dso
,
596 DSO_DATA_STATUS_SEEN_ITRACE
))
599 offset
= al
.map
->map_ip(al
.map
, *ip
);
601 if (!to_ip
&& one_map
) {
602 struct intel_pt_cache_entry
*e
;
604 e
= intel_pt_cache_lookup(al
.map
->dso
, machine
, offset
);
606 (!max_insn_cnt
|| e
->insn_cnt
<= max_insn_cnt
)) {
607 *insn_cnt_ptr
= e
->insn_cnt
;
609 intel_pt_insn
->op
= e
->op
;
610 intel_pt_insn
->branch
= e
->branch
;
611 intel_pt_insn
->length
= e
->length
;
612 intel_pt_insn
->rel
= e
->rel
;
613 memcpy(intel_pt_insn
->buf
, e
->insn
,
614 INTEL_PT_INSN_BUF_SZ
);
615 intel_pt_log_insn_no_data(intel_pt_insn
, *ip
);
620 start_offset
= offset
;
623 /* Load maps to ensure dso->is_64_bit has been updated */
626 x86_64
= al
.map
->dso
->is_64_bit
;
629 len
= dso__data_read_offset(al
.map
->dso
, machine
,
631 INTEL_PT_INSN_BUF_SZ
);
635 if (intel_pt_get_insn(buf
, len
, x86_64
, intel_pt_insn
))
638 intel_pt_log_insn(intel_pt_insn
, *ip
);
642 if (intel_pt_insn
->branch
!= INTEL_PT_BR_NO_BRANCH
)
645 if (max_insn_cnt
&& insn_cnt
>= max_insn_cnt
)
648 *ip
+= intel_pt_insn
->length
;
650 if (to_ip
&& *ip
== to_ip
)
653 if (*ip
>= al
.map
->end
)
656 offset
+= intel_pt_insn
->length
;
661 *insn_cnt_ptr
= insn_cnt
;
667 * Didn't lookup in the 'to_ip' case, so do it now to prevent duplicate
671 struct intel_pt_cache_entry
*e
;
673 e
= intel_pt_cache_lookup(al
.map
->dso
, machine
, start_offset
);
678 /* Ignore cache errors */
679 intel_pt_cache_add(al
.map
->dso
, machine
, start_offset
, insn_cnt
,
680 *ip
- start_ip
, intel_pt_insn
);
685 *insn_cnt_ptr
= insn_cnt
;
689 static bool intel_pt_match_pgd_ip(struct intel_pt
*pt
, uint64_t ip
,
690 uint64_t offset
, const char *filename
)
692 struct addr_filter
*filt
;
693 bool have_filter
= false;
694 bool hit_tracestop
= false;
695 bool hit_filter
= false;
697 list_for_each_entry(filt
, &pt
->filts
.head
, list
) {
701 if ((filename
&& !filt
->filename
) ||
702 (!filename
&& filt
->filename
) ||
703 (filename
&& strcmp(filename
, filt
->filename
)))
706 if (!(offset
>= filt
->addr
&& offset
< filt
->addr
+ filt
->size
))
709 intel_pt_log("TIP.PGD ip %#"PRIx64
" offset %#"PRIx64
" in %s hit filter: %s offset %#"PRIx64
" size %#"PRIx64
"\n",
710 ip
, offset
, filename
? filename
: "[kernel]",
711 filt
->start
? "filter" : "stop",
712 filt
->addr
, filt
->size
);
717 hit_tracestop
= true;
720 if (!hit_tracestop
&& !hit_filter
)
721 intel_pt_log("TIP.PGD ip %#"PRIx64
" offset %#"PRIx64
" in %s is not in a filter region\n",
722 ip
, offset
, filename
? filename
: "[kernel]");
724 return hit_tracestop
|| (have_filter
&& !hit_filter
);
727 static int __intel_pt_pgd_ip(uint64_t ip
, void *data
)
729 struct intel_pt_queue
*ptq
= data
;
730 struct thread
*thread
;
731 struct addr_location al
;
735 if (ip
>= ptq
->pt
->kernel_start
)
736 return intel_pt_match_pgd_ip(ptq
->pt
, ip
, ip
, NULL
);
738 cpumode
= PERF_RECORD_MISC_USER
;
740 thread
= ptq
->thread
;
744 if (!thread__find_map(thread
, cpumode
, ip
, &al
) || !al
.map
->dso
)
747 offset
= al
.map
->map_ip(al
.map
, ip
);
749 return intel_pt_match_pgd_ip(ptq
->pt
, ip
, offset
,
750 al
.map
->dso
->long_name
);
753 static bool intel_pt_pgd_ip(uint64_t ip
, void *data
)
755 return __intel_pt_pgd_ip(ip
, data
) > 0;
758 static bool intel_pt_get_config(struct intel_pt
*pt
,
759 struct perf_event_attr
*attr
, u64
*config
)
761 if (attr
->type
== pt
->pmu_type
) {
763 *config
= attr
->config
;
770 static bool intel_pt_exclude_kernel(struct intel_pt
*pt
)
774 evlist__for_each_entry(pt
->session
->evlist
, evsel
) {
775 if (intel_pt_get_config(pt
, &evsel
->core
.attr
, NULL
) &&
776 !evsel
->core
.attr
.exclude_kernel
)
782 static bool intel_pt_return_compression(struct intel_pt
*pt
)
787 if (!pt
->noretcomp_bit
)
790 evlist__for_each_entry(pt
->session
->evlist
, evsel
) {
791 if (intel_pt_get_config(pt
, &evsel
->core
.attr
, &config
) &&
792 (config
& pt
->noretcomp_bit
))
798 static bool intel_pt_branch_enable(struct intel_pt
*pt
)
803 evlist__for_each_entry(pt
->session
->evlist
, evsel
) {
804 if (intel_pt_get_config(pt
, &evsel
->core
.attr
, &config
) &&
805 (config
& 1) && !(config
& 0x2000))
811 static unsigned int intel_pt_mtc_period(struct intel_pt
*pt
)
817 if (!pt
->mtc_freq_bits
)
820 for (shift
= 0, config
= pt
->mtc_freq_bits
; !(config
& 1); shift
++)
823 evlist__for_each_entry(pt
->session
->evlist
, evsel
) {
824 if (intel_pt_get_config(pt
, &evsel
->core
.attr
, &config
))
825 return (config
& pt
->mtc_freq_bits
) >> shift
;
830 static bool intel_pt_timeless_decoding(struct intel_pt
*pt
)
833 bool timeless_decoding
= true;
836 if (!pt
->tsc_bit
|| !pt
->cap_user_time_zero
)
839 evlist__for_each_entry(pt
->session
->evlist
, evsel
) {
840 if (!(evsel
->core
.attr
.sample_type
& PERF_SAMPLE_TIME
))
842 if (intel_pt_get_config(pt
, &evsel
->core
.attr
, &config
)) {
843 if (config
& pt
->tsc_bit
)
844 timeless_decoding
= false;
849 return timeless_decoding
;
852 static bool intel_pt_tracing_kernel(struct intel_pt
*pt
)
856 evlist__for_each_entry(pt
->session
->evlist
, evsel
) {
857 if (intel_pt_get_config(pt
, &evsel
->core
.attr
, NULL
) &&
858 !evsel
->core
.attr
.exclude_kernel
)
864 static bool intel_pt_have_tsc(struct intel_pt
*pt
)
867 bool have_tsc
= false;
873 evlist__for_each_entry(pt
->session
->evlist
, evsel
) {
874 if (intel_pt_get_config(pt
, &evsel
->core
.attr
, &config
)) {
875 if (config
& pt
->tsc_bit
)
884 static bool intel_pt_sampling_mode(struct intel_pt
*pt
)
888 evlist__for_each_entry(pt
->session
->evlist
, evsel
) {
889 if ((evsel
->core
.attr
.sample_type
& PERF_SAMPLE_AUX
) &&
890 evsel
->core
.attr
.aux_sample_size
)
896 static u64
intel_pt_ns_to_ticks(const struct intel_pt
*pt
, u64 ns
)
900 quot
= ns
/ pt
->tc
.time_mult
;
901 rem
= ns
% pt
->tc
.time_mult
;
902 return (quot
<< pt
->tc
.time_shift
) + (rem
<< pt
->tc
.time_shift
) /
906 static struct ip_callchain
*intel_pt_alloc_chain(struct intel_pt
*pt
)
908 size_t sz
= sizeof(struct ip_callchain
);
910 /* Add 1 to callchain_sz for callchain context */
911 sz
+= (pt
->synth_opts
.callchain_sz
+ 1) * sizeof(u64
);
915 static int intel_pt_callchain_init(struct intel_pt
*pt
)
919 evlist__for_each_entry(pt
->session
->evlist
, evsel
) {
920 if (!(evsel
->core
.attr
.sample_type
& PERF_SAMPLE_CALLCHAIN
))
921 evsel
->synth_sample_type
|= PERF_SAMPLE_CALLCHAIN
;
924 pt
->chain
= intel_pt_alloc_chain(pt
);
931 static void intel_pt_add_callchain(struct intel_pt
*pt
,
932 struct perf_sample
*sample
)
934 struct thread
*thread
= machine__findnew_thread(pt
->machine
,
938 thread_stack__sample_late(thread
, sample
->cpu
, pt
->chain
,
939 pt
->synth_opts
.callchain_sz
+ 1, sample
->ip
,
942 sample
->callchain
= pt
->chain
;
945 static struct branch_stack
*intel_pt_alloc_br_stack(unsigned int entry_cnt
)
947 size_t sz
= sizeof(struct branch_stack
);
949 sz
+= entry_cnt
* sizeof(struct branch_entry
);
953 static int intel_pt_br_stack_init(struct intel_pt
*pt
)
957 evlist__for_each_entry(pt
->session
->evlist
, evsel
) {
958 if (!(evsel
->core
.attr
.sample_type
& PERF_SAMPLE_BRANCH_STACK
))
959 evsel
->synth_sample_type
|= PERF_SAMPLE_BRANCH_STACK
;
962 pt
->br_stack
= intel_pt_alloc_br_stack(pt
->br_stack_sz
);
969 static void intel_pt_add_br_stack(struct intel_pt
*pt
,
970 struct perf_sample
*sample
)
972 struct thread
*thread
= machine__findnew_thread(pt
->machine
,
976 thread_stack__br_sample_late(thread
, sample
->cpu
, pt
->br_stack
,
977 pt
->br_stack_sz
, sample
->ip
,
980 sample
->branch_stack
= pt
->br_stack
;
983 /* INTEL_PT_LBR_0, INTEL_PT_LBR_1 and INTEL_PT_LBR_2 */
984 #define LBRS_MAX (INTEL_PT_BLK_ITEM_ID_CNT * 3U)
986 static struct intel_pt_queue
*intel_pt_alloc_queue(struct intel_pt
*pt
,
987 unsigned int queue_nr
)
989 struct intel_pt_params params
= { .get_trace
= 0, };
990 struct perf_env
*env
= pt
->machine
->env
;
991 struct intel_pt_queue
*ptq
;
993 ptq
= zalloc(sizeof(struct intel_pt_queue
));
997 if (pt
->synth_opts
.callchain
) {
998 ptq
->chain
= intel_pt_alloc_chain(pt
);
1003 if (pt
->synth_opts
.last_branch
|| pt
->synth_opts
.other_events
) {
1004 unsigned int entry_cnt
= max(LBRS_MAX
, pt
->br_stack_sz
);
1006 ptq
->last_branch
= intel_pt_alloc_br_stack(entry_cnt
);
1007 if (!ptq
->last_branch
)
1011 ptq
->event_buf
= malloc(PERF_SAMPLE_MAX_SIZE
);
1012 if (!ptq
->event_buf
)
1016 ptq
->queue_nr
= queue_nr
;
1017 ptq
->exclude_kernel
= intel_pt_exclude_kernel(pt
);
1023 params
.get_trace
= intel_pt_get_trace
;
1024 params
.walk_insn
= intel_pt_walk_next_insn
;
1025 params
.lookahead
= intel_pt_lookahead
;
1027 params
.return_compression
= intel_pt_return_compression(pt
);
1028 params
.branch_enable
= intel_pt_branch_enable(pt
);
1029 params
.max_non_turbo_ratio
= pt
->max_non_turbo_ratio
;
1030 params
.mtc_period
= intel_pt_mtc_period(pt
);
1031 params
.tsc_ctc_ratio_n
= pt
->tsc_ctc_ratio_n
;
1032 params
.tsc_ctc_ratio_d
= pt
->tsc_ctc_ratio_d
;
1033 params
.quick
= pt
->synth_opts
.quick
;
1035 if (pt
->filts
.cnt
> 0)
1036 params
.pgd_ip
= intel_pt_pgd_ip
;
1038 if (pt
->synth_opts
.instructions
) {
1039 if (pt
->synth_opts
.period
) {
1040 switch (pt
->synth_opts
.period_type
) {
1041 case PERF_ITRACE_PERIOD_INSTRUCTIONS
:
1042 params
.period_type
=
1043 INTEL_PT_PERIOD_INSTRUCTIONS
;
1044 params
.period
= pt
->synth_opts
.period
;
1046 case PERF_ITRACE_PERIOD_TICKS
:
1047 params
.period_type
= INTEL_PT_PERIOD_TICKS
;
1048 params
.period
= pt
->synth_opts
.period
;
1050 case PERF_ITRACE_PERIOD_NANOSECS
:
1051 params
.period_type
= INTEL_PT_PERIOD_TICKS
;
1052 params
.period
= intel_pt_ns_to_ticks(pt
,
1053 pt
->synth_opts
.period
);
1060 if (!params
.period
) {
1061 params
.period_type
= INTEL_PT_PERIOD_INSTRUCTIONS
;
1066 if (env
->cpuid
&& !strncmp(env
->cpuid
, "GenuineIntel,6,92,", 18))
1067 params
.flags
|= INTEL_PT_FUP_WITH_NLIP
;
1069 ptq
->decoder
= intel_pt_decoder_new(¶ms
);
1076 zfree(&ptq
->event_buf
);
1077 zfree(&ptq
->last_branch
);
1083 static void intel_pt_free_queue(void *priv
)
1085 struct intel_pt_queue
*ptq
= priv
;
1089 thread__zput(ptq
->thread
);
1090 intel_pt_decoder_free(ptq
->decoder
);
1091 zfree(&ptq
->event_buf
);
1092 zfree(&ptq
->last_branch
);
1097 static void intel_pt_set_pid_tid_cpu(struct intel_pt
*pt
,
1098 struct auxtrace_queue
*queue
)
1100 struct intel_pt_queue
*ptq
= queue
->priv
;
1102 if (queue
->tid
== -1 || pt
->have_sched_switch
) {
1103 ptq
->tid
= machine__get_current_tid(pt
->machine
, ptq
->cpu
);
1106 thread__zput(ptq
->thread
);
1109 if (!ptq
->thread
&& ptq
->tid
!= -1)
1110 ptq
->thread
= machine__find_thread(pt
->machine
, -1, ptq
->tid
);
1113 ptq
->pid
= ptq
->thread
->pid_
;
1114 if (queue
->cpu
== -1)
1115 ptq
->cpu
= ptq
->thread
->cpu
;
1119 static void intel_pt_sample_flags(struct intel_pt_queue
*ptq
)
1121 if (ptq
->state
->flags
& INTEL_PT_ABORT_TX
) {
1122 ptq
->flags
= PERF_IP_FLAG_BRANCH
| PERF_IP_FLAG_TX_ABORT
;
1123 } else if (ptq
->state
->flags
& INTEL_PT_ASYNC
) {
1124 if (ptq
->state
->to_ip
)
1125 ptq
->flags
= PERF_IP_FLAG_BRANCH
| PERF_IP_FLAG_CALL
|
1126 PERF_IP_FLAG_ASYNC
|
1127 PERF_IP_FLAG_INTERRUPT
;
1129 ptq
->flags
= PERF_IP_FLAG_BRANCH
|
1130 PERF_IP_FLAG_TRACE_END
;
1133 if (ptq
->state
->from_ip
)
1134 ptq
->flags
= intel_pt_insn_type(ptq
->state
->insn_op
);
1136 ptq
->flags
= PERF_IP_FLAG_BRANCH
|
1137 PERF_IP_FLAG_TRACE_BEGIN
;
1138 if (ptq
->state
->flags
& INTEL_PT_IN_TX
)
1139 ptq
->flags
|= PERF_IP_FLAG_IN_TX
;
1140 ptq
->insn_len
= ptq
->state
->insn_len
;
1141 memcpy(ptq
->insn
, ptq
->state
->insn
, INTEL_PT_INSN_BUF_SZ
);
1144 if (ptq
->state
->type
& INTEL_PT_TRACE_BEGIN
)
1145 ptq
->flags
|= PERF_IP_FLAG_TRACE_BEGIN
;
1146 if (ptq
->state
->type
& INTEL_PT_TRACE_END
)
1147 ptq
->flags
|= PERF_IP_FLAG_TRACE_END
;
1150 static void intel_pt_setup_time_range(struct intel_pt
*pt
,
1151 struct intel_pt_queue
*ptq
)
1156 ptq
->sel_timestamp
= pt
->time_ranges
[0].start
;
1159 if (ptq
->sel_timestamp
) {
1160 ptq
->sel_start
= true;
1162 ptq
->sel_timestamp
= pt
->time_ranges
[0].end
;
1163 ptq
->sel_start
= false;
1167 static int intel_pt_setup_queue(struct intel_pt
*pt
,
1168 struct auxtrace_queue
*queue
,
1169 unsigned int queue_nr
)
1171 struct intel_pt_queue
*ptq
= queue
->priv
;
1173 if (list_empty(&queue
->head
))
1177 ptq
= intel_pt_alloc_queue(pt
, queue_nr
);
1182 if (queue
->cpu
!= -1)
1183 ptq
->cpu
= queue
->cpu
;
1184 ptq
->tid
= queue
->tid
;
1186 ptq
->cbr_seen
= UINT_MAX
;
1188 if (pt
->sampling_mode
&& !pt
->snapshot_mode
&&
1189 pt
->timeless_decoding
)
1190 ptq
->step_through_buffers
= true;
1192 ptq
->sync_switch
= pt
->sync_switch
;
1194 intel_pt_setup_time_range(pt
, ptq
);
1197 if (!ptq
->on_heap
&&
1198 (!ptq
->sync_switch
||
1199 ptq
->switch_state
!= INTEL_PT_SS_EXPECTING_SWITCH_EVENT
)) {
1200 const struct intel_pt_state
*state
;
1203 if (pt
->timeless_decoding
)
1206 intel_pt_log("queue %u getting timestamp\n", queue_nr
);
1207 intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n",
1208 queue_nr
, ptq
->cpu
, ptq
->pid
, ptq
->tid
);
1210 if (ptq
->sel_start
&& ptq
->sel_timestamp
) {
1211 ret
= intel_pt_fast_forward(ptq
->decoder
,
1212 ptq
->sel_timestamp
);
1218 state
= intel_pt_decode(ptq
->decoder
);
1220 if (state
->err
== INTEL_PT_ERR_NODATA
) {
1221 intel_pt_log("queue %u has no timestamp\n",
1227 if (state
->timestamp
)
1231 ptq
->timestamp
= state
->timestamp
;
1232 intel_pt_log("queue %u timestamp 0x%" PRIx64
"\n",
1233 queue_nr
, ptq
->timestamp
);
1235 ptq
->have_sample
= true;
1236 if (ptq
->sel_start
&& ptq
->sel_timestamp
&&
1237 ptq
->timestamp
< ptq
->sel_timestamp
)
1238 ptq
->have_sample
= false;
1239 intel_pt_sample_flags(ptq
);
1240 ret
= auxtrace_heap__add(&pt
->heap
, queue_nr
, ptq
->timestamp
);
1243 ptq
->on_heap
= true;
1249 static int intel_pt_setup_queues(struct intel_pt
*pt
)
1254 for (i
= 0; i
< pt
->queues
.nr_queues
; i
++) {
1255 ret
= intel_pt_setup_queue(pt
, &pt
->queues
.queue_array
[i
], i
);
1262 static inline bool intel_pt_skip_event(struct intel_pt
*pt
)
1264 return pt
->synth_opts
.initial_skip
&&
1265 pt
->num_events
++ < pt
->synth_opts
.initial_skip
;
1269 * Cannot count CBR as skipped because it won't go away until cbr == cbr_seen.
1270 * Also ensure CBR is first non-skipped event by allowing for 4 more samples
1271 * from this decoder state.
1273 static inline bool intel_pt_skip_cbr_event(struct intel_pt
*pt
)
1275 return pt
->synth_opts
.initial_skip
&&
1276 pt
->num_events
+ 4 < pt
->synth_opts
.initial_skip
;
1279 static void intel_pt_prep_a_sample(struct intel_pt_queue
*ptq
,
1280 union perf_event
*event
,
1281 struct perf_sample
*sample
)
1283 event
->sample
.header
.type
= PERF_RECORD_SAMPLE
;
1284 event
->sample
.header
.size
= sizeof(struct perf_event_header
);
1286 sample
->pid
= ptq
->pid
;
1287 sample
->tid
= ptq
->tid
;
1288 sample
->cpu
= ptq
->cpu
;
1289 sample
->insn_len
= ptq
->insn_len
;
1290 memcpy(sample
->insn
, ptq
->insn
, INTEL_PT_INSN_BUF_SZ
);
1293 static void intel_pt_prep_b_sample(struct intel_pt
*pt
,
1294 struct intel_pt_queue
*ptq
,
1295 union perf_event
*event
,
1296 struct perf_sample
*sample
)
1298 intel_pt_prep_a_sample(ptq
, event
, sample
);
1300 if (!pt
->timeless_decoding
)
1301 sample
->time
= tsc_to_perf_time(ptq
->timestamp
, &pt
->tc
);
1303 sample
->ip
= ptq
->state
->from_ip
;
1304 sample
->cpumode
= intel_pt_cpumode(pt
, sample
->ip
);
1305 sample
->addr
= ptq
->state
->to_ip
;
1307 sample
->flags
= ptq
->flags
;
1309 event
->sample
.header
.misc
= sample
->cpumode
;
1312 static int intel_pt_inject_event(union perf_event
*event
,
1313 struct perf_sample
*sample
, u64 type
)
1315 event
->header
.size
= perf_event__sample_event_size(sample
, type
, 0);
1316 return perf_event__synthesize_sample(event
, type
, 0, sample
);
1319 static inline int intel_pt_opt_inject(struct intel_pt
*pt
,
1320 union perf_event
*event
,
1321 struct perf_sample
*sample
, u64 type
)
1323 if (!pt
->synth_opts
.inject
)
1326 return intel_pt_inject_event(event
, sample
, type
);
1329 static int intel_pt_deliver_synth_event(struct intel_pt
*pt
,
1330 union perf_event
*event
,
1331 struct perf_sample
*sample
, u64 type
)
1335 ret
= intel_pt_opt_inject(pt
, event
, sample
, type
);
1339 ret
= perf_session__deliver_synth_event(pt
->session
, event
, sample
);
1341 pr_err("Intel PT: failed to deliver event, error %d\n", ret
);
1346 static int intel_pt_synth_branch_sample(struct intel_pt_queue
*ptq
)
1348 struct intel_pt
*pt
= ptq
->pt
;
1349 union perf_event
*event
= ptq
->event_buf
;
1350 struct perf_sample sample
= { .ip
= 0, };
1351 struct dummy_branch_stack
{
1354 struct branch_entry entries
;
1357 if (pt
->branches_filter
&& !(pt
->branches_filter
& ptq
->flags
))
1360 if (intel_pt_skip_event(pt
))
1363 intel_pt_prep_b_sample(pt
, ptq
, event
, &sample
);
1365 sample
.id
= ptq
->pt
->branches_id
;
1366 sample
.stream_id
= ptq
->pt
->branches_id
;
1369 * perf report cannot handle events without a branch stack when using
1370 * SORT_MODE__BRANCH so make a dummy one.
1372 if (pt
->synth_opts
.last_branch
&& sort__mode
== SORT_MODE__BRANCH
) {
1373 dummy_bs
= (struct dummy_branch_stack
){
1381 sample
.branch_stack
= (struct branch_stack
*)&dummy_bs
;
1384 sample
.cyc_cnt
= ptq
->ipc_cyc_cnt
- ptq
->last_br_cyc_cnt
;
1385 if (sample
.cyc_cnt
) {
1386 sample
.insn_cnt
= ptq
->ipc_insn_cnt
- ptq
->last_br_insn_cnt
;
1387 ptq
->last_br_insn_cnt
= ptq
->ipc_insn_cnt
;
1388 ptq
->last_br_cyc_cnt
= ptq
->ipc_cyc_cnt
;
1391 return intel_pt_deliver_synth_event(pt
, event
, &sample
,
1392 pt
->branches_sample_type
);
1395 static void intel_pt_prep_sample(struct intel_pt
*pt
,
1396 struct intel_pt_queue
*ptq
,
1397 union perf_event
*event
,
1398 struct perf_sample
*sample
)
1400 intel_pt_prep_b_sample(pt
, ptq
, event
, sample
);
1402 if (pt
->synth_opts
.callchain
) {
1403 thread_stack__sample(ptq
->thread
, ptq
->cpu
, ptq
->chain
,
1404 pt
->synth_opts
.callchain_sz
+ 1,
1405 sample
->ip
, pt
->kernel_start
);
1406 sample
->callchain
= ptq
->chain
;
1409 if (pt
->synth_opts
.last_branch
) {
1410 thread_stack__br_sample(ptq
->thread
, ptq
->cpu
, ptq
->last_branch
,
1412 sample
->branch_stack
= ptq
->last_branch
;
1416 static int intel_pt_synth_instruction_sample(struct intel_pt_queue
*ptq
)
1418 struct intel_pt
*pt
= ptq
->pt
;
1419 union perf_event
*event
= ptq
->event_buf
;
1420 struct perf_sample sample
= { .ip
= 0, };
1422 if (intel_pt_skip_event(pt
))
1425 intel_pt_prep_sample(pt
, ptq
, event
, &sample
);
1427 sample
.id
= ptq
->pt
->instructions_id
;
1428 sample
.stream_id
= ptq
->pt
->instructions_id
;
1429 if (pt
->synth_opts
.quick
)
1432 sample
.period
= ptq
->state
->tot_insn_cnt
- ptq
->last_insn_cnt
;
1434 sample
.cyc_cnt
= ptq
->ipc_cyc_cnt
- ptq
->last_in_cyc_cnt
;
1435 if (sample
.cyc_cnt
) {
1436 sample
.insn_cnt
= ptq
->ipc_insn_cnt
- ptq
->last_in_insn_cnt
;
1437 ptq
->last_in_insn_cnt
= ptq
->ipc_insn_cnt
;
1438 ptq
->last_in_cyc_cnt
= ptq
->ipc_cyc_cnt
;
1441 ptq
->last_insn_cnt
= ptq
->state
->tot_insn_cnt
;
1443 return intel_pt_deliver_synth_event(pt
, event
, &sample
,
1444 pt
->instructions_sample_type
);
1447 static int intel_pt_synth_transaction_sample(struct intel_pt_queue
*ptq
)
1449 struct intel_pt
*pt
= ptq
->pt
;
1450 union perf_event
*event
= ptq
->event_buf
;
1451 struct perf_sample sample
= { .ip
= 0, };
1453 if (intel_pt_skip_event(pt
))
1456 intel_pt_prep_sample(pt
, ptq
, event
, &sample
);
1458 sample
.id
= ptq
->pt
->transactions_id
;
1459 sample
.stream_id
= ptq
->pt
->transactions_id
;
1461 return intel_pt_deliver_synth_event(pt
, event
, &sample
,
1462 pt
->transactions_sample_type
);
1465 static void intel_pt_prep_p_sample(struct intel_pt
*pt
,
1466 struct intel_pt_queue
*ptq
,
1467 union perf_event
*event
,
1468 struct perf_sample
*sample
)
1470 intel_pt_prep_sample(pt
, ptq
, event
, sample
);
1473 * Zero IP is used to mean "trace start" but that is not the case for
1474 * power or PTWRITE events with no IP, so clear the flags.
1480 static int intel_pt_synth_ptwrite_sample(struct intel_pt_queue
*ptq
)
1482 struct intel_pt
*pt
= ptq
->pt
;
1483 union perf_event
*event
= ptq
->event_buf
;
1484 struct perf_sample sample
= { .ip
= 0, };
1485 struct perf_synth_intel_ptwrite raw
;
1487 if (intel_pt_skip_event(pt
))
1490 intel_pt_prep_p_sample(pt
, ptq
, event
, &sample
);
1492 sample
.id
= ptq
->pt
->ptwrites_id
;
1493 sample
.stream_id
= ptq
->pt
->ptwrites_id
;
1496 raw
.ip
= !!(ptq
->state
->flags
& INTEL_PT_FUP_IP
);
1497 raw
.payload
= cpu_to_le64(ptq
->state
->ptw_payload
);
1499 sample
.raw_size
= perf_synth__raw_size(raw
);
1500 sample
.raw_data
= perf_synth__raw_data(&raw
);
1502 return intel_pt_deliver_synth_event(pt
, event
, &sample
,
1503 pt
->ptwrites_sample_type
);
1506 static int intel_pt_synth_cbr_sample(struct intel_pt_queue
*ptq
)
1508 struct intel_pt
*pt
= ptq
->pt
;
1509 union perf_event
*event
= ptq
->event_buf
;
1510 struct perf_sample sample
= { .ip
= 0, };
1511 struct perf_synth_intel_cbr raw
;
1514 if (intel_pt_skip_cbr_event(pt
))
1517 ptq
->cbr_seen
= ptq
->state
->cbr
;
1519 intel_pt_prep_p_sample(pt
, ptq
, event
, &sample
);
1521 sample
.id
= ptq
->pt
->cbr_id
;
1522 sample
.stream_id
= ptq
->pt
->cbr_id
;
1524 flags
= (u16
)ptq
->state
->cbr_payload
| (pt
->max_non_turbo_ratio
<< 16);
1525 raw
.flags
= cpu_to_le32(flags
);
1526 raw
.freq
= cpu_to_le32(raw
.cbr
* pt
->cbr2khz
);
1529 sample
.raw_size
= perf_synth__raw_size(raw
);
1530 sample
.raw_data
= perf_synth__raw_data(&raw
);
1532 return intel_pt_deliver_synth_event(pt
, event
, &sample
,
1533 pt
->pwr_events_sample_type
);
1536 static int intel_pt_synth_mwait_sample(struct intel_pt_queue
*ptq
)
1538 struct intel_pt
*pt
= ptq
->pt
;
1539 union perf_event
*event
= ptq
->event_buf
;
1540 struct perf_sample sample
= { .ip
= 0, };
1541 struct perf_synth_intel_mwait raw
;
1543 if (intel_pt_skip_event(pt
))
1546 intel_pt_prep_p_sample(pt
, ptq
, event
, &sample
);
1548 sample
.id
= ptq
->pt
->mwait_id
;
1549 sample
.stream_id
= ptq
->pt
->mwait_id
;
1552 raw
.payload
= cpu_to_le64(ptq
->state
->mwait_payload
);
1554 sample
.raw_size
= perf_synth__raw_size(raw
);
1555 sample
.raw_data
= perf_synth__raw_data(&raw
);
1557 return intel_pt_deliver_synth_event(pt
, event
, &sample
,
1558 pt
->pwr_events_sample_type
);
1561 static int intel_pt_synth_pwre_sample(struct intel_pt_queue
*ptq
)
1563 struct intel_pt
*pt
= ptq
->pt
;
1564 union perf_event
*event
= ptq
->event_buf
;
1565 struct perf_sample sample
= { .ip
= 0, };
1566 struct perf_synth_intel_pwre raw
;
1568 if (intel_pt_skip_event(pt
))
1571 intel_pt_prep_p_sample(pt
, ptq
, event
, &sample
);
1573 sample
.id
= ptq
->pt
->pwre_id
;
1574 sample
.stream_id
= ptq
->pt
->pwre_id
;
1577 raw
.payload
= cpu_to_le64(ptq
->state
->pwre_payload
);
1579 sample
.raw_size
= perf_synth__raw_size(raw
);
1580 sample
.raw_data
= perf_synth__raw_data(&raw
);
1582 return intel_pt_deliver_synth_event(pt
, event
, &sample
,
1583 pt
->pwr_events_sample_type
);
1586 static int intel_pt_synth_exstop_sample(struct intel_pt_queue
*ptq
)
1588 struct intel_pt
*pt
= ptq
->pt
;
1589 union perf_event
*event
= ptq
->event_buf
;
1590 struct perf_sample sample
= { .ip
= 0, };
1591 struct perf_synth_intel_exstop raw
;
1593 if (intel_pt_skip_event(pt
))
1596 intel_pt_prep_p_sample(pt
, ptq
, event
, &sample
);
1598 sample
.id
= ptq
->pt
->exstop_id
;
1599 sample
.stream_id
= ptq
->pt
->exstop_id
;
1602 raw
.ip
= !!(ptq
->state
->flags
& INTEL_PT_FUP_IP
);
1604 sample
.raw_size
= perf_synth__raw_size(raw
);
1605 sample
.raw_data
= perf_synth__raw_data(&raw
);
1607 return intel_pt_deliver_synth_event(pt
, event
, &sample
,
1608 pt
->pwr_events_sample_type
);
1611 static int intel_pt_synth_pwrx_sample(struct intel_pt_queue
*ptq
)
1613 struct intel_pt
*pt
= ptq
->pt
;
1614 union perf_event
*event
= ptq
->event_buf
;
1615 struct perf_sample sample
= { .ip
= 0, };
1616 struct perf_synth_intel_pwrx raw
;
1618 if (intel_pt_skip_event(pt
))
1621 intel_pt_prep_p_sample(pt
, ptq
, event
, &sample
);
1623 sample
.id
= ptq
->pt
->pwrx_id
;
1624 sample
.stream_id
= ptq
->pt
->pwrx_id
;
1627 raw
.payload
= cpu_to_le64(ptq
->state
->pwrx_payload
);
1629 sample
.raw_size
= perf_synth__raw_size(raw
);
1630 sample
.raw_data
= perf_synth__raw_data(&raw
);
1632 return intel_pt_deliver_synth_event(pt
, event
, &sample
,
1633 pt
->pwr_events_sample_type
);
1637 * PEBS gp_regs array indexes plus 1 so that 0 means not present. Refer
1638 * intel_pt_add_gp_regs().
1640 static const int pebs_gp_regs
[] = {
1641 [PERF_REG_X86_FLAGS
] = 1,
1642 [PERF_REG_X86_IP
] = 2,
1643 [PERF_REG_X86_AX
] = 3,
1644 [PERF_REG_X86_CX
] = 4,
1645 [PERF_REG_X86_DX
] = 5,
1646 [PERF_REG_X86_BX
] = 6,
1647 [PERF_REG_X86_SP
] = 7,
1648 [PERF_REG_X86_BP
] = 8,
1649 [PERF_REG_X86_SI
] = 9,
1650 [PERF_REG_X86_DI
] = 10,
1651 [PERF_REG_X86_R8
] = 11,
1652 [PERF_REG_X86_R9
] = 12,
1653 [PERF_REG_X86_R10
] = 13,
1654 [PERF_REG_X86_R11
] = 14,
1655 [PERF_REG_X86_R12
] = 15,
1656 [PERF_REG_X86_R13
] = 16,
1657 [PERF_REG_X86_R14
] = 17,
1658 [PERF_REG_X86_R15
] = 18,
1661 static u64
*intel_pt_add_gp_regs(struct regs_dump
*intr_regs
, u64
*pos
,
1662 const struct intel_pt_blk_items
*items
,
1665 const u64
*gp_regs
= items
->val
[INTEL_PT_GP_REGS_POS
];
1666 u32 mask
= items
->mask
[INTEL_PT_GP_REGS_POS
];
1670 for (i
= 0, bit
= 1; i
< PERF_REG_X86_64_MAX
; i
++, bit
<<= 1) {
1671 /* Get the PEBS gp_regs array index */
1672 int n
= pebs_gp_regs
[i
] - 1;
1677 * Add only registers that were requested (i.e. 'regs_mask') and
1678 * that were provided (i.e. 'mask'), and update the resulting
1679 * mask (i.e. 'intr_regs->mask') accordingly.
1681 if (mask
& 1 << n
&& regs_mask
& bit
) {
1682 intr_regs
->mask
|= bit
;
1683 *pos
++ = gp_regs
[n
];
1690 #ifndef PERF_REG_X86_XMM0
1691 #define PERF_REG_X86_XMM0 32
1694 static void intel_pt_add_xmm(struct regs_dump
*intr_regs
, u64
*pos
,
1695 const struct intel_pt_blk_items
*items
,
1698 u32 mask
= items
->has_xmm
& (regs_mask
>> PERF_REG_X86_XMM0
);
1699 const u64
*xmm
= items
->xmm
;
1702 * If there are any XMM registers, then there should be all of them.
1703 * Nevertheless, follow the logic to add only registers that were
1704 * requested (i.e. 'regs_mask') and that were provided (i.e. 'mask'),
1705 * and update the resulting mask (i.e. 'intr_regs->mask') accordingly.
1707 intr_regs
->mask
|= (u64
)mask
<< PERF_REG_X86_XMM0
;
1709 for (; mask
; mask
>>= 1, xmm
++) {
1715 #define LBR_INFO_MISPRED (1ULL << 63)
1716 #define LBR_INFO_IN_TX (1ULL << 62)
1717 #define LBR_INFO_ABORT (1ULL << 61)
1718 #define LBR_INFO_CYCLES 0xffff
1720 /* Refer kernel's intel_pmu_store_pebs_lbrs() */
1721 static u64
intel_pt_lbr_flags(u64 info
)
1724 struct branch_flags flags
;
1729 u
.flags
.mispred
= !!(info
& LBR_INFO_MISPRED
);
1730 u
.flags
.predicted
= !(info
& LBR_INFO_MISPRED
);
1731 u
.flags
.in_tx
= !!(info
& LBR_INFO_IN_TX
);
1732 u
.flags
.abort
= !!(info
& LBR_INFO_ABORT
);
1733 u
.flags
.cycles
= info
& LBR_INFO_CYCLES
;
1738 static void intel_pt_add_lbrs(struct branch_stack
*br_stack
,
1739 const struct intel_pt_blk_items
*items
)
1746 to
= &br_stack
->entries
[0].from
;
1748 for (i
= INTEL_PT_LBR_0_POS
; i
<= INTEL_PT_LBR_2_POS
; i
++) {
1749 u32 mask
= items
->mask
[i
];
1750 const u64
*from
= items
->val
[i
];
1752 for (; mask
; mask
>>= 3, from
+= 3) {
1753 if ((mask
& 7) == 7) {
1756 *to
++ = intel_pt_lbr_flags(from
[2]);
1763 static int intel_pt_synth_pebs_sample(struct intel_pt_queue
*ptq
)
1765 const struct intel_pt_blk_items
*items
= &ptq
->state
->items
;
1766 struct perf_sample sample
= { .ip
= 0, };
1767 union perf_event
*event
= ptq
->event_buf
;
1768 struct intel_pt
*pt
= ptq
->pt
;
1769 struct evsel
*evsel
= pt
->pebs_evsel
;
1770 u64 sample_type
= evsel
->core
.attr
.sample_type
;
1771 u64 id
= evsel
->core
.id
[0];
1773 u64 regs
[8 * sizeof(sample
.intr_regs
.mask
)];
1775 if (intel_pt_skip_event(pt
))
1778 intel_pt_prep_a_sample(ptq
, event
, &sample
);
1781 sample
.stream_id
= id
;
1783 if (!evsel
->core
.attr
.freq
)
1784 sample
.period
= evsel
->core
.attr
.sample_period
;
1786 /* No support for non-zero CS base */
1788 sample
.ip
= items
->ip
;
1789 else if (items
->has_rip
)
1790 sample
.ip
= items
->rip
;
1792 sample
.ip
= ptq
->state
->from_ip
;
1794 /* No support for guest mode at this time */
1795 cpumode
= sample
.ip
< ptq
->pt
->kernel_start
?
1796 PERF_RECORD_MISC_USER
:
1797 PERF_RECORD_MISC_KERNEL
;
1799 event
->sample
.header
.misc
= cpumode
| PERF_RECORD_MISC_EXACT_IP
;
1801 sample
.cpumode
= cpumode
;
1803 if (sample_type
& PERF_SAMPLE_TIME
) {
1806 if (items
->has_timestamp
)
1807 timestamp
= items
->timestamp
;
1808 else if (!pt
->timeless_decoding
)
1809 timestamp
= ptq
->timestamp
;
1811 sample
.time
= tsc_to_perf_time(timestamp
, &pt
->tc
);
1814 if (sample_type
& PERF_SAMPLE_CALLCHAIN
&&
1815 pt
->synth_opts
.callchain
) {
1816 thread_stack__sample(ptq
->thread
, ptq
->cpu
, ptq
->chain
,
1817 pt
->synth_opts
.callchain_sz
, sample
.ip
,
1819 sample
.callchain
= ptq
->chain
;
1822 if (sample_type
& PERF_SAMPLE_REGS_INTR
&&
1823 (items
->mask
[INTEL_PT_GP_REGS_POS
] ||
1824 items
->mask
[INTEL_PT_XMM_POS
])) {
1825 u64 regs_mask
= evsel
->core
.attr
.sample_regs_intr
;
1828 sample
.intr_regs
.abi
= items
->is_32_bit
?
1829 PERF_SAMPLE_REGS_ABI_32
:
1830 PERF_SAMPLE_REGS_ABI_64
;
1831 sample
.intr_regs
.regs
= regs
;
1833 pos
= intel_pt_add_gp_regs(&sample
.intr_regs
, regs
, items
, regs_mask
);
1835 intel_pt_add_xmm(&sample
.intr_regs
, pos
, items
, regs_mask
);
1838 if (sample_type
& PERF_SAMPLE_BRANCH_STACK
) {
1839 if (items
->mask
[INTEL_PT_LBR_0_POS
] ||
1840 items
->mask
[INTEL_PT_LBR_1_POS
] ||
1841 items
->mask
[INTEL_PT_LBR_2_POS
]) {
1842 intel_pt_add_lbrs(ptq
->last_branch
, items
);
1843 } else if (pt
->synth_opts
.last_branch
) {
1844 thread_stack__br_sample(ptq
->thread
, ptq
->cpu
,
1848 ptq
->last_branch
->nr
= 0;
1850 sample
.branch_stack
= ptq
->last_branch
;
1853 if (sample_type
& PERF_SAMPLE_ADDR
&& items
->has_mem_access_address
)
1854 sample
.addr
= items
->mem_access_address
;
1856 if (sample_type
& PERF_SAMPLE_WEIGHT
) {
1858 * Refer kernel's setup_pebs_adaptive_sample_data() and
1859 * intel_hsw_weight().
1861 if (items
->has_mem_access_latency
)
1862 sample
.weight
= items
->mem_access_latency
;
1863 if (!sample
.weight
&& items
->has_tsx_aux_info
) {
1864 /* Cycles last block */
1865 sample
.weight
= (u32
)items
->tsx_aux_info
;
1869 if (sample_type
& PERF_SAMPLE_TRANSACTION
&& items
->has_tsx_aux_info
) {
1870 u64 ax
= items
->has_rax
? items
->rax
: 0;
1871 /* Refer kernel's intel_hsw_transaction() */
1872 u64 txn
= (u8
)(items
->tsx_aux_info
>> 32);
1874 /* For RTM XABORTs also log the abort code from AX */
1875 if (txn
& PERF_TXN_TRANSACTION
&& ax
& 1)
1876 txn
|= ((ax
>> 24) & 0xff) << PERF_TXN_ABORT_SHIFT
;
1877 sample
.transaction
= txn
;
1880 return intel_pt_deliver_synth_event(pt
, event
, &sample
, sample_type
);
1883 static int intel_pt_synth_error(struct intel_pt
*pt
, int code
, int cpu
,
1884 pid_t pid
, pid_t tid
, u64 ip
, u64 timestamp
)
1886 union perf_event event
;
1887 char msg
[MAX_AUXTRACE_ERROR_MSG
];
1890 if (pt
->synth_opts
.error_minus_flags
) {
1891 if (code
== INTEL_PT_ERR_OVR
&&
1892 pt
->synth_opts
.error_minus_flags
& AUXTRACE_ERR_FLG_OVERFLOW
)
1894 if (code
== INTEL_PT_ERR_LOST
&&
1895 pt
->synth_opts
.error_minus_flags
& AUXTRACE_ERR_FLG_DATA_LOST
)
1899 intel_pt__strerror(code
, msg
, MAX_AUXTRACE_ERROR_MSG
);
1901 auxtrace_synth_error(&event
.auxtrace_error
, PERF_AUXTRACE_ERROR_ITRACE
,
1902 code
, cpu
, pid
, tid
, ip
, msg
, timestamp
);
1904 err
= perf_session__deliver_synth_event(pt
->session
, &event
, NULL
);
1906 pr_err("Intel Processor Trace: failed to deliver error event, error %d\n",
1912 static int intel_ptq_synth_error(struct intel_pt_queue
*ptq
,
1913 const struct intel_pt_state
*state
)
1915 struct intel_pt
*pt
= ptq
->pt
;
1916 u64 tm
= ptq
->timestamp
;
1918 tm
= pt
->timeless_decoding
? 0 : tsc_to_perf_time(tm
, &pt
->tc
);
1920 return intel_pt_synth_error(pt
, state
->err
, ptq
->cpu
, ptq
->pid
,
1921 ptq
->tid
, state
->from_ip
, tm
);
1924 static int intel_pt_next_tid(struct intel_pt
*pt
, struct intel_pt_queue
*ptq
)
1926 struct auxtrace_queue
*queue
;
1927 pid_t tid
= ptq
->next_tid
;
1933 intel_pt_log("switch: cpu %d tid %d\n", ptq
->cpu
, tid
);
1935 err
= machine__set_current_tid(pt
->machine
, ptq
->cpu
, -1, tid
);
1937 queue
= &pt
->queues
.queue_array
[ptq
->queue_nr
];
1938 intel_pt_set_pid_tid_cpu(pt
, queue
);
1945 static inline bool intel_pt_is_switch_ip(struct intel_pt_queue
*ptq
, u64 ip
)
1947 struct intel_pt
*pt
= ptq
->pt
;
1949 return ip
== pt
->switch_ip
&&
1950 (ptq
->flags
& PERF_IP_FLAG_BRANCH
) &&
1951 !(ptq
->flags
& (PERF_IP_FLAG_CONDITIONAL
| PERF_IP_FLAG_ASYNC
|
1952 PERF_IP_FLAG_INTERRUPT
| PERF_IP_FLAG_TX_ABORT
));
1955 #define INTEL_PT_PWR_EVT (INTEL_PT_MWAIT_OP | INTEL_PT_PWR_ENTRY | \
1956 INTEL_PT_EX_STOP | INTEL_PT_PWR_EXIT)
1958 static int intel_pt_sample(struct intel_pt_queue
*ptq
)
1960 const struct intel_pt_state
*state
= ptq
->state
;
1961 struct intel_pt
*pt
= ptq
->pt
;
1964 if (!ptq
->have_sample
)
1967 ptq
->have_sample
= false;
1969 if (ptq
->state
->tot_cyc_cnt
> ptq
->ipc_cyc_cnt
) {
1971 * Cycle count and instruction count only go together to create
1972 * a valid IPC ratio when the cycle count changes.
1974 ptq
->ipc_insn_cnt
= ptq
->state
->tot_insn_cnt
;
1975 ptq
->ipc_cyc_cnt
= ptq
->state
->tot_cyc_cnt
;
1979 * Do PEBS first to allow for the possibility that the PEBS timestamp
1980 * precedes the current timestamp.
1982 if (pt
->sample_pebs
&& state
->type
& INTEL_PT_BLK_ITEMS
) {
1983 err
= intel_pt_synth_pebs_sample(ptq
);
1988 if (pt
->sample_pwr_events
) {
1989 if (ptq
->state
->cbr
!= ptq
->cbr_seen
) {
1990 err
= intel_pt_synth_cbr_sample(ptq
);
1994 if (state
->type
& INTEL_PT_PWR_EVT
) {
1995 if (state
->type
& INTEL_PT_MWAIT_OP
) {
1996 err
= intel_pt_synth_mwait_sample(ptq
);
2000 if (state
->type
& INTEL_PT_PWR_ENTRY
) {
2001 err
= intel_pt_synth_pwre_sample(ptq
);
2005 if (state
->type
& INTEL_PT_EX_STOP
) {
2006 err
= intel_pt_synth_exstop_sample(ptq
);
2010 if (state
->type
& INTEL_PT_PWR_EXIT
) {
2011 err
= intel_pt_synth_pwrx_sample(ptq
);
2018 if (pt
->sample_instructions
&& (state
->type
& INTEL_PT_INSTRUCTION
)) {
2019 err
= intel_pt_synth_instruction_sample(ptq
);
2024 if (pt
->sample_transactions
&& (state
->type
& INTEL_PT_TRANSACTION
)) {
2025 err
= intel_pt_synth_transaction_sample(ptq
);
2030 if (pt
->sample_ptwrites
&& (state
->type
& INTEL_PT_PTW
)) {
2031 err
= intel_pt_synth_ptwrite_sample(ptq
);
2036 if (!(state
->type
& INTEL_PT_BRANCH
))
2039 if (pt
->use_thread_stack
) {
2040 thread_stack__event(ptq
->thread
, ptq
->cpu
, ptq
->flags
,
2041 state
->from_ip
, state
->to_ip
, ptq
->insn_len
,
2042 state
->trace_nr
, pt
->callstack
,
2043 pt
->br_stack_sz_plus
,
2046 thread_stack__set_trace_nr(ptq
->thread
, ptq
->cpu
, state
->trace_nr
);
2049 if (pt
->sample_branches
) {
2050 err
= intel_pt_synth_branch_sample(ptq
);
2055 if (!ptq
->sync_switch
)
2058 if (intel_pt_is_switch_ip(ptq
, state
->to_ip
)) {
2059 switch (ptq
->switch_state
) {
2060 case INTEL_PT_SS_NOT_TRACING
:
2061 case INTEL_PT_SS_UNKNOWN
:
2062 case INTEL_PT_SS_EXPECTING_SWITCH_IP
:
2063 err
= intel_pt_next_tid(pt
, ptq
);
2066 ptq
->switch_state
= INTEL_PT_SS_TRACING
;
2069 ptq
->switch_state
= INTEL_PT_SS_EXPECTING_SWITCH_EVENT
;
2072 } else if (!state
->to_ip
) {
2073 ptq
->switch_state
= INTEL_PT_SS_NOT_TRACING
;
2074 } else if (ptq
->switch_state
== INTEL_PT_SS_NOT_TRACING
) {
2075 ptq
->switch_state
= INTEL_PT_SS_UNKNOWN
;
2076 } else if (ptq
->switch_state
== INTEL_PT_SS_UNKNOWN
&&
2077 state
->to_ip
== pt
->ptss_ip
&&
2078 (ptq
->flags
& PERF_IP_FLAG_CALL
)) {
2079 ptq
->switch_state
= INTEL_PT_SS_TRACING
;
2085 static u64
intel_pt_switch_ip(struct intel_pt
*pt
, u64
*ptss_ip
)
2087 struct machine
*machine
= pt
->machine
;
2089 struct symbol
*sym
, *start
;
2090 u64 ip
, switch_ip
= 0;
2096 map
= machine__kernel_map(machine
);
2103 start
= dso__first_symbol(map
->dso
);
2105 for (sym
= start
; sym
; sym
= dso__next_symbol(sym
)) {
2106 if (sym
->binding
== STB_GLOBAL
&&
2107 !strcmp(sym
->name
, "__switch_to")) {
2108 ip
= map
->unmap_ip(map
, sym
->start
);
2109 if (ip
>= map
->start
&& ip
< map
->end
) {
2116 if (!switch_ip
|| !ptss_ip
)
2119 if (pt
->have_sched_switch
== 1)
2120 ptss
= "perf_trace_sched_switch";
2122 ptss
= "__perf_event_task_sched_out";
2124 for (sym
= start
; sym
; sym
= dso__next_symbol(sym
)) {
2125 if (!strcmp(sym
->name
, ptss
)) {
2126 ip
= map
->unmap_ip(map
, sym
->start
);
2127 if (ip
>= map
->start
&& ip
< map
->end
) {
2137 static void intel_pt_enable_sync_switch(struct intel_pt
*pt
)
2141 pt
->sync_switch
= true;
2143 for (i
= 0; i
< pt
->queues
.nr_queues
; i
++) {
2144 struct auxtrace_queue
*queue
= &pt
->queues
.queue_array
[i
];
2145 struct intel_pt_queue
*ptq
= queue
->priv
;
2148 ptq
->sync_switch
= true;
2153 * To filter against time ranges, it is only necessary to look at the next start
2156 static bool intel_pt_next_time(struct intel_pt_queue
*ptq
)
2158 struct intel_pt
*pt
= ptq
->pt
;
2160 if (ptq
->sel_start
) {
2161 /* Next time is an end time */
2162 ptq
->sel_start
= false;
2163 ptq
->sel_timestamp
= pt
->time_ranges
[ptq
->sel_idx
].end
;
2165 } else if (ptq
->sel_idx
+ 1 < pt
->range_cnt
) {
2166 /* Next time is a start time */
2167 ptq
->sel_start
= true;
2169 ptq
->sel_timestamp
= pt
->time_ranges
[ptq
->sel_idx
].start
;
2177 static int intel_pt_time_filter(struct intel_pt_queue
*ptq
, u64
*ff_timestamp
)
2182 if (ptq
->sel_start
) {
2183 if (ptq
->timestamp
>= ptq
->sel_timestamp
) {
2184 /* After start time, so consider next time */
2185 intel_pt_next_time(ptq
);
2186 if (!ptq
->sel_timestamp
) {
2190 /* Check against end time */
2193 /* Before start time, so fast forward */
2194 ptq
->have_sample
= false;
2195 if (ptq
->sel_timestamp
> *ff_timestamp
) {
2196 if (ptq
->sync_switch
) {
2197 intel_pt_next_tid(ptq
->pt
, ptq
);
2198 ptq
->switch_state
= INTEL_PT_SS_UNKNOWN
;
2200 *ff_timestamp
= ptq
->sel_timestamp
;
2201 err
= intel_pt_fast_forward(ptq
->decoder
,
2202 ptq
->sel_timestamp
);
2207 } else if (ptq
->timestamp
> ptq
->sel_timestamp
) {
2208 /* After end time, so consider next time */
2209 if (!intel_pt_next_time(ptq
)) {
2210 /* No next time range, so stop decoding */
2211 ptq
->have_sample
= false;
2212 ptq
->switch_state
= INTEL_PT_SS_NOT_TRACING
;
2215 /* Check against next start time */
2218 /* Before end time */
2224 static int intel_pt_run_decoder(struct intel_pt_queue
*ptq
, u64
*timestamp
)
2226 const struct intel_pt_state
*state
= ptq
->state
;
2227 struct intel_pt
*pt
= ptq
->pt
;
2228 u64 ff_timestamp
= 0;
2231 if (!pt
->kernel_start
) {
2232 pt
->kernel_start
= machine__kernel_start(pt
->machine
);
2233 if (pt
->per_cpu_mmaps
&&
2234 (pt
->have_sched_switch
== 1 || pt
->have_sched_switch
== 3) &&
2235 !pt
->timeless_decoding
&& intel_pt_tracing_kernel(pt
) &&
2236 !pt
->sampling_mode
) {
2237 pt
->switch_ip
= intel_pt_switch_ip(pt
, &pt
->ptss_ip
);
2238 if (pt
->switch_ip
) {
2239 intel_pt_log("switch_ip: %"PRIx64
" ptss_ip: %"PRIx64
"\n",
2240 pt
->switch_ip
, pt
->ptss_ip
);
2241 intel_pt_enable_sync_switch(pt
);
2246 intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n",
2247 ptq
->queue_nr
, ptq
->cpu
, ptq
->pid
, ptq
->tid
);
2249 err
= intel_pt_sample(ptq
);
2253 state
= intel_pt_decode(ptq
->decoder
);
2255 if (state
->err
== INTEL_PT_ERR_NODATA
)
2257 if (ptq
->sync_switch
&&
2258 state
->from_ip
>= pt
->kernel_start
) {
2259 ptq
->sync_switch
= false;
2260 intel_pt_next_tid(pt
, ptq
);
2262 if (pt
->synth_opts
.errors
) {
2263 err
= intel_ptq_synth_error(ptq
, state
);
2271 ptq
->have_sample
= true;
2272 intel_pt_sample_flags(ptq
);
2274 /* Use estimated TSC upon return to user space */
2276 (state
->from_ip
>= pt
->kernel_start
|| !state
->from_ip
) &&
2277 state
->to_ip
&& state
->to_ip
< pt
->kernel_start
) {
2278 intel_pt_log("TSC %"PRIx64
" est. TSC %"PRIx64
"\n",
2279 state
->timestamp
, state
->est_timestamp
);
2280 ptq
->timestamp
= state
->est_timestamp
;
2281 /* Use estimated TSC in unknown switch state */
2282 } else if (ptq
->sync_switch
&&
2283 ptq
->switch_state
== INTEL_PT_SS_UNKNOWN
&&
2284 intel_pt_is_switch_ip(ptq
, state
->to_ip
) &&
2285 ptq
->next_tid
== -1) {
2286 intel_pt_log("TSC %"PRIx64
" est. TSC %"PRIx64
"\n",
2287 state
->timestamp
, state
->est_timestamp
);
2288 ptq
->timestamp
= state
->est_timestamp
;
2289 } else if (state
->timestamp
> ptq
->timestamp
) {
2290 ptq
->timestamp
= state
->timestamp
;
2293 if (ptq
->sel_timestamp
) {
2294 err
= intel_pt_time_filter(ptq
, &ff_timestamp
);
2299 if (!pt
->timeless_decoding
&& ptq
->timestamp
>= *timestamp
) {
2300 *timestamp
= ptq
->timestamp
;
2307 static inline int intel_pt_update_queues(struct intel_pt
*pt
)
2309 if (pt
->queues
.new_data
) {
2310 pt
->queues
.new_data
= false;
2311 return intel_pt_setup_queues(pt
);
2316 static int intel_pt_process_queues(struct intel_pt
*pt
, u64 timestamp
)
2318 unsigned int queue_nr
;
2323 struct auxtrace_queue
*queue
;
2324 struct intel_pt_queue
*ptq
;
2326 if (!pt
->heap
.heap_cnt
)
2329 if (pt
->heap
.heap_array
[0].ordinal
>= timestamp
)
2332 queue_nr
= pt
->heap
.heap_array
[0].queue_nr
;
2333 queue
= &pt
->queues
.queue_array
[queue_nr
];
2336 intel_pt_log("queue %u processing 0x%" PRIx64
" to 0x%" PRIx64
"\n",
2337 queue_nr
, pt
->heap
.heap_array
[0].ordinal
,
2340 auxtrace_heap__pop(&pt
->heap
);
2342 if (pt
->heap
.heap_cnt
) {
2343 ts
= pt
->heap
.heap_array
[0].ordinal
+ 1;
2350 intel_pt_set_pid_tid_cpu(pt
, queue
);
2352 ret
= intel_pt_run_decoder(ptq
, &ts
);
2355 auxtrace_heap__add(&pt
->heap
, queue_nr
, ts
);
2360 ret
= auxtrace_heap__add(&pt
->heap
, queue_nr
, ts
);
2364 ptq
->on_heap
= false;
2371 static int intel_pt_process_timeless_queues(struct intel_pt
*pt
, pid_t tid
,
2374 struct auxtrace_queues
*queues
= &pt
->queues
;
2378 for (i
= 0; i
< queues
->nr_queues
; i
++) {
2379 struct auxtrace_queue
*queue
= &pt
->queues
.queue_array
[i
];
2380 struct intel_pt_queue
*ptq
= queue
->priv
;
2382 if (ptq
&& (tid
== -1 || ptq
->tid
== tid
)) {
2384 intel_pt_set_pid_tid_cpu(pt
, queue
);
2385 intel_pt_run_decoder(ptq
, &ts
);
2391 static void intel_pt_sample_set_pid_tid_cpu(struct intel_pt_queue
*ptq
,
2392 struct auxtrace_queue
*queue
,
2393 struct perf_sample
*sample
)
2395 struct machine
*m
= ptq
->pt
->machine
;
2397 ptq
->pid
= sample
->pid
;
2398 ptq
->tid
= sample
->tid
;
2399 ptq
->cpu
= queue
->cpu
;
2401 intel_pt_log("queue %u cpu %d pid %d tid %d\n",
2402 ptq
->queue_nr
, ptq
->cpu
, ptq
->pid
, ptq
->tid
);
2404 thread__zput(ptq
->thread
);
2409 if (ptq
->pid
== -1) {
2410 ptq
->thread
= machine__find_thread(m
, -1, ptq
->tid
);
2412 ptq
->pid
= ptq
->thread
->pid_
;
2416 ptq
->thread
= machine__findnew_thread(m
, ptq
->pid
, ptq
->tid
);
2419 static int intel_pt_process_timeless_sample(struct intel_pt
*pt
,
2420 struct perf_sample
*sample
)
2422 struct auxtrace_queue
*queue
;
2423 struct intel_pt_queue
*ptq
;
2426 queue
= auxtrace_queues__sample_queue(&pt
->queues
, sample
, pt
->session
);
2435 ptq
->time
= sample
->time
;
2436 intel_pt_sample_set_pid_tid_cpu(ptq
, queue
, sample
);
2437 intel_pt_run_decoder(ptq
, &ts
);
2441 static int intel_pt_lost(struct intel_pt
*pt
, struct perf_sample
*sample
)
2443 return intel_pt_synth_error(pt
, INTEL_PT_ERR_LOST
, sample
->cpu
,
2444 sample
->pid
, sample
->tid
, 0, sample
->time
);
2447 static struct intel_pt_queue
*intel_pt_cpu_to_ptq(struct intel_pt
*pt
, int cpu
)
2451 if (cpu
< 0 || !pt
->queues
.nr_queues
)
2454 if ((unsigned)cpu
>= pt
->queues
.nr_queues
)
2455 i
= pt
->queues
.nr_queues
- 1;
2459 if (pt
->queues
.queue_array
[i
].cpu
== cpu
)
2460 return pt
->queues
.queue_array
[i
].priv
;
2462 for (j
= 0; i
> 0; j
++) {
2463 if (pt
->queues
.queue_array
[--i
].cpu
== cpu
)
2464 return pt
->queues
.queue_array
[i
].priv
;
2467 for (; j
< pt
->queues
.nr_queues
; j
++) {
2468 if (pt
->queues
.queue_array
[j
].cpu
== cpu
)
2469 return pt
->queues
.queue_array
[j
].priv
;
2475 static int intel_pt_sync_switch(struct intel_pt
*pt
, int cpu
, pid_t tid
,
2478 struct intel_pt_queue
*ptq
;
2481 if (!pt
->sync_switch
)
2484 ptq
= intel_pt_cpu_to_ptq(pt
, cpu
);
2485 if (!ptq
|| !ptq
->sync_switch
)
2488 switch (ptq
->switch_state
) {
2489 case INTEL_PT_SS_NOT_TRACING
:
2491 case INTEL_PT_SS_UNKNOWN
:
2492 case INTEL_PT_SS_TRACING
:
2493 ptq
->next_tid
= tid
;
2494 ptq
->switch_state
= INTEL_PT_SS_EXPECTING_SWITCH_IP
;
2496 case INTEL_PT_SS_EXPECTING_SWITCH_EVENT
:
2497 if (!ptq
->on_heap
) {
2498 ptq
->timestamp
= perf_time_to_tsc(timestamp
,
2500 err
= auxtrace_heap__add(&pt
->heap
, ptq
->queue_nr
,
2504 ptq
->on_heap
= true;
2506 ptq
->switch_state
= INTEL_PT_SS_TRACING
;
2508 case INTEL_PT_SS_EXPECTING_SWITCH_IP
:
2509 intel_pt_log("ERROR: cpu %d expecting switch ip\n", cpu
);
2520 static int intel_pt_process_switch(struct intel_pt
*pt
,
2521 struct perf_sample
*sample
)
2525 struct evsel
*evsel
= evlist__id2evsel(pt
->session
->evlist
, sample
->id
);
2527 if (evsel
!= pt
->switch_evsel
)
2530 tid
= evsel__intval(evsel
, sample
, "next_pid");
2533 intel_pt_log("sched_switch: cpu %d tid %d time %"PRIu64
" tsc %#"PRIx64
"\n",
2534 cpu
, tid
, sample
->time
, perf_time_to_tsc(sample
->time
,
2537 ret
= intel_pt_sync_switch(pt
, cpu
, tid
, sample
->time
);
2541 return machine__set_current_tid(pt
->machine
, cpu
, -1, tid
);
2544 static int intel_pt_context_switch_in(struct intel_pt
*pt
,
2545 struct perf_sample
*sample
)
2547 pid_t pid
= sample
->pid
;
2548 pid_t tid
= sample
->tid
;
2549 int cpu
= sample
->cpu
;
2551 if (pt
->sync_switch
) {
2552 struct intel_pt_queue
*ptq
;
2554 ptq
= intel_pt_cpu_to_ptq(pt
, cpu
);
2555 if (ptq
&& ptq
->sync_switch
) {
2557 switch (ptq
->switch_state
) {
2558 case INTEL_PT_SS_NOT_TRACING
:
2559 case INTEL_PT_SS_UNKNOWN
:
2560 case INTEL_PT_SS_TRACING
:
2562 case INTEL_PT_SS_EXPECTING_SWITCH_EVENT
:
2563 case INTEL_PT_SS_EXPECTING_SWITCH_IP
:
2564 ptq
->switch_state
= INTEL_PT_SS_TRACING
;
2573 * If the current tid has not been updated yet, ensure it is now that
2574 * a "switch in" event has occurred.
2576 if (machine__get_current_tid(pt
->machine
, cpu
) == tid
)
2579 return machine__set_current_tid(pt
->machine
, cpu
, pid
, tid
);
2582 static int intel_pt_context_switch(struct intel_pt
*pt
, union perf_event
*event
,
2583 struct perf_sample
*sample
)
2585 bool out
= event
->header
.misc
& PERF_RECORD_MISC_SWITCH_OUT
;
2591 if (pt
->have_sched_switch
== 3) {
2593 return intel_pt_context_switch_in(pt
, sample
);
2594 if (event
->header
.type
!= PERF_RECORD_SWITCH_CPU_WIDE
) {
2595 pr_err("Expecting CPU-wide context switch event\n");
2598 pid
= event
->context_switch
.next_prev_pid
;
2599 tid
= event
->context_switch
.next_prev_tid
;
2608 intel_pt_log("context_switch event has no tid\n");
2610 ret
= intel_pt_sync_switch(pt
, cpu
, tid
, sample
->time
);
2614 return machine__set_current_tid(pt
->machine
, cpu
, pid
, tid
);
2617 static int intel_pt_process_itrace_start(struct intel_pt
*pt
,
2618 union perf_event
*event
,
2619 struct perf_sample
*sample
)
2621 if (!pt
->per_cpu_mmaps
)
2624 intel_pt_log("itrace_start: cpu %d pid %d tid %d time %"PRIu64
" tsc %#"PRIx64
"\n",
2625 sample
->cpu
, event
->itrace_start
.pid
,
2626 event
->itrace_start
.tid
, sample
->time
,
2627 perf_time_to_tsc(sample
->time
, &pt
->tc
));
2629 return machine__set_current_tid(pt
->machine
, sample
->cpu
,
2630 event
->itrace_start
.pid
,
2631 event
->itrace_start
.tid
);
2634 static int intel_pt_find_map(struct thread
*thread
, u8 cpumode
, u64 addr
,
2635 struct addr_location
*al
)
2637 if (!al
->map
|| addr
< al
->map
->start
|| addr
>= al
->map
->end
) {
2638 if (!thread__find_map(thread
, cpumode
, addr
, al
))
2645 /* Invalidate all instruction cache entries that overlap the text poke */
2646 static int intel_pt_text_poke(struct intel_pt
*pt
, union perf_event
*event
)
2648 u8 cpumode
= event
->header
.misc
& PERF_RECORD_MISC_CPUMODE_MASK
;
2649 u64 addr
= event
->text_poke
.addr
+ event
->text_poke
.new_len
- 1;
2650 /* Assume text poke begins in a basic block no more than 4096 bytes */
2651 int cnt
= 4096 + event
->text_poke
.new_len
;
2652 struct thread
*thread
= pt
->unknown_thread
;
2653 struct addr_location al
= { .map
= NULL
};
2654 struct machine
*machine
= pt
->machine
;
2655 struct intel_pt_cache_entry
*e
;
2658 if (!event
->text_poke
.new_len
)
2661 for (; cnt
; cnt
--, addr
--) {
2662 if (intel_pt_find_map(thread
, cpumode
, addr
, &al
)) {
2663 if (addr
< event
->text_poke
.addr
)
2668 if (!al
.map
->dso
|| !al
.map
->dso
->auxtrace_cache
)
2671 offset
= al
.map
->map_ip(al
.map
, addr
);
2673 e
= intel_pt_cache_lookup(al
.map
->dso
, machine
, offset
);
2677 if (addr
+ e
->byte_cnt
+ e
->length
<= event
->text_poke
.addr
) {
2679 * No overlap. Working backwards there cannot be another
2680 * basic block that overlaps the text poke if there is a
2681 * branch instruction before the text poke address.
2683 if (e
->branch
!= INTEL_PT_BR_NO_BRANCH
)
2686 intel_pt_cache_invalidate(al
.map
->dso
, machine
, offset
);
2687 intel_pt_log("Invalidated instruction cache for %s at %#"PRIx64
"\n",
2688 al
.map
->dso
->long_name
, addr
);
2695 static int intel_pt_process_event(struct perf_session
*session
,
2696 union perf_event
*event
,
2697 struct perf_sample
*sample
,
2698 struct perf_tool
*tool
)
2700 struct intel_pt
*pt
= container_of(session
->auxtrace
, struct intel_pt
,
2708 if (!tool
->ordered_events
) {
2709 pr_err("Intel Processor Trace requires ordered events\n");
2713 if (sample
->time
&& sample
->time
!= (u64
)-1)
2714 timestamp
= perf_time_to_tsc(sample
->time
, &pt
->tc
);
2718 if (timestamp
|| pt
->timeless_decoding
) {
2719 err
= intel_pt_update_queues(pt
);
2724 if (pt
->timeless_decoding
) {
2725 if (pt
->sampling_mode
) {
2726 if (sample
->aux_sample
.size
)
2727 err
= intel_pt_process_timeless_sample(pt
,
2729 } else if (event
->header
.type
== PERF_RECORD_EXIT
) {
2730 err
= intel_pt_process_timeless_queues(pt
,
2734 } else if (timestamp
) {
2735 err
= intel_pt_process_queues(pt
, timestamp
);
2740 if (event
->header
.type
== PERF_RECORD_SAMPLE
) {
2741 if (pt
->synth_opts
.add_callchain
&& !sample
->callchain
)
2742 intel_pt_add_callchain(pt
, sample
);
2743 if (pt
->synth_opts
.add_last_branch
&& !sample
->branch_stack
)
2744 intel_pt_add_br_stack(pt
, sample
);
2747 if (event
->header
.type
== PERF_RECORD_AUX
&&
2748 (event
->aux
.flags
& PERF_AUX_FLAG_TRUNCATED
) &&
2749 pt
->synth_opts
.errors
) {
2750 err
= intel_pt_lost(pt
, sample
);
2755 if (pt
->switch_evsel
&& event
->header
.type
== PERF_RECORD_SAMPLE
)
2756 err
= intel_pt_process_switch(pt
, sample
);
2757 else if (event
->header
.type
== PERF_RECORD_ITRACE_START
)
2758 err
= intel_pt_process_itrace_start(pt
, event
, sample
);
2759 else if (event
->header
.type
== PERF_RECORD_SWITCH
||
2760 event
->header
.type
== PERF_RECORD_SWITCH_CPU_WIDE
)
2761 err
= intel_pt_context_switch(pt
, event
, sample
);
2763 if (!err
&& event
->header
.type
== PERF_RECORD_TEXT_POKE
)
2764 err
= intel_pt_text_poke(pt
, event
);
2766 if (intel_pt_enable_logging
&& intel_pt_log_events(pt
, sample
->time
)) {
2767 intel_pt_log("event %u: cpu %d time %"PRIu64
" tsc %#"PRIx64
" ",
2768 event
->header
.type
, sample
->cpu
, sample
->time
, timestamp
);
2769 intel_pt_log_event(event
);
2775 static int intel_pt_flush(struct perf_session
*session
, struct perf_tool
*tool
)
2777 struct intel_pt
*pt
= container_of(session
->auxtrace
, struct intel_pt
,
2784 if (!tool
->ordered_events
)
2787 ret
= intel_pt_update_queues(pt
);
2791 if (pt
->timeless_decoding
)
2792 return intel_pt_process_timeless_queues(pt
, -1,
2795 return intel_pt_process_queues(pt
, MAX_TIMESTAMP
);
2798 static void intel_pt_free_events(struct perf_session
*session
)
2800 struct intel_pt
*pt
= container_of(session
->auxtrace
, struct intel_pt
,
2802 struct auxtrace_queues
*queues
= &pt
->queues
;
2805 for (i
= 0; i
< queues
->nr_queues
; i
++) {
2806 intel_pt_free_queue(queues
->queue_array
[i
].priv
);
2807 queues
->queue_array
[i
].priv
= NULL
;
2809 intel_pt_log_disable();
2810 auxtrace_queues__free(queues
);
2813 static void intel_pt_free(struct perf_session
*session
)
2815 struct intel_pt
*pt
= container_of(session
->auxtrace
, struct intel_pt
,
2818 auxtrace_heap__free(&pt
->heap
);
2819 intel_pt_free_events(session
);
2820 session
->auxtrace
= NULL
;
2821 thread__put(pt
->unknown_thread
);
2822 addr_filters__exit(&pt
->filts
);
2825 zfree(&pt
->time_ranges
);
2829 static bool intel_pt_evsel_is_auxtrace(struct perf_session
*session
,
2830 struct evsel
*evsel
)
2832 struct intel_pt
*pt
= container_of(session
->auxtrace
, struct intel_pt
,
2835 return evsel
->core
.attr
.type
== pt
->pmu_type
;
2838 static int intel_pt_process_auxtrace_event(struct perf_session
*session
,
2839 union perf_event
*event
,
2840 struct perf_tool
*tool __maybe_unused
)
2842 struct intel_pt
*pt
= container_of(session
->auxtrace
, struct intel_pt
,
2845 if (!pt
->data_queued
) {
2846 struct auxtrace_buffer
*buffer
;
2848 int fd
= perf_data__fd(session
->data
);
2851 if (perf_data__is_pipe(session
->data
)) {
2854 data_offset
= lseek(fd
, 0, SEEK_CUR
);
2855 if (data_offset
== -1)
2859 err
= auxtrace_queues__add_event(&pt
->queues
, session
, event
,
2860 data_offset
, &buffer
);
2864 /* Dump here now we have copied a piped trace out of the pipe */
2866 if (auxtrace_buffer__get_data(buffer
, fd
)) {
2867 intel_pt_dump_event(pt
, buffer
->data
,
2869 auxtrace_buffer__put_data(buffer
);
2877 static int intel_pt_queue_data(struct perf_session
*session
,
2878 struct perf_sample
*sample
,
2879 union perf_event
*event
, u64 data_offset
)
2881 struct intel_pt
*pt
= container_of(session
->auxtrace
, struct intel_pt
,
2886 return auxtrace_queues__add_event(&pt
->queues
, session
, event
,
2890 if (sample
->time
&& sample
->time
!= (u64
)-1)
2891 timestamp
= perf_time_to_tsc(sample
->time
, &pt
->tc
);
2895 return auxtrace_queues__add_sample(&pt
->queues
, session
, sample
,
2896 data_offset
, timestamp
);
2899 struct intel_pt_synth
{
2900 struct perf_tool dummy_tool
;
2901 struct perf_session
*session
;
2904 static int intel_pt_event_synth(struct perf_tool
*tool
,
2905 union perf_event
*event
,
2906 struct perf_sample
*sample __maybe_unused
,
2907 struct machine
*machine __maybe_unused
)
2909 struct intel_pt_synth
*intel_pt_synth
=
2910 container_of(tool
, struct intel_pt_synth
, dummy_tool
);
2912 return perf_session__deliver_synth_event(intel_pt_synth
->session
, event
,
2916 static int intel_pt_synth_event(struct perf_session
*session
, const char *name
,
2917 struct perf_event_attr
*attr
, u64 id
)
2919 struct intel_pt_synth intel_pt_synth
;
2922 pr_debug("Synthesizing '%s' event with id %" PRIu64
" sample type %#" PRIx64
"\n",
2923 name
, id
, (u64
)attr
->sample_type
);
2925 memset(&intel_pt_synth
, 0, sizeof(struct intel_pt_synth
));
2926 intel_pt_synth
.session
= session
;
2928 err
= perf_event__synthesize_attr(&intel_pt_synth
.dummy_tool
, attr
, 1,
2929 &id
, intel_pt_event_synth
);
2931 pr_err("%s: failed to synthesize '%s' event type\n",
2937 static void intel_pt_set_event_name(struct evlist
*evlist
, u64 id
,
2940 struct evsel
*evsel
;
2942 evlist__for_each_entry(evlist
, evsel
) {
2943 if (evsel
->core
.id
&& evsel
->core
.id
[0] == id
) {
2945 zfree(&evsel
->name
);
2946 evsel
->name
= strdup(name
);
2952 static struct evsel
*intel_pt_evsel(struct intel_pt
*pt
,
2953 struct evlist
*evlist
)
2955 struct evsel
*evsel
;
2957 evlist__for_each_entry(evlist
, evsel
) {
2958 if (evsel
->core
.attr
.type
== pt
->pmu_type
&& evsel
->core
.ids
)
2965 static int intel_pt_synth_events(struct intel_pt
*pt
,
2966 struct perf_session
*session
)
2968 struct evlist
*evlist
= session
->evlist
;
2969 struct evsel
*evsel
= intel_pt_evsel(pt
, evlist
);
2970 struct perf_event_attr attr
;
2975 pr_debug("There are no selected events with Intel Processor Trace data\n");
2979 memset(&attr
, 0, sizeof(struct perf_event_attr
));
2980 attr
.size
= sizeof(struct perf_event_attr
);
2981 attr
.type
= PERF_TYPE_HARDWARE
;
2982 attr
.sample_type
= evsel
->core
.attr
.sample_type
& PERF_SAMPLE_MASK
;
2983 attr
.sample_type
|= PERF_SAMPLE_IP
| PERF_SAMPLE_TID
|
2985 if (pt
->timeless_decoding
)
2986 attr
.sample_type
&= ~(u64
)PERF_SAMPLE_TIME
;
2988 attr
.sample_type
|= PERF_SAMPLE_TIME
;
2989 if (!pt
->per_cpu_mmaps
)
2990 attr
.sample_type
&= ~(u64
)PERF_SAMPLE_CPU
;
2991 attr
.exclude_user
= evsel
->core
.attr
.exclude_user
;
2992 attr
.exclude_kernel
= evsel
->core
.attr
.exclude_kernel
;
2993 attr
.exclude_hv
= evsel
->core
.attr
.exclude_hv
;
2994 attr
.exclude_host
= evsel
->core
.attr
.exclude_host
;
2995 attr
.exclude_guest
= evsel
->core
.attr
.exclude_guest
;
2996 attr
.sample_id_all
= evsel
->core
.attr
.sample_id_all
;
2997 attr
.read_format
= evsel
->core
.attr
.read_format
;
2999 id
= evsel
->core
.id
[0] + 1000000000;
3003 if (pt
->synth_opts
.branches
) {
3004 attr
.config
= PERF_COUNT_HW_BRANCH_INSTRUCTIONS
;
3005 attr
.sample_period
= 1;
3006 attr
.sample_type
|= PERF_SAMPLE_ADDR
;
3007 err
= intel_pt_synth_event(session
, "branches", &attr
, id
);
3010 pt
->sample_branches
= true;
3011 pt
->branches_sample_type
= attr
.sample_type
;
3012 pt
->branches_id
= id
;
3014 attr
.sample_type
&= ~(u64
)PERF_SAMPLE_ADDR
;
3017 if (pt
->synth_opts
.callchain
)
3018 attr
.sample_type
|= PERF_SAMPLE_CALLCHAIN
;
3019 if (pt
->synth_opts
.last_branch
) {
3020 attr
.sample_type
|= PERF_SAMPLE_BRANCH_STACK
;
3022 * We don't use the hardware index, but the sample generation
3023 * code uses the new format branch_stack with this field,
3024 * so the event attributes must indicate that it's present.
3026 attr
.branch_sample_type
|= PERF_SAMPLE_BRANCH_HW_INDEX
;
3029 if (pt
->synth_opts
.instructions
) {
3030 attr
.config
= PERF_COUNT_HW_INSTRUCTIONS
;
3031 if (pt
->synth_opts
.period_type
== PERF_ITRACE_PERIOD_NANOSECS
)
3032 attr
.sample_period
=
3033 intel_pt_ns_to_ticks(pt
, pt
->synth_opts
.period
);
3035 attr
.sample_period
= pt
->synth_opts
.period
;
3036 err
= intel_pt_synth_event(session
, "instructions", &attr
, id
);
3039 pt
->sample_instructions
= true;
3040 pt
->instructions_sample_type
= attr
.sample_type
;
3041 pt
->instructions_id
= id
;
3045 attr
.sample_type
&= ~(u64
)PERF_SAMPLE_PERIOD
;
3046 attr
.sample_period
= 1;
3048 if (pt
->synth_opts
.transactions
) {
3049 attr
.config
= PERF_COUNT_HW_INSTRUCTIONS
;
3050 err
= intel_pt_synth_event(session
, "transactions", &attr
, id
);
3053 pt
->sample_transactions
= true;
3054 pt
->transactions_sample_type
= attr
.sample_type
;
3055 pt
->transactions_id
= id
;
3056 intel_pt_set_event_name(evlist
, id
, "transactions");
3060 attr
.type
= PERF_TYPE_SYNTH
;
3061 attr
.sample_type
|= PERF_SAMPLE_RAW
;
3063 if (pt
->synth_opts
.ptwrites
) {
3064 attr
.config
= PERF_SYNTH_INTEL_PTWRITE
;
3065 err
= intel_pt_synth_event(session
, "ptwrite", &attr
, id
);
3068 pt
->sample_ptwrites
= true;
3069 pt
->ptwrites_sample_type
= attr
.sample_type
;
3070 pt
->ptwrites_id
= id
;
3071 intel_pt_set_event_name(evlist
, id
, "ptwrite");
3075 if (pt
->synth_opts
.pwr_events
) {
3076 pt
->sample_pwr_events
= true;
3077 pt
->pwr_events_sample_type
= attr
.sample_type
;
3079 attr
.config
= PERF_SYNTH_INTEL_CBR
;
3080 err
= intel_pt_synth_event(session
, "cbr", &attr
, id
);
3084 intel_pt_set_event_name(evlist
, id
, "cbr");
3088 if (pt
->synth_opts
.pwr_events
&& (evsel
->core
.attr
.config
& 0x10)) {
3089 attr
.config
= PERF_SYNTH_INTEL_MWAIT
;
3090 err
= intel_pt_synth_event(session
, "mwait", &attr
, id
);
3094 intel_pt_set_event_name(evlist
, id
, "mwait");
3097 attr
.config
= PERF_SYNTH_INTEL_PWRE
;
3098 err
= intel_pt_synth_event(session
, "pwre", &attr
, id
);
3102 intel_pt_set_event_name(evlist
, id
, "pwre");
3105 attr
.config
= PERF_SYNTH_INTEL_EXSTOP
;
3106 err
= intel_pt_synth_event(session
, "exstop", &attr
, id
);
3110 intel_pt_set_event_name(evlist
, id
, "exstop");
3113 attr
.config
= PERF_SYNTH_INTEL_PWRX
;
3114 err
= intel_pt_synth_event(session
, "pwrx", &attr
, id
);
3118 intel_pt_set_event_name(evlist
, id
, "pwrx");
3125 static void intel_pt_setup_pebs_events(struct intel_pt
*pt
)
3127 struct evsel
*evsel
;
3129 if (!pt
->synth_opts
.other_events
)
3132 evlist__for_each_entry(pt
->session
->evlist
, evsel
) {
3133 if (evsel
->core
.attr
.aux_output
&& evsel
->core
.id
) {
3134 pt
->sample_pebs
= true;
3135 pt
->pebs_evsel
= evsel
;
3141 static struct evsel
*intel_pt_find_sched_switch(struct evlist
*evlist
)
3143 struct evsel
*evsel
;
3145 evlist__for_each_entry_reverse(evlist
, evsel
) {
3146 const char *name
= evsel__name(evsel
);
3148 if (!strcmp(name
, "sched:sched_switch"))
3155 static bool intel_pt_find_switch(struct evlist
*evlist
)
3157 struct evsel
*evsel
;
3159 evlist__for_each_entry(evlist
, evsel
) {
3160 if (evsel
->core
.attr
.context_switch
)
3167 static int intel_pt_perf_config(const char *var
, const char *value
, void *data
)
3169 struct intel_pt
*pt
= data
;
3171 if (!strcmp(var
, "intel-pt.mispred-all"))
3172 pt
->mispred_all
= perf_config_bool(var
, value
);
3177 /* Find least TSC which converts to ns or later */
3178 static u64
intel_pt_tsc_start(u64 ns
, struct intel_pt
*pt
)
3182 tsc
= perf_time_to_tsc(ns
, &pt
->tc
);
3185 tm
= tsc_to_perf_time(tsc
, &pt
->tc
);
3192 tm
= tsc_to_perf_time(++tsc
, &pt
->tc
);
3197 /* Find greatest TSC which converts to ns or earlier */
3198 static u64
intel_pt_tsc_end(u64 ns
, struct intel_pt
*pt
)
3202 tsc
= perf_time_to_tsc(ns
, &pt
->tc
);
3205 tm
= tsc_to_perf_time(tsc
, &pt
->tc
);
3212 tm
= tsc_to_perf_time(--tsc
, &pt
->tc
);
3217 static int intel_pt_setup_time_ranges(struct intel_pt
*pt
,
3218 struct itrace_synth_opts
*opts
)
3220 struct perf_time_interval
*p
= opts
->ptime_range
;
3221 int n
= opts
->range_num
;
3224 if (!n
|| !p
|| pt
->timeless_decoding
)
3227 pt
->time_ranges
= calloc(n
, sizeof(struct range
));
3228 if (!pt
->time_ranges
)
3233 intel_pt_log("%s: %u range(s)\n", __func__
, n
);
3235 for (i
= 0; i
< n
; i
++) {
3236 struct range
*r
= &pt
->time_ranges
[i
];
3237 u64 ts
= p
[i
].start
;
3241 * Take care to ensure the TSC range matches the perf-time range
3242 * when converted back to perf-time.
3244 r
->start
= ts
? intel_pt_tsc_start(ts
, pt
) : 0;
3245 r
->end
= te
? intel_pt_tsc_end(te
, pt
) : 0;
3247 intel_pt_log("range %d: perf time interval: %"PRIu64
" to %"PRIu64
"\n",
3249 intel_pt_log("range %d: TSC time interval: %#"PRIx64
" to %#"PRIx64
"\n",
3250 i
, r
->start
, r
->end
);
3256 static const char * const intel_pt_info_fmts
[] = {
3257 [INTEL_PT_PMU_TYPE
] = " PMU Type %"PRId64
"\n",
3258 [INTEL_PT_TIME_SHIFT
] = " Time Shift %"PRIu64
"\n",
3259 [INTEL_PT_TIME_MULT
] = " Time Muliplier %"PRIu64
"\n",
3260 [INTEL_PT_TIME_ZERO
] = " Time Zero %"PRIu64
"\n",
3261 [INTEL_PT_CAP_USER_TIME_ZERO
] = " Cap Time Zero %"PRId64
"\n",
3262 [INTEL_PT_TSC_BIT
] = " TSC bit %#"PRIx64
"\n",
3263 [INTEL_PT_NORETCOMP_BIT
] = " NoRETComp bit %#"PRIx64
"\n",
3264 [INTEL_PT_HAVE_SCHED_SWITCH
] = " Have sched_switch %"PRId64
"\n",
3265 [INTEL_PT_SNAPSHOT_MODE
] = " Snapshot mode %"PRId64
"\n",
3266 [INTEL_PT_PER_CPU_MMAPS
] = " Per-cpu maps %"PRId64
"\n",
3267 [INTEL_PT_MTC_BIT
] = " MTC bit %#"PRIx64
"\n",
3268 [INTEL_PT_TSC_CTC_N
] = " TSC:CTC numerator %"PRIu64
"\n",
3269 [INTEL_PT_TSC_CTC_D
] = " TSC:CTC denominator %"PRIu64
"\n",
3270 [INTEL_PT_CYC_BIT
] = " CYC bit %#"PRIx64
"\n",
3271 [INTEL_PT_MAX_NONTURBO_RATIO
] = " Max non-turbo ratio %"PRIu64
"\n",
3272 [INTEL_PT_FILTER_STR_LEN
] = " Filter string len. %"PRIu64
"\n",
3275 static void intel_pt_print_info(__u64
*arr
, int start
, int finish
)
3282 for (i
= start
; i
<= finish
; i
++)
3283 fprintf(stdout
, intel_pt_info_fmts
[i
], arr
[i
]);
3286 static void intel_pt_print_info_str(const char *name
, const char *str
)
3291 fprintf(stdout
, " %-20s%s\n", name
, str
? str
: "");
3294 static bool intel_pt_has(struct perf_record_auxtrace_info
*auxtrace_info
, int pos
)
3296 return auxtrace_info
->header
.size
>=
3297 sizeof(struct perf_record_auxtrace_info
) + (sizeof(u64
) * (pos
+ 1));
3300 int intel_pt_process_auxtrace_info(union perf_event
*event
,
3301 struct perf_session
*session
)
3303 struct perf_record_auxtrace_info
*auxtrace_info
= &event
->auxtrace_info
;
3304 size_t min_sz
= sizeof(u64
) * INTEL_PT_PER_CPU_MMAPS
;
3305 struct intel_pt
*pt
;
3310 if (auxtrace_info
->header
.size
< sizeof(struct perf_record_auxtrace_info
) +
3314 pt
= zalloc(sizeof(struct intel_pt
));
3318 addr_filters__init(&pt
->filts
);
3320 err
= perf_config(intel_pt_perf_config
, pt
);
3324 err
= auxtrace_queues__init(&pt
->queues
);
3328 intel_pt_log_set_name(INTEL_PT_PMU_NAME
);
3330 pt
->session
= session
;
3331 pt
->machine
= &session
->machines
.host
; /* No kvm support */
3332 pt
->auxtrace_type
= auxtrace_info
->type
;
3333 pt
->pmu_type
= auxtrace_info
->priv
[INTEL_PT_PMU_TYPE
];
3334 pt
->tc
.time_shift
= auxtrace_info
->priv
[INTEL_PT_TIME_SHIFT
];
3335 pt
->tc
.time_mult
= auxtrace_info
->priv
[INTEL_PT_TIME_MULT
];
3336 pt
->tc
.time_zero
= auxtrace_info
->priv
[INTEL_PT_TIME_ZERO
];
3337 pt
->cap_user_time_zero
= auxtrace_info
->priv
[INTEL_PT_CAP_USER_TIME_ZERO
];
3338 pt
->tsc_bit
= auxtrace_info
->priv
[INTEL_PT_TSC_BIT
];
3339 pt
->noretcomp_bit
= auxtrace_info
->priv
[INTEL_PT_NORETCOMP_BIT
];
3340 pt
->have_sched_switch
= auxtrace_info
->priv
[INTEL_PT_HAVE_SCHED_SWITCH
];
3341 pt
->snapshot_mode
= auxtrace_info
->priv
[INTEL_PT_SNAPSHOT_MODE
];
3342 pt
->per_cpu_mmaps
= auxtrace_info
->priv
[INTEL_PT_PER_CPU_MMAPS
];
3343 intel_pt_print_info(&auxtrace_info
->priv
[0], INTEL_PT_PMU_TYPE
,
3344 INTEL_PT_PER_CPU_MMAPS
);
3346 if (intel_pt_has(auxtrace_info
, INTEL_PT_CYC_BIT
)) {
3347 pt
->mtc_bit
= auxtrace_info
->priv
[INTEL_PT_MTC_BIT
];
3348 pt
->mtc_freq_bits
= auxtrace_info
->priv
[INTEL_PT_MTC_FREQ_BITS
];
3349 pt
->tsc_ctc_ratio_n
= auxtrace_info
->priv
[INTEL_PT_TSC_CTC_N
];
3350 pt
->tsc_ctc_ratio_d
= auxtrace_info
->priv
[INTEL_PT_TSC_CTC_D
];
3351 pt
->cyc_bit
= auxtrace_info
->priv
[INTEL_PT_CYC_BIT
];
3352 intel_pt_print_info(&auxtrace_info
->priv
[0], INTEL_PT_MTC_BIT
,
3356 if (intel_pt_has(auxtrace_info
, INTEL_PT_MAX_NONTURBO_RATIO
)) {
3357 pt
->max_non_turbo_ratio
=
3358 auxtrace_info
->priv
[INTEL_PT_MAX_NONTURBO_RATIO
];
3359 intel_pt_print_info(&auxtrace_info
->priv
[0],
3360 INTEL_PT_MAX_NONTURBO_RATIO
,
3361 INTEL_PT_MAX_NONTURBO_RATIO
);
3364 info
= &auxtrace_info
->priv
[INTEL_PT_FILTER_STR_LEN
] + 1;
3365 info_end
= (void *)info
+ auxtrace_info
->header
.size
;
3367 if (intel_pt_has(auxtrace_info
, INTEL_PT_FILTER_STR_LEN
)) {
3370 len
= auxtrace_info
->priv
[INTEL_PT_FILTER_STR_LEN
];
3371 intel_pt_print_info(&auxtrace_info
->priv
[0],
3372 INTEL_PT_FILTER_STR_LEN
,
3373 INTEL_PT_FILTER_STR_LEN
);
3375 const char *filter
= (const char *)info
;
3377 len
= roundup(len
+ 1, 8);
3379 if ((void *)info
> info_end
) {
3380 pr_err("%s: bad filter string length\n", __func__
);
3382 goto err_free_queues
;
3384 pt
->filter
= memdup(filter
, len
);
3387 goto err_free_queues
;
3389 if (session
->header
.needs_swap
)
3390 mem_bswap_64(pt
->filter
, len
);
3391 if (pt
->filter
[len
- 1]) {
3392 pr_err("%s: filter string not null terminated\n", __func__
);
3394 goto err_free_queues
;
3396 err
= addr_filters__parse_bare_filter(&pt
->filts
,
3399 goto err_free_queues
;
3401 intel_pt_print_info_str("Filter string", pt
->filter
);
3404 pt
->timeless_decoding
= intel_pt_timeless_decoding(pt
);
3405 if (pt
->timeless_decoding
&& !pt
->tc
.time_mult
)
3406 pt
->tc
.time_mult
= 1;
3407 pt
->have_tsc
= intel_pt_have_tsc(pt
);
3408 pt
->sampling_mode
= intel_pt_sampling_mode(pt
);
3409 pt
->est_tsc
= !pt
->timeless_decoding
;
3411 pt
->unknown_thread
= thread__new(999999999, 999999999);
3412 if (!pt
->unknown_thread
) {
3414 goto err_free_queues
;
3418 * Since this thread will not be kept in any rbtree not in a
3419 * list, initialize its list node so that at thread__put() the
3420 * current thread lifetime assuption is kept and we don't segfault
3421 * at list_del_init().
3423 INIT_LIST_HEAD(&pt
->unknown_thread
->node
);
3425 err
= thread__set_comm(pt
->unknown_thread
, "unknown", 0);
3427 goto err_delete_thread
;
3428 if (thread__init_maps(pt
->unknown_thread
, pt
->machine
)) {
3430 goto err_delete_thread
;
3433 pt
->auxtrace
.process_event
= intel_pt_process_event
;
3434 pt
->auxtrace
.process_auxtrace_event
= intel_pt_process_auxtrace_event
;
3435 pt
->auxtrace
.queue_data
= intel_pt_queue_data
;
3436 pt
->auxtrace
.dump_auxtrace_sample
= intel_pt_dump_sample
;
3437 pt
->auxtrace
.flush_events
= intel_pt_flush
;
3438 pt
->auxtrace
.free_events
= intel_pt_free_events
;
3439 pt
->auxtrace
.free
= intel_pt_free
;
3440 pt
->auxtrace
.evsel_is_auxtrace
= intel_pt_evsel_is_auxtrace
;
3441 session
->auxtrace
= &pt
->auxtrace
;
3446 if (pt
->have_sched_switch
== 1) {
3447 pt
->switch_evsel
= intel_pt_find_sched_switch(session
->evlist
);
3448 if (!pt
->switch_evsel
) {
3449 pr_err("%s: missing sched_switch event\n", __func__
);
3451 goto err_delete_thread
;
3453 } else if (pt
->have_sched_switch
== 2 &&
3454 !intel_pt_find_switch(session
->evlist
)) {
3455 pr_err("%s: missing context_switch attribute flag\n", __func__
);
3457 goto err_delete_thread
;
3460 if (session
->itrace_synth_opts
->set
) {
3461 pt
->synth_opts
= *session
->itrace_synth_opts
;
3463 itrace_synth_opts__set_default(&pt
->synth_opts
,
3464 session
->itrace_synth_opts
->default_no_sample
);
3465 if (!session
->itrace_synth_opts
->default_no_sample
&&
3466 !session
->itrace_synth_opts
->inject
) {
3467 pt
->synth_opts
.branches
= false;
3468 pt
->synth_opts
.callchain
= true;
3469 pt
->synth_opts
.add_callchain
= true;
3471 pt
->synth_opts
.thread_stack
=
3472 session
->itrace_synth_opts
->thread_stack
;
3475 if (pt
->synth_opts
.log
)
3476 intel_pt_log_enable();
3478 /* Maximum non-turbo ratio is TSC freq / 100 MHz */
3479 if (pt
->tc
.time_mult
) {
3480 u64 tsc_freq
= intel_pt_ns_to_ticks(pt
, 1000000000);
3482 if (!pt
->max_non_turbo_ratio
)
3483 pt
->max_non_turbo_ratio
=
3484 (tsc_freq
+ 50000000) / 100000000;
3485 intel_pt_log("TSC frequency %"PRIu64
"\n", tsc_freq
);
3486 intel_pt_log("Maximum non-turbo ratio %u\n",
3487 pt
->max_non_turbo_ratio
);
3488 pt
->cbr2khz
= tsc_freq
/ pt
->max_non_turbo_ratio
/ 1000;
3491 err
= intel_pt_setup_time_ranges(pt
, session
->itrace_synth_opts
);
3493 goto err_delete_thread
;
3495 if (pt
->synth_opts
.calls
)
3496 pt
->branches_filter
|= PERF_IP_FLAG_CALL
| PERF_IP_FLAG_ASYNC
|
3497 PERF_IP_FLAG_TRACE_END
;
3498 if (pt
->synth_opts
.returns
)
3499 pt
->branches_filter
|= PERF_IP_FLAG_RETURN
|
3500 PERF_IP_FLAG_TRACE_BEGIN
;
3502 if ((pt
->synth_opts
.callchain
|| pt
->synth_opts
.add_callchain
) &&
3503 !symbol_conf
.use_callchain
) {
3504 symbol_conf
.use_callchain
= true;
3505 if (callchain_register_param(&callchain_param
) < 0) {
3506 symbol_conf
.use_callchain
= false;
3507 pt
->synth_opts
.callchain
= false;
3508 pt
->synth_opts
.add_callchain
= false;
3512 if (pt
->synth_opts
.add_callchain
) {
3513 err
= intel_pt_callchain_init(pt
);
3515 goto err_delete_thread
;
3518 if (pt
->synth_opts
.last_branch
|| pt
->synth_opts
.add_last_branch
) {
3519 pt
->br_stack_sz
= pt
->synth_opts
.last_branch_sz
;
3520 pt
->br_stack_sz_plus
= pt
->br_stack_sz
;
3523 if (pt
->synth_opts
.add_last_branch
) {
3524 err
= intel_pt_br_stack_init(pt
);
3526 goto err_delete_thread
;
3528 * Additional branch stack size to cater for tracing from the
3529 * actual sample ip to where the sample time is recorded.
3530 * Measured at about 200 branches, but generously set to 1024.
3531 * If kernel space is not being traced, then add just 1 for the
3532 * branch to kernel space.
3534 if (intel_pt_tracing_kernel(pt
))
3535 pt
->br_stack_sz_plus
+= 1024;
3537 pt
->br_stack_sz_plus
+= 1;
3540 pt
->use_thread_stack
= pt
->synth_opts
.callchain
||
3541 pt
->synth_opts
.add_callchain
||
3542 pt
->synth_opts
.thread_stack
||
3543 pt
->synth_opts
.last_branch
||
3544 pt
->synth_opts
.add_last_branch
;
3546 pt
->callstack
= pt
->synth_opts
.callchain
||
3547 pt
->synth_opts
.add_callchain
||
3548 pt
->synth_opts
.thread_stack
;
3550 err
= intel_pt_synth_events(pt
, session
);
3552 goto err_delete_thread
;
3554 intel_pt_setup_pebs_events(pt
);
3556 if (pt
->sampling_mode
|| list_empty(&session
->auxtrace_index
))
3557 err
= auxtrace_queue_data(session
, true, true);
3559 err
= auxtrace_queues__process_index(&pt
->queues
, session
);
3561 goto err_delete_thread
;
3563 if (pt
->queues
.populated
)
3564 pt
->data_queued
= true;
3566 if (pt
->timeless_decoding
)
3567 pr_debug2("Intel PT decoding without timestamps\n");
3573 thread__zput(pt
->unknown_thread
);
3575 intel_pt_log_disable();
3576 auxtrace_queues__free(&pt
->queues
);
3577 session
->auxtrace
= NULL
;
3579 addr_filters__exit(&pt
->filts
);
3581 zfree(&pt
->time_ranges
);