1 // SPDX-License-Identifier: GPL-2.0-only
3 * intel_pt.c: Intel Processor Trace support
4 * Copyright (c) 2013-2015, Intel Corporation.
8 #include <linux/perf_event.h>
12 #include <linux/kernel.h>
13 #include <linux/string.h>
14 #include <linux/types.h>
15 #include <linux/zalloc.h>
28 #include "thread-stack.h"
30 #include "callchain.h"
37 #include "util/perf_api_probe.h"
38 #include "util/synthetic-events.h"
39 #include "time-utils.h"
41 #include "../arch/x86/include/uapi/asm/perf_regs.h"
43 #include "intel-pt-decoder/intel-pt-log.h"
44 #include "intel-pt-decoder/intel-pt-decoder.h"
45 #include "intel-pt-decoder/intel-pt-insn-decoder.h"
46 #include "intel-pt-decoder/intel-pt-pkt-decoder.h"
48 #define MAX_TIMESTAMP (~0ULL)
50 #define INTEL_PT_CFG_PASS_THRU BIT_ULL(0)
51 #define INTEL_PT_CFG_PWR_EVT_EN BIT_ULL(4)
52 #define INTEL_PT_CFG_BRANCH_EN BIT_ULL(13)
53 #define INTEL_PT_CFG_EVT_EN BIT_ULL(31)
54 #define INTEL_PT_CFG_TNT_DIS BIT_ULL(55)
62 struct auxtrace auxtrace
;
63 struct auxtrace_queues queues
;
64 struct auxtrace_heap heap
;
66 struct perf_session
*session
;
67 struct machine
*machine
;
68 struct evsel
*switch_evsel
;
69 struct thread
*unknown_thread
;
70 bool timeless_decoding
;
78 bool sync_switch_not_supported
;
80 bool use_thread_stack
;
83 bool have_guest_sideband
;
84 unsigned int br_stack_sz
;
85 unsigned int br_stack_sz_plus
;
86 int have_sched_switch
;
93 struct perf_tsc_conversion tc
;
94 bool cap_user_time_zero
;
96 struct itrace_synth_opts synth_opts
;
98 bool sample_instructions
;
99 u64 instructions_sample_type
;
103 u64 cycles_sample_type
;
106 bool sample_branches
;
108 u64 branches_sample_type
;
111 bool sample_transactions
;
112 u64 transactions_sample_type
;
115 bool sample_ptwrites
;
116 u64 ptwrites_sample_type
;
119 bool sample_pwr_events
;
120 u64 pwr_events_sample_type
;
130 struct evsel
*pebs_evsel
;
135 u64 iflag_chg_sample_type
;
145 unsigned max_non_turbo_ratio
;
149 unsigned long num_events
;
152 struct addr_filters filts
;
154 struct range
*time_ranges
;
155 unsigned int range_cnt
;
157 struct ip_callchain
*chain
;
158 struct branch_stack
*br_stack
;
161 struct rb_root vmcs_info
;
165 INTEL_PT_SS_NOT_TRACING
,
168 INTEL_PT_SS_EXPECTING_SWITCH_EVENT
,
169 INTEL_PT_SS_EXPECTING_SWITCH_IP
,
172 /* applicable_counters is 64-bits */
173 #define INTEL_PT_MAX_PEBS 64
175 struct intel_pt_pebs_event
{
180 struct intel_pt_queue
{
182 unsigned int queue_nr
;
183 struct auxtrace_buffer
*buffer
;
184 struct auxtrace_buffer
*old_buffer
;
186 const struct intel_pt_state
*state
;
187 struct ip_callchain
*chain
;
188 struct branch_stack
*last_branch
;
189 union perf_event
*event_buf
;
192 bool step_through_buffers
;
193 bool use_buffer_pid_tid
;
200 struct thread
*thread
;
201 struct machine
*guest_machine
;
202 struct thread
*guest_thread
;
203 struct thread
*unknown_guest_thread
;
204 pid_t guest_machine_pid
;
214 unsigned int sel_idx
;
220 u64 last_in_insn_cnt
;
222 u64 last_cy_insn_cnt
;
224 u64 last_br_insn_cnt
;
226 unsigned int cbr_seen
;
227 char insn
[INTEL_PT_INSN_BUF_SZ
];
228 struct intel_pt_pebs_event pebs
[INTEL_PT_MAX_PEBS
];
231 static void intel_pt_dump(struct intel_pt
*pt __maybe_unused
,
232 unsigned char *buf
, size_t len
)
234 struct intel_pt_pkt packet
;
237 char desc
[INTEL_PT_PKT_DESC_MAX
];
238 const char *color
= PERF_COLOR_BLUE
;
239 enum intel_pt_pkt_ctx ctx
= INTEL_PT_NO_CTX
;
241 color_fprintf(stdout
, color
,
242 ". ... Intel Processor Trace data: size %zu bytes\n",
246 ret
= intel_pt_get_packet(buf
, len
, &packet
, &ctx
);
252 color_fprintf(stdout
, color
, " %08zx: ", pos
);
253 for (i
= 0; i
< pkt_len
; i
++)
254 color_fprintf(stdout
, color
, " %02x", buf
[i
]);
256 color_fprintf(stdout
, color
, " ");
258 ret
= intel_pt_pkt_desc(&packet
, desc
,
259 INTEL_PT_PKT_DESC_MAX
);
261 color_fprintf(stdout
, color
, " %s\n", desc
);
263 color_fprintf(stdout
, color
, " Bad packet!\n");
271 static void intel_pt_dump_event(struct intel_pt
*pt
, unsigned char *buf
,
275 intel_pt_dump(pt
, buf
, len
);
278 static void intel_pt_log_event(union perf_event
*event
)
280 FILE *f
= intel_pt_log_fp();
282 if (!intel_pt_enable_logging
|| !f
)
285 perf_event__fprintf(event
, NULL
, f
);
288 static void intel_pt_dump_sample(struct perf_session
*session
,
289 struct perf_sample
*sample
)
291 struct intel_pt
*pt
= container_of(session
->auxtrace
, struct intel_pt
,
295 intel_pt_dump(pt
, sample
->aux_sample
.data
, sample
->aux_sample
.size
);
298 static bool intel_pt_log_events(struct intel_pt
*pt
, u64 tm
)
300 struct perf_time_interval
*range
= pt
->synth_opts
.ptime_range
;
301 int n
= pt
->synth_opts
.range_num
;
303 if (pt
->synth_opts
.log_plus_flags
& AUXTRACE_LOG_FLG_ALL_PERF_EVTS
)
306 if (pt
->synth_opts
.log_minus_flags
& AUXTRACE_LOG_FLG_ALL_PERF_EVTS
)
309 /* perf_time__ranges_skip_sample does not work if time is zero */
313 return !n
|| !perf_time__ranges_skip_sample(range
, n
, tm
);
316 static struct intel_pt_vmcs_info
*intel_pt_findnew_vmcs(struct rb_root
*rb_root
,
320 struct rb_node
**p
= &rb_root
->rb_node
;
321 struct rb_node
*parent
= NULL
;
322 struct intel_pt_vmcs_info
*v
;
326 v
= rb_entry(parent
, struct intel_pt_vmcs_info
, rb_node
);
337 v
= zalloc(sizeof(*v
));
340 v
->tsc_offset
= dflt_tsc_offset
;
341 v
->reliable
= dflt_tsc_offset
;
343 rb_link_node(&v
->rb_node
, parent
, p
);
344 rb_insert_color(&v
->rb_node
, rb_root
);
350 static struct intel_pt_vmcs_info
*intel_pt_findnew_vmcs_info(void *data
, uint64_t vmcs
)
352 struct intel_pt_queue
*ptq
= data
;
353 struct intel_pt
*pt
= ptq
->pt
;
355 if (!vmcs
&& !pt
->dflt_tsc_offset
)
358 return intel_pt_findnew_vmcs(&pt
->vmcs_info
, vmcs
, pt
->dflt_tsc_offset
);
361 static void intel_pt_free_vmcs_info(struct intel_pt
*pt
)
363 struct intel_pt_vmcs_info
*v
;
366 n
= rb_first(&pt
->vmcs_info
);
368 v
= rb_entry(n
, struct intel_pt_vmcs_info
, rb_node
);
370 rb_erase(&v
->rb_node
, &pt
->vmcs_info
);
375 static int intel_pt_do_fix_overlap(struct intel_pt
*pt
, struct auxtrace_buffer
*a
,
376 struct auxtrace_buffer
*b
)
378 bool consecutive
= false;
381 start
= intel_pt_find_overlap(a
->data
, a
->size
, b
->data
, b
->size
,
382 pt
->have_tsc
, &consecutive
,
383 pt
->synth_opts
.vm_time_correlation
);
387 * In the case of vm_time_correlation, the overlap might contain TSC
388 * packets that will not be fixed, and that will then no longer work for
389 * overlap detection. Avoid that by zeroing out the overlap.
391 if (pt
->synth_opts
.vm_time_correlation
)
392 memset(b
->data
, 0, start
- b
->data
);
393 b
->use_size
= b
->data
+ b
->size
- start
;
395 if (b
->use_size
&& consecutive
)
396 b
->consecutive
= true;
400 static int intel_pt_get_buffer(struct intel_pt_queue
*ptq
,
401 struct auxtrace_buffer
*buffer
,
402 struct auxtrace_buffer
*old_buffer
,
403 struct intel_pt_buffer
*b
)
408 int fd
= perf_data__fd(ptq
->pt
->session
->data
);
410 buffer
->data
= auxtrace_buffer__get_data(buffer
, fd
);
415 might_overlap
= ptq
->pt
->snapshot_mode
|| ptq
->pt
->sampling_mode
;
416 if (might_overlap
&& !buffer
->consecutive
&& old_buffer
&&
417 intel_pt_do_fix_overlap(ptq
->pt
, old_buffer
, buffer
))
420 if (buffer
->use_data
) {
421 b
->len
= buffer
->use_size
;
422 b
->buf
= buffer
->use_data
;
424 b
->len
= buffer
->size
;
425 b
->buf
= buffer
->data
;
427 b
->ref_timestamp
= buffer
->reference
;
429 if (!old_buffer
|| (might_overlap
&& !buffer
->consecutive
)) {
430 b
->consecutive
= false;
431 b
->trace_nr
= buffer
->buffer_nr
+ 1;
433 b
->consecutive
= true;
439 /* Do not drop buffers with references - refer intel_pt_get_trace() */
440 static void intel_pt_lookahead_drop_buffer(struct intel_pt_queue
*ptq
,
441 struct auxtrace_buffer
*buffer
)
443 if (!buffer
|| buffer
== ptq
->buffer
|| buffer
== ptq
->old_buffer
)
446 auxtrace_buffer__drop_data(buffer
);
449 /* Must be serialized with respect to intel_pt_get_trace() */
450 static int intel_pt_lookahead(void *data
, intel_pt_lookahead_cb_t cb
,
453 struct intel_pt_queue
*ptq
= data
;
454 struct auxtrace_buffer
*buffer
= ptq
->buffer
;
455 struct auxtrace_buffer
*old_buffer
= ptq
->old_buffer
;
456 struct auxtrace_queue
*queue
;
459 queue
= &ptq
->pt
->queues
.queue_array
[ptq
->queue_nr
];
462 struct intel_pt_buffer b
= { .len
= 0 };
464 buffer
= auxtrace_buffer__next(queue
, buffer
);
468 err
= intel_pt_get_buffer(ptq
, buffer
, old_buffer
, &b
);
473 intel_pt_lookahead_drop_buffer(ptq
, old_buffer
);
476 intel_pt_lookahead_drop_buffer(ptq
, buffer
);
480 err
= cb(&b
, cb_data
);
485 if (buffer
!= old_buffer
)
486 intel_pt_lookahead_drop_buffer(ptq
, buffer
);
487 intel_pt_lookahead_drop_buffer(ptq
, old_buffer
);
493 * This function assumes data is processed sequentially only.
494 * Must be serialized with respect to intel_pt_lookahead()
496 static int intel_pt_get_trace(struct intel_pt_buffer
*b
, void *data
)
498 struct intel_pt_queue
*ptq
= data
;
499 struct auxtrace_buffer
*buffer
= ptq
->buffer
;
500 struct auxtrace_buffer
*old_buffer
= ptq
->old_buffer
;
501 struct auxtrace_queue
*queue
;
509 queue
= &ptq
->pt
->queues
.queue_array
[ptq
->queue_nr
];
511 buffer
= auxtrace_buffer__next(queue
, buffer
);
514 auxtrace_buffer__drop_data(old_buffer
);
519 ptq
->buffer
= buffer
;
521 err
= intel_pt_get_buffer(ptq
, buffer
, old_buffer
, b
);
525 if (ptq
->step_through_buffers
)
530 auxtrace_buffer__drop_data(old_buffer
);
531 ptq
->old_buffer
= buffer
;
533 auxtrace_buffer__drop_data(buffer
);
534 return intel_pt_get_trace(b
, data
);
540 struct intel_pt_cache_entry
{
541 struct auxtrace_cache_entry entry
;
544 enum intel_pt_insn_op op
;
545 enum intel_pt_insn_branch branch
;
546 bool emulated_ptwrite
;
549 char insn
[INTEL_PT_INSN_BUF_SZ
];
552 static int intel_pt_config_div(const char *var
, const char *value
, void *data
)
557 if (!strcmp(var
, "intel-pt.cache-divisor")) {
558 val
= strtol(value
, NULL
, 0);
559 if (val
> 0 && val
<= INT_MAX
)
566 static int intel_pt_cache_divisor(void)
573 perf_config(intel_pt_config_div
, &d
);
581 static unsigned int intel_pt_cache_size(struct dso
*dso
,
582 struct machine
*machine
)
586 size
= dso__data_size(dso
, machine
);
587 size
/= intel_pt_cache_divisor();
590 if (size
> (1 << 21))
592 return 32 - __builtin_clz(size
);
595 static struct auxtrace_cache
*intel_pt_cache(struct dso
*dso
,
596 struct machine
*machine
)
598 struct auxtrace_cache
*c
;
601 if (dso__auxtrace_cache(dso
))
602 return dso__auxtrace_cache(dso
);
604 bits
= intel_pt_cache_size(dso
, machine
);
606 /* Ignoring cache creation failure */
607 c
= auxtrace_cache__new(bits
, sizeof(struct intel_pt_cache_entry
), 200);
609 dso__set_auxtrace_cache(dso
, c
);
614 static int intel_pt_cache_add(struct dso
*dso
, struct machine
*machine
,
615 u64 offset
, u64 insn_cnt
, u64 byte_cnt
,
616 struct intel_pt_insn
*intel_pt_insn
)
618 struct auxtrace_cache
*c
= intel_pt_cache(dso
, machine
);
619 struct intel_pt_cache_entry
*e
;
625 e
= auxtrace_cache__alloc_entry(c
);
629 e
->insn_cnt
= insn_cnt
;
630 e
->byte_cnt
= byte_cnt
;
631 e
->op
= intel_pt_insn
->op
;
632 e
->branch
= intel_pt_insn
->branch
;
633 e
->emulated_ptwrite
= intel_pt_insn
->emulated_ptwrite
;
634 e
->length
= intel_pt_insn
->length
;
635 e
->rel
= intel_pt_insn
->rel
;
636 memcpy(e
->insn
, intel_pt_insn
->buf
, INTEL_PT_INSN_BUF_SZ
);
638 err
= auxtrace_cache__add(c
, offset
, &e
->entry
);
640 auxtrace_cache__free_entry(c
, e
);
645 static struct intel_pt_cache_entry
*
646 intel_pt_cache_lookup(struct dso
*dso
, struct machine
*machine
, u64 offset
)
648 struct auxtrace_cache
*c
= intel_pt_cache(dso
, machine
);
653 return auxtrace_cache__lookup(dso__auxtrace_cache(dso
), offset
);
656 static void intel_pt_cache_invalidate(struct dso
*dso
, struct machine
*machine
,
659 struct auxtrace_cache
*c
= intel_pt_cache(dso
, machine
);
664 auxtrace_cache__remove(dso__auxtrace_cache(dso
), offset
);
667 static inline bool intel_pt_guest_kernel_ip(uint64_t ip
)
669 /* Assumes 64-bit kernel */
670 return ip
& (1ULL << 63);
673 static inline u8
intel_pt_nr_cpumode(struct intel_pt_queue
*ptq
, uint64_t ip
, bool nr
)
676 return intel_pt_guest_kernel_ip(ip
) ?
677 PERF_RECORD_MISC_GUEST_KERNEL
:
678 PERF_RECORD_MISC_GUEST_USER
;
681 return ip
>= ptq
->pt
->kernel_start
?
682 PERF_RECORD_MISC_KERNEL
:
683 PERF_RECORD_MISC_USER
;
686 static inline u8
intel_pt_cpumode(struct intel_pt_queue
*ptq
, uint64_t from_ip
, uint64_t to_ip
)
688 /* No support for non-zero CS base */
690 return intel_pt_nr_cpumode(ptq
, from_ip
, ptq
->state
->from_nr
);
691 return intel_pt_nr_cpumode(ptq
, to_ip
, ptq
->state
->to_nr
);
694 static int intel_pt_get_guest(struct intel_pt_queue
*ptq
)
696 struct machines
*machines
= &ptq
->pt
->session
->machines
;
697 struct machine
*machine
;
698 pid_t pid
= ptq
->pid
<= 0 ? DEFAULT_GUEST_KERNEL_ID
: ptq
->pid
;
700 if (ptq
->guest_machine
&& pid
== ptq
->guest_machine
->pid
)
703 ptq
->guest_machine
= NULL
;
704 thread__zput(ptq
->unknown_guest_thread
);
706 if (symbol_conf
.guest_code
) {
707 thread__zput(ptq
->guest_thread
);
708 ptq
->guest_thread
= machines__findnew_guest_code(machines
, pid
);
711 machine
= machines__find_guest(machines
, pid
);
715 ptq
->unknown_guest_thread
= machine__idle_thread(machine
);
716 if (!ptq
->unknown_guest_thread
)
719 ptq
->guest_machine
= machine
;
724 static inline bool intel_pt_jmp_16(struct intel_pt_insn
*intel_pt_insn
)
726 return intel_pt_insn
->rel
== 16 && intel_pt_insn
->branch
== INTEL_PT_BR_UNCONDITIONAL
;
729 #define PTWRITE_MAGIC "\x0f\x0bperf,ptwrite "
730 #define PTWRITE_MAGIC_LEN 16
732 static bool intel_pt_emulated_ptwrite(struct dso
*dso
, struct machine
*machine
, u64 offset
)
734 unsigned char buf
[PTWRITE_MAGIC_LEN
];
737 len
= dso__data_read_offset(dso
, machine
, offset
, buf
, PTWRITE_MAGIC_LEN
);
738 if (len
== PTWRITE_MAGIC_LEN
&& !memcmp(buf
, PTWRITE_MAGIC
, PTWRITE_MAGIC_LEN
)) {
739 intel_pt_log("Emulated ptwrite signature found\n");
742 intel_pt_log("Emulated ptwrite signature not found\n");
746 static int intel_pt_walk_next_insn(struct intel_pt_insn
*intel_pt_insn
,
747 uint64_t *insn_cnt_ptr
, uint64_t *ip
,
748 uint64_t to_ip
, uint64_t max_insn_cnt
,
751 struct intel_pt_queue
*ptq
= data
;
752 struct machine
*machine
= ptq
->pt
->machine
;
753 struct thread
*thread
;
754 struct addr_location al
;
755 unsigned char buf
[INTEL_PT_INSN_BUF_SZ
];
759 u64 offset
, start_offset
, start_ip
;
765 addr_location__init(&al
);
766 intel_pt_insn
->length
= 0;
767 intel_pt_insn
->op
= INTEL_PT_OP_OTHER
;
769 if (to_ip
&& *ip
== to_ip
)
772 nr
= ptq
->state
->to_nr
;
773 cpumode
= intel_pt_nr_cpumode(ptq
, *ip
, nr
);
776 if (ptq
->pt
->have_guest_sideband
) {
777 if (!ptq
->guest_machine
|| ptq
->guest_machine_pid
!= ptq
->pid
) {
778 intel_pt_log("ERROR: guest sideband but no guest machine\n");
782 } else if ((!symbol_conf
.guest_code
&& cpumode
!= PERF_RECORD_MISC_GUEST_KERNEL
) ||
783 intel_pt_get_guest(ptq
)) {
784 intel_pt_log("ERROR: no guest machine\n");
788 machine
= ptq
->guest_machine
;
789 thread
= ptq
->guest_thread
;
791 if (cpumode
!= PERF_RECORD_MISC_GUEST_KERNEL
) {
792 intel_pt_log("ERROR: no guest thread\n");
796 thread
= ptq
->unknown_guest_thread
;
799 thread
= ptq
->thread
;
801 if (cpumode
!= PERF_RECORD_MISC_KERNEL
) {
802 intel_pt_log("ERROR: no thread\n");
806 thread
= ptq
->pt
->unknown_thread
;
813 if (!thread__find_map(thread
, cpumode
, *ip
, &al
) || !map__dso(al
.map
)) {
815 intel_pt_log("ERROR: thread has no dso for %#" PRIx64
"\n", *ip
);
817 intel_pt_log("ERROR: thread has no map for %#" PRIx64
"\n", *ip
);
818 addr_location__exit(&al
);
822 dso
= map__dso(al
.map
);
824 if (dso__data(dso
)->status
== DSO_DATA_STATUS_ERROR
&&
825 dso__data_status_seen(dso
, DSO_DATA_STATUS_SEEN_ITRACE
)) {
830 offset
= map__map_ip(al
.map
, *ip
);
832 if (!to_ip
&& one_map
) {
833 struct intel_pt_cache_entry
*e
;
835 e
= intel_pt_cache_lookup(dso
, machine
, offset
);
837 (!max_insn_cnt
|| e
->insn_cnt
<= max_insn_cnt
)) {
838 *insn_cnt_ptr
= e
->insn_cnt
;
840 intel_pt_insn
->op
= e
->op
;
841 intel_pt_insn
->branch
= e
->branch
;
842 intel_pt_insn
->emulated_ptwrite
= e
->emulated_ptwrite
;
843 intel_pt_insn
->length
= e
->length
;
844 intel_pt_insn
->rel
= e
->rel
;
845 memcpy(intel_pt_insn
->buf
, e
->insn
, INTEL_PT_INSN_BUF_SZ
);
846 intel_pt_log_insn_no_data(intel_pt_insn
, *ip
);
852 start_offset
= offset
;
855 /* Load maps to ensure dso->is_64_bit has been updated */
858 x86_64
= dso__is_64_bit(dso
);
861 len
= dso__data_read_offset(dso
, machine
,
863 INTEL_PT_INSN_BUF_SZ
);
865 intel_pt_log("ERROR: failed to read at offset %#" PRIx64
" ",
867 if (intel_pt_enable_logging
)
868 dso__fprintf(dso
, intel_pt_log_fp());
873 if (intel_pt_get_insn(buf
, len
, x86_64
, intel_pt_insn
)) {
878 intel_pt_log_insn(intel_pt_insn
, *ip
);
882 if (intel_pt_insn
->branch
!= INTEL_PT_BR_NO_BRANCH
) {
886 if (!intel_pt_jmp_16(intel_pt_insn
))
888 /* Check for emulated ptwrite */
889 offs
= offset
+ intel_pt_insn
->length
;
890 eptw
= intel_pt_emulated_ptwrite(dso
, machine
, offs
);
891 intel_pt_insn
->emulated_ptwrite
= eptw
;
895 if (max_insn_cnt
&& insn_cnt
>= max_insn_cnt
)
898 *ip
+= intel_pt_insn
->length
;
900 if (to_ip
&& *ip
== to_ip
) {
901 intel_pt_insn
->length
= 0;
902 intel_pt_insn
->op
= INTEL_PT_OP_OTHER
;
906 if (*ip
>= map__end(al
.map
))
909 offset
+= intel_pt_insn
->length
;
914 *insn_cnt_ptr
= insn_cnt
;
920 * Didn't lookup in the 'to_ip' case, so do it now to prevent duplicate
924 struct intel_pt_cache_entry
*e
;
926 e
= intel_pt_cache_lookup(map__dso(al
.map
), machine
, start_offset
);
931 /* Ignore cache errors */
932 intel_pt_cache_add(map__dso(al
.map
), machine
, start_offset
, insn_cnt
,
933 *ip
- start_ip
, intel_pt_insn
);
936 addr_location__exit(&al
);
940 *insn_cnt_ptr
= insn_cnt
;
941 addr_location__exit(&al
);
945 static bool intel_pt_match_pgd_ip(struct intel_pt
*pt
, uint64_t ip
,
946 uint64_t offset
, const char *filename
)
948 struct addr_filter
*filt
;
949 bool have_filter
= false;
950 bool hit_tracestop
= false;
951 bool hit_filter
= false;
953 list_for_each_entry(filt
, &pt
->filts
.head
, list
) {
957 if ((filename
&& !filt
->filename
) ||
958 (!filename
&& filt
->filename
) ||
959 (filename
&& strcmp(filename
, filt
->filename
)))
962 if (!(offset
>= filt
->addr
&& offset
< filt
->addr
+ filt
->size
))
965 intel_pt_log("TIP.PGD ip %#"PRIx64
" offset %#"PRIx64
" in %s hit filter: %s offset %#"PRIx64
" size %#"PRIx64
"\n",
966 ip
, offset
, filename
? filename
: "[kernel]",
967 filt
->start
? "filter" : "stop",
968 filt
->addr
, filt
->size
);
973 hit_tracestop
= true;
976 if (!hit_tracestop
&& !hit_filter
)
977 intel_pt_log("TIP.PGD ip %#"PRIx64
" offset %#"PRIx64
" in %s is not in a filter region\n",
978 ip
, offset
, filename
? filename
: "[kernel]");
980 return hit_tracestop
|| (have_filter
&& !hit_filter
);
983 static int __intel_pt_pgd_ip(uint64_t ip
, void *data
)
985 struct intel_pt_queue
*ptq
= data
;
986 struct thread
*thread
;
987 struct addr_location al
;
992 if (ptq
->state
->to_nr
) {
993 if (intel_pt_guest_kernel_ip(ip
))
994 return intel_pt_match_pgd_ip(ptq
->pt
, ip
, ip
, NULL
);
995 /* No support for decoding guest user space */
997 } else if (ip
>= ptq
->pt
->kernel_start
) {
998 return intel_pt_match_pgd_ip(ptq
->pt
, ip
, ip
, NULL
);
1001 cpumode
= PERF_RECORD_MISC_USER
;
1003 thread
= ptq
->thread
;
1007 addr_location__init(&al
);
1008 if (!thread__find_map(thread
, cpumode
, ip
, &al
) || !map__dso(al
.map
))
1011 offset
= map__map_ip(al
.map
, ip
);
1013 res
= intel_pt_match_pgd_ip(ptq
->pt
, ip
, offset
, dso__long_name(map__dso(al
.map
)));
1014 addr_location__exit(&al
);
1018 static bool intel_pt_pgd_ip(uint64_t ip
, void *data
)
1020 return __intel_pt_pgd_ip(ip
, data
) > 0;
1023 static bool intel_pt_get_config(struct intel_pt
*pt
,
1024 struct perf_event_attr
*attr
, u64
*config
)
1026 if (attr
->type
== pt
->pmu_type
) {
1028 *config
= attr
->config
;
1035 static bool intel_pt_exclude_kernel(struct intel_pt
*pt
)
1037 struct evsel
*evsel
;
1039 evlist__for_each_entry(pt
->session
->evlist
, evsel
) {
1040 if (intel_pt_get_config(pt
, &evsel
->core
.attr
, NULL
) &&
1041 !evsel
->core
.attr
.exclude_kernel
)
1047 static bool intel_pt_return_compression(struct intel_pt
*pt
)
1049 struct evsel
*evsel
;
1052 if (!pt
->noretcomp_bit
)
1055 evlist__for_each_entry(pt
->session
->evlist
, evsel
) {
1056 if (intel_pt_get_config(pt
, &evsel
->core
.attr
, &config
) &&
1057 (config
& pt
->noretcomp_bit
))
1063 static bool intel_pt_branch_enable(struct intel_pt
*pt
)
1065 struct evsel
*evsel
;
1068 evlist__for_each_entry(pt
->session
->evlist
, evsel
) {
1069 if (intel_pt_get_config(pt
, &evsel
->core
.attr
, &config
) &&
1070 (config
& INTEL_PT_CFG_PASS_THRU
) &&
1071 !(config
& INTEL_PT_CFG_BRANCH_EN
))
1077 static bool intel_pt_disabled_tnt(struct intel_pt
*pt
)
1079 struct evsel
*evsel
;
1082 evlist__for_each_entry(pt
->session
->evlist
, evsel
) {
1083 if (intel_pt_get_config(pt
, &evsel
->core
.attr
, &config
) &&
1084 config
& INTEL_PT_CFG_TNT_DIS
)
1090 static unsigned int intel_pt_mtc_period(struct intel_pt
*pt
)
1092 struct evsel
*evsel
;
1096 if (!pt
->mtc_freq_bits
)
1099 for (shift
= 0, config
= pt
->mtc_freq_bits
; !(config
& 1); shift
++)
1102 evlist__for_each_entry(pt
->session
->evlist
, evsel
) {
1103 if (intel_pt_get_config(pt
, &evsel
->core
.attr
, &config
))
1104 return (config
& pt
->mtc_freq_bits
) >> shift
;
1109 static bool intel_pt_timeless_decoding(struct intel_pt
*pt
)
1111 struct evsel
*evsel
;
1112 bool timeless_decoding
= true;
1115 if (!pt
->tsc_bit
|| !pt
->cap_user_time_zero
|| pt
->synth_opts
.timeless_decoding
)
1118 evlist__for_each_entry(pt
->session
->evlist
, evsel
) {
1119 if (!(evsel
->core
.attr
.sample_type
& PERF_SAMPLE_TIME
))
1121 if (intel_pt_get_config(pt
, &evsel
->core
.attr
, &config
)) {
1122 if (config
& pt
->tsc_bit
)
1123 timeless_decoding
= false;
1128 return timeless_decoding
;
1131 static bool intel_pt_tracing_kernel(struct intel_pt
*pt
)
1133 struct evsel
*evsel
;
1135 evlist__for_each_entry(pt
->session
->evlist
, evsel
) {
1136 if (intel_pt_get_config(pt
, &evsel
->core
.attr
, NULL
) &&
1137 !evsel
->core
.attr
.exclude_kernel
)
1143 static bool intel_pt_have_tsc(struct intel_pt
*pt
)
1145 struct evsel
*evsel
;
1146 bool have_tsc
= false;
1152 evlist__for_each_entry(pt
->session
->evlist
, evsel
) {
1153 if (intel_pt_get_config(pt
, &evsel
->core
.attr
, &config
)) {
1154 if (config
& pt
->tsc_bit
)
1163 static bool intel_pt_have_mtc(struct intel_pt
*pt
)
1165 struct evsel
*evsel
;
1168 evlist__for_each_entry(pt
->session
->evlist
, evsel
) {
1169 if (intel_pt_get_config(pt
, &evsel
->core
.attr
, &config
) &&
1170 (config
& pt
->mtc_bit
))
1176 static bool intel_pt_sampling_mode(struct intel_pt
*pt
)
1178 struct evsel
*evsel
;
1180 evlist__for_each_entry(pt
->session
->evlist
, evsel
) {
1181 if ((evsel
->core
.attr
.sample_type
& PERF_SAMPLE_AUX
) &&
1182 evsel
->core
.attr
.aux_sample_size
)
1188 static u64
intel_pt_ctl(struct intel_pt
*pt
)
1190 struct evsel
*evsel
;
1193 evlist__for_each_entry(pt
->session
->evlist
, evsel
) {
1194 if (intel_pt_get_config(pt
, &evsel
->core
.attr
, &config
))
1200 static u64
intel_pt_ns_to_ticks(const struct intel_pt
*pt
, u64 ns
)
1204 quot
= ns
/ pt
->tc
.time_mult
;
1205 rem
= ns
% pt
->tc
.time_mult
;
1206 return (quot
<< pt
->tc
.time_shift
) + (rem
<< pt
->tc
.time_shift
) /
1210 static struct ip_callchain
*intel_pt_alloc_chain(struct intel_pt
*pt
)
1212 size_t sz
= sizeof(struct ip_callchain
);
1214 /* Add 1 to callchain_sz for callchain context */
1215 sz
+= (pt
->synth_opts
.callchain_sz
+ 1) * sizeof(u64
);
1219 static int intel_pt_callchain_init(struct intel_pt
*pt
)
1221 struct evsel
*evsel
;
1223 evlist__for_each_entry(pt
->session
->evlist
, evsel
) {
1224 if (!(evsel
->core
.attr
.sample_type
& PERF_SAMPLE_CALLCHAIN
))
1225 evsel
->synth_sample_type
|= PERF_SAMPLE_CALLCHAIN
;
1228 pt
->chain
= intel_pt_alloc_chain(pt
);
1235 static void intel_pt_add_callchain(struct intel_pt
*pt
,
1236 struct perf_sample
*sample
)
1238 struct thread
*thread
= machine__findnew_thread(pt
->machine
,
1242 thread_stack__sample_late(thread
, sample
->cpu
, pt
->chain
,
1243 pt
->synth_opts
.callchain_sz
+ 1, sample
->ip
,
1246 sample
->callchain
= pt
->chain
;
1249 static struct branch_stack
*intel_pt_alloc_br_stack(unsigned int entry_cnt
)
1251 size_t sz
= sizeof(struct branch_stack
);
1253 sz
+= entry_cnt
* sizeof(struct branch_entry
);
1257 static int intel_pt_br_stack_init(struct intel_pt
*pt
)
1259 struct evsel
*evsel
;
1261 evlist__for_each_entry(pt
->session
->evlist
, evsel
) {
1262 if (!(evsel
->core
.attr
.sample_type
& PERF_SAMPLE_BRANCH_STACK
))
1263 evsel
->synth_sample_type
|= PERF_SAMPLE_BRANCH_STACK
;
1266 pt
->br_stack
= intel_pt_alloc_br_stack(pt
->br_stack_sz
);
1273 static void intel_pt_add_br_stack(struct intel_pt
*pt
,
1274 struct perf_sample
*sample
)
1276 struct thread
*thread
= machine__findnew_thread(pt
->machine
,
1280 thread_stack__br_sample_late(thread
, sample
->cpu
, pt
->br_stack
,
1281 pt
->br_stack_sz
, sample
->ip
,
1284 sample
->branch_stack
= pt
->br_stack
;
1285 thread__put(thread
);
1288 /* INTEL_PT_LBR_0, INTEL_PT_LBR_1 and INTEL_PT_LBR_2 */
1289 #define LBRS_MAX (INTEL_PT_BLK_ITEM_ID_CNT * 3U)
1291 static struct intel_pt_queue
*intel_pt_alloc_queue(struct intel_pt
*pt
,
1292 unsigned int queue_nr
)
1294 struct intel_pt_params params
= { .get_trace
= 0, };
1295 struct perf_env
*env
= pt
->machine
->env
;
1296 struct intel_pt_queue
*ptq
;
1298 ptq
= zalloc(sizeof(struct intel_pt_queue
));
1302 if (pt
->synth_opts
.callchain
) {
1303 ptq
->chain
= intel_pt_alloc_chain(pt
);
1308 if (pt
->synth_opts
.last_branch
|| pt
->synth_opts
.other_events
) {
1309 unsigned int entry_cnt
= max(LBRS_MAX
, pt
->br_stack_sz
);
1311 ptq
->last_branch
= intel_pt_alloc_br_stack(entry_cnt
);
1312 if (!ptq
->last_branch
)
1316 ptq
->event_buf
= malloc(PERF_SAMPLE_MAX_SIZE
);
1317 if (!ptq
->event_buf
)
1321 ptq
->queue_nr
= queue_nr
;
1322 ptq
->exclude_kernel
= intel_pt_exclude_kernel(pt
);
1328 params
.get_trace
= intel_pt_get_trace
;
1329 params
.walk_insn
= intel_pt_walk_next_insn
;
1330 params
.lookahead
= intel_pt_lookahead
;
1331 params
.findnew_vmcs_info
= intel_pt_findnew_vmcs_info
;
1333 params
.return_compression
= intel_pt_return_compression(pt
);
1334 params
.branch_enable
= intel_pt_branch_enable(pt
);
1335 params
.ctl
= intel_pt_ctl(pt
);
1336 params
.max_non_turbo_ratio
= pt
->max_non_turbo_ratio
;
1337 params
.mtc_period
= intel_pt_mtc_period(pt
);
1338 params
.tsc_ctc_ratio_n
= pt
->tsc_ctc_ratio_n
;
1339 params
.tsc_ctc_ratio_d
= pt
->tsc_ctc_ratio_d
;
1340 params
.quick
= pt
->synth_opts
.quick
;
1341 params
.vm_time_correlation
= pt
->synth_opts
.vm_time_correlation
;
1342 params
.vm_tm_corr_dry_run
= pt
->synth_opts
.vm_tm_corr_dry_run
;
1343 params
.first_timestamp
= pt
->first_timestamp
;
1344 params
.max_loops
= pt
->max_loops
;
1346 /* Cannot walk code without TNT, so force 'quick' mode */
1347 if (params
.branch_enable
&& intel_pt_disabled_tnt(pt
) && !params
.quick
)
1350 if (pt
->filts
.cnt
> 0)
1351 params
.pgd_ip
= intel_pt_pgd_ip
;
1353 if (pt
->synth_opts
.instructions
|| pt
->synth_opts
.cycles
) {
1354 if (pt
->synth_opts
.period
) {
1355 switch (pt
->synth_opts
.period_type
) {
1356 case PERF_ITRACE_PERIOD_INSTRUCTIONS
:
1357 params
.period_type
=
1358 INTEL_PT_PERIOD_INSTRUCTIONS
;
1359 params
.period
= pt
->synth_opts
.period
;
1361 case PERF_ITRACE_PERIOD_TICKS
:
1362 params
.period_type
= INTEL_PT_PERIOD_TICKS
;
1363 params
.period
= pt
->synth_opts
.period
;
1365 case PERF_ITRACE_PERIOD_NANOSECS
:
1366 params
.period_type
= INTEL_PT_PERIOD_TICKS
;
1367 params
.period
= intel_pt_ns_to_ticks(pt
,
1368 pt
->synth_opts
.period
);
1375 if (!params
.period
) {
1376 params
.period_type
= INTEL_PT_PERIOD_INSTRUCTIONS
;
1381 if (env
->cpuid
&& !strncmp(env
->cpuid
, "GenuineIntel,6,92,", 18))
1382 params
.flags
|= INTEL_PT_FUP_WITH_NLIP
;
1384 ptq
->decoder
= intel_pt_decoder_new(¶ms
);
1391 zfree(&ptq
->event_buf
);
1392 zfree(&ptq
->last_branch
);
1398 static void intel_pt_free_queue(void *priv
)
1400 struct intel_pt_queue
*ptq
= priv
;
1404 thread__zput(ptq
->thread
);
1405 thread__zput(ptq
->guest_thread
);
1406 thread__zput(ptq
->unknown_guest_thread
);
1407 intel_pt_decoder_free(ptq
->decoder
);
1408 zfree(&ptq
->event_buf
);
1409 zfree(&ptq
->last_branch
);
1414 static void intel_pt_first_timestamp(struct intel_pt
*pt
, u64 timestamp
)
1418 pt
->first_timestamp
= timestamp
;
1420 for (i
= 0; i
< pt
->queues
.nr_queues
; i
++) {
1421 struct auxtrace_queue
*queue
= &pt
->queues
.queue_array
[i
];
1422 struct intel_pt_queue
*ptq
= queue
->priv
;
1424 if (ptq
&& ptq
->decoder
)
1425 intel_pt_set_first_timestamp(ptq
->decoder
, timestamp
);
1429 static int intel_pt_get_guest_from_sideband(struct intel_pt_queue
*ptq
)
1431 struct machines
*machines
= &ptq
->pt
->session
->machines
;
1432 struct machine
*machine
;
1433 pid_t machine_pid
= ptq
->pid
;
1437 if (machine_pid
<= 0)
1438 return 0; /* Not a guest machine */
1440 machine
= machines__find(machines
, machine_pid
);
1442 return 0; /* Not a guest machine */
1444 if (ptq
->guest_machine
!= machine
) {
1445 ptq
->guest_machine
= NULL
;
1446 thread__zput(ptq
->guest_thread
);
1447 thread__zput(ptq
->unknown_guest_thread
);
1449 ptq
->unknown_guest_thread
= machine__find_thread(machine
, 0, 0);
1450 if (!ptq
->unknown_guest_thread
)
1452 ptq
->guest_machine
= machine
;
1455 vcpu
= ptq
->thread
? thread__guest_cpu(ptq
->thread
) : -1;
1459 tid
= machine__get_current_tid(machine
, vcpu
);
1461 if (ptq
->guest_thread
&& thread__tid(ptq
->guest_thread
) != tid
)
1462 thread__zput(ptq
->guest_thread
);
1464 if (!ptq
->guest_thread
) {
1465 ptq
->guest_thread
= machine__find_thread(machine
, -1, tid
);
1466 if (!ptq
->guest_thread
)
1470 ptq
->guest_machine_pid
= machine_pid
;
1471 ptq
->guest_pid
= thread__pid(ptq
->guest_thread
);
1472 ptq
->guest_tid
= tid
;
1478 static void intel_pt_set_pid_tid_cpu(struct intel_pt
*pt
,
1479 struct auxtrace_queue
*queue
)
1481 struct intel_pt_queue
*ptq
= queue
->priv
;
1483 if (queue
->tid
== -1 || pt
->have_sched_switch
) {
1484 ptq
->tid
= machine__get_current_tid(pt
->machine
, ptq
->cpu
);
1487 thread__zput(ptq
->thread
);
1490 if (!ptq
->thread
&& ptq
->tid
!= -1)
1491 ptq
->thread
= machine__find_thread(pt
->machine
, -1, ptq
->tid
);
1494 ptq
->pid
= thread__pid(ptq
->thread
);
1495 if (queue
->cpu
== -1)
1496 ptq
->cpu
= thread__cpu(ptq
->thread
);
1499 if (pt
->have_guest_sideband
&& intel_pt_get_guest_from_sideband(ptq
)) {
1500 ptq
->guest_machine_pid
= 0;
1501 ptq
->guest_pid
= -1;
1502 ptq
->guest_tid
= -1;
1507 static void intel_pt_sample_flags(struct intel_pt_queue
*ptq
)
1509 struct intel_pt
*pt
= ptq
->pt
;
1512 if (ptq
->state
->flags
& INTEL_PT_ABORT_TX
) {
1513 ptq
->flags
= PERF_IP_FLAG_BRANCH
| PERF_IP_FLAG_TX_ABORT
;
1514 } else if (ptq
->state
->flags
& INTEL_PT_ASYNC
) {
1515 if (!ptq
->state
->to_ip
)
1516 ptq
->flags
= PERF_IP_FLAG_BRANCH
|
1517 PERF_IP_FLAG_ASYNC
|
1518 PERF_IP_FLAG_TRACE_END
;
1519 else if (ptq
->state
->from_nr
&& !ptq
->state
->to_nr
)
1520 ptq
->flags
= PERF_IP_FLAG_BRANCH
| PERF_IP_FLAG_CALL
|
1521 PERF_IP_FLAG_ASYNC
|
1522 PERF_IP_FLAG_VMEXIT
;
1524 ptq
->flags
= PERF_IP_FLAG_BRANCH
| PERF_IP_FLAG_CALL
|
1525 PERF_IP_FLAG_ASYNC
|
1526 PERF_IP_FLAG_INTERRUPT
;
1528 if (ptq
->state
->from_ip
)
1529 ptq
->flags
= intel_pt_insn_type(ptq
->state
->insn_op
);
1531 ptq
->flags
= PERF_IP_FLAG_BRANCH
|
1532 PERF_IP_FLAG_TRACE_BEGIN
;
1533 if (ptq
->state
->flags
& INTEL_PT_IN_TX
)
1534 ptq
->flags
|= PERF_IP_FLAG_IN_TX
;
1535 ptq
->insn_len
= ptq
->state
->insn_len
;
1536 memcpy(ptq
->insn
, ptq
->state
->insn
, INTEL_PT_INSN_BUF_SZ
);
1539 if (ptq
->state
->type
& INTEL_PT_TRACE_BEGIN
)
1540 ptq
->flags
|= PERF_IP_FLAG_TRACE_BEGIN
;
1541 if (ptq
->state
->type
& INTEL_PT_TRACE_END
)
1542 ptq
->flags
|= PERF_IP_FLAG_TRACE_END
;
1544 if (pt
->cap_event_trace
) {
1545 if (ptq
->state
->type
& INTEL_PT_IFLAG_CHG
) {
1546 if (!ptq
->state
->from_iflag
)
1547 ptq
->flags
|= PERF_IP_FLAG_INTR_DISABLE
;
1548 if (ptq
->state
->from_iflag
!= ptq
->state
->to_iflag
)
1549 ptq
->flags
|= PERF_IP_FLAG_INTR_TOGGLE
;
1550 } else if (!ptq
->state
->to_iflag
) {
1551 ptq
->flags
|= PERF_IP_FLAG_INTR_DISABLE
;
1556 static void intel_pt_setup_time_range(struct intel_pt
*pt
,
1557 struct intel_pt_queue
*ptq
)
1562 ptq
->sel_timestamp
= pt
->time_ranges
[0].start
;
1565 if (ptq
->sel_timestamp
) {
1566 ptq
->sel_start
= true;
1568 ptq
->sel_timestamp
= pt
->time_ranges
[0].end
;
1569 ptq
->sel_start
= false;
1573 static int intel_pt_setup_queue(struct intel_pt
*pt
,
1574 struct auxtrace_queue
*queue
,
1575 unsigned int queue_nr
)
1577 struct intel_pt_queue
*ptq
= queue
->priv
;
1579 if (list_empty(&queue
->head
))
1583 ptq
= intel_pt_alloc_queue(pt
, queue_nr
);
1588 if (queue
->cpu
!= -1)
1589 ptq
->cpu
= queue
->cpu
;
1590 ptq
->tid
= queue
->tid
;
1592 ptq
->cbr_seen
= UINT_MAX
;
1594 if (pt
->sampling_mode
&& !pt
->snapshot_mode
&&
1595 pt
->timeless_decoding
)
1596 ptq
->step_through_buffers
= true;
1598 ptq
->sync_switch
= pt
->sync_switch
;
1600 intel_pt_setup_time_range(pt
, ptq
);
1603 if (!ptq
->on_heap
&&
1604 (!ptq
->sync_switch
||
1605 ptq
->switch_state
!= INTEL_PT_SS_EXPECTING_SWITCH_EVENT
)) {
1606 const struct intel_pt_state
*state
;
1609 if (pt
->timeless_decoding
)
1612 intel_pt_log("queue %u getting timestamp\n", queue_nr
);
1613 intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n",
1614 queue_nr
, ptq
->cpu
, ptq
->pid
, ptq
->tid
);
1616 if (ptq
->sel_start
&& ptq
->sel_timestamp
) {
1617 ret
= intel_pt_fast_forward(ptq
->decoder
,
1618 ptq
->sel_timestamp
);
1624 state
= intel_pt_decode(ptq
->decoder
);
1626 if (state
->err
== INTEL_PT_ERR_NODATA
) {
1627 intel_pt_log("queue %u has no timestamp\n",
1633 if (state
->timestamp
)
1637 ptq
->timestamp
= state
->timestamp
;
1638 intel_pt_log("queue %u timestamp 0x%" PRIx64
"\n",
1639 queue_nr
, ptq
->timestamp
);
1641 ptq
->have_sample
= true;
1642 if (ptq
->sel_start
&& ptq
->sel_timestamp
&&
1643 ptq
->timestamp
< ptq
->sel_timestamp
)
1644 ptq
->have_sample
= false;
1645 intel_pt_sample_flags(ptq
);
1646 ret
= auxtrace_heap__add(&pt
->heap
, queue_nr
, ptq
->timestamp
);
1649 ptq
->on_heap
= true;
1655 static int intel_pt_setup_queues(struct intel_pt
*pt
)
1660 for (i
= 0; i
< pt
->queues
.nr_queues
; i
++) {
1661 ret
= intel_pt_setup_queue(pt
, &pt
->queues
.queue_array
[i
], i
);
1668 static inline bool intel_pt_skip_event(struct intel_pt
*pt
)
1670 return pt
->synth_opts
.initial_skip
&&
1671 pt
->num_events
++ < pt
->synth_opts
.initial_skip
;
1675 * Cannot count CBR as skipped because it won't go away until cbr == cbr_seen.
1676 * Also ensure CBR is first non-skipped event by allowing for 4 more samples
1677 * from this decoder state.
1679 static inline bool intel_pt_skip_cbr_event(struct intel_pt
*pt
)
1681 return pt
->synth_opts
.initial_skip
&&
1682 pt
->num_events
+ 4 < pt
->synth_opts
.initial_skip
;
1685 static void intel_pt_prep_a_sample(struct intel_pt_queue
*ptq
,
1686 union perf_event
*event
,
1687 struct perf_sample
*sample
)
1689 event
->sample
.header
.type
= PERF_RECORD_SAMPLE
;
1690 event
->sample
.header
.size
= sizeof(struct perf_event_header
);
1692 sample
->pid
= ptq
->pid
;
1693 sample
->tid
= ptq
->tid
;
1695 if (ptq
->pt
->have_guest_sideband
) {
1696 if ((ptq
->state
->from_ip
&& ptq
->state
->from_nr
) ||
1697 (ptq
->state
->to_ip
&& ptq
->state
->to_nr
)) {
1698 sample
->pid
= ptq
->guest_pid
;
1699 sample
->tid
= ptq
->guest_tid
;
1700 sample
->machine_pid
= ptq
->guest_machine_pid
;
1701 sample
->vcpu
= ptq
->vcpu
;
1705 sample
->cpu
= ptq
->cpu
;
1706 sample
->insn_len
= ptq
->insn_len
;
1707 memcpy(sample
->insn
, ptq
->insn
, INTEL_PT_INSN_BUF_SZ
);
1710 static void intel_pt_prep_b_sample(struct intel_pt
*pt
,
1711 struct intel_pt_queue
*ptq
,
1712 union perf_event
*event
,
1713 struct perf_sample
*sample
)
1715 intel_pt_prep_a_sample(ptq
, event
, sample
);
1717 if (!pt
->timeless_decoding
)
1718 sample
->time
= tsc_to_perf_time(ptq
->timestamp
, &pt
->tc
);
1720 sample
->ip
= ptq
->state
->from_ip
;
1721 sample
->addr
= ptq
->state
->to_ip
;
1722 sample
->cpumode
= intel_pt_cpumode(ptq
, sample
->ip
, sample
->addr
);
1724 sample
->flags
= ptq
->flags
;
1726 event
->sample
.header
.misc
= sample
->cpumode
;
1729 static int intel_pt_inject_event(union perf_event
*event
,
1730 struct perf_sample
*sample
, u64 type
)
1732 event
->header
.size
= perf_event__sample_event_size(sample
, type
, 0);
1733 return perf_event__synthesize_sample(event
, type
, 0, sample
);
1736 static inline int intel_pt_opt_inject(struct intel_pt
*pt
,
1737 union perf_event
*event
,
1738 struct perf_sample
*sample
, u64 type
)
1740 if (!pt
->synth_opts
.inject
)
1743 return intel_pt_inject_event(event
, sample
, type
);
1746 static int intel_pt_deliver_synth_event(struct intel_pt
*pt
,
1747 union perf_event
*event
,
1748 struct perf_sample
*sample
, u64 type
)
1752 ret
= intel_pt_opt_inject(pt
, event
, sample
, type
);
1756 ret
= perf_session__deliver_synth_event(pt
->session
, event
, sample
);
1758 pr_err("Intel PT: failed to deliver event, error %d\n", ret
);
1763 static int intel_pt_synth_branch_sample(struct intel_pt_queue
*ptq
)
1765 struct intel_pt
*pt
= ptq
->pt
;
1766 union perf_event
*event
= ptq
->event_buf
;
1767 struct perf_sample sample
= { .ip
= 0, };
1768 struct dummy_branch_stack
{
1771 struct branch_entry entries
;
1774 if (pt
->branches_filter
&& !(pt
->branches_filter
& ptq
->flags
))
1777 if (intel_pt_skip_event(pt
))
1780 intel_pt_prep_b_sample(pt
, ptq
, event
, &sample
);
1782 sample
.id
= ptq
->pt
->branches_id
;
1783 sample
.stream_id
= ptq
->pt
->branches_id
;
1786 * perf report cannot handle events without a branch stack when using
1787 * SORT_MODE__BRANCH so make a dummy one.
1789 if (pt
->synth_opts
.last_branch
&& sort__mode
== SORT_MODE__BRANCH
) {
1790 dummy_bs
= (struct dummy_branch_stack
){
1798 sample
.branch_stack
= (struct branch_stack
*)&dummy_bs
;
1801 if (ptq
->sample_ipc
)
1802 sample
.cyc_cnt
= ptq
->ipc_cyc_cnt
- ptq
->last_br_cyc_cnt
;
1803 if (sample
.cyc_cnt
) {
1804 sample
.insn_cnt
= ptq
->ipc_insn_cnt
- ptq
->last_br_insn_cnt
;
1805 ptq
->last_br_insn_cnt
= ptq
->ipc_insn_cnt
;
1806 ptq
->last_br_cyc_cnt
= ptq
->ipc_cyc_cnt
;
1809 return intel_pt_deliver_synth_event(pt
, event
, &sample
,
1810 pt
->branches_sample_type
);
1813 static void intel_pt_prep_sample(struct intel_pt
*pt
,
1814 struct intel_pt_queue
*ptq
,
1815 union perf_event
*event
,
1816 struct perf_sample
*sample
)
1818 intel_pt_prep_b_sample(pt
, ptq
, event
, sample
);
1820 if (pt
->synth_opts
.callchain
) {
1821 thread_stack__sample(ptq
->thread
, ptq
->cpu
, ptq
->chain
,
1822 pt
->synth_opts
.callchain_sz
+ 1,
1823 sample
->ip
, pt
->kernel_start
);
1824 sample
->callchain
= ptq
->chain
;
1827 if (pt
->synth_opts
.last_branch
) {
1828 thread_stack__br_sample(ptq
->thread
, ptq
->cpu
, ptq
->last_branch
,
1830 sample
->branch_stack
= ptq
->last_branch
;
1834 static int intel_pt_synth_instruction_sample(struct intel_pt_queue
*ptq
)
1836 struct intel_pt
*pt
= ptq
->pt
;
1837 union perf_event
*event
= ptq
->event_buf
;
1838 struct perf_sample sample
= { .ip
= 0, };
1840 if (intel_pt_skip_event(pt
))
1843 intel_pt_prep_sample(pt
, ptq
, event
, &sample
);
1845 sample
.id
= ptq
->pt
->instructions_id
;
1846 sample
.stream_id
= ptq
->pt
->instructions_id
;
1847 if (pt
->synth_opts
.quick
)
1850 sample
.period
= ptq
->state
->tot_insn_cnt
- ptq
->last_insn_cnt
;
1852 if (ptq
->sample_ipc
)
1853 sample
.cyc_cnt
= ptq
->ipc_cyc_cnt
- ptq
->last_in_cyc_cnt
;
1854 if (sample
.cyc_cnt
) {
1855 sample
.insn_cnt
= ptq
->ipc_insn_cnt
- ptq
->last_in_insn_cnt
;
1856 ptq
->last_in_insn_cnt
= ptq
->ipc_insn_cnt
;
1857 ptq
->last_in_cyc_cnt
= ptq
->ipc_cyc_cnt
;
1860 ptq
->last_insn_cnt
= ptq
->state
->tot_insn_cnt
;
1862 return intel_pt_deliver_synth_event(pt
, event
, &sample
,
1863 pt
->instructions_sample_type
);
1866 static int intel_pt_synth_cycle_sample(struct intel_pt_queue
*ptq
)
1868 struct intel_pt
*pt
= ptq
->pt
;
1869 union perf_event
*event
= ptq
->event_buf
;
1870 struct perf_sample sample
= { .ip
= 0, };
1873 if (ptq
->sample_ipc
)
1874 period
= ptq
->ipc_cyc_cnt
- ptq
->last_cy_cyc_cnt
;
1876 if (!period
|| intel_pt_skip_event(pt
))
1879 intel_pt_prep_sample(pt
, ptq
, event
, &sample
);
1881 sample
.id
= ptq
->pt
->cycles_id
;
1882 sample
.stream_id
= ptq
->pt
->cycles_id
;
1883 sample
.period
= period
;
1885 sample
.cyc_cnt
= period
;
1886 sample
.insn_cnt
= ptq
->ipc_insn_cnt
- ptq
->last_cy_insn_cnt
;
1887 ptq
->last_cy_insn_cnt
= ptq
->ipc_insn_cnt
;
1888 ptq
->last_cy_cyc_cnt
= ptq
->ipc_cyc_cnt
;
1890 return intel_pt_deliver_synth_event(pt
, event
, &sample
, pt
->cycles_sample_type
);
1893 static int intel_pt_synth_transaction_sample(struct intel_pt_queue
*ptq
)
1895 struct intel_pt
*pt
= ptq
->pt
;
1896 union perf_event
*event
= ptq
->event_buf
;
1897 struct perf_sample sample
= { .ip
= 0, };
1899 if (intel_pt_skip_event(pt
))
1902 intel_pt_prep_sample(pt
, ptq
, event
, &sample
);
1904 sample
.id
= ptq
->pt
->transactions_id
;
1905 sample
.stream_id
= ptq
->pt
->transactions_id
;
1907 return intel_pt_deliver_synth_event(pt
, event
, &sample
,
1908 pt
->transactions_sample_type
);
1911 static void intel_pt_prep_p_sample(struct intel_pt
*pt
,
1912 struct intel_pt_queue
*ptq
,
1913 union perf_event
*event
,
1914 struct perf_sample
*sample
)
1916 intel_pt_prep_sample(pt
, ptq
, event
, sample
);
1919 * Zero IP is used to mean "trace start" but that is not the case for
1920 * power or PTWRITE events with no IP, so clear the flags.
1926 static int intel_pt_synth_ptwrite_sample(struct intel_pt_queue
*ptq
)
1928 struct intel_pt
*pt
= ptq
->pt
;
1929 union perf_event
*event
= ptq
->event_buf
;
1930 struct perf_sample sample
= { .ip
= 0, };
1931 struct perf_synth_intel_ptwrite raw
;
1933 if (intel_pt_skip_event(pt
))
1936 intel_pt_prep_p_sample(pt
, ptq
, event
, &sample
);
1938 sample
.id
= ptq
->pt
->ptwrites_id
;
1939 sample
.stream_id
= ptq
->pt
->ptwrites_id
;
1942 raw
.ip
= !!(ptq
->state
->flags
& INTEL_PT_FUP_IP
);
1943 raw
.payload
= cpu_to_le64(ptq
->state
->ptw_payload
);
1945 sample
.raw_size
= perf_synth__raw_size(raw
);
1946 sample
.raw_data
= perf_synth__raw_data(&raw
);
1948 return intel_pt_deliver_synth_event(pt
, event
, &sample
,
1949 pt
->ptwrites_sample_type
);
1952 static int intel_pt_synth_cbr_sample(struct intel_pt_queue
*ptq
)
1954 struct intel_pt
*pt
= ptq
->pt
;
1955 union perf_event
*event
= ptq
->event_buf
;
1956 struct perf_sample sample
= { .ip
= 0, };
1957 struct perf_synth_intel_cbr raw
;
1960 if (intel_pt_skip_cbr_event(pt
))
1963 ptq
->cbr_seen
= ptq
->state
->cbr
;
1965 intel_pt_prep_p_sample(pt
, ptq
, event
, &sample
);
1967 sample
.id
= ptq
->pt
->cbr_id
;
1968 sample
.stream_id
= ptq
->pt
->cbr_id
;
1970 flags
= (u16
)ptq
->state
->cbr_payload
| (pt
->max_non_turbo_ratio
<< 16);
1971 raw
.flags
= cpu_to_le32(flags
);
1972 raw
.freq
= cpu_to_le32(raw
.cbr
* pt
->cbr2khz
);
1975 sample
.raw_size
= perf_synth__raw_size(raw
);
1976 sample
.raw_data
= perf_synth__raw_data(&raw
);
1978 return intel_pt_deliver_synth_event(pt
, event
, &sample
,
1979 pt
->pwr_events_sample_type
);
1982 static int intel_pt_synth_psb_sample(struct intel_pt_queue
*ptq
)
1984 struct intel_pt
*pt
= ptq
->pt
;
1985 union perf_event
*event
= ptq
->event_buf
;
1986 struct perf_sample sample
= { .ip
= 0, };
1987 struct perf_synth_intel_psb raw
;
1989 if (intel_pt_skip_event(pt
))
1992 intel_pt_prep_p_sample(pt
, ptq
, event
, &sample
);
1994 sample
.id
= ptq
->pt
->psb_id
;
1995 sample
.stream_id
= ptq
->pt
->psb_id
;
1999 raw
.offset
= ptq
->state
->psb_offset
;
2001 sample
.raw_size
= perf_synth__raw_size(raw
);
2002 sample
.raw_data
= perf_synth__raw_data(&raw
);
2004 return intel_pt_deliver_synth_event(pt
, event
, &sample
,
2005 pt
->pwr_events_sample_type
);
2008 static int intel_pt_synth_mwait_sample(struct intel_pt_queue
*ptq
)
2010 struct intel_pt
*pt
= ptq
->pt
;
2011 union perf_event
*event
= ptq
->event_buf
;
2012 struct perf_sample sample
= { .ip
= 0, };
2013 struct perf_synth_intel_mwait raw
;
2015 if (intel_pt_skip_event(pt
))
2018 intel_pt_prep_p_sample(pt
, ptq
, event
, &sample
);
2020 sample
.id
= ptq
->pt
->mwait_id
;
2021 sample
.stream_id
= ptq
->pt
->mwait_id
;
2024 raw
.payload
= cpu_to_le64(ptq
->state
->mwait_payload
);
2026 sample
.raw_size
= perf_synth__raw_size(raw
);
2027 sample
.raw_data
= perf_synth__raw_data(&raw
);
2029 return intel_pt_deliver_synth_event(pt
, event
, &sample
,
2030 pt
->pwr_events_sample_type
);
2033 static int intel_pt_synth_pwre_sample(struct intel_pt_queue
*ptq
)
2035 struct intel_pt
*pt
= ptq
->pt
;
2036 union perf_event
*event
= ptq
->event_buf
;
2037 struct perf_sample sample
= { .ip
= 0, };
2038 struct perf_synth_intel_pwre raw
;
2040 if (intel_pt_skip_event(pt
))
2043 intel_pt_prep_p_sample(pt
, ptq
, event
, &sample
);
2045 sample
.id
= ptq
->pt
->pwre_id
;
2046 sample
.stream_id
= ptq
->pt
->pwre_id
;
2049 raw
.payload
= cpu_to_le64(ptq
->state
->pwre_payload
);
2051 sample
.raw_size
= perf_synth__raw_size(raw
);
2052 sample
.raw_data
= perf_synth__raw_data(&raw
);
2054 return intel_pt_deliver_synth_event(pt
, event
, &sample
,
2055 pt
->pwr_events_sample_type
);
2058 static int intel_pt_synth_exstop_sample(struct intel_pt_queue
*ptq
)
2060 struct intel_pt
*pt
= ptq
->pt
;
2061 union perf_event
*event
= ptq
->event_buf
;
2062 struct perf_sample sample
= { .ip
= 0, };
2063 struct perf_synth_intel_exstop raw
;
2065 if (intel_pt_skip_event(pt
))
2068 intel_pt_prep_p_sample(pt
, ptq
, event
, &sample
);
2070 sample
.id
= ptq
->pt
->exstop_id
;
2071 sample
.stream_id
= ptq
->pt
->exstop_id
;
2074 raw
.ip
= !!(ptq
->state
->flags
& INTEL_PT_FUP_IP
);
2076 sample
.raw_size
= perf_synth__raw_size(raw
);
2077 sample
.raw_data
= perf_synth__raw_data(&raw
);
2079 return intel_pt_deliver_synth_event(pt
, event
, &sample
,
2080 pt
->pwr_events_sample_type
);
2083 static int intel_pt_synth_pwrx_sample(struct intel_pt_queue
*ptq
)
2085 struct intel_pt
*pt
= ptq
->pt
;
2086 union perf_event
*event
= ptq
->event_buf
;
2087 struct perf_sample sample
= { .ip
= 0, };
2088 struct perf_synth_intel_pwrx raw
;
2090 if (intel_pt_skip_event(pt
))
2093 intel_pt_prep_p_sample(pt
, ptq
, event
, &sample
);
2095 sample
.id
= ptq
->pt
->pwrx_id
;
2096 sample
.stream_id
= ptq
->pt
->pwrx_id
;
2099 raw
.payload
= cpu_to_le64(ptq
->state
->pwrx_payload
);
2101 sample
.raw_size
= perf_synth__raw_size(raw
);
2102 sample
.raw_data
= perf_synth__raw_data(&raw
);
2104 return intel_pt_deliver_synth_event(pt
, event
, &sample
,
2105 pt
->pwr_events_sample_type
);
2109 * PEBS gp_regs array indexes plus 1 so that 0 means not present. Refer
2110 * intel_pt_add_gp_regs().
2112 static const int pebs_gp_regs
[] = {
2113 [PERF_REG_X86_FLAGS
] = 1,
2114 [PERF_REG_X86_IP
] = 2,
2115 [PERF_REG_X86_AX
] = 3,
2116 [PERF_REG_X86_CX
] = 4,
2117 [PERF_REG_X86_DX
] = 5,
2118 [PERF_REG_X86_BX
] = 6,
2119 [PERF_REG_X86_SP
] = 7,
2120 [PERF_REG_X86_BP
] = 8,
2121 [PERF_REG_X86_SI
] = 9,
2122 [PERF_REG_X86_DI
] = 10,
2123 [PERF_REG_X86_R8
] = 11,
2124 [PERF_REG_X86_R9
] = 12,
2125 [PERF_REG_X86_R10
] = 13,
2126 [PERF_REG_X86_R11
] = 14,
2127 [PERF_REG_X86_R12
] = 15,
2128 [PERF_REG_X86_R13
] = 16,
2129 [PERF_REG_X86_R14
] = 17,
2130 [PERF_REG_X86_R15
] = 18,
2133 static u64
*intel_pt_add_gp_regs(struct regs_dump
*intr_regs
, u64
*pos
,
2134 const struct intel_pt_blk_items
*items
,
2137 const u64
*gp_regs
= items
->val
[INTEL_PT_GP_REGS_POS
];
2138 u32 mask
= items
->mask
[INTEL_PT_GP_REGS_POS
];
2142 for (i
= 0, bit
= 1; i
< PERF_REG_X86_64_MAX
; i
++, bit
<<= 1) {
2143 /* Get the PEBS gp_regs array index */
2144 int n
= pebs_gp_regs
[i
] - 1;
2149 * Add only registers that were requested (i.e. 'regs_mask') and
2150 * that were provided (i.e. 'mask'), and update the resulting
2151 * mask (i.e. 'intr_regs->mask') accordingly.
2153 if (mask
& 1 << n
&& regs_mask
& bit
) {
2154 intr_regs
->mask
|= bit
;
2155 *pos
++ = gp_regs
[n
];
2162 #ifndef PERF_REG_X86_XMM0
2163 #define PERF_REG_X86_XMM0 32
2166 static void intel_pt_add_xmm(struct regs_dump
*intr_regs
, u64
*pos
,
2167 const struct intel_pt_blk_items
*items
,
2170 u32 mask
= items
->has_xmm
& (regs_mask
>> PERF_REG_X86_XMM0
);
2171 const u64
*xmm
= items
->xmm
;
2174 * If there are any XMM registers, then there should be all of them.
2175 * Nevertheless, follow the logic to add only registers that were
2176 * requested (i.e. 'regs_mask') and that were provided (i.e. 'mask'),
2177 * and update the resulting mask (i.e. 'intr_regs->mask') accordingly.
2179 intr_regs
->mask
|= (u64
)mask
<< PERF_REG_X86_XMM0
;
2181 for (; mask
; mask
>>= 1, xmm
++) {
2187 #define LBR_INFO_MISPRED (1ULL << 63)
2188 #define LBR_INFO_IN_TX (1ULL << 62)
2189 #define LBR_INFO_ABORT (1ULL << 61)
2190 #define LBR_INFO_CYCLES 0xffff
2192 /* Refer kernel's intel_pmu_store_pebs_lbrs() */
2193 static u64
intel_pt_lbr_flags(u64 info
)
2196 struct branch_flags flags
;
2201 u
.flags
.mispred
= !!(info
& LBR_INFO_MISPRED
);
2202 u
.flags
.predicted
= !(info
& LBR_INFO_MISPRED
);
2203 u
.flags
.in_tx
= !!(info
& LBR_INFO_IN_TX
);
2204 u
.flags
.abort
= !!(info
& LBR_INFO_ABORT
);
2205 u
.flags
.cycles
= info
& LBR_INFO_CYCLES
;
2210 static void intel_pt_add_lbrs(struct branch_stack
*br_stack
,
2211 const struct intel_pt_blk_items
*items
)
2218 to
= &br_stack
->entries
[0].from
;
2220 for (i
= INTEL_PT_LBR_0_POS
; i
<= INTEL_PT_LBR_2_POS
; i
++) {
2221 u32 mask
= items
->mask
[i
];
2222 const u64
*from
= items
->val
[i
];
2224 for (; mask
; mask
>>= 3, from
+= 3) {
2225 if ((mask
& 7) == 7) {
2228 *to
++ = intel_pt_lbr_flags(from
[2]);
2235 static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue
*ptq
, struct evsel
*evsel
, u64 id
)
2237 const struct intel_pt_blk_items
*items
= &ptq
->state
->items
;
2238 struct perf_sample sample
= { .ip
= 0, };
2239 union perf_event
*event
= ptq
->event_buf
;
2240 struct intel_pt
*pt
= ptq
->pt
;
2241 u64 sample_type
= evsel
->core
.attr
.sample_type
;
2243 u64 regs
[8 * sizeof(sample
.intr_regs
.mask
)];
2245 if (intel_pt_skip_event(pt
))
2248 intel_pt_prep_a_sample(ptq
, event
, &sample
);
2251 sample
.stream_id
= id
;
2253 if (!evsel
->core
.attr
.freq
)
2254 sample
.period
= evsel
->core
.attr
.sample_period
;
2256 /* No support for non-zero CS base */
2258 sample
.ip
= items
->ip
;
2259 else if (items
->has_rip
)
2260 sample
.ip
= items
->rip
;
2262 sample
.ip
= ptq
->state
->from_ip
;
2264 cpumode
= intel_pt_cpumode(ptq
, sample
.ip
, 0);
2266 event
->sample
.header
.misc
= cpumode
| PERF_RECORD_MISC_EXACT_IP
;
2268 sample
.cpumode
= cpumode
;
2270 if (sample_type
& PERF_SAMPLE_TIME
) {
2273 if (items
->has_timestamp
)
2274 timestamp
= items
->timestamp
;
2275 else if (!pt
->timeless_decoding
)
2276 timestamp
= ptq
->timestamp
;
2278 sample
.time
= tsc_to_perf_time(timestamp
, &pt
->tc
);
2281 if (sample_type
& PERF_SAMPLE_CALLCHAIN
&&
2282 pt
->synth_opts
.callchain
) {
2283 thread_stack__sample(ptq
->thread
, ptq
->cpu
, ptq
->chain
,
2284 pt
->synth_opts
.callchain_sz
, sample
.ip
,
2286 sample
.callchain
= ptq
->chain
;
2289 if (sample_type
& PERF_SAMPLE_REGS_INTR
&&
2290 (items
->mask
[INTEL_PT_GP_REGS_POS
] ||
2291 items
->mask
[INTEL_PT_XMM_POS
])) {
2292 u64 regs_mask
= evsel
->core
.attr
.sample_regs_intr
;
2295 sample
.intr_regs
.abi
= items
->is_32_bit
?
2296 PERF_SAMPLE_REGS_ABI_32
:
2297 PERF_SAMPLE_REGS_ABI_64
;
2298 sample
.intr_regs
.regs
= regs
;
2300 pos
= intel_pt_add_gp_regs(&sample
.intr_regs
, regs
, items
, regs_mask
);
2302 intel_pt_add_xmm(&sample
.intr_regs
, pos
, items
, regs_mask
);
2305 if (sample_type
& PERF_SAMPLE_BRANCH_STACK
) {
2306 if (items
->mask
[INTEL_PT_LBR_0_POS
] ||
2307 items
->mask
[INTEL_PT_LBR_1_POS
] ||
2308 items
->mask
[INTEL_PT_LBR_2_POS
]) {
2309 intel_pt_add_lbrs(ptq
->last_branch
, items
);
2310 } else if (pt
->synth_opts
.last_branch
) {
2311 thread_stack__br_sample(ptq
->thread
, ptq
->cpu
,
2315 ptq
->last_branch
->nr
= 0;
2317 sample
.branch_stack
= ptq
->last_branch
;
2320 if (sample_type
& PERF_SAMPLE_ADDR
&& items
->has_mem_access_address
)
2321 sample
.addr
= items
->mem_access_address
;
2323 if (sample_type
& PERF_SAMPLE_WEIGHT_TYPE
) {
2325 * Refer kernel's setup_pebs_adaptive_sample_data() and
2326 * intel_hsw_weight().
2328 if (items
->has_mem_access_latency
) {
2329 u64 weight
= items
->mem_access_latency
>> 32;
2332 * Starts from SPR, the mem access latency field
2333 * contains both cache latency [47:32] and instruction
2334 * latency [15:0]. The cache latency is the same as the
2335 * mem access latency on previous platforms.
2337 * In practice, no memory access could last than 4G
2338 * cycles. Use latency >> 32 to distinguish the
2339 * different format of the mem access latency field.
2342 sample
.weight
= weight
& 0xffff;
2343 sample
.ins_lat
= items
->mem_access_latency
& 0xffff;
2345 sample
.weight
= items
->mem_access_latency
;
2347 if (!sample
.weight
&& items
->has_tsx_aux_info
) {
2348 /* Cycles last block */
2349 sample
.weight
= (u32
)items
->tsx_aux_info
;
2353 if (sample_type
& PERF_SAMPLE_TRANSACTION
&& items
->has_tsx_aux_info
) {
2354 u64 ax
= items
->has_rax
? items
->rax
: 0;
2355 /* Refer kernel's intel_hsw_transaction() */
2356 u64 txn
= (u8
)(items
->tsx_aux_info
>> 32);
2358 /* For RTM XABORTs also log the abort code from AX */
2359 if (txn
& PERF_TXN_TRANSACTION
&& ax
& 1)
2360 txn
|= ((ax
>> 24) & 0xff) << PERF_TXN_ABORT_SHIFT
;
2361 sample
.transaction
= txn
;
2364 return intel_pt_deliver_synth_event(pt
, event
, &sample
, sample_type
);
2367 static int intel_pt_synth_single_pebs_sample(struct intel_pt_queue
*ptq
)
2369 struct intel_pt
*pt
= ptq
->pt
;
2370 struct evsel
*evsel
= pt
->pebs_evsel
;
2371 u64 id
= evsel
->core
.id
[0];
2373 return intel_pt_do_synth_pebs_sample(ptq
, evsel
, id
);
2376 static int intel_pt_synth_pebs_sample(struct intel_pt_queue
*ptq
)
2378 const struct intel_pt_blk_items
*items
= &ptq
->state
->items
;
2379 struct intel_pt_pebs_event
*pe
;
2380 struct intel_pt
*pt
= ptq
->pt
;
2384 if (!items
->has_applicable_counters
|| !items
->applicable_counters
) {
2385 if (!pt
->single_pebs
)
2386 pr_err("PEBS-via-PT record with no applicable_counters\n");
2387 return intel_pt_synth_single_pebs_sample(ptq
);
2390 for_each_set_bit(hw_id
, (unsigned long *)&items
->applicable_counters
, INTEL_PT_MAX_PEBS
) {
2391 pe
= &ptq
->pebs
[hw_id
];
2393 if (!pt
->single_pebs
)
2394 pr_err("PEBS-via-PT record with no matching event, hw_id %d\n",
2396 return intel_pt_synth_single_pebs_sample(ptq
);
2398 err
= intel_pt_do_synth_pebs_sample(ptq
, pe
->evsel
, pe
->id
);
2406 static int intel_pt_synth_events_sample(struct intel_pt_queue
*ptq
)
2408 struct intel_pt
*pt
= ptq
->pt
;
2409 union perf_event
*event
= ptq
->event_buf
;
2410 struct perf_sample sample
= { .ip
= 0, };
2412 struct perf_synth_intel_evt cfe
;
2413 struct perf_synth_intel_evd evd
[INTEL_PT_MAX_EVDS
];
2417 if (intel_pt_skip_event(pt
))
2420 intel_pt_prep_p_sample(pt
, ptq
, event
, &sample
);
2422 sample
.id
= ptq
->pt
->evt_id
;
2423 sample
.stream_id
= ptq
->pt
->evt_id
;
2425 raw
.cfe
.type
= ptq
->state
->cfe_type
;
2426 raw
.cfe
.reserved
= 0;
2427 raw
.cfe
.ip
= !!(ptq
->state
->flags
& INTEL_PT_FUP_IP
);
2428 raw
.cfe
.vector
= ptq
->state
->cfe_vector
;
2429 raw
.cfe
.evd_cnt
= ptq
->state
->evd_cnt
;
2431 for (i
= 0; i
< ptq
->state
->evd_cnt
; i
++) {
2433 raw
.evd
[i
].evd_type
= ptq
->state
->evd
[i
].type
;
2434 raw
.evd
[i
].payload
= ptq
->state
->evd
[i
].payload
;
2437 sample
.raw_size
= perf_synth__raw_size(raw
) +
2438 ptq
->state
->evd_cnt
* sizeof(struct perf_synth_intel_evd
);
2439 sample
.raw_data
= perf_synth__raw_data(&raw
);
2441 return intel_pt_deliver_synth_event(pt
, event
, &sample
,
2442 pt
->evt_sample_type
);
2445 static int intel_pt_synth_iflag_chg_sample(struct intel_pt_queue
*ptq
)
2447 struct intel_pt
*pt
= ptq
->pt
;
2448 union perf_event
*event
= ptq
->event_buf
;
2449 struct perf_sample sample
= { .ip
= 0, };
2450 struct perf_synth_intel_iflag_chg raw
;
2452 if (intel_pt_skip_event(pt
))
2455 intel_pt_prep_p_sample(pt
, ptq
, event
, &sample
);
2457 sample
.id
= ptq
->pt
->iflag_chg_id
;
2458 sample
.stream_id
= ptq
->pt
->iflag_chg_id
;
2461 raw
.iflag
= ptq
->state
->to_iflag
;
2463 if (ptq
->state
->type
& INTEL_PT_BRANCH
) {
2465 raw
.branch_ip
= ptq
->state
->to_ip
;
2469 sample
.flags
= ptq
->flags
;
2471 sample
.raw_size
= perf_synth__raw_size(raw
);
2472 sample
.raw_data
= perf_synth__raw_data(&raw
);
2474 return intel_pt_deliver_synth_event(pt
, event
, &sample
,
2475 pt
->iflag_chg_sample_type
);
2478 static int intel_pt_synth_error(struct intel_pt
*pt
, int code
, int cpu
,
2479 pid_t pid
, pid_t tid
, u64 ip
, u64 timestamp
,
2480 pid_t machine_pid
, int vcpu
)
2482 bool dump_log_on_error
= pt
->synth_opts
.log_plus_flags
& AUXTRACE_LOG_FLG_ON_ERROR
;
2483 bool log_on_stdout
= pt
->synth_opts
.log_plus_flags
& AUXTRACE_LOG_FLG_USE_STDOUT
;
2484 union perf_event event
;
2485 char msg
[MAX_AUXTRACE_ERROR_MSG
];
2488 if (pt
->synth_opts
.error_minus_flags
) {
2489 if (code
== INTEL_PT_ERR_OVR
&&
2490 pt
->synth_opts
.error_minus_flags
& AUXTRACE_ERR_FLG_OVERFLOW
)
2492 if (code
== INTEL_PT_ERR_LOST
&&
2493 pt
->synth_opts
.error_minus_flags
& AUXTRACE_ERR_FLG_DATA_LOST
)
2497 intel_pt__strerror(code
, msg
, MAX_AUXTRACE_ERROR_MSG
);
2499 auxtrace_synth_guest_error(&event
.auxtrace_error
, PERF_AUXTRACE_ERROR_ITRACE
,
2500 code
, cpu
, pid
, tid
, ip
, msg
, timestamp
,
2503 if (intel_pt_enable_logging
&& !log_on_stdout
) {
2504 FILE *fp
= intel_pt_log_fp();
2507 perf_event__fprintf_auxtrace_error(&event
, fp
);
2510 if (code
!= INTEL_PT_ERR_LOST
&& dump_log_on_error
)
2511 intel_pt_log_dump_buf();
2513 err
= perf_session__deliver_synth_event(pt
->session
, &event
, NULL
);
2515 pr_err("Intel Processor Trace: failed to deliver error event, error %d\n",
2521 static int intel_ptq_synth_error(struct intel_pt_queue
*ptq
,
2522 const struct intel_pt_state
*state
)
2524 struct intel_pt
*pt
= ptq
->pt
;
2525 u64 tm
= ptq
->timestamp
;
2526 pid_t machine_pid
= 0;
2527 pid_t pid
= ptq
->pid
;
2528 pid_t tid
= ptq
->tid
;
2531 tm
= pt
->timeless_decoding
? 0 : tsc_to_perf_time(tm
, &pt
->tc
);
2533 if (pt
->have_guest_sideband
&& state
->from_nr
) {
2534 machine_pid
= ptq
->guest_machine_pid
;
2536 pid
= ptq
->guest_pid
;
2537 tid
= ptq
->guest_tid
;
2540 return intel_pt_synth_error(pt
, state
->err
, ptq
->cpu
, pid
, tid
,
2541 state
->from_ip
, tm
, machine_pid
, vcpu
);
2544 static int intel_pt_next_tid(struct intel_pt
*pt
, struct intel_pt_queue
*ptq
)
2546 struct auxtrace_queue
*queue
;
2547 pid_t tid
= ptq
->next_tid
;
2553 intel_pt_log("switch: cpu %d tid %d\n", ptq
->cpu
, tid
);
2555 err
= machine__set_current_tid(pt
->machine
, ptq
->cpu
, -1, tid
);
2557 queue
= &pt
->queues
.queue_array
[ptq
->queue_nr
];
2558 intel_pt_set_pid_tid_cpu(pt
, queue
);
2565 static inline bool intel_pt_is_switch_ip(struct intel_pt_queue
*ptq
, u64 ip
)
2567 struct intel_pt
*pt
= ptq
->pt
;
2569 return ip
== pt
->switch_ip
&&
2570 (ptq
->flags
& PERF_IP_FLAG_BRANCH
) &&
2571 !(ptq
->flags
& (PERF_IP_FLAG_CONDITIONAL
| PERF_IP_FLAG_ASYNC
|
2572 PERF_IP_FLAG_INTERRUPT
| PERF_IP_FLAG_TX_ABORT
));
2575 #define INTEL_PT_PWR_EVT (INTEL_PT_MWAIT_OP | INTEL_PT_PWR_ENTRY | \
2576 INTEL_PT_EX_STOP | INTEL_PT_PWR_EXIT)
2578 static int intel_pt_sample(struct intel_pt_queue
*ptq
)
2580 const struct intel_pt_state
*state
= ptq
->state
;
2581 struct intel_pt
*pt
= ptq
->pt
;
2584 if (!ptq
->have_sample
)
2587 ptq
->have_sample
= false;
2589 if (pt
->synth_opts
.approx_ipc
) {
2590 ptq
->ipc_insn_cnt
= ptq
->state
->tot_insn_cnt
;
2591 ptq
->ipc_cyc_cnt
= ptq
->state
->cycles
;
2592 ptq
->sample_ipc
= true;
2594 ptq
->ipc_insn_cnt
= ptq
->state
->tot_insn_cnt
;
2595 ptq
->ipc_cyc_cnt
= ptq
->state
->tot_cyc_cnt
;
2596 ptq
->sample_ipc
= ptq
->state
->flags
& INTEL_PT_SAMPLE_IPC
;
2599 /* Ensure guest code maps are set up */
2600 if (symbol_conf
.guest_code
&& (state
->from_nr
|| state
->to_nr
))
2601 intel_pt_get_guest(ptq
);
2604 * Do PEBS first to allow for the possibility that the PEBS timestamp
2605 * precedes the current timestamp.
2607 if (pt
->sample_pebs
&& state
->type
& INTEL_PT_BLK_ITEMS
) {
2608 err
= intel_pt_synth_pebs_sample(ptq
);
2613 if (pt
->synth_opts
.intr_events
) {
2614 if (state
->type
& INTEL_PT_EVT
) {
2615 err
= intel_pt_synth_events_sample(ptq
);
2619 if (state
->type
& INTEL_PT_IFLAG_CHG
) {
2620 err
= intel_pt_synth_iflag_chg_sample(ptq
);
2626 if (pt
->sample_pwr_events
) {
2627 if (state
->type
& INTEL_PT_PSB_EVT
) {
2628 err
= intel_pt_synth_psb_sample(ptq
);
2632 if (ptq
->state
->cbr
!= ptq
->cbr_seen
) {
2633 err
= intel_pt_synth_cbr_sample(ptq
);
2637 if (state
->type
& INTEL_PT_PWR_EVT
) {
2638 if (state
->type
& INTEL_PT_MWAIT_OP
) {
2639 err
= intel_pt_synth_mwait_sample(ptq
);
2643 if (state
->type
& INTEL_PT_PWR_ENTRY
) {
2644 err
= intel_pt_synth_pwre_sample(ptq
);
2648 if (state
->type
& INTEL_PT_EX_STOP
) {
2649 err
= intel_pt_synth_exstop_sample(ptq
);
2653 if (state
->type
& INTEL_PT_PWR_EXIT
) {
2654 err
= intel_pt_synth_pwrx_sample(ptq
);
2661 if (state
->type
& INTEL_PT_INSTRUCTION
) {
2662 if (pt
->sample_instructions
) {
2663 err
= intel_pt_synth_instruction_sample(ptq
);
2667 if (pt
->sample_cycles
) {
2668 err
= intel_pt_synth_cycle_sample(ptq
);
2674 if (pt
->sample_transactions
&& (state
->type
& INTEL_PT_TRANSACTION
)) {
2675 err
= intel_pt_synth_transaction_sample(ptq
);
2680 if (pt
->sample_ptwrites
&& (state
->type
& INTEL_PT_PTW
)) {
2681 err
= intel_pt_synth_ptwrite_sample(ptq
);
2686 if (!(state
->type
& INTEL_PT_BRANCH
))
2689 if (pt
->use_thread_stack
) {
2690 thread_stack__event(ptq
->thread
, ptq
->cpu
, ptq
->flags
,
2691 state
->from_ip
, state
->to_ip
, ptq
->insn_len
,
2692 state
->trace_nr
, pt
->callstack
,
2693 pt
->br_stack_sz_plus
,
2696 thread_stack__set_trace_nr(ptq
->thread
, ptq
->cpu
, state
->trace_nr
);
2699 if (pt
->sample_branches
) {
2700 if (state
->from_nr
!= state
->to_nr
&&
2701 state
->from_ip
&& state
->to_ip
) {
2702 struct intel_pt_state
*st
= (struct intel_pt_state
*)state
;
2703 u64 to_ip
= st
->to_ip
;
2704 u64 from_ip
= st
->from_ip
;
2707 * perf cannot handle having different machines for ip
2708 * and addr, so create 2 branches.
2711 err
= intel_pt_synth_branch_sample(ptq
);
2716 err
= intel_pt_synth_branch_sample(ptq
);
2717 st
->from_ip
= from_ip
;
2719 err
= intel_pt_synth_branch_sample(ptq
);
2725 if (!ptq
->sync_switch
)
2728 if (intel_pt_is_switch_ip(ptq
, state
->to_ip
)) {
2729 switch (ptq
->switch_state
) {
2730 case INTEL_PT_SS_NOT_TRACING
:
2731 case INTEL_PT_SS_UNKNOWN
:
2732 case INTEL_PT_SS_EXPECTING_SWITCH_IP
:
2733 err
= intel_pt_next_tid(pt
, ptq
);
2736 ptq
->switch_state
= INTEL_PT_SS_TRACING
;
2739 ptq
->switch_state
= INTEL_PT_SS_EXPECTING_SWITCH_EVENT
;
2742 } else if (!state
->to_ip
) {
2743 ptq
->switch_state
= INTEL_PT_SS_NOT_TRACING
;
2744 } else if (ptq
->switch_state
== INTEL_PT_SS_NOT_TRACING
) {
2745 ptq
->switch_state
= INTEL_PT_SS_UNKNOWN
;
2746 } else if (ptq
->switch_state
== INTEL_PT_SS_UNKNOWN
&&
2747 state
->to_ip
== pt
->ptss_ip
&&
2748 (ptq
->flags
& PERF_IP_FLAG_CALL
)) {
2749 ptq
->switch_state
= INTEL_PT_SS_TRACING
;
2755 static u64
intel_pt_switch_ip(struct intel_pt
*pt
, u64
*ptss_ip
)
2757 struct machine
*machine
= pt
->machine
;
2759 struct symbol
*sym
, *start
;
2760 u64 ip
, switch_ip
= 0;
2766 map
= machine__kernel_map(machine
);
2773 start
= dso__first_symbol(map__dso(map
));
2775 for (sym
= start
; sym
; sym
= dso__next_symbol(sym
)) {
2776 if (sym
->binding
== STB_GLOBAL
&&
2777 !strcmp(sym
->name
, "__switch_to")) {
2778 ip
= map__unmap_ip(map
, sym
->start
);
2779 if (ip
>= map__start(map
) && ip
< map__end(map
)) {
2786 if (!switch_ip
|| !ptss_ip
)
2789 if (pt
->have_sched_switch
== 1)
2790 ptss
= "perf_trace_sched_switch";
2792 ptss
= "__perf_event_task_sched_out";
2794 for (sym
= start
; sym
; sym
= dso__next_symbol(sym
)) {
2795 if (!strcmp(sym
->name
, ptss
)) {
2796 ip
= map__unmap_ip(map
, sym
->start
);
2797 if (ip
>= map__start(map
) && ip
< map__end(map
)) {
2807 static void intel_pt_enable_sync_switch(struct intel_pt
*pt
)
2811 if (pt
->sync_switch_not_supported
)
2814 pt
->sync_switch
= true;
2816 for (i
= 0; i
< pt
->queues
.nr_queues
; i
++) {
2817 struct auxtrace_queue
*queue
= &pt
->queues
.queue_array
[i
];
2818 struct intel_pt_queue
*ptq
= queue
->priv
;
2821 ptq
->sync_switch
= true;
2825 static void intel_pt_disable_sync_switch(struct intel_pt
*pt
)
2829 pt
->sync_switch
= false;
2831 for (i
= 0; i
< pt
->queues
.nr_queues
; i
++) {
2832 struct auxtrace_queue
*queue
= &pt
->queues
.queue_array
[i
];
2833 struct intel_pt_queue
*ptq
= queue
->priv
;
2836 ptq
->sync_switch
= false;
2837 intel_pt_next_tid(pt
, ptq
);
2843 * To filter against time ranges, it is only necessary to look at the next start
2846 static bool intel_pt_next_time(struct intel_pt_queue
*ptq
)
2848 struct intel_pt
*pt
= ptq
->pt
;
2850 if (ptq
->sel_start
) {
2851 /* Next time is an end time */
2852 ptq
->sel_start
= false;
2853 ptq
->sel_timestamp
= pt
->time_ranges
[ptq
->sel_idx
].end
;
2855 } else if (ptq
->sel_idx
+ 1 < pt
->range_cnt
) {
2856 /* Next time is a start time */
2857 ptq
->sel_start
= true;
2859 ptq
->sel_timestamp
= pt
->time_ranges
[ptq
->sel_idx
].start
;
2867 static int intel_pt_time_filter(struct intel_pt_queue
*ptq
, u64
*ff_timestamp
)
2872 if (ptq
->sel_start
) {
2873 if (ptq
->timestamp
>= ptq
->sel_timestamp
) {
2874 /* After start time, so consider next time */
2875 intel_pt_next_time(ptq
);
2876 if (!ptq
->sel_timestamp
) {
2880 /* Check against end time */
2883 /* Before start time, so fast forward */
2884 ptq
->have_sample
= false;
2885 if (ptq
->sel_timestamp
> *ff_timestamp
) {
2886 if (ptq
->sync_switch
) {
2887 intel_pt_next_tid(ptq
->pt
, ptq
);
2888 ptq
->switch_state
= INTEL_PT_SS_UNKNOWN
;
2890 *ff_timestamp
= ptq
->sel_timestamp
;
2891 err
= intel_pt_fast_forward(ptq
->decoder
,
2892 ptq
->sel_timestamp
);
2897 } else if (ptq
->timestamp
> ptq
->sel_timestamp
) {
2898 /* After end time, so consider next time */
2899 if (!intel_pt_next_time(ptq
)) {
2900 /* No next time range, so stop decoding */
2901 ptq
->have_sample
= false;
2902 ptq
->switch_state
= INTEL_PT_SS_NOT_TRACING
;
2905 /* Check against next start time */
2908 /* Before end time */
2914 static int intel_pt_run_decoder(struct intel_pt_queue
*ptq
, u64
*timestamp
)
2916 const struct intel_pt_state
*state
= ptq
->state
;
2917 struct intel_pt
*pt
= ptq
->pt
;
2918 u64 ff_timestamp
= 0;
2921 if (!pt
->kernel_start
) {
2922 pt
->kernel_start
= machine__kernel_start(pt
->machine
);
2923 if (pt
->per_cpu_mmaps
&&
2924 (pt
->have_sched_switch
== 1 || pt
->have_sched_switch
== 3) &&
2925 !pt
->timeless_decoding
&& intel_pt_tracing_kernel(pt
) &&
2926 !pt
->sampling_mode
&& !pt
->synth_opts
.vm_time_correlation
) {
2927 pt
->switch_ip
= intel_pt_switch_ip(pt
, &pt
->ptss_ip
);
2928 if (pt
->switch_ip
) {
2929 intel_pt_log("switch_ip: %"PRIx64
" ptss_ip: %"PRIx64
"\n",
2930 pt
->switch_ip
, pt
->ptss_ip
);
2931 intel_pt_enable_sync_switch(pt
);
2936 intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n",
2937 ptq
->queue_nr
, ptq
->cpu
, ptq
->pid
, ptq
->tid
);
2939 err
= intel_pt_sample(ptq
);
2943 state
= intel_pt_decode(ptq
->decoder
);
2945 if (state
->err
== INTEL_PT_ERR_NODATA
)
2947 if (ptq
->sync_switch
&&
2948 state
->from_ip
>= pt
->kernel_start
) {
2949 ptq
->sync_switch
= false;
2950 intel_pt_next_tid(pt
, ptq
);
2952 ptq
->timestamp
= state
->est_timestamp
;
2953 if (pt
->synth_opts
.errors
) {
2954 err
= intel_ptq_synth_error(ptq
, state
);
2962 ptq
->have_sample
= true;
2963 intel_pt_sample_flags(ptq
);
2965 /* Use estimated TSC upon return to user space */
2967 (state
->from_ip
>= pt
->kernel_start
|| !state
->from_ip
) &&
2968 state
->to_ip
&& state
->to_ip
< pt
->kernel_start
) {
2969 intel_pt_log("TSC %"PRIx64
" est. TSC %"PRIx64
"\n",
2970 state
->timestamp
, state
->est_timestamp
);
2971 ptq
->timestamp
= state
->est_timestamp
;
2972 /* Use estimated TSC in unknown switch state */
2973 } else if (ptq
->sync_switch
&&
2974 ptq
->switch_state
== INTEL_PT_SS_UNKNOWN
&&
2975 intel_pt_is_switch_ip(ptq
, state
->to_ip
) &&
2976 ptq
->next_tid
== -1) {
2977 intel_pt_log("TSC %"PRIx64
" est. TSC %"PRIx64
"\n",
2978 state
->timestamp
, state
->est_timestamp
);
2979 ptq
->timestamp
= state
->est_timestamp
;
2980 } else if (state
->timestamp
> ptq
->timestamp
) {
2981 ptq
->timestamp
= state
->timestamp
;
2984 if (ptq
->sel_timestamp
) {
2985 err
= intel_pt_time_filter(ptq
, &ff_timestamp
);
2990 if (!pt
->timeless_decoding
&& ptq
->timestamp
>= *timestamp
) {
2991 *timestamp
= ptq
->timestamp
;
2998 static inline int intel_pt_update_queues(struct intel_pt
*pt
)
3000 if (pt
->queues
.new_data
) {
3001 pt
->queues
.new_data
= false;
3002 return intel_pt_setup_queues(pt
);
3007 static int intel_pt_process_queues(struct intel_pt
*pt
, u64 timestamp
)
3009 unsigned int queue_nr
;
3014 struct auxtrace_queue
*queue
;
3015 struct intel_pt_queue
*ptq
;
3017 if (!pt
->heap
.heap_cnt
)
3020 if (pt
->heap
.heap_array
[0].ordinal
>= timestamp
)
3023 queue_nr
= pt
->heap
.heap_array
[0].queue_nr
;
3024 queue
= &pt
->queues
.queue_array
[queue_nr
];
3027 intel_pt_log("queue %u processing 0x%" PRIx64
" to 0x%" PRIx64
"\n",
3028 queue_nr
, pt
->heap
.heap_array
[0].ordinal
,
3031 auxtrace_heap__pop(&pt
->heap
);
3033 if (pt
->heap
.heap_cnt
) {
3034 ts
= pt
->heap
.heap_array
[0].ordinal
+ 1;
3041 intel_pt_set_pid_tid_cpu(pt
, queue
);
3043 ret
= intel_pt_run_decoder(ptq
, &ts
);
3046 auxtrace_heap__add(&pt
->heap
, queue_nr
, ts
);
3051 ret
= auxtrace_heap__add(&pt
->heap
, queue_nr
, ts
);
3055 ptq
->on_heap
= false;
3062 static int intel_pt_process_timeless_queues(struct intel_pt
*pt
, pid_t tid
,
3065 struct auxtrace_queues
*queues
= &pt
->queues
;
3069 for (i
= 0; i
< queues
->nr_queues
; i
++) {
3070 struct auxtrace_queue
*queue
= &pt
->queues
.queue_array
[i
];
3071 struct intel_pt_queue
*ptq
= queue
->priv
;
3073 if (ptq
&& (tid
== -1 || ptq
->tid
== tid
)) {
3075 intel_pt_set_pid_tid_cpu(pt
, queue
);
3076 intel_pt_run_decoder(ptq
, &ts
);
3082 static void intel_pt_sample_set_pid_tid_cpu(struct intel_pt_queue
*ptq
,
3083 struct auxtrace_queue
*queue
,
3084 struct perf_sample
*sample
)
3086 struct machine
*m
= ptq
->pt
->machine
;
3088 ptq
->pid
= sample
->pid
;
3089 ptq
->tid
= sample
->tid
;
3090 ptq
->cpu
= queue
->cpu
;
3092 intel_pt_log("queue %u cpu %d pid %d tid %d\n",
3093 ptq
->queue_nr
, ptq
->cpu
, ptq
->pid
, ptq
->tid
);
3095 thread__zput(ptq
->thread
);
3100 if (ptq
->pid
== -1) {
3101 ptq
->thread
= machine__find_thread(m
, -1, ptq
->tid
);
3103 ptq
->pid
= thread__pid(ptq
->thread
);
3107 ptq
->thread
= machine__findnew_thread(m
, ptq
->pid
, ptq
->tid
);
3110 static int intel_pt_process_timeless_sample(struct intel_pt
*pt
,
3111 struct perf_sample
*sample
)
3113 struct auxtrace_queue
*queue
;
3114 struct intel_pt_queue
*ptq
;
3117 queue
= auxtrace_queues__sample_queue(&pt
->queues
, sample
, pt
->session
);
3126 ptq
->time
= sample
->time
;
3127 intel_pt_sample_set_pid_tid_cpu(ptq
, queue
, sample
);
3128 intel_pt_run_decoder(ptq
, &ts
);
3132 static int intel_pt_lost(struct intel_pt
*pt
, struct perf_sample
*sample
)
3134 return intel_pt_synth_error(pt
, INTEL_PT_ERR_LOST
, sample
->cpu
,
3135 sample
->pid
, sample
->tid
, 0, sample
->time
,
3136 sample
->machine_pid
, sample
->vcpu
);
3139 static struct intel_pt_queue
*intel_pt_cpu_to_ptq(struct intel_pt
*pt
, int cpu
)
3143 if (cpu
< 0 || !pt
->queues
.nr_queues
)
3146 if ((unsigned)cpu
>= pt
->queues
.nr_queues
)
3147 i
= pt
->queues
.nr_queues
- 1;
3151 if (pt
->queues
.queue_array
[i
].cpu
== cpu
)
3152 return pt
->queues
.queue_array
[i
].priv
;
3154 for (j
= 0; i
> 0; j
++) {
3155 if (pt
->queues
.queue_array
[--i
].cpu
== cpu
)
3156 return pt
->queues
.queue_array
[i
].priv
;
3159 for (; j
< pt
->queues
.nr_queues
; j
++) {
3160 if (pt
->queues
.queue_array
[j
].cpu
== cpu
)
3161 return pt
->queues
.queue_array
[j
].priv
;
3167 static int intel_pt_sync_switch(struct intel_pt
*pt
, int cpu
, pid_t tid
,
3170 struct intel_pt_queue
*ptq
;
3173 if (!pt
->sync_switch
)
3176 ptq
= intel_pt_cpu_to_ptq(pt
, cpu
);
3177 if (!ptq
|| !ptq
->sync_switch
)
3180 switch (ptq
->switch_state
) {
3181 case INTEL_PT_SS_NOT_TRACING
:
3183 case INTEL_PT_SS_UNKNOWN
:
3184 case INTEL_PT_SS_TRACING
:
3185 ptq
->next_tid
= tid
;
3186 ptq
->switch_state
= INTEL_PT_SS_EXPECTING_SWITCH_IP
;
3188 case INTEL_PT_SS_EXPECTING_SWITCH_EVENT
:
3189 if (!ptq
->on_heap
) {
3190 ptq
->timestamp
= perf_time_to_tsc(timestamp
,
3192 err
= auxtrace_heap__add(&pt
->heap
, ptq
->queue_nr
,
3196 ptq
->on_heap
= true;
3198 ptq
->switch_state
= INTEL_PT_SS_TRACING
;
3200 case INTEL_PT_SS_EXPECTING_SWITCH_IP
:
3201 intel_pt_log("ERROR: cpu %d expecting switch ip\n", cpu
);
3212 #ifdef HAVE_LIBTRACEEVENT
3213 static int intel_pt_process_switch(struct intel_pt
*pt
,
3214 struct perf_sample
*sample
)
3218 struct evsel
*evsel
= evlist__id2evsel(pt
->session
->evlist
, sample
->id
);
3220 if (evsel
!= pt
->switch_evsel
)
3223 tid
= evsel__intval(evsel
, sample
, "next_pid");
3226 intel_pt_log("sched_switch: cpu %d tid %d time %"PRIu64
" tsc %#"PRIx64
"\n",
3227 cpu
, tid
, sample
->time
, perf_time_to_tsc(sample
->time
,
3230 ret
= intel_pt_sync_switch(pt
, cpu
, tid
, sample
->time
);
3234 return machine__set_current_tid(pt
->machine
, cpu
, -1, tid
);
3236 #endif /* HAVE_LIBTRACEEVENT */
3238 static int intel_pt_context_switch_in(struct intel_pt
*pt
,
3239 struct perf_sample
*sample
)
3241 pid_t pid
= sample
->pid
;
3242 pid_t tid
= sample
->tid
;
3243 int cpu
= sample
->cpu
;
3245 if (pt
->sync_switch
) {
3246 struct intel_pt_queue
*ptq
;
3248 ptq
= intel_pt_cpu_to_ptq(pt
, cpu
);
3249 if (ptq
&& ptq
->sync_switch
) {
3251 switch (ptq
->switch_state
) {
3252 case INTEL_PT_SS_NOT_TRACING
:
3253 case INTEL_PT_SS_UNKNOWN
:
3254 case INTEL_PT_SS_TRACING
:
3256 case INTEL_PT_SS_EXPECTING_SWITCH_EVENT
:
3257 case INTEL_PT_SS_EXPECTING_SWITCH_IP
:
3258 ptq
->switch_state
= INTEL_PT_SS_TRACING
;
3267 * If the current tid has not been updated yet, ensure it is now that
3268 * a "switch in" event has occurred.
3270 if (machine__get_current_tid(pt
->machine
, cpu
) == tid
)
3273 return machine__set_current_tid(pt
->machine
, cpu
, pid
, tid
);
3276 static int intel_pt_guest_context_switch(struct intel_pt
*pt
,
3277 union perf_event
*event
,
3278 struct perf_sample
*sample
)
3280 bool out
= event
->header
.misc
& PERF_RECORD_MISC_SWITCH_OUT
;
3281 struct machines
*machines
= &pt
->session
->machines
;
3282 struct machine
*machine
= machines__find(machines
, sample
->machine_pid
);
3284 pt
->have_guest_sideband
= true;
3287 * sync_switch cannot handle guest machines at present, so just disable
3290 pt
->sync_switch_not_supported
= true;
3291 if (pt
->sync_switch
)
3292 intel_pt_disable_sync_switch(pt
);
3300 return machine__set_current_tid(machine
, sample
->vcpu
, sample
->pid
, sample
->tid
);
3303 static int intel_pt_context_switch(struct intel_pt
*pt
, union perf_event
*event
,
3304 struct perf_sample
*sample
)
3306 bool out
= event
->header
.misc
& PERF_RECORD_MISC_SWITCH_OUT
;
3310 if (perf_event__is_guest(event
))
3311 return intel_pt_guest_context_switch(pt
, event
, sample
);
3315 if (pt
->have_sched_switch
== 3) {
3317 return intel_pt_context_switch_in(pt
, sample
);
3318 if (event
->header
.type
!= PERF_RECORD_SWITCH_CPU_WIDE
) {
3319 pr_err("Expecting CPU-wide context switch event\n");
3322 pid
= event
->context_switch
.next_prev_pid
;
3323 tid
= event
->context_switch
.next_prev_tid
;
3332 intel_pt_log("context_switch event has no tid\n");
3334 ret
= intel_pt_sync_switch(pt
, cpu
, tid
, sample
->time
);
3338 return machine__set_current_tid(pt
->machine
, cpu
, pid
, tid
);
3341 static int intel_pt_process_itrace_start(struct intel_pt
*pt
,
3342 union perf_event
*event
,
3343 struct perf_sample
*sample
)
3345 if (!pt
->per_cpu_mmaps
)
3348 intel_pt_log("itrace_start: cpu %d pid %d tid %d time %"PRIu64
" tsc %#"PRIx64
"\n",
3349 sample
->cpu
, event
->itrace_start
.pid
,
3350 event
->itrace_start
.tid
, sample
->time
,
3351 perf_time_to_tsc(sample
->time
, &pt
->tc
));
3353 return machine__set_current_tid(pt
->machine
, sample
->cpu
,
3354 event
->itrace_start
.pid
,
3355 event
->itrace_start
.tid
);
3358 static int intel_pt_process_aux_output_hw_id(struct intel_pt
*pt
,
3359 union perf_event
*event
,
3360 struct perf_sample
*sample
)
3362 u64 hw_id
= event
->aux_output_hw_id
.hw_id
;
3363 struct auxtrace_queue
*queue
;
3364 struct intel_pt_queue
*ptq
;
3365 struct evsel
*evsel
;
3367 queue
= auxtrace_queues__sample_queue(&pt
->queues
, sample
, pt
->session
);
3368 evsel
= evlist__id2evsel_strict(pt
->session
->evlist
, sample
->id
);
3369 if (!queue
|| !queue
->priv
|| !evsel
|| hw_id
> INTEL_PT_MAX_PEBS
) {
3370 pr_err("Bad AUX output hardware ID\n");
3376 ptq
->pebs
[hw_id
].evsel
= evsel
;
3377 ptq
->pebs
[hw_id
].id
= sample
->id
;
3382 static int intel_pt_find_map(struct thread
*thread
, u8 cpumode
, u64 addr
,
3383 struct addr_location
*al
)
3385 if (!al
->map
|| addr
< map__start(al
->map
) || addr
>= map__end(al
->map
)) {
3386 if (!thread__find_map(thread
, cpumode
, addr
, al
))
3393 /* Invalidate all instruction cache entries that overlap the text poke */
3394 static int intel_pt_text_poke(struct intel_pt
*pt
, union perf_event
*event
)
3396 u8 cpumode
= event
->header
.misc
& PERF_RECORD_MISC_CPUMODE_MASK
;
3397 u64 addr
= event
->text_poke
.addr
+ event
->text_poke
.new_len
- 1;
3398 /* Assume text poke begins in a basic block no more than 4096 bytes */
3399 int cnt
= 4096 + event
->text_poke
.new_len
;
3400 struct thread
*thread
= pt
->unknown_thread
;
3401 struct addr_location al
;
3402 struct machine
*machine
= pt
->machine
;
3403 struct intel_pt_cache_entry
*e
;
3407 addr_location__init(&al
);
3408 if (!event
->text_poke
.new_len
)
3411 for (; cnt
; cnt
--, addr
--) {
3414 if (intel_pt_find_map(thread
, cpumode
, addr
, &al
)) {
3415 if (addr
< event
->text_poke
.addr
)
3420 dso
= map__dso(al
.map
);
3421 if (!dso
|| !dso__auxtrace_cache(dso
))
3424 offset
= map__map_ip(al
.map
, addr
);
3426 e
= intel_pt_cache_lookup(dso
, machine
, offset
);
3430 if (addr
+ e
->byte_cnt
+ e
->length
<= event
->text_poke
.addr
) {
3432 * No overlap. Working backwards there cannot be another
3433 * basic block that overlaps the text poke if there is a
3434 * branch instruction before the text poke address.
3436 if (e
->branch
!= INTEL_PT_BR_NO_BRANCH
)
3439 intel_pt_cache_invalidate(dso
, machine
, offset
);
3440 intel_pt_log("Invalidated instruction cache for %s at %#"PRIx64
"\n",
3441 dso__long_name(dso
), addr
);
3445 addr_location__exit(&al
);
3449 static int intel_pt_process_event(struct perf_session
*session
,
3450 union perf_event
*event
,
3451 struct perf_sample
*sample
,
3452 const struct perf_tool
*tool
)
3454 struct intel_pt
*pt
= container_of(session
->auxtrace
, struct intel_pt
,
3462 if (!tool
->ordered_events
) {
3463 pr_err("Intel Processor Trace requires ordered events\n");
3467 if (sample
->time
&& sample
->time
!= (u64
)-1)
3468 timestamp
= perf_time_to_tsc(sample
->time
, &pt
->tc
);
3472 if (timestamp
|| pt
->timeless_decoding
) {
3473 err
= intel_pt_update_queues(pt
);
3478 if (pt
->timeless_decoding
) {
3479 if (pt
->sampling_mode
) {
3480 if (sample
->aux_sample
.size
)
3481 err
= intel_pt_process_timeless_sample(pt
,
3483 } else if (event
->header
.type
== PERF_RECORD_EXIT
) {
3484 err
= intel_pt_process_timeless_queues(pt
,
3488 } else if (timestamp
) {
3489 if (!pt
->first_timestamp
)
3490 intel_pt_first_timestamp(pt
, timestamp
);
3491 err
= intel_pt_process_queues(pt
, timestamp
);
3496 if (event
->header
.type
== PERF_RECORD_SAMPLE
) {
3497 if (pt
->synth_opts
.add_callchain
&& !sample
->callchain
)
3498 intel_pt_add_callchain(pt
, sample
);
3499 if (pt
->synth_opts
.add_last_branch
&& !sample
->branch_stack
)
3500 intel_pt_add_br_stack(pt
, sample
);
3503 if (event
->header
.type
== PERF_RECORD_AUX
&&
3504 (event
->aux
.flags
& PERF_AUX_FLAG_TRUNCATED
) &&
3505 pt
->synth_opts
.errors
) {
3506 err
= intel_pt_lost(pt
, sample
);
3511 #ifdef HAVE_LIBTRACEEVENT
3512 if (pt
->switch_evsel
&& event
->header
.type
== PERF_RECORD_SAMPLE
)
3513 err
= intel_pt_process_switch(pt
, sample
);
3516 if (event
->header
.type
== PERF_RECORD_ITRACE_START
)
3517 err
= intel_pt_process_itrace_start(pt
, event
, sample
);
3518 else if (event
->header
.type
== PERF_RECORD_AUX_OUTPUT_HW_ID
)
3519 err
= intel_pt_process_aux_output_hw_id(pt
, event
, sample
);
3520 else if (event
->header
.type
== PERF_RECORD_SWITCH
||
3521 event
->header
.type
== PERF_RECORD_SWITCH_CPU_WIDE
)
3522 err
= intel_pt_context_switch(pt
, event
, sample
);
3524 if (!err
&& event
->header
.type
== PERF_RECORD_TEXT_POKE
)
3525 err
= intel_pt_text_poke(pt
, event
);
3527 if (intel_pt_enable_logging
&& intel_pt_log_events(pt
, sample
->time
)) {
3528 intel_pt_log("event %u: cpu %d time %"PRIu64
" tsc %#"PRIx64
" ",
3529 event
->header
.type
, sample
->cpu
, sample
->time
, timestamp
);
3530 intel_pt_log_event(event
);
3536 static int intel_pt_flush(struct perf_session
*session
, const struct perf_tool
*tool
)
3538 struct intel_pt
*pt
= container_of(session
->auxtrace
, struct intel_pt
,
3545 if (!tool
->ordered_events
)
3548 ret
= intel_pt_update_queues(pt
);
3552 if (pt
->timeless_decoding
)
3553 return intel_pt_process_timeless_queues(pt
, -1,
3556 return intel_pt_process_queues(pt
, MAX_TIMESTAMP
);
3559 static void intel_pt_free_events(struct perf_session
*session
)
3561 struct intel_pt
*pt
= container_of(session
->auxtrace
, struct intel_pt
,
3563 struct auxtrace_queues
*queues
= &pt
->queues
;
3566 for (i
= 0; i
< queues
->nr_queues
; i
++) {
3567 intel_pt_free_queue(queues
->queue_array
[i
].priv
);
3568 queues
->queue_array
[i
].priv
= NULL
;
3570 intel_pt_log_disable();
3571 auxtrace_queues__free(queues
);
3574 static void intel_pt_free(struct perf_session
*session
)
3576 struct intel_pt
*pt
= container_of(session
->auxtrace
, struct intel_pt
,
3579 auxtrace_heap__free(&pt
->heap
);
3580 intel_pt_free_events(session
);
3581 session
->auxtrace
= NULL
;
3582 intel_pt_free_vmcs_info(pt
);
3583 thread__put(pt
->unknown_thread
);
3584 addr_filters__exit(&pt
->filts
);
3587 zfree(&pt
->time_ranges
);
3588 zfree(&pt
->br_stack
);
3592 static bool intel_pt_evsel_is_auxtrace(struct perf_session
*session
,
3593 struct evsel
*evsel
)
3595 struct intel_pt
*pt
= container_of(session
->auxtrace
, struct intel_pt
,
3598 return evsel
->core
.attr
.type
== pt
->pmu_type
;
3601 static int intel_pt_process_auxtrace_event(struct perf_session
*session
,
3602 union perf_event
*event
,
3603 const struct perf_tool
*tool __maybe_unused
)
3605 struct intel_pt
*pt
= container_of(session
->auxtrace
, struct intel_pt
,
3608 if (!pt
->data_queued
) {
3609 struct auxtrace_buffer
*buffer
;
3611 int fd
= perf_data__fd(session
->data
);
3614 if (perf_data__is_pipe(session
->data
)) {
3617 data_offset
= lseek(fd
, 0, SEEK_CUR
);
3618 if (data_offset
== -1)
3622 err
= auxtrace_queues__add_event(&pt
->queues
, session
, event
,
3623 data_offset
, &buffer
);
3627 /* Dump here now we have copied a piped trace out of the pipe */
3629 if (auxtrace_buffer__get_data(buffer
, fd
)) {
3630 intel_pt_dump_event(pt
, buffer
->data
,
3632 auxtrace_buffer__put_data(buffer
);
3640 static int intel_pt_queue_data(struct perf_session
*session
,
3641 struct perf_sample
*sample
,
3642 union perf_event
*event
, u64 data_offset
)
3644 struct intel_pt
*pt
= container_of(session
->auxtrace
, struct intel_pt
,
3649 return auxtrace_queues__add_event(&pt
->queues
, session
, event
,
3653 if (sample
->time
&& sample
->time
!= (u64
)-1)
3654 timestamp
= perf_time_to_tsc(sample
->time
, &pt
->tc
);
3658 return auxtrace_queues__add_sample(&pt
->queues
, session
, sample
,
3659 data_offset
, timestamp
);
3662 static int intel_pt_synth_event(struct perf_session
*session
, const char *name
,
3663 struct perf_event_attr
*attr
, u64 id
)
3667 pr_debug("Synthesizing '%s' event with id %" PRIu64
" sample type %#" PRIx64
"\n",
3668 name
, id
, (u64
)attr
->sample_type
);
3670 err
= perf_session__deliver_synth_attr_event(session
, attr
, id
);
3672 pr_err("%s: failed to synthesize '%s' event type\n",
3678 static void intel_pt_set_event_name(struct evlist
*evlist
, u64 id
,
3681 struct evsel
*evsel
;
3683 evlist__for_each_entry(evlist
, evsel
) {
3684 if (evsel
->core
.id
&& evsel
->core
.id
[0] == id
) {
3686 zfree(&evsel
->name
);
3687 evsel
->name
= strdup(name
);
3693 static struct evsel
*intel_pt_evsel(struct intel_pt
*pt
,
3694 struct evlist
*evlist
)
3696 struct evsel
*evsel
;
3698 evlist__for_each_entry(evlist
, evsel
) {
3699 if (evsel
->core
.attr
.type
== pt
->pmu_type
&& evsel
->core
.ids
)
3706 static int intel_pt_synth_events(struct intel_pt
*pt
,
3707 struct perf_session
*session
)
3709 struct evlist
*evlist
= session
->evlist
;
3710 struct evsel
*evsel
= intel_pt_evsel(pt
, evlist
);
3711 struct perf_event_attr attr
;
3716 pr_debug("There are no selected events with Intel Processor Trace data\n");
3720 memset(&attr
, 0, sizeof(struct perf_event_attr
));
3721 attr
.size
= sizeof(struct perf_event_attr
);
3722 attr
.type
= PERF_TYPE_HARDWARE
;
3723 attr
.sample_type
= evsel
->core
.attr
.sample_type
& PERF_SAMPLE_MASK
;
3724 attr
.sample_type
|= PERF_SAMPLE_IP
| PERF_SAMPLE_TID
|
3726 if (pt
->timeless_decoding
)
3727 attr
.sample_type
&= ~(u64
)PERF_SAMPLE_TIME
;
3729 attr
.sample_type
|= PERF_SAMPLE_TIME
;
3730 if (!pt
->per_cpu_mmaps
)
3731 attr
.sample_type
&= ~(u64
)PERF_SAMPLE_CPU
;
3732 attr
.exclude_user
= evsel
->core
.attr
.exclude_user
;
3733 attr
.exclude_kernel
= evsel
->core
.attr
.exclude_kernel
;
3734 attr
.exclude_hv
= evsel
->core
.attr
.exclude_hv
;
3735 attr
.exclude_host
= evsel
->core
.attr
.exclude_host
;
3736 attr
.exclude_guest
= evsel
->core
.attr
.exclude_guest
;
3737 attr
.sample_id_all
= evsel
->core
.attr
.sample_id_all
;
3738 attr
.read_format
= evsel
->core
.attr
.read_format
;
3740 id
= evsel
->core
.id
[0] + 1000000000;
3744 if (pt
->synth_opts
.branches
) {
3745 attr
.config
= PERF_COUNT_HW_BRANCH_INSTRUCTIONS
;
3746 attr
.sample_period
= 1;
3747 attr
.sample_type
|= PERF_SAMPLE_ADDR
;
3748 err
= intel_pt_synth_event(session
, "branches", &attr
, id
);
3751 pt
->sample_branches
= true;
3752 pt
->branches_sample_type
= attr
.sample_type
;
3753 pt
->branches_id
= id
;
3755 attr
.sample_type
&= ~(u64
)PERF_SAMPLE_ADDR
;
3758 if (pt
->synth_opts
.callchain
)
3759 attr
.sample_type
|= PERF_SAMPLE_CALLCHAIN
;
3760 if (pt
->synth_opts
.last_branch
) {
3761 attr
.sample_type
|= PERF_SAMPLE_BRANCH_STACK
;
3763 * We don't use the hardware index, but the sample generation
3764 * code uses the new format branch_stack with this field,
3765 * so the event attributes must indicate that it's present.
3767 attr
.branch_sample_type
|= PERF_SAMPLE_BRANCH_HW_INDEX
;
3770 if (pt
->synth_opts
.instructions
) {
3771 attr
.config
= PERF_COUNT_HW_INSTRUCTIONS
;
3772 if (pt
->synth_opts
.period_type
== PERF_ITRACE_PERIOD_NANOSECS
)
3773 attr
.sample_period
=
3774 intel_pt_ns_to_ticks(pt
, pt
->synth_opts
.period
);
3776 attr
.sample_period
= pt
->synth_opts
.period
;
3777 err
= intel_pt_synth_event(session
, "instructions", &attr
, id
);
3780 pt
->sample_instructions
= true;
3781 pt
->instructions_sample_type
= attr
.sample_type
;
3782 pt
->instructions_id
= id
;
3786 if (pt
->synth_opts
.cycles
) {
3787 attr
.config
= PERF_COUNT_HW_CPU_CYCLES
;
3788 if (pt
->synth_opts
.period_type
== PERF_ITRACE_PERIOD_NANOSECS
)
3789 attr
.sample_period
=
3790 intel_pt_ns_to_ticks(pt
, pt
->synth_opts
.period
);
3792 attr
.sample_period
= pt
->synth_opts
.period
;
3793 err
= intel_pt_synth_event(session
, "cycles", &attr
, id
);
3796 pt
->sample_cycles
= true;
3797 pt
->cycles_sample_type
= attr
.sample_type
;
3802 attr
.sample_type
&= ~(u64
)PERF_SAMPLE_PERIOD
;
3803 attr
.sample_period
= 1;
3805 if (pt
->synth_opts
.transactions
) {
3806 attr
.config
= PERF_COUNT_HW_INSTRUCTIONS
;
3807 err
= intel_pt_synth_event(session
, "transactions", &attr
, id
);
3810 pt
->sample_transactions
= true;
3811 pt
->transactions_sample_type
= attr
.sample_type
;
3812 pt
->transactions_id
= id
;
3813 intel_pt_set_event_name(evlist
, id
, "transactions");
3817 attr
.type
= PERF_TYPE_SYNTH
;
3818 attr
.sample_type
|= PERF_SAMPLE_RAW
;
3820 if (pt
->synth_opts
.ptwrites
) {
3821 attr
.config
= PERF_SYNTH_INTEL_PTWRITE
;
3822 err
= intel_pt_synth_event(session
, "ptwrite", &attr
, id
);
3825 pt
->sample_ptwrites
= true;
3826 pt
->ptwrites_sample_type
= attr
.sample_type
;
3827 pt
->ptwrites_id
= id
;
3828 intel_pt_set_event_name(evlist
, id
, "ptwrite");
3832 if (pt
->synth_opts
.pwr_events
) {
3833 pt
->sample_pwr_events
= true;
3834 pt
->pwr_events_sample_type
= attr
.sample_type
;
3836 attr
.config
= PERF_SYNTH_INTEL_CBR
;
3837 err
= intel_pt_synth_event(session
, "cbr", &attr
, id
);
3841 intel_pt_set_event_name(evlist
, id
, "cbr");
3844 attr
.config
= PERF_SYNTH_INTEL_PSB
;
3845 err
= intel_pt_synth_event(session
, "psb", &attr
, id
);
3849 intel_pt_set_event_name(evlist
, id
, "psb");
3853 if (pt
->synth_opts
.pwr_events
&& (evsel
->core
.attr
.config
& INTEL_PT_CFG_PWR_EVT_EN
)) {
3854 attr
.config
= PERF_SYNTH_INTEL_MWAIT
;
3855 err
= intel_pt_synth_event(session
, "mwait", &attr
, id
);
3859 intel_pt_set_event_name(evlist
, id
, "mwait");
3862 attr
.config
= PERF_SYNTH_INTEL_PWRE
;
3863 err
= intel_pt_synth_event(session
, "pwre", &attr
, id
);
3867 intel_pt_set_event_name(evlist
, id
, "pwre");
3870 attr
.config
= PERF_SYNTH_INTEL_EXSTOP
;
3871 err
= intel_pt_synth_event(session
, "exstop", &attr
, id
);
3875 intel_pt_set_event_name(evlist
, id
, "exstop");
3878 attr
.config
= PERF_SYNTH_INTEL_PWRX
;
3879 err
= intel_pt_synth_event(session
, "pwrx", &attr
, id
);
3883 intel_pt_set_event_name(evlist
, id
, "pwrx");
3887 if (pt
->synth_opts
.intr_events
&& (evsel
->core
.attr
.config
& INTEL_PT_CFG_EVT_EN
)) {
3888 attr
.config
= PERF_SYNTH_INTEL_EVT
;
3889 err
= intel_pt_synth_event(session
, "evt", &attr
, id
);
3892 pt
->evt_sample_type
= attr
.sample_type
;
3894 intel_pt_set_event_name(evlist
, id
, "evt");
3898 if (pt
->synth_opts
.intr_events
&& pt
->cap_event_trace
) {
3899 attr
.config
= PERF_SYNTH_INTEL_IFLAG_CHG
;
3900 err
= intel_pt_synth_event(session
, "iflag", &attr
, id
);
3903 pt
->iflag_chg_sample_type
= attr
.sample_type
;
3904 pt
->iflag_chg_id
= id
;
3905 intel_pt_set_event_name(evlist
, id
, "iflag");
3912 static void intel_pt_setup_pebs_events(struct intel_pt
*pt
)
3914 struct evsel
*evsel
;
3916 if (!pt
->synth_opts
.other_events
)
3919 evlist__for_each_entry(pt
->session
->evlist
, evsel
) {
3920 if (evsel
->core
.attr
.aux_output
&& evsel
->core
.id
) {
3921 if (pt
->single_pebs
) {
3922 pt
->single_pebs
= false;
3925 pt
->single_pebs
= true;
3926 pt
->sample_pebs
= true;
3927 pt
->pebs_evsel
= evsel
;
3932 static struct evsel
*intel_pt_find_sched_switch(struct evlist
*evlist
)
3934 struct evsel
*evsel
;
3936 evlist__for_each_entry_reverse(evlist
, evsel
) {
3937 const char *name
= evsel__name(evsel
);
3939 if (!strcmp(name
, "sched:sched_switch"))
3946 static bool intel_pt_find_switch(struct evlist
*evlist
)
3948 struct evsel
*evsel
;
3950 evlist__for_each_entry(evlist
, evsel
) {
3951 if (evsel
->core
.attr
.context_switch
)
3958 static int intel_pt_perf_config(const char *var
, const char *value
, void *data
)
3960 struct intel_pt
*pt
= data
;
3962 if (!strcmp(var
, "intel-pt.mispred-all"))
3963 pt
->mispred_all
= perf_config_bool(var
, value
);
3965 if (!strcmp(var
, "intel-pt.max-loops"))
3966 perf_config_int(&pt
->max_loops
, var
, value
);
3971 /* Find least TSC which converts to ns or later */
3972 static u64
intel_pt_tsc_start(u64 ns
, struct intel_pt
*pt
)
3976 tsc
= perf_time_to_tsc(ns
, &pt
->tc
);
3979 tm
= tsc_to_perf_time(tsc
, &pt
->tc
);
3986 tm
= tsc_to_perf_time(++tsc
, &pt
->tc
);
3991 /* Find greatest TSC which converts to ns or earlier */
3992 static u64
intel_pt_tsc_end(u64 ns
, struct intel_pt
*pt
)
3996 tsc
= perf_time_to_tsc(ns
, &pt
->tc
);
3999 tm
= tsc_to_perf_time(tsc
, &pt
->tc
);
4006 tm
= tsc_to_perf_time(--tsc
, &pt
->tc
);
4011 static int intel_pt_setup_time_ranges(struct intel_pt
*pt
,
4012 struct itrace_synth_opts
*opts
)
4014 struct perf_time_interval
*p
= opts
->ptime_range
;
4015 int n
= opts
->range_num
;
4018 if (!n
|| !p
|| pt
->timeless_decoding
)
4021 pt
->time_ranges
= calloc(n
, sizeof(struct range
));
4022 if (!pt
->time_ranges
)
4027 intel_pt_log("%s: %u range(s)\n", __func__
, n
);
4029 for (i
= 0; i
< n
; i
++) {
4030 struct range
*r
= &pt
->time_ranges
[i
];
4031 u64 ts
= p
[i
].start
;
4035 * Take care to ensure the TSC range matches the perf-time range
4036 * when converted back to perf-time.
4038 r
->start
= ts
? intel_pt_tsc_start(ts
, pt
) : 0;
4039 r
->end
= te
? intel_pt_tsc_end(te
, pt
) : 0;
4041 intel_pt_log("range %d: perf time interval: %"PRIu64
" to %"PRIu64
"\n",
4043 intel_pt_log("range %d: TSC time interval: %#"PRIx64
" to %#"PRIx64
"\n",
4044 i
, r
->start
, r
->end
);
4050 static int intel_pt_parse_vm_tm_corr_arg(struct intel_pt
*pt
, char **args
)
4052 struct intel_pt_vmcs_info
*vmcs_info
;
4053 u64 tsc_offset
, vmcs
;
4062 tsc_offset
= strtoull(p
, &p
, 0);
4067 pt
->dflt_tsc_offset
= tsc_offset
;
4073 vmcs
= strtoull(p
, &p
, 0);
4078 vmcs_info
= intel_pt_findnew_vmcs(&pt
->vmcs_info
, vmcs
, tsc_offset
);
4090 static int intel_pt_parse_vm_tm_corr_args(struct intel_pt
*pt
)
4092 char *args
= pt
->synth_opts
.vm_tm_corr_args
;
4099 ret
= intel_pt_parse_vm_tm_corr_arg(pt
, &args
);
4103 pr_err("Failed to parse VM Time Correlation options\n");
4110 static const char * const intel_pt_info_fmts
[] = {
4111 [INTEL_PT_PMU_TYPE
] = " PMU Type %"PRId64
"\n",
4112 [INTEL_PT_TIME_SHIFT
] = " Time Shift %"PRIu64
"\n",
4113 [INTEL_PT_TIME_MULT
] = " Time Multiplier %"PRIu64
"\n",
4114 [INTEL_PT_TIME_ZERO
] = " Time Zero %"PRIu64
"\n",
4115 [INTEL_PT_CAP_USER_TIME_ZERO
] = " Cap Time Zero %"PRId64
"\n",
4116 [INTEL_PT_TSC_BIT
] = " TSC bit %#"PRIx64
"\n",
4117 [INTEL_PT_NORETCOMP_BIT
] = " NoRETComp bit %#"PRIx64
"\n",
4118 [INTEL_PT_HAVE_SCHED_SWITCH
] = " Have sched_switch %"PRId64
"\n",
4119 [INTEL_PT_SNAPSHOT_MODE
] = " Snapshot mode %"PRId64
"\n",
4120 [INTEL_PT_PER_CPU_MMAPS
] = " Per-cpu maps %"PRId64
"\n",
4121 [INTEL_PT_MTC_BIT
] = " MTC bit %#"PRIx64
"\n",
4122 [INTEL_PT_MTC_FREQ_BITS
] = " MTC freq bits %#"PRIx64
"\n",
4123 [INTEL_PT_TSC_CTC_N
] = " TSC:CTC numerator %"PRIu64
"\n",
4124 [INTEL_PT_TSC_CTC_D
] = " TSC:CTC denominator %"PRIu64
"\n",
4125 [INTEL_PT_CYC_BIT
] = " CYC bit %#"PRIx64
"\n",
4126 [INTEL_PT_MAX_NONTURBO_RATIO
] = " Max non-turbo ratio %"PRIu64
"\n",
4127 [INTEL_PT_FILTER_STR_LEN
] = " Filter string len. %"PRIu64
"\n",
4130 static void intel_pt_print_info(__u64
*arr
, int start
, int finish
)
4137 for (i
= start
; i
<= finish
; i
++) {
4138 const char *fmt
= intel_pt_info_fmts
[i
];
4141 fprintf(stdout
, fmt
, arr
[i
]);
4145 static void intel_pt_print_info_str(const char *name
, const char *str
)
4150 fprintf(stdout
, " %-20s%s\n", name
, str
? str
: "");
4153 static bool intel_pt_has(struct perf_record_auxtrace_info
*auxtrace_info
, int pos
)
4155 return auxtrace_info
->header
.size
>=
4156 sizeof(struct perf_record_auxtrace_info
) + (sizeof(u64
) * (pos
+ 1));
4159 int intel_pt_process_auxtrace_info(union perf_event
*event
,
4160 struct perf_session
*session
)
4162 struct perf_record_auxtrace_info
*auxtrace_info
= &event
->auxtrace_info
;
4163 size_t min_sz
= sizeof(u64
) * INTEL_PT_PER_CPU_MMAPS
;
4164 struct intel_pt
*pt
;
4169 if (auxtrace_info
->header
.size
< sizeof(struct perf_record_auxtrace_info
) +
4173 pt
= zalloc(sizeof(struct intel_pt
));
4177 pt
->vmcs_info
= RB_ROOT
;
4179 addr_filters__init(&pt
->filts
);
4181 err
= perf_config(intel_pt_perf_config
, pt
);
4185 err
= auxtrace_queues__init(&pt
->queues
);
4189 if (session
->itrace_synth_opts
->set
) {
4190 pt
->synth_opts
= *session
->itrace_synth_opts
;
4192 struct itrace_synth_opts
*opts
= session
->itrace_synth_opts
;
4194 itrace_synth_opts__set_default(&pt
->synth_opts
, opts
->default_no_sample
);
4195 if (!opts
->default_no_sample
&& !opts
->inject
) {
4196 pt
->synth_opts
.branches
= false;
4197 pt
->synth_opts
.callchain
= true;
4198 pt
->synth_opts
.add_callchain
= true;
4200 pt
->synth_opts
.thread_stack
= opts
->thread_stack
;
4203 if (!(pt
->synth_opts
.log_plus_flags
& AUXTRACE_LOG_FLG_USE_STDOUT
))
4204 intel_pt_log_set_name(INTEL_PT_PMU_NAME
);
4206 pt
->session
= session
;
4207 pt
->machine
= &session
->machines
.host
; /* No kvm support */
4208 pt
->auxtrace_type
= auxtrace_info
->type
;
4209 pt
->pmu_type
= auxtrace_info
->priv
[INTEL_PT_PMU_TYPE
];
4210 pt
->tc
.time_shift
= auxtrace_info
->priv
[INTEL_PT_TIME_SHIFT
];
4211 pt
->tc
.time_mult
= auxtrace_info
->priv
[INTEL_PT_TIME_MULT
];
4212 pt
->tc
.time_zero
= auxtrace_info
->priv
[INTEL_PT_TIME_ZERO
];
4213 pt
->cap_user_time_zero
= auxtrace_info
->priv
[INTEL_PT_CAP_USER_TIME_ZERO
];
4214 pt
->tsc_bit
= auxtrace_info
->priv
[INTEL_PT_TSC_BIT
];
4215 pt
->noretcomp_bit
= auxtrace_info
->priv
[INTEL_PT_NORETCOMP_BIT
];
4216 pt
->have_sched_switch
= auxtrace_info
->priv
[INTEL_PT_HAVE_SCHED_SWITCH
];
4217 pt
->snapshot_mode
= auxtrace_info
->priv
[INTEL_PT_SNAPSHOT_MODE
];
4218 pt
->per_cpu_mmaps
= auxtrace_info
->priv
[INTEL_PT_PER_CPU_MMAPS
];
4219 intel_pt_print_info(&auxtrace_info
->priv
[0], INTEL_PT_PMU_TYPE
,
4220 INTEL_PT_PER_CPU_MMAPS
);
4222 if (intel_pt_has(auxtrace_info
, INTEL_PT_CYC_BIT
)) {
4223 pt
->mtc_bit
= auxtrace_info
->priv
[INTEL_PT_MTC_BIT
];
4224 pt
->mtc_freq_bits
= auxtrace_info
->priv
[INTEL_PT_MTC_FREQ_BITS
];
4225 pt
->tsc_ctc_ratio_n
= auxtrace_info
->priv
[INTEL_PT_TSC_CTC_N
];
4226 pt
->tsc_ctc_ratio_d
= auxtrace_info
->priv
[INTEL_PT_TSC_CTC_D
];
4227 pt
->cyc_bit
= auxtrace_info
->priv
[INTEL_PT_CYC_BIT
];
4228 intel_pt_print_info(&auxtrace_info
->priv
[0], INTEL_PT_MTC_BIT
,
4232 if (intel_pt_has(auxtrace_info
, INTEL_PT_MAX_NONTURBO_RATIO
)) {
4233 pt
->max_non_turbo_ratio
=
4234 auxtrace_info
->priv
[INTEL_PT_MAX_NONTURBO_RATIO
];
4235 intel_pt_print_info(&auxtrace_info
->priv
[0],
4236 INTEL_PT_MAX_NONTURBO_RATIO
,
4237 INTEL_PT_MAX_NONTURBO_RATIO
);
4240 info
= &auxtrace_info
->priv
[INTEL_PT_FILTER_STR_LEN
] + 1;
4241 info_end
= (void *)auxtrace_info
+ auxtrace_info
->header
.size
;
4243 if (intel_pt_has(auxtrace_info
, INTEL_PT_FILTER_STR_LEN
)) {
4246 len
= auxtrace_info
->priv
[INTEL_PT_FILTER_STR_LEN
];
4247 intel_pt_print_info(&auxtrace_info
->priv
[0],
4248 INTEL_PT_FILTER_STR_LEN
,
4249 INTEL_PT_FILTER_STR_LEN
);
4251 const char *filter
= (const char *)info
;
4253 len
= roundup(len
+ 1, 8);
4255 if ((void *)info
> info_end
) {
4256 pr_err("%s: bad filter string length\n", __func__
);
4258 goto err_free_queues
;
4260 pt
->filter
= memdup(filter
, len
);
4263 goto err_free_queues
;
4265 if (session
->header
.needs_swap
)
4266 mem_bswap_64(pt
->filter
, len
);
4267 if (pt
->filter
[len
- 1]) {
4268 pr_err("%s: filter string not null terminated\n", __func__
);
4270 goto err_free_queues
;
4272 err
= addr_filters__parse_bare_filter(&pt
->filts
,
4275 goto err_free_queues
;
4277 intel_pt_print_info_str("Filter string", pt
->filter
);
4280 if ((void *)info
< info_end
) {
4281 pt
->cap_event_trace
= *info
++;
4283 fprintf(stdout
, " Cap Event Trace %d\n",
4284 pt
->cap_event_trace
);
4287 pt
->timeless_decoding
= intel_pt_timeless_decoding(pt
);
4288 if (pt
->timeless_decoding
&& !pt
->tc
.time_mult
)
4289 pt
->tc
.time_mult
= 1;
4290 pt
->have_tsc
= intel_pt_have_tsc(pt
);
4291 pt
->sampling_mode
= intel_pt_sampling_mode(pt
);
4292 pt
->est_tsc
= !pt
->timeless_decoding
;
4294 if (pt
->synth_opts
.vm_time_correlation
) {
4295 if (pt
->timeless_decoding
) {
4296 pr_err("Intel PT has no time information for VM Time Correlation\n");
4298 goto err_free_queues
;
4300 if (session
->itrace_synth_opts
->ptime_range
) {
4301 pr_err("Time ranges cannot be specified with VM Time Correlation\n");
4303 goto err_free_queues
;
4305 /* Currently TSC Offset is calculated using MTC packets */
4306 if (!intel_pt_have_mtc(pt
)) {
4307 pr_err("MTC packets must have been enabled for VM Time Correlation\n");
4309 goto err_free_queues
;
4311 err
= intel_pt_parse_vm_tm_corr_args(pt
);
4313 goto err_free_queues
;
4316 pt
->unknown_thread
= thread__new(999999999, 999999999);
4317 if (!pt
->unknown_thread
) {
4319 goto err_free_queues
;
4322 err
= thread__set_comm(pt
->unknown_thread
, "unknown", 0);
4324 goto err_delete_thread
;
4325 if (thread__init_maps(pt
->unknown_thread
, pt
->machine
)) {
4327 goto err_delete_thread
;
4330 pt
->auxtrace
.process_event
= intel_pt_process_event
;
4331 pt
->auxtrace
.process_auxtrace_event
= intel_pt_process_auxtrace_event
;
4332 pt
->auxtrace
.queue_data
= intel_pt_queue_data
;
4333 pt
->auxtrace
.dump_auxtrace_sample
= intel_pt_dump_sample
;
4334 pt
->auxtrace
.flush_events
= intel_pt_flush
;
4335 pt
->auxtrace
.free_events
= intel_pt_free_events
;
4336 pt
->auxtrace
.free
= intel_pt_free
;
4337 pt
->auxtrace
.evsel_is_auxtrace
= intel_pt_evsel_is_auxtrace
;
4338 session
->auxtrace
= &pt
->auxtrace
;
4343 if (pt
->have_sched_switch
== 1) {
4344 pt
->switch_evsel
= intel_pt_find_sched_switch(session
->evlist
);
4345 if (!pt
->switch_evsel
) {
4346 pr_err("%s: missing sched_switch event\n", __func__
);
4348 goto err_delete_thread
;
4350 } else if (pt
->have_sched_switch
== 2 &&
4351 !intel_pt_find_switch(session
->evlist
)) {
4352 pr_err("%s: missing context_switch attribute flag\n", __func__
);
4354 goto err_delete_thread
;
4357 if (pt
->synth_opts
.log
) {
4358 bool log_on_error
= pt
->synth_opts
.log_plus_flags
& AUXTRACE_LOG_FLG_ON_ERROR
;
4359 unsigned int log_on_error_size
= pt
->synth_opts
.log_on_error_size
;
4361 intel_pt_log_enable(log_on_error
, log_on_error_size
);
4364 /* Maximum non-turbo ratio is TSC freq / 100 MHz */
4365 if (pt
->tc
.time_mult
) {
4366 u64 tsc_freq
= intel_pt_ns_to_ticks(pt
, 1000000000);
4368 if (!pt
->max_non_turbo_ratio
)
4369 pt
->max_non_turbo_ratio
=
4370 (tsc_freq
+ 50000000) / 100000000;
4371 intel_pt_log("TSC frequency %"PRIu64
"\n", tsc_freq
);
4372 intel_pt_log("Maximum non-turbo ratio %u\n",
4373 pt
->max_non_turbo_ratio
);
4374 pt
->cbr2khz
= tsc_freq
/ pt
->max_non_turbo_ratio
/ 1000;
4377 err
= intel_pt_setup_time_ranges(pt
, session
->itrace_synth_opts
);
4379 goto err_delete_thread
;
4381 if (pt
->synth_opts
.calls
)
4382 pt
->branches_filter
|= PERF_IP_FLAG_CALL
| PERF_IP_FLAG_ASYNC
|
4383 PERF_IP_FLAG_TRACE_END
;
4384 if (pt
->synth_opts
.returns
)
4385 pt
->branches_filter
|= PERF_IP_FLAG_RETURN
|
4386 PERF_IP_FLAG_TRACE_BEGIN
;
4388 if ((pt
->synth_opts
.callchain
|| pt
->synth_opts
.add_callchain
) &&
4389 !symbol_conf
.use_callchain
) {
4390 symbol_conf
.use_callchain
= true;
4391 if (callchain_register_param(&callchain_param
) < 0) {
4392 symbol_conf
.use_callchain
= false;
4393 pt
->synth_opts
.callchain
= false;
4394 pt
->synth_opts
.add_callchain
= false;
4398 if (pt
->synth_opts
.add_callchain
) {
4399 err
= intel_pt_callchain_init(pt
);
4401 goto err_delete_thread
;
4404 if (pt
->synth_opts
.last_branch
|| pt
->synth_opts
.add_last_branch
) {
4405 pt
->br_stack_sz
= pt
->synth_opts
.last_branch_sz
;
4406 pt
->br_stack_sz_plus
= pt
->br_stack_sz
;
4409 if (pt
->synth_opts
.add_last_branch
) {
4410 err
= intel_pt_br_stack_init(pt
);
4412 goto err_delete_thread
;
4414 * Additional branch stack size to cater for tracing from the
4415 * actual sample ip to where the sample time is recorded.
4416 * Measured at about 200 branches, but generously set to 1024.
4417 * If kernel space is not being traced, then add just 1 for the
4418 * branch to kernel space.
4420 if (intel_pt_tracing_kernel(pt
))
4421 pt
->br_stack_sz_plus
+= 1024;
4423 pt
->br_stack_sz_plus
+= 1;
4426 pt
->use_thread_stack
= pt
->synth_opts
.callchain
||
4427 pt
->synth_opts
.add_callchain
||
4428 pt
->synth_opts
.thread_stack
||
4429 pt
->synth_opts
.last_branch
||
4430 pt
->synth_opts
.add_last_branch
;
4432 pt
->callstack
= pt
->synth_opts
.callchain
||
4433 pt
->synth_opts
.add_callchain
||
4434 pt
->synth_opts
.thread_stack
;
4436 err
= intel_pt_synth_events(pt
, session
);
4438 goto err_delete_thread
;
4440 intel_pt_setup_pebs_events(pt
);
4442 if (perf_data__is_pipe(session
->data
)) {
4443 pr_warning("WARNING: Intel PT with pipe mode is not recommended.\n"
4444 " The output cannot relied upon. In particular,\n"
4445 " timestamps and the order of events may be incorrect.\n");
4448 if (pt
->sampling_mode
|| list_empty(&session
->auxtrace_index
))
4449 err
= auxtrace_queue_data(session
, true, true);
4451 err
= auxtrace_queues__process_index(&pt
->queues
, session
);
4453 goto err_delete_thread
;
4455 if (pt
->queues
.populated
)
4456 pt
->data_queued
= true;
4458 if (pt
->timeless_decoding
)
4459 pr_debug2("Intel PT decoding without timestamps\n");
4465 thread__zput(pt
->unknown_thread
);
4467 intel_pt_log_disable();
4468 auxtrace_queues__free(&pt
->queues
);
4469 session
->auxtrace
= NULL
;
4471 addr_filters__exit(&pt
->filts
);
4473 zfree(&pt
->time_ranges
);