1 // SPDX-License-Identifier: GPL-2.0-only
3 * KVM PMU support for Intel CPUs
5 * Copyright 2011 Red Hat, Inc. and/or its affiliates.
8 * Avi Kivity <avi@redhat.com>
9 * Gleb Natapov <gleb@redhat.com>
11 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13 #include <linux/types.h>
14 #include <linux/kvm_host.h>
15 #include <linux/perf_event.h>
16 #include <asm/perf_event.h>
24 * Perf's "BASE" is wildly misleading, architectural PMUs use bits 31:16 of ECX
25 * to encode the "type" of counter to read, i.e. this is not a "base". And to
26 * further confuse things, non-architectural PMUs use bit 31 as a flag for
27 * "fast" reads, whereas the "type" is an explicit value.
29 #define INTEL_RDPMC_GP 0
30 #define INTEL_RDPMC_FIXED INTEL_PMC_FIXED_RDPMC_BASE
32 #define INTEL_RDPMC_TYPE_MASK GENMASK(31, 16)
33 #define INTEL_RDPMC_INDEX_MASK GENMASK(15, 0)
35 #define MSR_PMC_FULL_WIDTH_BIT (MSR_IA32_PMC0 - MSR_IA32_PERFCTR0)
37 static void reprogram_fixed_counters(struct kvm_pmu
*pmu
, u64 data
)
40 u64 old_fixed_ctr_ctrl
= pmu
->fixed_ctr_ctrl
;
43 pmu
->fixed_ctr_ctrl
= data
;
44 for (i
= 0; i
< pmu
->nr_arch_fixed_counters
; i
++) {
45 u8 new_ctrl
= fixed_ctrl_field(data
, i
);
46 u8 old_ctrl
= fixed_ctrl_field(old_fixed_ctr_ctrl
, i
);
48 if (old_ctrl
== new_ctrl
)
51 pmc
= get_fixed_pmc(pmu
, MSR_CORE_PERF_FIXED_CTR0
+ i
);
53 __set_bit(KVM_FIXED_PMC_BASE_IDX
+ i
, pmu
->pmc_in_use
);
54 kvm_pmu_request_counter_reprogram(pmc
);
58 static struct kvm_pmc
*intel_rdpmc_ecx_to_pmc(struct kvm_vcpu
*vcpu
,
59 unsigned int idx
, u64
*mask
)
61 unsigned int type
= idx
& INTEL_RDPMC_TYPE_MASK
;
62 struct kvm_pmu
*pmu
= vcpu_to_pmu(vcpu
);
63 struct kvm_pmc
*counters
;
64 unsigned int num_counters
;
68 * The encoding of ECX for RDPMC is different for architectural versus
69 * non-architecturals PMUs (PMUs with version '0'). For architectural
70 * PMUs, bits 31:16 specify the PMC type and bits 15:0 specify the PMC
71 * index. For non-architectural PMUs, bit 31 is a "fast" flag, and
72 * bits 30:0 specify the PMC index.
74 * Yell and reject attempts to read PMCs for a non-architectural PMU,
75 * as KVM doesn't support such PMUs.
77 if (WARN_ON_ONCE(!pmu
->version
))
81 * General Purpose (GP) PMCs are supported on all PMUs, and fixed PMCs
82 * are supported on all architectural PMUs, i.e. on all virtual PMUs
83 * supported by KVM. Note, KVM only emulates fixed PMCs for PMU v2+,
84 * but the type itself is still valid, i.e. let RDPMC fail due to
85 * accessing a non-existent counter. Reject attempts to read all other
86 * types, which are unknown/unsupported.
89 case INTEL_RDPMC_FIXED
:
90 counters
= pmu
->fixed_counters
;
91 num_counters
= pmu
->nr_arch_fixed_counters
;
92 bitmask
= pmu
->counter_bitmask
[KVM_PMC_FIXED
];
95 counters
= pmu
->gp_counters
;
96 num_counters
= pmu
->nr_arch_gp_counters
;
97 bitmask
= pmu
->counter_bitmask
[KVM_PMC_GP
];
103 idx
&= INTEL_RDPMC_INDEX_MASK
;
104 if (idx
>= num_counters
)
108 return &counters
[array_index_nospec(idx
, num_counters
)];
111 static inline u64
vcpu_get_perf_capabilities(struct kvm_vcpu
*vcpu
)
113 if (!guest_cpuid_has(vcpu
, X86_FEATURE_PDCM
))
116 return vcpu
->arch
.perf_capabilities
;
119 static inline bool fw_writes_is_enabled(struct kvm_vcpu
*vcpu
)
121 return (vcpu_get_perf_capabilities(vcpu
) & PMU_CAP_FW_WRITES
) != 0;
124 static inline struct kvm_pmc
*get_fw_gp_pmc(struct kvm_pmu
*pmu
, u32 msr
)
126 if (!fw_writes_is_enabled(pmu_to_vcpu(pmu
)))
129 return get_gp_pmc(pmu
, msr
, MSR_IA32_PMC0
);
132 static bool intel_pmu_is_valid_lbr_msr(struct kvm_vcpu
*vcpu
, u32 index
)
134 struct x86_pmu_lbr
*records
= vcpu_to_lbr_records(vcpu
);
137 if (!intel_pmu_lbr_is_enabled(vcpu
))
140 ret
= (index
== MSR_LBR_SELECT
) || (index
== MSR_LBR_TOS
) ||
141 (index
>= records
->from
&& index
< records
->from
+ records
->nr
) ||
142 (index
>= records
->to
&& index
< records
->to
+ records
->nr
);
144 if (!ret
&& records
->info
)
145 ret
= (index
>= records
->info
&& index
< records
->info
+ records
->nr
);
150 static bool intel_is_valid_msr(struct kvm_vcpu
*vcpu
, u32 msr
)
152 struct kvm_pmu
*pmu
= vcpu_to_pmu(vcpu
);
153 u64 perf_capabilities
;
157 case MSR_CORE_PERF_FIXED_CTR_CTRL
:
158 return kvm_pmu_has_perf_global_ctrl(pmu
);
159 case MSR_IA32_PEBS_ENABLE
:
160 ret
= vcpu_get_perf_capabilities(vcpu
) & PERF_CAP_PEBS_FORMAT
;
162 case MSR_IA32_DS_AREA
:
163 ret
= guest_cpuid_has(vcpu
, X86_FEATURE_DS
);
165 case MSR_PEBS_DATA_CFG
:
166 perf_capabilities
= vcpu_get_perf_capabilities(vcpu
);
167 ret
= (perf_capabilities
& PERF_CAP_PEBS_BASELINE
) &&
168 ((perf_capabilities
& PERF_CAP_PEBS_FORMAT
) > 3);
171 ret
= get_gp_pmc(pmu
, msr
, MSR_IA32_PERFCTR0
) ||
172 get_gp_pmc(pmu
, msr
, MSR_P6_EVNTSEL0
) ||
173 get_fixed_pmc(pmu
, msr
) || get_fw_gp_pmc(pmu
, msr
) ||
174 intel_pmu_is_valid_lbr_msr(vcpu
, msr
);
181 static struct kvm_pmc
*intel_msr_idx_to_pmc(struct kvm_vcpu
*vcpu
, u32 msr
)
183 struct kvm_pmu
*pmu
= vcpu_to_pmu(vcpu
);
186 pmc
= get_fixed_pmc(pmu
, msr
);
187 pmc
= pmc
? pmc
: get_gp_pmc(pmu
, msr
, MSR_P6_EVNTSEL0
);
188 pmc
= pmc
? pmc
: get_gp_pmc(pmu
, msr
, MSR_IA32_PERFCTR0
);
193 static inline void intel_pmu_release_guest_lbr_event(struct kvm_vcpu
*vcpu
)
195 struct lbr_desc
*lbr_desc
= vcpu_to_lbr_desc(vcpu
);
197 if (lbr_desc
->event
) {
198 perf_event_release_kernel(lbr_desc
->event
);
199 lbr_desc
->event
= NULL
;
200 vcpu_to_pmu(vcpu
)->event_count
--;
204 int intel_pmu_create_guest_lbr_event(struct kvm_vcpu
*vcpu
)
206 struct lbr_desc
*lbr_desc
= vcpu_to_lbr_desc(vcpu
);
207 struct kvm_pmu
*pmu
= vcpu_to_pmu(vcpu
);
208 struct perf_event
*event
;
211 * The perf_event_attr is constructed in the minimum efficient way:
212 * - set 'pinned = true' to make it task pinned so that if another
213 * cpu pinned event reclaims LBR, the event->oncpu will be set to -1;
214 * - set '.exclude_host = true' to record guest branches behavior;
216 * - set '.config = INTEL_FIXED_VLBR_EVENT' to indicates host perf
217 * schedule the event without a real HW counter but a fake one;
218 * check is_guest_lbr_event() and __intel_get_event_constraints();
220 * - set 'sample_type = PERF_SAMPLE_BRANCH_STACK' and
221 * 'branch_sample_type = PERF_SAMPLE_BRANCH_CALL_STACK |
222 * PERF_SAMPLE_BRANCH_USER' to configure it as a LBR callstack
223 * event, which helps KVM to save/restore guest LBR records
224 * during host context switches and reduces quite a lot overhead,
225 * check branch_user_callstack() and intel_pmu_lbr_sched_task();
227 struct perf_event_attr attr
= {
228 .type
= PERF_TYPE_RAW
,
229 .size
= sizeof(attr
),
230 .config
= INTEL_FIXED_VLBR_EVENT
,
231 .sample_type
= PERF_SAMPLE_BRANCH_STACK
,
233 .exclude_host
= true,
234 .branch_sample_type
= PERF_SAMPLE_BRANCH_CALL_STACK
|
235 PERF_SAMPLE_BRANCH_USER
,
238 if (unlikely(lbr_desc
->event
)) {
239 __set_bit(INTEL_PMC_IDX_FIXED_VLBR
, pmu
->pmc_in_use
);
243 event
= perf_event_create_kernel_counter(&attr
, -1,
244 current
, NULL
, NULL
);
246 pr_debug_ratelimited("%s: failed %ld\n",
247 __func__
, PTR_ERR(event
));
248 return PTR_ERR(event
);
250 lbr_desc
->event
= event
;
252 __set_bit(INTEL_PMC_IDX_FIXED_VLBR
, pmu
->pmc_in_use
);
257 * It's safe to access LBR msrs from guest when they have not
258 * been passthrough since the host would help restore or reset
259 * the LBR msrs records when the guest LBR event is scheduled in.
261 static bool intel_pmu_handle_lbr_msrs_access(struct kvm_vcpu
*vcpu
,
262 struct msr_data
*msr_info
, bool read
)
264 struct lbr_desc
*lbr_desc
= vcpu_to_lbr_desc(vcpu
);
265 u32 index
= msr_info
->index
;
267 if (!intel_pmu_is_valid_lbr_msr(vcpu
, index
))
270 if (!lbr_desc
->event
&& intel_pmu_create_guest_lbr_event(vcpu
) < 0)
274 * Disable irq to ensure the LBR feature doesn't get reclaimed by the
275 * host at the time the value is read from the msr, and this avoids the
276 * host LBR value to be leaked to the guest. If LBR has been reclaimed,
277 * return 0 on guest reads.
280 if (lbr_desc
->event
->state
== PERF_EVENT_STATE_ACTIVE
) {
282 rdmsrl(index
, msr_info
->data
);
284 wrmsrl(index
, msr_info
->data
);
285 __set_bit(INTEL_PMC_IDX_FIXED_VLBR
, vcpu_to_pmu(vcpu
)->pmc_in_use
);
289 clear_bit(INTEL_PMC_IDX_FIXED_VLBR
, vcpu_to_pmu(vcpu
)->pmc_in_use
);
298 static int intel_pmu_get_msr(struct kvm_vcpu
*vcpu
, struct msr_data
*msr_info
)
300 struct kvm_pmu
*pmu
= vcpu_to_pmu(vcpu
);
302 u32 msr
= msr_info
->index
;
305 case MSR_CORE_PERF_FIXED_CTR_CTRL
:
306 msr_info
->data
= pmu
->fixed_ctr_ctrl
;
308 case MSR_IA32_PEBS_ENABLE
:
309 msr_info
->data
= pmu
->pebs_enable
;
311 case MSR_IA32_DS_AREA
:
312 msr_info
->data
= pmu
->ds_area
;
314 case MSR_PEBS_DATA_CFG
:
315 msr_info
->data
= pmu
->pebs_data_cfg
;
318 if ((pmc
= get_gp_pmc(pmu
, msr
, MSR_IA32_PERFCTR0
)) ||
319 (pmc
= get_gp_pmc(pmu
, msr
, MSR_IA32_PMC0
))) {
320 u64 val
= pmc_read_counter(pmc
);
322 val
& pmu
->counter_bitmask
[KVM_PMC_GP
];
324 } else if ((pmc
= get_fixed_pmc(pmu
, msr
))) {
325 u64 val
= pmc_read_counter(pmc
);
327 val
& pmu
->counter_bitmask
[KVM_PMC_FIXED
];
329 } else if ((pmc
= get_gp_pmc(pmu
, msr
, MSR_P6_EVNTSEL0
))) {
330 msr_info
->data
= pmc
->eventsel
;
332 } else if (intel_pmu_handle_lbr_msrs_access(vcpu
, msr_info
, true)) {
341 static int intel_pmu_set_msr(struct kvm_vcpu
*vcpu
, struct msr_data
*msr_info
)
343 struct kvm_pmu
*pmu
= vcpu_to_pmu(vcpu
);
345 u32 msr
= msr_info
->index
;
346 u64 data
= msr_info
->data
;
347 u64 reserved_bits
, diff
;
350 case MSR_CORE_PERF_FIXED_CTR_CTRL
:
351 if (data
& pmu
->fixed_ctr_ctrl_rsvd
)
354 if (pmu
->fixed_ctr_ctrl
!= data
)
355 reprogram_fixed_counters(pmu
, data
);
357 case MSR_IA32_PEBS_ENABLE
:
358 if (data
& pmu
->pebs_enable_rsvd
)
361 if (pmu
->pebs_enable
!= data
) {
362 diff
= pmu
->pebs_enable
^ data
;
363 pmu
->pebs_enable
= data
;
364 reprogram_counters(pmu
, diff
);
367 case MSR_IA32_DS_AREA
:
368 if (is_noncanonical_address(data
, vcpu
))
373 case MSR_PEBS_DATA_CFG
:
374 if (data
& pmu
->pebs_data_cfg_rsvd
)
377 pmu
->pebs_data_cfg
= data
;
380 if ((pmc
= get_gp_pmc(pmu
, msr
, MSR_IA32_PERFCTR0
)) ||
381 (pmc
= get_gp_pmc(pmu
, msr
, MSR_IA32_PMC0
))) {
382 if ((msr
& MSR_PMC_FULL_WIDTH_BIT
) &&
383 (data
& ~pmu
->counter_bitmask
[KVM_PMC_GP
]))
386 if (!msr_info
->host_initiated
&&
387 !(msr
& MSR_PMC_FULL_WIDTH_BIT
))
388 data
= (s64
)(s32
)data
;
389 pmc_write_counter(pmc
, data
);
391 } else if ((pmc
= get_fixed_pmc(pmu
, msr
))) {
392 pmc_write_counter(pmc
, data
);
394 } else if ((pmc
= get_gp_pmc(pmu
, msr
, MSR_P6_EVNTSEL0
))) {
395 reserved_bits
= pmu
->reserved_bits
;
396 if ((pmc
->idx
== 2) &&
397 (pmu
->raw_event_mask
& HSW_IN_TX_CHECKPOINTED
))
398 reserved_bits
^= HSW_IN_TX_CHECKPOINTED
;
399 if (data
& reserved_bits
)
402 if (data
!= pmc
->eventsel
) {
403 pmc
->eventsel
= data
;
404 kvm_pmu_request_counter_reprogram(pmc
);
407 } else if (intel_pmu_handle_lbr_msrs_access(vcpu
, msr_info
, false)) {
410 /* Not a known PMU MSR. */
418 * Map fixed counter events to architectural general purpose event encodings.
419 * Perf doesn't provide APIs to allow KVM to directly program a fixed counter,
420 * and so KVM instead programs the architectural event to effectively request
421 * the fixed counter. Perf isn't guaranteed to use a fixed counter and may
422 * instead program the encoding into a general purpose counter, e.g. if a
423 * different perf_event is already utilizing the requested counter, but the end
424 * result is the same (ignoring the fact that using a general purpose counter
425 * will likely exacerbate counter contention).
427 * Forcibly inlined to allow asserting on @index at build time, and there should
428 * never be more than one user.
430 static __always_inline u64
intel_get_fixed_pmc_eventsel(unsigned int index
)
432 const enum perf_hw_id fixed_pmc_perf_ids
[] = {
433 [0] = PERF_COUNT_HW_INSTRUCTIONS
,
434 [1] = PERF_COUNT_HW_CPU_CYCLES
,
435 [2] = PERF_COUNT_HW_REF_CPU_CYCLES
,
439 BUILD_BUG_ON(ARRAY_SIZE(fixed_pmc_perf_ids
) != KVM_MAX_NR_INTEL_FIXED_COUTNERS
);
440 BUILD_BUG_ON(index
>= KVM_MAX_NR_INTEL_FIXED_COUTNERS
);
443 * Yell if perf reports support for a fixed counter but perf doesn't
444 * have a known encoding for the associated general purpose event.
446 eventsel
= perf_get_hw_event_config(fixed_pmc_perf_ids
[index
]);
447 WARN_ON_ONCE(!eventsel
&& index
< kvm_pmu_cap
.num_counters_fixed
);
451 static void intel_pmu_enable_fixed_counter_bits(struct kvm_pmu
*pmu
, u64 bits
)
455 for (i
= 0; i
< pmu
->nr_arch_fixed_counters
; i
++)
456 pmu
->fixed_ctr_ctrl_rsvd
&= ~intel_fixed_bits_by_idx(i
, bits
);
459 static void intel_pmu_refresh(struct kvm_vcpu
*vcpu
)
461 struct kvm_pmu
*pmu
= vcpu_to_pmu(vcpu
);
462 struct lbr_desc
*lbr_desc
= vcpu_to_lbr_desc(vcpu
);
463 struct kvm_cpuid_entry2
*entry
;
464 union cpuid10_eax eax
;
465 union cpuid10_edx edx
;
466 u64 perf_capabilities
;
469 memset(&lbr_desc
->records
, 0, sizeof(lbr_desc
->records
));
472 * Setting passthrough of LBR MSRs is done only in the VM-Entry loop,
473 * and PMU refresh is disallowed after the vCPU has run, i.e. this code
474 * should never be reached while KVM is passing through MSRs.
476 if (KVM_BUG_ON(lbr_desc
->msr_passthrough
, vcpu
->kvm
))
479 entry
= kvm_find_cpuid_entry(vcpu
, 0xa);
483 eax
.full
= entry
->eax
;
484 edx
.full
= entry
->edx
;
486 pmu
->version
= eax
.split
.version_id
;
490 pmu
->nr_arch_gp_counters
= min_t(int, eax
.split
.num_counters
,
491 kvm_pmu_cap
.num_counters_gp
);
492 eax
.split
.bit_width
= min_t(int, eax
.split
.bit_width
,
493 kvm_pmu_cap
.bit_width_gp
);
494 pmu
->counter_bitmask
[KVM_PMC_GP
] = ((u64
)1 << eax
.split
.bit_width
) - 1;
495 eax
.split
.mask_length
= min_t(int, eax
.split
.mask_length
,
496 kvm_pmu_cap
.events_mask_len
);
497 pmu
->available_event_types
= ~entry
->ebx
&
498 ((1ull << eax
.split
.mask_length
) - 1);
500 if (pmu
->version
== 1) {
501 pmu
->nr_arch_fixed_counters
= 0;
503 pmu
->nr_arch_fixed_counters
= min_t(int, edx
.split
.num_counters_fixed
,
504 kvm_pmu_cap
.num_counters_fixed
);
505 edx
.split
.bit_width_fixed
= min_t(int, edx
.split
.bit_width_fixed
,
506 kvm_pmu_cap
.bit_width_fixed
);
507 pmu
->counter_bitmask
[KVM_PMC_FIXED
] =
508 ((u64
)1 << edx
.split
.bit_width_fixed
) - 1;
511 intel_pmu_enable_fixed_counter_bits(pmu
, INTEL_FIXED_0_KERNEL
|
513 INTEL_FIXED_0_ENABLE_PMI
);
515 counter_rsvd
= ~(((1ull << pmu
->nr_arch_gp_counters
) - 1) |
516 (((1ull << pmu
->nr_arch_fixed_counters
) - 1) << KVM_FIXED_PMC_BASE_IDX
));
517 pmu
->global_ctrl_rsvd
= counter_rsvd
;
520 * GLOBAL_STATUS and GLOBAL_OVF_CONTROL (a.k.a. GLOBAL_STATUS_RESET)
521 * share reserved bit definitions. The kernel just happens to use
522 * OVF_CTRL for the names.
524 pmu
->global_status_rsvd
= pmu
->global_ctrl_rsvd
525 & ~(MSR_CORE_PERF_GLOBAL_OVF_CTRL_OVF_BUF
|
526 MSR_CORE_PERF_GLOBAL_OVF_CTRL_COND_CHGD
);
527 if (vmx_pt_mode_is_host_guest())
528 pmu
->global_status_rsvd
&=
529 ~MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI
;
531 entry
= kvm_find_cpuid_entry_index(vcpu
, 7, 0);
533 (boot_cpu_has(X86_FEATURE_HLE
) || boot_cpu_has(X86_FEATURE_RTM
)) &&
534 (entry
->ebx
& (X86_FEATURE_HLE
|X86_FEATURE_RTM
))) {
535 pmu
->reserved_bits
^= HSW_IN_TX
;
536 pmu
->raw_event_mask
|= (HSW_IN_TX
|HSW_IN_TX_CHECKPOINTED
);
539 bitmap_set(pmu
->all_valid_pmc_idx
,
540 0, pmu
->nr_arch_gp_counters
);
541 bitmap_set(pmu
->all_valid_pmc_idx
,
542 INTEL_PMC_MAX_GENERIC
, pmu
->nr_arch_fixed_counters
);
544 perf_capabilities
= vcpu_get_perf_capabilities(vcpu
);
545 if (cpuid_model_is_consistent(vcpu
) &&
546 (perf_capabilities
& PMU_CAP_LBR_FMT
))
547 memcpy(&lbr_desc
->records
, &vmx_lbr_caps
, sizeof(vmx_lbr_caps
));
549 lbr_desc
->records
.nr
= 0;
551 if (lbr_desc
->records
.nr
)
552 bitmap_set(pmu
->all_valid_pmc_idx
, INTEL_PMC_IDX_FIXED_VLBR
, 1);
554 if (perf_capabilities
& PERF_CAP_PEBS_FORMAT
) {
555 if (perf_capabilities
& PERF_CAP_PEBS_BASELINE
) {
556 pmu
->pebs_enable_rsvd
= counter_rsvd
;
557 pmu
->reserved_bits
&= ~ICL_EVENTSEL_ADAPTIVE
;
558 pmu
->pebs_data_cfg_rsvd
= ~0xff00000full
;
559 intel_pmu_enable_fixed_counter_bits(pmu
, ICL_FIXED_0_ADAPTIVE
);
561 pmu
->pebs_enable_rsvd
=
562 ~((1ull << pmu
->nr_arch_gp_counters
) - 1);
567 static void intel_pmu_init(struct kvm_vcpu
*vcpu
)
570 struct kvm_pmu
*pmu
= vcpu_to_pmu(vcpu
);
571 struct lbr_desc
*lbr_desc
= vcpu_to_lbr_desc(vcpu
);
573 for (i
= 0; i
< KVM_MAX_NR_INTEL_GP_COUNTERS
; i
++) {
574 pmu
->gp_counters
[i
].type
= KVM_PMC_GP
;
575 pmu
->gp_counters
[i
].vcpu
= vcpu
;
576 pmu
->gp_counters
[i
].idx
= i
;
577 pmu
->gp_counters
[i
].current_config
= 0;
580 for (i
= 0; i
< KVM_MAX_NR_INTEL_FIXED_COUTNERS
; i
++) {
581 pmu
->fixed_counters
[i
].type
= KVM_PMC_FIXED
;
582 pmu
->fixed_counters
[i
].vcpu
= vcpu
;
583 pmu
->fixed_counters
[i
].idx
= i
+ KVM_FIXED_PMC_BASE_IDX
;
584 pmu
->fixed_counters
[i
].current_config
= 0;
585 pmu
->fixed_counters
[i
].eventsel
= intel_get_fixed_pmc_eventsel(i
);
588 lbr_desc
->records
.nr
= 0;
589 lbr_desc
->event
= NULL
;
590 lbr_desc
->msr_passthrough
= false;
593 static void intel_pmu_reset(struct kvm_vcpu
*vcpu
)
595 intel_pmu_release_guest_lbr_event(vcpu
);
599 * Emulate LBR_On_PMI behavior for 1 < pmu.version < 4.
601 * If Freeze_LBR_On_PMI = 1, the LBR is frozen on PMI and
602 * the KVM emulates to clear the LBR bit (bit 0) in IA32_DEBUGCTL.
604 * Guest needs to re-enable LBR to resume branches recording.
606 static void intel_pmu_legacy_freezing_lbrs_on_pmi(struct kvm_vcpu
*vcpu
)
608 u64 data
= vmcs_read64(GUEST_IA32_DEBUGCTL
);
610 if (data
& DEBUGCTLMSR_FREEZE_LBRS_ON_PMI
) {
611 data
&= ~DEBUGCTLMSR_LBR
;
612 vmcs_write64(GUEST_IA32_DEBUGCTL
, data
);
616 static void intel_pmu_deliver_pmi(struct kvm_vcpu
*vcpu
)
618 u8 version
= vcpu_to_pmu(vcpu
)->version
;
620 if (!intel_pmu_lbr_is_enabled(vcpu
))
623 if (version
> 1 && version
< 4)
624 intel_pmu_legacy_freezing_lbrs_on_pmi(vcpu
);
627 static void vmx_update_intercept_for_lbr_msrs(struct kvm_vcpu
*vcpu
, bool set
)
629 struct x86_pmu_lbr
*lbr
= vcpu_to_lbr_records(vcpu
);
632 for (i
= 0; i
< lbr
->nr
; i
++) {
633 vmx_set_intercept_for_msr(vcpu
, lbr
->from
+ i
, MSR_TYPE_RW
, set
);
634 vmx_set_intercept_for_msr(vcpu
, lbr
->to
+ i
, MSR_TYPE_RW
, set
);
636 vmx_set_intercept_for_msr(vcpu
, lbr
->info
+ i
, MSR_TYPE_RW
, set
);
639 vmx_set_intercept_for_msr(vcpu
, MSR_LBR_SELECT
, MSR_TYPE_RW
, set
);
640 vmx_set_intercept_for_msr(vcpu
, MSR_LBR_TOS
, MSR_TYPE_RW
, set
);
643 static inline void vmx_disable_lbr_msrs_passthrough(struct kvm_vcpu
*vcpu
)
645 struct lbr_desc
*lbr_desc
= vcpu_to_lbr_desc(vcpu
);
647 if (!lbr_desc
->msr_passthrough
)
650 vmx_update_intercept_for_lbr_msrs(vcpu
, true);
651 lbr_desc
->msr_passthrough
= false;
654 static inline void vmx_enable_lbr_msrs_passthrough(struct kvm_vcpu
*vcpu
)
656 struct lbr_desc
*lbr_desc
= vcpu_to_lbr_desc(vcpu
);
658 if (lbr_desc
->msr_passthrough
)
661 vmx_update_intercept_for_lbr_msrs(vcpu
, false);
662 lbr_desc
->msr_passthrough
= true;
666 * Higher priority host perf events (e.g. cpu pinned) could reclaim the
667 * pmu resources (e.g. LBR) that were assigned to the guest. This is
668 * usually done via ipi calls (more details in perf_install_in_context).
670 * Before entering the non-root mode (with irq disabled here), double
671 * confirm that the pmu features enabled to the guest are not reclaimed
672 * by higher priority host events. Otherwise, disallow vcpu's access to
673 * the reclaimed features.
675 void vmx_passthrough_lbr_msrs(struct kvm_vcpu
*vcpu
)
677 struct kvm_pmu
*pmu
= vcpu_to_pmu(vcpu
);
678 struct lbr_desc
*lbr_desc
= vcpu_to_lbr_desc(vcpu
);
680 if (!lbr_desc
->event
) {
681 vmx_disable_lbr_msrs_passthrough(vcpu
);
682 if (vmcs_read64(GUEST_IA32_DEBUGCTL
) & DEBUGCTLMSR_LBR
)
684 if (test_bit(INTEL_PMC_IDX_FIXED_VLBR
, pmu
->pmc_in_use
))
689 if (lbr_desc
->event
->state
< PERF_EVENT_STATE_ACTIVE
) {
690 vmx_disable_lbr_msrs_passthrough(vcpu
);
691 __clear_bit(INTEL_PMC_IDX_FIXED_VLBR
, pmu
->pmc_in_use
);
694 vmx_enable_lbr_msrs_passthrough(vcpu
);
699 pr_warn_ratelimited("vcpu-%d: fail to passthrough LBR.\n", vcpu
->vcpu_id
);
702 static void intel_pmu_cleanup(struct kvm_vcpu
*vcpu
)
704 if (!(vmcs_read64(GUEST_IA32_DEBUGCTL
) & DEBUGCTLMSR_LBR
))
705 intel_pmu_release_guest_lbr_event(vcpu
);
708 void intel_pmu_cross_mapped_check(struct kvm_pmu
*pmu
)
710 struct kvm_pmc
*pmc
= NULL
;
713 kvm_for_each_pmc(pmu
, pmc
, bit
, (unsigned long *)&pmu
->global_ctrl
) {
714 if (!pmc_speculative_in_use(pmc
) ||
715 !pmc_is_globally_enabled(pmc
) || !pmc
->perf_event
)
719 * A negative index indicates the event isn't mapped to a
720 * physical counter in the host, e.g. due to contention.
722 hw_idx
= pmc
->perf_event
->hw
.idx
;
723 if (hw_idx
!= pmc
->idx
&& hw_idx
> -1)
724 pmu
->host_cross_mapped_mask
|= BIT_ULL(hw_idx
);
728 struct kvm_pmu_ops intel_pmu_ops __initdata
= {
729 .rdpmc_ecx_to_pmc
= intel_rdpmc_ecx_to_pmc
,
730 .msr_idx_to_pmc
= intel_msr_idx_to_pmc
,
731 .is_valid_msr
= intel_is_valid_msr
,
732 .get_msr
= intel_pmu_get_msr
,
733 .set_msr
= intel_pmu_set_msr
,
734 .refresh
= intel_pmu_refresh
,
735 .init
= intel_pmu_init
,
736 .reset
= intel_pmu_reset
,
737 .deliver_pmi
= intel_pmu_deliver_pmi
,
738 .cleanup
= intel_pmu_cleanup
,
739 .EVENTSEL_EVENT
= ARCH_PERFMON_EVENTSEL_EVENT
,
740 .MAX_NR_GP_COUNTERS
= KVM_MAX_NR_INTEL_GP_COUNTERS
,
741 .MIN_NR_GP_COUNTERS
= 1,