1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/types.h>
3 #include <linux/interrupt.h>
5 #include <asm/xen/hypercall.h>
8 #include <xen/interface/xen.h>
9 #include <xen/interface/vcpu.h>
10 #include <xen/interface/xenpmu.h>
14 /* x86_pmu.handle_irq definition */
15 #include "../events/perf_event.h"
17 #define XENPMU_IRQ_PROCESSING 1
19 /* Shared page between hypervisor and domain */
20 struct xen_pmu_data
*xenpmu_data
;
24 static DEFINE_PER_CPU(struct xenpmu
, xenpmu_shared
);
25 #define get_xenpmu_data() (this_cpu_ptr(&xenpmu_shared)->xenpmu_data)
26 #define get_xenpmu_flags() (this_cpu_ptr(&xenpmu_shared)->flags)
28 /* Macro for computing address of a PMU MSR bank */
29 #define field_offset(ctxt, field) ((void *)((uintptr_t)ctxt + \
30 (uintptr_t)ctxt->field))
33 #define F15H_NUM_COUNTERS 6
34 #define F10H_NUM_COUNTERS 4
36 static __read_mostly
uint32_t amd_counters_base
;
37 static __read_mostly
uint32_t amd_ctrls_base
;
38 static __read_mostly
int amd_msr_step
;
39 static __read_mostly
int k7_counters_mirrored
;
40 static __read_mostly
int amd_num_counters
;
43 #define MSR_TYPE_COUNTER 0
44 #define MSR_TYPE_CTRL 1
45 #define MSR_TYPE_GLOBAL 2
46 #define MSR_TYPE_ARCH_COUNTER 3
47 #define MSR_TYPE_ARCH_CTRL 4
49 /* Number of general pmu registers (CPUID.EAX[0xa].EAX[8..15]) */
50 #define PMU_GENERAL_NR_SHIFT 8
51 #define PMU_GENERAL_NR_BITS 8
52 #define PMU_GENERAL_NR_MASK (((1 << PMU_GENERAL_NR_BITS) - 1) \
53 << PMU_GENERAL_NR_SHIFT)
55 /* Number of fixed pmu registers (CPUID.EDX[0xa].EDX[0..4]) */
56 #define PMU_FIXED_NR_SHIFT 0
57 #define PMU_FIXED_NR_BITS 5
58 #define PMU_FIXED_NR_MASK (((1 << PMU_FIXED_NR_BITS) - 1) \
59 << PMU_FIXED_NR_SHIFT)
61 /* Alias registers (0x4c1) for full-width writes to PMCs */
62 #define MSR_PMC_ALIAS_MASK (~(MSR_IA32_PERFCTR0 ^ MSR_IA32_PMC0))
64 #define INTEL_PMC_TYPE_SHIFT 30
66 static __read_mostly
int intel_num_arch_counters
, intel_num_fixed_counters
;
69 static void xen_pmu_arch_init(void)
71 if (boot_cpu_data
.x86_vendor
== X86_VENDOR_AMD
) {
73 switch (boot_cpu_data
.x86
) {
75 amd_num_counters
= F15H_NUM_COUNTERS
;
76 amd_counters_base
= MSR_F15H_PERF_CTR
;
77 amd_ctrls_base
= MSR_F15H_PERF_CTL
;
79 k7_counters_mirrored
= 1;
86 amd_num_counters
= F10H_NUM_COUNTERS
;
87 amd_counters_base
= MSR_K7_PERFCTR0
;
88 amd_ctrls_base
= MSR_K7_EVNTSEL0
;
90 k7_counters_mirrored
= 0;
93 } else if (boot_cpu_data
.x86_vendor
== X86_VENDOR_HYGON
) {
94 amd_num_counters
= F10H_NUM_COUNTERS
;
95 amd_counters_base
= MSR_K7_PERFCTR0
;
96 amd_ctrls_base
= MSR_K7_EVNTSEL0
;
98 k7_counters_mirrored
= 0;
100 uint32_t eax
, ebx
, ecx
, edx
;
102 cpuid(0xa, &eax
, &ebx
, &ecx
, &edx
);
104 intel_num_arch_counters
= (eax
& PMU_GENERAL_NR_MASK
) >>
105 PMU_GENERAL_NR_SHIFT
;
106 intel_num_fixed_counters
= (edx
& PMU_FIXED_NR_MASK
) >>
111 static inline uint32_t get_fam15h_addr(u32 addr
)
114 case MSR_K7_PERFCTR0
:
115 case MSR_K7_PERFCTR1
:
116 case MSR_K7_PERFCTR2
:
117 case MSR_K7_PERFCTR3
:
118 return MSR_F15H_PERF_CTR
+ (addr
- MSR_K7_PERFCTR0
);
119 case MSR_K7_EVNTSEL0
:
120 case MSR_K7_EVNTSEL1
:
121 case MSR_K7_EVNTSEL2
:
122 case MSR_K7_EVNTSEL3
:
123 return MSR_F15H_PERF_CTL
+ (addr
- MSR_K7_EVNTSEL0
);
131 static inline bool is_amd_pmu_msr(unsigned int msr
)
133 if (boot_cpu_data
.x86_vendor
!= X86_VENDOR_AMD
&&
134 boot_cpu_data
.x86_vendor
!= X86_VENDOR_HYGON
)
137 if ((msr
>= MSR_F15H_PERF_CTL
&&
138 msr
< MSR_F15H_PERF_CTR
+ (amd_num_counters
* 2)) ||
139 (msr
>= MSR_K7_EVNTSEL0
&&
140 msr
< MSR_K7_PERFCTR0
+ amd_num_counters
))
146 static bool is_intel_pmu_msr(u32 msr_index
, int *type
, int *index
)
150 if (boot_cpu_data
.x86_vendor
!= X86_VENDOR_INTEL
&&
151 boot_cpu_data
.x86_vendor
!= X86_VENDOR_CENTAUR
&&
152 boot_cpu_data
.x86_vendor
!= X86_VENDOR_ZHAOXIN
)
156 case MSR_CORE_PERF_FIXED_CTR_CTRL
:
157 case MSR_IA32_DS_AREA
:
158 case MSR_IA32_PEBS_ENABLE
:
159 *type
= MSR_TYPE_CTRL
;
162 case MSR_CORE_PERF_GLOBAL_CTRL
:
163 case MSR_CORE_PERF_GLOBAL_STATUS
:
164 case MSR_CORE_PERF_GLOBAL_OVF_CTRL
:
165 *type
= MSR_TYPE_GLOBAL
;
170 if ((msr_index
>= MSR_CORE_PERF_FIXED_CTR0
) &&
171 (msr_index
< MSR_CORE_PERF_FIXED_CTR0
+
172 intel_num_fixed_counters
)) {
173 *index
= msr_index
- MSR_CORE_PERF_FIXED_CTR0
;
174 *type
= MSR_TYPE_COUNTER
;
178 if ((msr_index
>= MSR_P6_EVNTSEL0
) &&
179 (msr_index
< MSR_P6_EVNTSEL0
+ intel_num_arch_counters
)) {
180 *index
= msr_index
- MSR_P6_EVNTSEL0
;
181 *type
= MSR_TYPE_ARCH_CTRL
;
185 msr_index_pmc
= msr_index
& MSR_PMC_ALIAS_MASK
;
186 if ((msr_index_pmc
>= MSR_IA32_PERFCTR0
) &&
187 (msr_index_pmc
< MSR_IA32_PERFCTR0
+
188 intel_num_arch_counters
)) {
189 *type
= MSR_TYPE_ARCH_COUNTER
;
190 *index
= msr_index_pmc
- MSR_IA32_PERFCTR0
;
197 static bool xen_intel_pmu_emulate(unsigned int msr
, u64
*val
, int type
,
198 int index
, bool is_read
)
200 uint64_t *reg
= NULL
;
201 struct xen_pmu_intel_ctxt
*ctxt
;
202 uint64_t *fix_counters
;
203 struct xen_pmu_cntr_pair
*arch_cntr_pair
;
204 struct xen_pmu_data
*xenpmu_data
= get_xenpmu_data();
205 uint8_t xenpmu_flags
= get_xenpmu_flags();
208 if (!xenpmu_data
|| !(xenpmu_flags
& XENPMU_IRQ_PROCESSING
))
211 ctxt
= &xenpmu_data
->pmu
.c
.intel
;
214 case MSR_CORE_PERF_GLOBAL_OVF_CTRL
:
215 reg
= &ctxt
->global_ovf_ctrl
;
217 case MSR_CORE_PERF_GLOBAL_STATUS
:
218 reg
= &ctxt
->global_status
;
220 case MSR_CORE_PERF_GLOBAL_CTRL
:
221 reg
= &ctxt
->global_ctrl
;
223 case MSR_CORE_PERF_FIXED_CTR_CTRL
:
224 reg
= &ctxt
->fixed_ctrl
;
228 case MSR_TYPE_COUNTER
:
229 fix_counters
= field_offset(ctxt
, fixed_counters
);
230 reg
= &fix_counters
[index
];
232 case MSR_TYPE_ARCH_COUNTER
:
233 arch_cntr_pair
= field_offset(ctxt
, arch_counters
);
234 reg
= &arch_cntr_pair
[index
].counter
;
236 case MSR_TYPE_ARCH_CTRL
:
237 arch_cntr_pair
= field_offset(ctxt
, arch_counters
);
238 reg
= &arch_cntr_pair
[index
].control
;
251 if (msr
== MSR_CORE_PERF_GLOBAL_OVF_CTRL
)
252 ctxt
->global_status
&= (~(*val
));
260 static bool xen_amd_pmu_emulate(unsigned int msr
, u64
*val
, bool is_read
)
262 uint64_t *reg
= NULL
;
264 struct xen_pmu_amd_ctxt
*ctxt
;
265 uint64_t *counter_regs
, *ctrl_regs
;
266 struct xen_pmu_data
*xenpmu_data
= get_xenpmu_data();
267 uint8_t xenpmu_flags
= get_xenpmu_flags();
269 if (!xenpmu_data
|| !(xenpmu_flags
& XENPMU_IRQ_PROCESSING
))
272 if (k7_counters_mirrored
&&
273 ((msr
>= MSR_K7_EVNTSEL0
) && (msr
<= MSR_K7_PERFCTR3
)))
274 msr
= get_fam15h_addr(msr
);
276 ctxt
= &xenpmu_data
->pmu
.c
.amd
;
277 for (i
= 0; i
< amd_num_counters
; i
++) {
278 if (msr
== amd_ctrls_base
+ off
) {
279 ctrl_regs
= field_offset(ctxt
, ctrls
);
282 } else if (msr
== amd_counters_base
+ off
) {
283 counter_regs
= field_offset(ctxt
, counters
);
284 reg
= &counter_regs
[i
];
301 static bool pmu_msr_chk_emulated(unsigned int msr
, uint64_t *val
, bool is_read
,
306 if (is_amd_pmu_msr(msr
))
307 *emul
= xen_amd_pmu_emulate(msr
, val
, is_read
);
308 else if (is_intel_pmu_msr(msr
, &type
, &index
))
309 *emul
= xen_intel_pmu_emulate(msr
, val
, type
, index
, is_read
);
316 bool pmu_msr_read(unsigned int msr
, uint64_t *val
, int *err
)
320 if (!pmu_msr_chk_emulated(msr
, val
, true, &emulated
))
324 *val
= err
? native_read_msr_safe(msr
, err
)
325 : native_read_msr(msr
);
331 bool pmu_msr_write(unsigned int msr
, uint32_t low
, uint32_t high
, int *err
)
333 uint64_t val
= ((uint64_t)high
<< 32) | low
;
336 if (!pmu_msr_chk_emulated(msr
, &val
, false, &emulated
))
341 *err
= native_write_msr_safe(msr
, low
, high
);
343 native_write_msr(msr
, low
, high
);
349 static unsigned long long xen_amd_read_pmc(int counter
)
351 struct xen_pmu_amd_ctxt
*ctxt
;
352 uint64_t *counter_regs
;
353 struct xen_pmu_data
*xenpmu_data
= get_xenpmu_data();
354 uint8_t xenpmu_flags
= get_xenpmu_flags();
356 if (!xenpmu_data
|| !(xenpmu_flags
& XENPMU_IRQ_PROCESSING
)) {
360 msr
= amd_counters_base
+ (counter
* amd_msr_step
);
361 return native_read_msr_safe(msr
, &err
);
364 ctxt
= &xenpmu_data
->pmu
.c
.amd
;
365 counter_regs
= field_offset(ctxt
, counters
);
366 return counter_regs
[counter
];
369 static unsigned long long xen_intel_read_pmc(int counter
)
371 struct xen_pmu_intel_ctxt
*ctxt
;
372 uint64_t *fixed_counters
;
373 struct xen_pmu_cntr_pair
*arch_cntr_pair
;
374 struct xen_pmu_data
*xenpmu_data
= get_xenpmu_data();
375 uint8_t xenpmu_flags
= get_xenpmu_flags();
377 if (!xenpmu_data
|| !(xenpmu_flags
& XENPMU_IRQ_PROCESSING
)) {
381 if (counter
& (1 << INTEL_PMC_TYPE_SHIFT
))
382 msr
= MSR_CORE_PERF_FIXED_CTR0
+ (counter
& 0xffff);
384 msr
= MSR_IA32_PERFCTR0
+ counter
;
386 return native_read_msr_safe(msr
, &err
);
389 ctxt
= &xenpmu_data
->pmu
.c
.intel
;
390 if (counter
& (1 << INTEL_PMC_TYPE_SHIFT
)) {
391 fixed_counters
= field_offset(ctxt
, fixed_counters
);
392 return fixed_counters
[counter
& 0xffff];
395 arch_cntr_pair
= field_offset(ctxt
, arch_counters
);
396 return arch_cntr_pair
[counter
].counter
;
399 unsigned long long xen_read_pmc(int counter
)
401 if (boot_cpu_data
.x86_vendor
!= X86_VENDOR_INTEL
)
402 return xen_amd_read_pmc(counter
);
404 return xen_intel_read_pmc(counter
);
407 int pmu_apic_update(uint32_t val
)
410 struct xen_pmu_data
*xenpmu_data
= get_xenpmu_data();
413 pr_warn_once("%s: pmudata not initialized\n", __func__
);
417 xenpmu_data
->pmu
.l
.lapic_lvtpc
= val
;
419 if (get_xenpmu_flags() & XENPMU_IRQ_PROCESSING
)
422 ret
= HYPERVISOR_xenpmu_op(XENPMU_lvtpc_set
, NULL
);
428 static unsigned int xen_guest_state(void)
430 const struct xen_pmu_data
*xenpmu_data
= get_xenpmu_data();
431 unsigned int state
= 0;
434 pr_warn_once("%s: pmudata not initialized\n", __func__
);
438 if (!xen_initial_domain() || (xenpmu_data
->domain_id
>= DOMID_SELF
))
441 state
|= PERF_GUEST_ACTIVE
;
443 if (xenpmu_data
->pmu
.pmu_flags
& PMU_SAMPLE_PV
) {
444 if (xenpmu_data
->pmu
.pmu_flags
& PMU_SAMPLE_USER
)
445 state
|= PERF_GUEST_USER
;
446 } else if (xenpmu_data
->pmu
.r
.regs
.cpl
& 3) {
447 state
|= PERF_GUEST_USER
;
453 static unsigned long xen_get_guest_ip(void)
455 const struct xen_pmu_data
*xenpmu_data
= get_xenpmu_data();
458 pr_warn_once("%s: pmudata not initialized\n", __func__
);
462 return xenpmu_data
->pmu
.r
.regs
.ip
;
465 static struct perf_guest_info_callbacks xen_guest_cbs
= {
466 .state
= xen_guest_state
,
467 .get_ip
= xen_get_guest_ip
,
470 /* Convert registers from Xen's format to Linux' */
471 static void xen_convert_regs(const struct xen_pmu_regs
*xen_regs
,
472 struct pt_regs
*regs
, uint64_t pmu_flags
)
474 regs
->ip
= xen_regs
->ip
;
475 regs
->cs
= xen_regs
->cs
;
476 regs
->sp
= xen_regs
->sp
;
478 if (pmu_flags
& PMU_SAMPLE_PV
) {
479 if (pmu_flags
& PMU_SAMPLE_USER
)
491 irqreturn_t
xen_pmu_irq_handler(int irq
, void *dev_id
)
493 int err
, ret
= IRQ_NONE
;
494 struct pt_regs regs
= {0};
495 const struct xen_pmu_data
*xenpmu_data
= get_xenpmu_data();
496 uint8_t xenpmu_flags
= get_xenpmu_flags();
499 pr_warn_once("%s: pmudata not initialized\n", __func__
);
503 this_cpu_ptr(&xenpmu_shared
)->flags
=
504 xenpmu_flags
| XENPMU_IRQ_PROCESSING
;
505 xen_convert_regs(&xenpmu_data
->pmu
.r
.regs
, ®s
,
506 xenpmu_data
->pmu
.pmu_flags
);
507 if (x86_pmu
.handle_irq(®s
))
510 /* Write out cached context to HW */
511 err
= HYPERVISOR_xenpmu_op(XENPMU_flush
, NULL
);
512 this_cpu_ptr(&xenpmu_shared
)->flags
= xenpmu_flags
;
514 pr_warn_once("%s: failed hypercall, err: %d\n", __func__
, err
);
523 void xen_pmu_init(int cpu
)
526 struct xen_pmu_params xp
;
528 struct xen_pmu_data
*xenpmu_data
;
530 BUILD_BUG_ON(sizeof(struct xen_pmu_data
) > PAGE_SIZE
);
532 if (xen_hvm_domain() || (cpu
!= 0 && !is_xen_pmu
))
535 xenpmu_data
= (struct xen_pmu_data
*)get_zeroed_page(GFP_KERNEL
);
537 pr_err("VPMU init: No memory\n");
540 pfn
= virt_to_pfn(xenpmu_data
);
542 xp
.val
= pfn_to_mfn(pfn
);
544 xp
.version
.maj
= XENPMU_VER_MAJ
;
545 xp
.version
.min
= XENPMU_VER_MIN
;
546 err
= HYPERVISOR_xenpmu_op(XENPMU_init
, &xp
);
550 per_cpu(xenpmu_shared
, cpu
).xenpmu_data
= xenpmu_data
;
551 per_cpu(xenpmu_shared
, cpu
).flags
= 0;
555 perf_register_guest_info_callbacks(&xen_guest_cbs
);
562 if (err
== -EOPNOTSUPP
|| err
== -ENOSYS
)
563 pr_info_once("VPMU disabled by hypervisor.\n");
565 pr_info_once("Could not initialize VPMU for cpu %d, error %d\n",
567 free_pages((unsigned long)xenpmu_data
, 0);
570 void xen_pmu_finish(int cpu
)
572 struct xen_pmu_params xp
;
574 if (xen_hvm_domain())
578 xp
.version
.maj
= XENPMU_VER_MAJ
;
579 xp
.version
.min
= XENPMU_VER_MIN
;
581 (void)HYPERVISOR_xenpmu_op(XENPMU_finish
, &xp
);
583 free_pages((unsigned long)per_cpu(xenpmu_shared
, cpu
).xenpmu_data
, 0);
584 per_cpu(xenpmu_shared
, cpu
).xenpmu_data
= NULL
;