1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/types.h>
3 #include <linux/interrupt.h>
5 #include <asm/xen/hypercall.h>
8 #include <xen/interface/xen.h>
9 #include <xen/interface/vcpu.h>
10 #include <xen/interface/xenpmu.h>
15 /* x86_pmu.handle_irq definition */
16 #include "../events/perf_event.h"
18 #define XENPMU_IRQ_PROCESSING 1
20 /* Shared page between hypervisor and domain */
21 struct xen_pmu_data
*xenpmu_data
;
25 static DEFINE_PER_CPU(struct xenpmu
, xenpmu_shared
);
26 #define get_xenpmu_data() (this_cpu_ptr(&xenpmu_shared)->xenpmu_data)
27 #define get_xenpmu_flags() (this_cpu_ptr(&xenpmu_shared)->flags)
29 /* Macro for computing address of a PMU MSR bank */
30 #define field_offset(ctxt, field) ((void *)((uintptr_t)ctxt + \
31 (uintptr_t)ctxt->field))
34 #define F15H_NUM_COUNTERS 6
35 #define F10H_NUM_COUNTERS 4
37 static __read_mostly
uint32_t amd_counters_base
;
38 static __read_mostly
uint32_t amd_ctrls_base
;
39 static __read_mostly
int amd_msr_step
;
40 static __read_mostly
int k7_counters_mirrored
;
41 static __read_mostly
int amd_num_counters
;
44 #define MSR_TYPE_COUNTER 0
45 #define MSR_TYPE_CTRL 1
46 #define MSR_TYPE_GLOBAL 2
47 #define MSR_TYPE_ARCH_COUNTER 3
48 #define MSR_TYPE_ARCH_CTRL 4
50 /* Number of general pmu registers (CPUID.EAX[0xa].EAX[8..15]) */
51 #define PMU_GENERAL_NR_SHIFT 8
52 #define PMU_GENERAL_NR_BITS 8
53 #define PMU_GENERAL_NR_MASK (((1 << PMU_GENERAL_NR_BITS) - 1) \
54 << PMU_GENERAL_NR_SHIFT)
56 /* Number of fixed pmu registers (CPUID.EDX[0xa].EDX[0..4]) */
57 #define PMU_FIXED_NR_SHIFT 0
58 #define PMU_FIXED_NR_BITS 5
59 #define PMU_FIXED_NR_MASK (((1 << PMU_FIXED_NR_BITS) - 1) \
60 << PMU_FIXED_NR_SHIFT)
62 /* Alias registers (0x4c1) for full-width writes to PMCs */
63 #define MSR_PMC_ALIAS_MASK (~(MSR_IA32_PERFCTR0 ^ MSR_IA32_PMC0))
65 #define INTEL_PMC_TYPE_SHIFT 30
67 static __read_mostly
int intel_num_arch_counters
, intel_num_fixed_counters
;
70 static void xen_pmu_arch_init(void)
72 if (boot_cpu_data
.x86_vendor
== X86_VENDOR_AMD
) {
74 switch (boot_cpu_data
.x86
) {
76 amd_num_counters
= F15H_NUM_COUNTERS
;
77 amd_counters_base
= MSR_F15H_PERF_CTR
;
78 amd_ctrls_base
= MSR_F15H_PERF_CTL
;
80 k7_counters_mirrored
= 1;
87 amd_num_counters
= F10H_NUM_COUNTERS
;
88 amd_counters_base
= MSR_K7_PERFCTR0
;
89 amd_ctrls_base
= MSR_K7_EVNTSEL0
;
91 k7_counters_mirrored
= 0;
94 } else if (boot_cpu_data
.x86_vendor
== X86_VENDOR_HYGON
) {
95 amd_num_counters
= F10H_NUM_COUNTERS
;
96 amd_counters_base
= MSR_K7_PERFCTR0
;
97 amd_ctrls_base
= MSR_K7_EVNTSEL0
;
99 k7_counters_mirrored
= 0;
101 uint32_t eax
, ebx
, ecx
, edx
;
103 cpuid(0xa, &eax
, &ebx
, &ecx
, &edx
);
105 intel_num_arch_counters
= (eax
& PMU_GENERAL_NR_MASK
) >>
106 PMU_GENERAL_NR_SHIFT
;
107 intel_num_fixed_counters
= (edx
& PMU_FIXED_NR_MASK
) >>
112 static inline uint32_t get_fam15h_addr(u32 addr
)
115 case MSR_K7_PERFCTR0
:
116 case MSR_K7_PERFCTR1
:
117 case MSR_K7_PERFCTR2
:
118 case MSR_K7_PERFCTR3
:
119 return MSR_F15H_PERF_CTR
+ (addr
- MSR_K7_PERFCTR0
);
120 case MSR_K7_EVNTSEL0
:
121 case MSR_K7_EVNTSEL1
:
122 case MSR_K7_EVNTSEL2
:
123 case MSR_K7_EVNTSEL3
:
124 return MSR_F15H_PERF_CTL
+ (addr
- MSR_K7_EVNTSEL0
);
132 static inline bool is_amd_pmu_msr(unsigned int msr
)
134 if ((msr
>= MSR_F15H_PERF_CTL
&&
135 msr
< MSR_F15H_PERF_CTR
+ (amd_num_counters
* 2)) ||
136 (msr
>= MSR_K7_EVNTSEL0
&&
137 msr
< MSR_K7_PERFCTR0
+ amd_num_counters
))
143 static int is_intel_pmu_msr(u32 msr_index
, int *type
, int *index
)
148 case MSR_CORE_PERF_FIXED_CTR_CTRL
:
149 case MSR_IA32_DS_AREA
:
150 case MSR_IA32_PEBS_ENABLE
:
151 *type
= MSR_TYPE_CTRL
;
154 case MSR_CORE_PERF_GLOBAL_CTRL
:
155 case MSR_CORE_PERF_GLOBAL_STATUS
:
156 case MSR_CORE_PERF_GLOBAL_OVF_CTRL
:
157 *type
= MSR_TYPE_GLOBAL
;
162 if ((msr_index
>= MSR_CORE_PERF_FIXED_CTR0
) &&
163 (msr_index
< MSR_CORE_PERF_FIXED_CTR0
+
164 intel_num_fixed_counters
)) {
165 *index
= msr_index
- MSR_CORE_PERF_FIXED_CTR0
;
166 *type
= MSR_TYPE_COUNTER
;
170 if ((msr_index
>= MSR_P6_EVNTSEL0
) &&
171 (msr_index
< MSR_P6_EVNTSEL0
+ intel_num_arch_counters
)) {
172 *index
= msr_index
- MSR_P6_EVNTSEL0
;
173 *type
= MSR_TYPE_ARCH_CTRL
;
177 msr_index_pmc
= msr_index
& MSR_PMC_ALIAS_MASK
;
178 if ((msr_index_pmc
>= MSR_IA32_PERFCTR0
) &&
179 (msr_index_pmc
< MSR_IA32_PERFCTR0
+
180 intel_num_arch_counters
)) {
181 *type
= MSR_TYPE_ARCH_COUNTER
;
182 *index
= msr_index_pmc
- MSR_IA32_PERFCTR0
;
189 static bool xen_intel_pmu_emulate(unsigned int msr
, u64
*val
, int type
,
190 int index
, bool is_read
)
192 uint64_t *reg
= NULL
;
193 struct xen_pmu_intel_ctxt
*ctxt
;
194 uint64_t *fix_counters
;
195 struct xen_pmu_cntr_pair
*arch_cntr_pair
;
196 struct xen_pmu_data
*xenpmu_data
= get_xenpmu_data();
197 uint8_t xenpmu_flags
= get_xenpmu_flags();
200 if (!xenpmu_data
|| !(xenpmu_flags
& XENPMU_IRQ_PROCESSING
))
203 ctxt
= &xenpmu_data
->pmu
.c
.intel
;
206 case MSR_CORE_PERF_GLOBAL_OVF_CTRL
:
207 reg
= &ctxt
->global_ovf_ctrl
;
209 case MSR_CORE_PERF_GLOBAL_STATUS
:
210 reg
= &ctxt
->global_status
;
212 case MSR_CORE_PERF_GLOBAL_CTRL
:
213 reg
= &ctxt
->global_ctrl
;
215 case MSR_CORE_PERF_FIXED_CTR_CTRL
:
216 reg
= &ctxt
->fixed_ctrl
;
220 case MSR_TYPE_COUNTER
:
221 fix_counters
= field_offset(ctxt
, fixed_counters
);
222 reg
= &fix_counters
[index
];
224 case MSR_TYPE_ARCH_COUNTER
:
225 arch_cntr_pair
= field_offset(ctxt
, arch_counters
);
226 reg
= &arch_cntr_pair
[index
].counter
;
228 case MSR_TYPE_ARCH_CTRL
:
229 arch_cntr_pair
= field_offset(ctxt
, arch_counters
);
230 reg
= &arch_cntr_pair
[index
].control
;
243 if (msr
== MSR_CORE_PERF_GLOBAL_OVF_CTRL
)
244 ctxt
->global_status
&= (~(*val
));
252 static bool xen_amd_pmu_emulate(unsigned int msr
, u64
*val
, bool is_read
)
254 uint64_t *reg
= NULL
;
256 struct xen_pmu_amd_ctxt
*ctxt
;
257 uint64_t *counter_regs
, *ctrl_regs
;
258 struct xen_pmu_data
*xenpmu_data
= get_xenpmu_data();
259 uint8_t xenpmu_flags
= get_xenpmu_flags();
261 if (!xenpmu_data
|| !(xenpmu_flags
& XENPMU_IRQ_PROCESSING
))
264 if (k7_counters_mirrored
&&
265 ((msr
>= MSR_K7_EVNTSEL0
) && (msr
<= MSR_K7_PERFCTR3
)))
266 msr
= get_fam15h_addr(msr
);
268 ctxt
= &xenpmu_data
->pmu
.c
.amd
;
269 for (i
= 0; i
< amd_num_counters
; i
++) {
270 if (msr
== amd_ctrls_base
+ off
) {
271 ctrl_regs
= field_offset(ctxt
, ctrls
);
274 } else if (msr
== amd_counters_base
+ off
) {
275 counter_regs
= field_offset(ctxt
, counters
);
276 reg
= &counter_regs
[i
];
293 bool pmu_msr_read(unsigned int msr
, uint64_t *val
, int *err
)
295 if (boot_cpu_data
.x86_vendor
!= X86_VENDOR_INTEL
) {
296 if (is_amd_pmu_msr(msr
)) {
297 if (!xen_amd_pmu_emulate(msr
, val
, 1))
298 *val
= native_read_msr_safe(msr
, err
);
304 if (is_intel_pmu_msr(msr
, &type
, &index
)) {
305 if (!xen_intel_pmu_emulate(msr
, val
, type
, index
, 1))
306 *val
= native_read_msr_safe(msr
, err
);
314 bool pmu_msr_write(unsigned int msr
, uint32_t low
, uint32_t high
, int *err
)
316 uint64_t val
= ((uint64_t)high
<< 32) | low
;
318 if (boot_cpu_data
.x86_vendor
!= X86_VENDOR_INTEL
) {
319 if (is_amd_pmu_msr(msr
)) {
320 if (!xen_amd_pmu_emulate(msr
, &val
, 0))
321 *err
= native_write_msr_safe(msr
, low
, high
);
327 if (is_intel_pmu_msr(msr
, &type
, &index
)) {
328 if (!xen_intel_pmu_emulate(msr
, &val
, type
, index
, 0))
329 *err
= native_write_msr_safe(msr
, low
, high
);
337 static unsigned long long xen_amd_read_pmc(int counter
)
339 struct xen_pmu_amd_ctxt
*ctxt
;
340 uint64_t *counter_regs
;
341 struct xen_pmu_data
*xenpmu_data
= get_xenpmu_data();
342 uint8_t xenpmu_flags
= get_xenpmu_flags();
344 if (!xenpmu_data
|| !(xenpmu_flags
& XENPMU_IRQ_PROCESSING
)) {
348 msr
= amd_counters_base
+ (counter
* amd_msr_step
);
349 return native_read_msr_safe(msr
, &err
);
352 ctxt
= &xenpmu_data
->pmu
.c
.amd
;
353 counter_regs
= field_offset(ctxt
, counters
);
354 return counter_regs
[counter
];
357 static unsigned long long xen_intel_read_pmc(int counter
)
359 struct xen_pmu_intel_ctxt
*ctxt
;
360 uint64_t *fixed_counters
;
361 struct xen_pmu_cntr_pair
*arch_cntr_pair
;
362 struct xen_pmu_data
*xenpmu_data
= get_xenpmu_data();
363 uint8_t xenpmu_flags
= get_xenpmu_flags();
365 if (!xenpmu_data
|| !(xenpmu_flags
& XENPMU_IRQ_PROCESSING
)) {
369 if (counter
& (1 << INTEL_PMC_TYPE_SHIFT
))
370 msr
= MSR_CORE_PERF_FIXED_CTR0
+ (counter
& 0xffff);
372 msr
= MSR_IA32_PERFCTR0
+ counter
;
374 return native_read_msr_safe(msr
, &err
);
377 ctxt
= &xenpmu_data
->pmu
.c
.intel
;
378 if (counter
& (1 << INTEL_PMC_TYPE_SHIFT
)) {
379 fixed_counters
= field_offset(ctxt
, fixed_counters
);
380 return fixed_counters
[counter
& 0xffff];
383 arch_cntr_pair
= field_offset(ctxt
, arch_counters
);
384 return arch_cntr_pair
[counter
].counter
;
387 unsigned long long xen_read_pmc(int counter
)
389 if (boot_cpu_data
.x86_vendor
!= X86_VENDOR_INTEL
)
390 return xen_amd_read_pmc(counter
);
392 return xen_intel_read_pmc(counter
);
395 int pmu_apic_update(uint32_t val
)
398 struct xen_pmu_data
*xenpmu_data
= get_xenpmu_data();
401 pr_warn_once("%s: pmudata not initialized\n", __func__
);
405 xenpmu_data
->pmu
.l
.lapic_lvtpc
= val
;
407 if (get_xenpmu_flags() & XENPMU_IRQ_PROCESSING
)
410 ret
= HYPERVISOR_xenpmu_op(XENPMU_lvtpc_set
, NULL
);
416 static int xen_is_in_guest(void)
418 const struct xen_pmu_data
*xenpmu_data
= get_xenpmu_data();
421 pr_warn_once("%s: pmudata not initialized\n", __func__
);
425 if (!xen_initial_domain() || (xenpmu_data
->domain_id
>= DOMID_SELF
))
431 static int xen_is_user_mode(void)
433 const struct xen_pmu_data
*xenpmu_data
= get_xenpmu_data();
436 pr_warn_once("%s: pmudata not initialized\n", __func__
);
440 if (xenpmu_data
->pmu
.pmu_flags
& PMU_SAMPLE_PV
)
441 return (xenpmu_data
->pmu
.pmu_flags
& PMU_SAMPLE_USER
);
443 return !!(xenpmu_data
->pmu
.r
.regs
.cpl
& 3);
446 static unsigned long xen_get_guest_ip(void)
448 const struct xen_pmu_data
*xenpmu_data
= get_xenpmu_data();
451 pr_warn_once("%s: pmudata not initialized\n", __func__
);
455 return xenpmu_data
->pmu
.r
.regs
.ip
;
458 static struct perf_guest_info_callbacks xen_guest_cbs
= {
459 .is_in_guest
= xen_is_in_guest
,
460 .is_user_mode
= xen_is_user_mode
,
461 .get_guest_ip
= xen_get_guest_ip
,
464 /* Convert registers from Xen's format to Linux' */
465 static void xen_convert_regs(const struct xen_pmu_regs
*xen_regs
,
466 struct pt_regs
*regs
, uint64_t pmu_flags
)
468 regs
->ip
= xen_regs
->ip
;
469 regs
->cs
= xen_regs
->cs
;
470 regs
->sp
= xen_regs
->sp
;
472 if (pmu_flags
& PMU_SAMPLE_PV
) {
473 if (pmu_flags
& PMU_SAMPLE_USER
)
485 irqreturn_t
xen_pmu_irq_handler(int irq
, void *dev_id
)
487 int err
, ret
= IRQ_NONE
;
488 struct pt_regs regs
= {0};
489 const struct xen_pmu_data
*xenpmu_data
= get_xenpmu_data();
490 uint8_t xenpmu_flags
= get_xenpmu_flags();
493 pr_warn_once("%s: pmudata not initialized\n", __func__
);
497 this_cpu_ptr(&xenpmu_shared
)->flags
=
498 xenpmu_flags
| XENPMU_IRQ_PROCESSING
;
499 xen_convert_regs(&xenpmu_data
->pmu
.r
.regs
, ®s
,
500 xenpmu_data
->pmu
.pmu_flags
);
501 if (x86_pmu
.handle_irq(®s
))
504 /* Write out cached context to HW */
505 err
= HYPERVISOR_xenpmu_op(XENPMU_flush
, NULL
);
506 this_cpu_ptr(&xenpmu_shared
)->flags
= xenpmu_flags
;
508 pr_warn_once("%s: failed hypercall, err: %d\n", __func__
, err
);
515 bool is_xen_pmu(int cpu
)
517 return (get_xenpmu_data() != NULL
);
520 void xen_pmu_init(int cpu
)
523 struct xen_pmu_params xp
;
525 struct xen_pmu_data
*xenpmu_data
;
527 BUILD_BUG_ON(sizeof(struct xen_pmu_data
) > PAGE_SIZE
);
529 if (xen_hvm_domain())
532 xenpmu_data
= (struct xen_pmu_data
*)get_zeroed_page(GFP_KERNEL
);
534 pr_err("VPMU init: No memory\n");
537 pfn
= virt_to_pfn(xenpmu_data
);
539 xp
.val
= pfn_to_mfn(pfn
);
541 xp
.version
.maj
= XENPMU_VER_MAJ
;
542 xp
.version
.min
= XENPMU_VER_MIN
;
543 err
= HYPERVISOR_xenpmu_op(XENPMU_init
, &xp
);
547 per_cpu(xenpmu_shared
, cpu
).xenpmu_data
= xenpmu_data
;
548 per_cpu(xenpmu_shared
, cpu
).flags
= 0;
551 perf_register_guest_info_callbacks(&xen_guest_cbs
);
558 if (err
== -EOPNOTSUPP
|| err
== -ENOSYS
)
559 pr_info_once("VPMU disabled by hypervisor.\n");
561 pr_info_once("Could not initialize VPMU for cpu %d, error %d\n",
563 free_pages((unsigned long)xenpmu_data
, 0);
566 void xen_pmu_finish(int cpu
)
568 struct xen_pmu_params xp
;
570 if (xen_hvm_domain())
574 xp
.version
.maj
= XENPMU_VER_MAJ
;
575 xp
.version
.min
= XENPMU_VER_MIN
;
577 (void)HYPERVISOR_xenpmu_op(XENPMU_finish
, &xp
);
579 free_pages((unsigned long)per_cpu(xenpmu_shared
, cpu
).xenpmu_data
, 0);
580 per_cpu(xenpmu_shared
, cpu
).xenpmu_data
= NULL
;