1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright (C) 2015 Linaro Ltd.
4 * Author: Shannon Zhao <shannon.zhao@linaro.org>
9 #include <linux/kvm_host.h>
10 #include <linux/list.h>
11 #include <linux/perf_event.h>
12 #include <linux/perf/arm_pmu.h>
13 #include <linux/uaccess.h>
14 #include <asm/kvm_emulate.h>
15 #include <kvm/arm_pmu.h>
16 #include <kvm/arm_vgic.h>
18 #define PERF_ATTR_CFG1_COUNTER_64BIT BIT(0)
20 DEFINE_STATIC_KEY_FALSE(kvm_arm_pmu_available
);
22 static LIST_HEAD(arm_pmus
);
23 static DEFINE_MUTEX(arm_pmus_lock
);
25 static void kvm_pmu_create_perf_event(struct kvm_pmc
*pmc
);
26 static void kvm_pmu_release_perf_event(struct kvm_pmc
*pmc
);
28 static struct kvm_vcpu
*kvm_pmc_to_vcpu(const struct kvm_pmc
*pmc
)
30 return container_of(pmc
, struct kvm_vcpu
, arch
.pmu
.pmc
[pmc
->idx
]);
33 static struct kvm_pmc
*kvm_vcpu_idx_to_pmc(struct kvm_vcpu
*vcpu
, int cnt_idx
)
35 return &vcpu
->arch
.pmu
.pmc
[cnt_idx
];
38 static u32
__kvm_pmu_event_mask(unsigned int pmuver
)
41 case ID_AA64DFR0_EL1_PMUVer_IMP
:
43 case ID_AA64DFR0_EL1_PMUVer_V3P1
:
44 case ID_AA64DFR0_EL1_PMUVer_V3P4
:
45 case ID_AA64DFR0_EL1_PMUVer_V3P5
:
46 case ID_AA64DFR0_EL1_PMUVer_V3P7
:
47 return GENMASK(15, 0);
48 default: /* Shouldn't be here, just for sanity */
49 WARN_ONCE(1, "Unknown PMU version %d\n", pmuver
);
54 static u32
kvm_pmu_event_mask(struct kvm
*kvm
)
56 u64 dfr0
= kvm_read_vm_id_reg(kvm
, SYS_ID_AA64DFR0_EL1
);
57 u8 pmuver
= SYS_FIELD_GET(ID_AA64DFR0_EL1
, PMUVer
, dfr0
);
59 return __kvm_pmu_event_mask(pmuver
);
62 u64
kvm_pmu_evtyper_mask(struct kvm
*kvm
)
64 u64 mask
= ARMV8_PMU_EXCLUDE_EL1
| ARMV8_PMU_EXCLUDE_EL0
|
65 kvm_pmu_event_mask(kvm
);
67 if (kvm_has_feat(kvm
, ID_AA64PFR0_EL1
, EL2
, IMP
))
68 mask
|= ARMV8_PMU_INCLUDE_EL2
;
70 if (kvm_has_feat(kvm
, ID_AA64PFR0_EL1
, EL3
, IMP
))
71 mask
|= ARMV8_PMU_EXCLUDE_NS_EL0
|
72 ARMV8_PMU_EXCLUDE_NS_EL1
|
73 ARMV8_PMU_EXCLUDE_EL3
;
79 * kvm_pmc_is_64bit - determine if counter is 64bit
80 * @pmc: counter context
82 static bool kvm_pmc_is_64bit(struct kvm_pmc
*pmc
)
84 struct kvm_vcpu
*vcpu
= kvm_pmc_to_vcpu(pmc
);
86 return (pmc
->idx
== ARMV8_PMU_CYCLE_IDX
||
87 kvm_has_feat(vcpu
->kvm
, ID_AA64DFR0_EL1
, PMUVer
, V3P5
));
90 static bool kvm_pmc_has_64bit_overflow(struct kvm_pmc
*pmc
)
92 struct kvm_vcpu
*vcpu
= kvm_pmc_to_vcpu(pmc
);
93 u64 val
= kvm_vcpu_read_pmcr(vcpu
);
95 if (kvm_pmu_counter_is_hyp(vcpu
, pmc
->idx
))
96 return __vcpu_sys_reg(vcpu
, MDCR_EL2
) & MDCR_EL2_HLP
;
98 return (pmc
->idx
< ARMV8_PMU_CYCLE_IDX
&& (val
& ARMV8_PMU_PMCR_LP
)) ||
99 (pmc
->idx
== ARMV8_PMU_CYCLE_IDX
&& (val
& ARMV8_PMU_PMCR_LC
));
102 static bool kvm_pmu_counter_can_chain(struct kvm_pmc
*pmc
)
104 return (!(pmc
->idx
& 1) && (pmc
->idx
+ 1) < ARMV8_PMU_CYCLE_IDX
&&
105 !kvm_pmc_has_64bit_overflow(pmc
));
108 static u32
counter_index_to_reg(u64 idx
)
110 return (idx
== ARMV8_PMU_CYCLE_IDX
) ? PMCCNTR_EL0
: PMEVCNTR0_EL0
+ idx
;
113 static u32
counter_index_to_evtreg(u64 idx
)
115 return (idx
== ARMV8_PMU_CYCLE_IDX
) ? PMCCFILTR_EL0
: PMEVTYPER0_EL0
+ idx
;
118 static u64
kvm_pmc_read_evtreg(const struct kvm_pmc
*pmc
)
120 return __vcpu_sys_reg(kvm_pmc_to_vcpu(pmc
), counter_index_to_evtreg(pmc
->idx
));
123 static u64
kvm_pmu_get_pmc_value(struct kvm_pmc
*pmc
)
125 struct kvm_vcpu
*vcpu
= kvm_pmc_to_vcpu(pmc
);
126 u64 counter
, reg
, enabled
, running
;
128 reg
= counter_index_to_reg(pmc
->idx
);
129 counter
= __vcpu_sys_reg(vcpu
, reg
);
132 * The real counter value is equal to the value of counter register plus
133 * the value perf event counts.
136 counter
+= perf_event_read_value(pmc
->perf_event
, &enabled
,
139 if (!kvm_pmc_is_64bit(pmc
))
140 counter
= lower_32_bits(counter
);
146 * kvm_pmu_get_counter_value - get PMU counter value
147 * @vcpu: The vcpu pointer
148 * @select_idx: The counter index
150 u64
kvm_pmu_get_counter_value(struct kvm_vcpu
*vcpu
, u64 select_idx
)
152 if (!kvm_vcpu_has_pmu(vcpu
))
155 return kvm_pmu_get_pmc_value(kvm_vcpu_idx_to_pmc(vcpu
, select_idx
));
158 static void kvm_pmu_set_pmc_value(struct kvm_pmc
*pmc
, u64 val
, bool force
)
160 struct kvm_vcpu
*vcpu
= kvm_pmc_to_vcpu(pmc
);
163 kvm_pmu_release_perf_event(pmc
);
165 reg
= counter_index_to_reg(pmc
->idx
);
167 if (vcpu_mode_is_32bit(vcpu
) && pmc
->idx
!= ARMV8_PMU_CYCLE_IDX
&&
170 * Even with PMUv3p5, AArch32 cannot write to the top
171 * 32bit of the counters. The only possible course of
172 * action is to use PMCR.P, which will reset them to
173 * 0 (the only use of the 'force' parameter).
175 val
= __vcpu_sys_reg(vcpu
, reg
) & GENMASK(63, 32);
176 val
|= lower_32_bits(val
);
179 __vcpu_sys_reg(vcpu
, reg
) = val
;
181 /* Recreate the perf event to reflect the updated sample_period */
182 kvm_pmu_create_perf_event(pmc
);
186 * kvm_pmu_set_counter_value - set PMU counter value
187 * @vcpu: The vcpu pointer
188 * @select_idx: The counter index
189 * @val: The counter value
191 void kvm_pmu_set_counter_value(struct kvm_vcpu
*vcpu
, u64 select_idx
, u64 val
)
193 if (!kvm_vcpu_has_pmu(vcpu
))
196 kvm_pmu_set_pmc_value(kvm_vcpu_idx_to_pmc(vcpu
, select_idx
), val
, false);
200 * kvm_pmu_release_perf_event - remove the perf event
201 * @pmc: The PMU counter pointer
203 static void kvm_pmu_release_perf_event(struct kvm_pmc
*pmc
)
205 if (pmc
->perf_event
) {
206 perf_event_disable(pmc
->perf_event
);
207 perf_event_release_kernel(pmc
->perf_event
);
208 pmc
->perf_event
= NULL
;
213 * kvm_pmu_stop_counter - stop PMU counter
214 * @pmc: The PMU counter pointer
216 * If this counter has been configured to monitor some event, release it here.
218 static void kvm_pmu_stop_counter(struct kvm_pmc
*pmc
)
220 struct kvm_vcpu
*vcpu
= kvm_pmc_to_vcpu(pmc
);
223 if (!pmc
->perf_event
)
226 val
= kvm_pmu_get_pmc_value(pmc
);
228 reg
= counter_index_to_reg(pmc
->idx
);
230 __vcpu_sys_reg(vcpu
, reg
) = val
;
232 kvm_pmu_release_perf_event(pmc
);
236 * kvm_pmu_vcpu_init - assign pmu counter idx for cpu
237 * @vcpu: The vcpu pointer
240 void kvm_pmu_vcpu_init(struct kvm_vcpu
*vcpu
)
243 struct kvm_pmu
*pmu
= &vcpu
->arch
.pmu
;
245 for (i
= 0; i
< KVM_ARMV8_PMU_MAX_COUNTERS
; i
++)
250 * kvm_pmu_vcpu_reset - reset pmu state for cpu
251 * @vcpu: The vcpu pointer
254 void kvm_pmu_vcpu_reset(struct kvm_vcpu
*vcpu
)
256 unsigned long mask
= kvm_pmu_implemented_counter_mask(vcpu
);
259 for_each_set_bit(i
, &mask
, 32)
260 kvm_pmu_stop_counter(kvm_vcpu_idx_to_pmc(vcpu
, i
));
264 * kvm_pmu_vcpu_destroy - free perf event of PMU for cpu
265 * @vcpu: The vcpu pointer
268 void kvm_pmu_vcpu_destroy(struct kvm_vcpu
*vcpu
)
272 for (i
= 0; i
< KVM_ARMV8_PMU_MAX_COUNTERS
; i
++)
273 kvm_pmu_release_perf_event(kvm_vcpu_idx_to_pmc(vcpu
, i
));
274 irq_work_sync(&vcpu
->arch
.pmu
.overflow_work
);
277 static u64
kvm_pmu_hyp_counter_mask(struct kvm_vcpu
*vcpu
)
279 unsigned int hpmn
, n
;
281 if (!vcpu_has_nv(vcpu
))
284 hpmn
= SYS_FIELD_GET(MDCR_EL2
, HPMN
, __vcpu_sys_reg(vcpu
, MDCR_EL2
));
285 n
= vcpu
->kvm
->arch
.pmcr_n
;
288 * Programming HPMN to a value greater than PMCR_EL0.N is
289 * CONSTRAINED UNPREDICTABLE. Make the implementation choice that an
290 * UNKNOWN number of counters (in our case, zero) are reserved for EL2.
296 * Programming HPMN=0 is CONSTRAINED UNPREDICTABLE if FEAT_HPMN0 isn't
297 * implemented. Since KVM's ability to emulate HPMN=0 does not directly
298 * depend on hardware (all PMU registers are trapped), make the
299 * implementation choice that all counters are included in the second
300 * range reserved for EL2/EL3.
302 return GENMASK(n
- 1, hpmn
);
305 bool kvm_pmu_counter_is_hyp(struct kvm_vcpu
*vcpu
, unsigned int idx
)
307 return kvm_pmu_hyp_counter_mask(vcpu
) & BIT(idx
);
310 u64
kvm_pmu_accessible_counter_mask(struct kvm_vcpu
*vcpu
)
312 u64 mask
= kvm_pmu_implemented_counter_mask(vcpu
);
314 if (!vcpu_has_nv(vcpu
) || vcpu_is_el2(vcpu
))
317 return mask
& ~kvm_pmu_hyp_counter_mask(vcpu
);
320 u64
kvm_pmu_implemented_counter_mask(struct kvm_vcpu
*vcpu
)
322 u64 val
= FIELD_GET(ARMV8_PMU_PMCR_N
, kvm_vcpu_read_pmcr(vcpu
));
325 return BIT(ARMV8_PMU_CYCLE_IDX
);
327 return GENMASK(val
- 1, 0) | BIT(ARMV8_PMU_CYCLE_IDX
);
331 * kvm_pmu_enable_counter_mask - enable selected PMU counters
332 * @vcpu: The vcpu pointer
333 * @val: the value guest writes to PMCNTENSET register
335 * Call perf_event_enable to start counting the perf event
337 void kvm_pmu_enable_counter_mask(struct kvm_vcpu
*vcpu
, u64 val
)
340 if (!kvm_vcpu_has_pmu(vcpu
))
343 if (!(kvm_vcpu_read_pmcr(vcpu
) & ARMV8_PMU_PMCR_E
) || !val
)
346 for (i
= 0; i
< KVM_ARMV8_PMU_MAX_COUNTERS
; i
++) {
352 pmc
= kvm_vcpu_idx_to_pmc(vcpu
, i
);
354 if (!pmc
->perf_event
) {
355 kvm_pmu_create_perf_event(pmc
);
357 perf_event_enable(pmc
->perf_event
);
358 if (pmc
->perf_event
->state
!= PERF_EVENT_STATE_ACTIVE
)
359 kvm_debug("fail to enable perf event\n");
365 * kvm_pmu_disable_counter_mask - disable selected PMU counters
366 * @vcpu: The vcpu pointer
367 * @val: the value guest writes to PMCNTENCLR register
369 * Call perf_event_disable to stop counting the perf event
371 void kvm_pmu_disable_counter_mask(struct kvm_vcpu
*vcpu
, u64 val
)
375 if (!kvm_vcpu_has_pmu(vcpu
) || !val
)
378 for (i
= 0; i
< KVM_ARMV8_PMU_MAX_COUNTERS
; i
++) {
384 pmc
= kvm_vcpu_idx_to_pmc(vcpu
, i
);
387 perf_event_disable(pmc
->perf_event
);
392 * Returns the PMU overflow state, which is true if there exists an event
393 * counter where the values of the global enable control, PMOVSSET_EL0[n], and
394 * PMINTENSET_EL1[n] are all 1.
396 static bool kvm_pmu_overflow_status(struct kvm_vcpu
*vcpu
)
398 u64 reg
= __vcpu_sys_reg(vcpu
, PMOVSSET_EL0
);
400 reg
&= __vcpu_sys_reg(vcpu
, PMINTENSET_EL1
);
403 * PMCR_EL0.E is the global enable control for event counters available
406 if (!(kvm_vcpu_read_pmcr(vcpu
) & ARMV8_PMU_PMCR_E
))
407 reg
&= kvm_pmu_hyp_counter_mask(vcpu
);
410 * Otherwise, MDCR_EL2.HPME is the global enable control for event
411 * counters reserved for EL2.
413 if (!(vcpu_read_sys_reg(vcpu
, MDCR_EL2
) & MDCR_EL2_HPME
))
414 reg
&= ~kvm_pmu_hyp_counter_mask(vcpu
);
419 static void kvm_pmu_update_state(struct kvm_vcpu
*vcpu
)
421 struct kvm_pmu
*pmu
= &vcpu
->arch
.pmu
;
424 if (!kvm_vcpu_has_pmu(vcpu
))
427 overflow
= kvm_pmu_overflow_status(vcpu
);
428 if (pmu
->irq_level
== overflow
)
431 pmu
->irq_level
= overflow
;
433 if (likely(irqchip_in_kernel(vcpu
->kvm
))) {
434 int ret
= kvm_vgic_inject_irq(vcpu
->kvm
, vcpu
,
435 pmu
->irq_num
, overflow
, pmu
);
440 bool kvm_pmu_should_notify_user(struct kvm_vcpu
*vcpu
)
442 struct kvm_pmu
*pmu
= &vcpu
->arch
.pmu
;
443 struct kvm_sync_regs
*sregs
= &vcpu
->run
->s
.regs
;
444 bool run_level
= sregs
->device_irq_level
& KVM_ARM_DEV_PMU
;
446 if (likely(irqchip_in_kernel(vcpu
->kvm
)))
449 return pmu
->irq_level
!= run_level
;
453 * Reflect the PMU overflow interrupt output level into the kvm_run structure
455 void kvm_pmu_update_run(struct kvm_vcpu
*vcpu
)
457 struct kvm_sync_regs
*regs
= &vcpu
->run
->s
.regs
;
459 /* Populate the timer bitmap for user space */
460 regs
->device_irq_level
&= ~KVM_ARM_DEV_PMU
;
461 if (vcpu
->arch
.pmu
.irq_level
)
462 regs
->device_irq_level
|= KVM_ARM_DEV_PMU
;
466 * kvm_pmu_flush_hwstate - flush pmu state to cpu
467 * @vcpu: The vcpu pointer
469 * Check if the PMU has overflowed while we were running in the host, and inject
470 * an interrupt if that was the case.
472 void kvm_pmu_flush_hwstate(struct kvm_vcpu
*vcpu
)
474 kvm_pmu_update_state(vcpu
);
478 * kvm_pmu_sync_hwstate - sync pmu state from cpu
479 * @vcpu: The vcpu pointer
481 * Check if the PMU has overflowed while we were running in the guest, and
482 * inject an interrupt if that was the case.
484 void kvm_pmu_sync_hwstate(struct kvm_vcpu
*vcpu
)
486 kvm_pmu_update_state(vcpu
);
490 * When perf interrupt is an NMI, we cannot safely notify the vcpu corresponding
492 * This is why we need a callback to do it once outside of the NMI context.
494 static void kvm_pmu_perf_overflow_notify_vcpu(struct irq_work
*work
)
496 struct kvm_vcpu
*vcpu
;
498 vcpu
= container_of(work
, struct kvm_vcpu
, arch
.pmu
.overflow_work
);
503 * Perform an increment on any of the counters described in @mask,
504 * generating the overflow if required, and propagate it as a chained
507 static void kvm_pmu_counter_increment(struct kvm_vcpu
*vcpu
,
508 unsigned long mask
, u32 event
)
512 if (!(kvm_vcpu_read_pmcr(vcpu
) & ARMV8_PMU_PMCR_E
))
515 /* Weed out disabled counters */
516 mask
&= __vcpu_sys_reg(vcpu
, PMCNTENSET_EL0
);
518 for_each_set_bit(i
, &mask
, ARMV8_PMU_CYCLE_IDX
) {
519 struct kvm_pmc
*pmc
= kvm_vcpu_idx_to_pmc(vcpu
, i
);
522 /* Filter on event type */
523 type
= __vcpu_sys_reg(vcpu
, counter_index_to_evtreg(i
));
524 type
&= kvm_pmu_event_mask(vcpu
->kvm
);
528 /* Increment this counter */
529 reg
= __vcpu_sys_reg(vcpu
, counter_index_to_reg(i
)) + 1;
530 if (!kvm_pmc_is_64bit(pmc
))
531 reg
= lower_32_bits(reg
);
532 __vcpu_sys_reg(vcpu
, counter_index_to_reg(i
)) = reg
;
534 /* No overflow? move on */
535 if (kvm_pmc_has_64bit_overflow(pmc
) ? reg
: lower_32_bits(reg
))
539 __vcpu_sys_reg(vcpu
, PMOVSSET_EL0
) |= BIT(i
);
541 if (kvm_pmu_counter_can_chain(pmc
))
542 kvm_pmu_counter_increment(vcpu
, BIT(i
+ 1),
543 ARMV8_PMUV3_PERFCTR_CHAIN
);
547 /* Compute the sample period for a given counter value */
548 static u64
compute_period(struct kvm_pmc
*pmc
, u64 counter
)
552 if (kvm_pmc_is_64bit(pmc
) && kvm_pmc_has_64bit_overflow(pmc
))
553 val
= (-counter
) & GENMASK(63, 0);
555 val
= (-counter
) & GENMASK(31, 0);
561 * When the perf event overflows, set the overflow status and inform the vcpu.
563 static void kvm_pmu_perf_overflow(struct perf_event
*perf_event
,
564 struct perf_sample_data
*data
,
565 struct pt_regs
*regs
)
567 struct kvm_pmc
*pmc
= perf_event
->overflow_handler_context
;
568 struct arm_pmu
*cpu_pmu
= to_arm_pmu(perf_event
->pmu
);
569 struct kvm_vcpu
*vcpu
= kvm_pmc_to_vcpu(pmc
);
573 cpu_pmu
->pmu
.stop(perf_event
, PERF_EF_UPDATE
);
576 * Reset the sample period to the architectural limit,
577 * i.e. the point where the counter overflows.
579 period
= compute_period(pmc
, local64_read(&perf_event
->count
));
581 local64_set(&perf_event
->hw
.period_left
, 0);
582 perf_event
->attr
.sample_period
= period
;
583 perf_event
->hw
.sample_period
= period
;
585 __vcpu_sys_reg(vcpu
, PMOVSSET_EL0
) |= BIT(idx
);
587 if (kvm_pmu_counter_can_chain(pmc
))
588 kvm_pmu_counter_increment(vcpu
, BIT(idx
+ 1),
589 ARMV8_PMUV3_PERFCTR_CHAIN
);
591 if (kvm_pmu_overflow_status(vcpu
)) {
592 kvm_make_request(KVM_REQ_IRQ_PENDING
, vcpu
);
597 irq_work_queue(&vcpu
->arch
.pmu
.overflow_work
);
600 cpu_pmu
->pmu
.start(perf_event
, PERF_EF_RELOAD
);
604 * kvm_pmu_software_increment - do software increment
605 * @vcpu: The vcpu pointer
606 * @val: the value guest writes to PMSWINC register
608 void kvm_pmu_software_increment(struct kvm_vcpu
*vcpu
, u64 val
)
610 kvm_pmu_counter_increment(vcpu
, val
, ARMV8_PMUV3_PERFCTR_SW_INCR
);
614 * kvm_pmu_handle_pmcr - handle PMCR register
615 * @vcpu: The vcpu pointer
616 * @val: the value guest writes to PMCR register
618 void kvm_pmu_handle_pmcr(struct kvm_vcpu
*vcpu
, u64 val
)
622 if (!kvm_vcpu_has_pmu(vcpu
))
625 /* Fixup PMCR_EL0 to reconcile the PMU version and the LP bit */
626 if (!kvm_has_feat(vcpu
->kvm
, ID_AA64DFR0_EL1
, PMUVer
, V3P5
))
627 val
&= ~ARMV8_PMU_PMCR_LP
;
629 /* The reset bits don't indicate any state, and shouldn't be saved. */
630 __vcpu_sys_reg(vcpu
, PMCR_EL0
) = val
& ~(ARMV8_PMU_PMCR_C
| ARMV8_PMU_PMCR_P
);
632 if (val
& ARMV8_PMU_PMCR_E
) {
633 kvm_pmu_enable_counter_mask(vcpu
,
634 __vcpu_sys_reg(vcpu
, PMCNTENSET_EL0
));
636 kvm_pmu_disable_counter_mask(vcpu
,
637 __vcpu_sys_reg(vcpu
, PMCNTENSET_EL0
));
640 if (val
& ARMV8_PMU_PMCR_C
)
641 kvm_pmu_set_counter_value(vcpu
, ARMV8_PMU_CYCLE_IDX
, 0);
643 if (val
& ARMV8_PMU_PMCR_P
) {
644 unsigned long mask
= kvm_pmu_accessible_counter_mask(vcpu
);
645 mask
&= ~BIT(ARMV8_PMU_CYCLE_IDX
);
646 for_each_set_bit(i
, &mask
, 32)
647 kvm_pmu_set_pmc_value(kvm_vcpu_idx_to_pmc(vcpu
, i
), 0, true);
649 kvm_vcpu_pmu_restore_guest(vcpu
);
652 static bool kvm_pmu_counter_is_enabled(struct kvm_pmc
*pmc
)
654 struct kvm_vcpu
*vcpu
= kvm_pmc_to_vcpu(pmc
);
655 unsigned int mdcr
= __vcpu_sys_reg(vcpu
, MDCR_EL2
);
657 if (!(__vcpu_sys_reg(vcpu
, PMCNTENSET_EL0
) & BIT(pmc
->idx
)))
660 if (kvm_pmu_counter_is_hyp(vcpu
, pmc
->idx
))
661 return mdcr
& MDCR_EL2_HPME
;
663 return kvm_vcpu_read_pmcr(vcpu
) & ARMV8_PMU_PMCR_E
;
666 static bool kvm_pmc_counts_at_el0(struct kvm_pmc
*pmc
)
668 u64 evtreg
= kvm_pmc_read_evtreg(pmc
);
669 bool nsu
= evtreg
& ARMV8_PMU_EXCLUDE_NS_EL0
;
670 bool u
= evtreg
& ARMV8_PMU_EXCLUDE_EL0
;
675 static bool kvm_pmc_counts_at_el1(struct kvm_pmc
*pmc
)
677 u64 evtreg
= kvm_pmc_read_evtreg(pmc
);
678 bool nsk
= evtreg
& ARMV8_PMU_EXCLUDE_NS_EL1
;
679 bool p
= evtreg
& ARMV8_PMU_EXCLUDE_EL1
;
684 static bool kvm_pmc_counts_at_el2(struct kvm_pmc
*pmc
)
686 struct kvm_vcpu
*vcpu
= kvm_pmc_to_vcpu(pmc
);
687 u64 mdcr
= __vcpu_sys_reg(vcpu
, MDCR_EL2
);
689 if (!kvm_pmu_counter_is_hyp(vcpu
, pmc
->idx
) && (mdcr
& MDCR_EL2_HPMD
))
692 return kvm_pmc_read_evtreg(pmc
) & ARMV8_PMU_INCLUDE_EL2
;
696 * kvm_pmu_create_perf_event - create a perf event for a counter
697 * @pmc: Counter context
699 static void kvm_pmu_create_perf_event(struct kvm_pmc
*pmc
)
701 struct kvm_vcpu
*vcpu
= kvm_pmc_to_vcpu(pmc
);
702 struct arm_pmu
*arm_pmu
= vcpu
->kvm
->arch
.arm_pmu
;
703 struct perf_event
*event
;
704 struct perf_event_attr attr
;
705 u64 eventsel
, evtreg
;
707 evtreg
= kvm_pmc_read_evtreg(pmc
);
709 kvm_pmu_stop_counter(pmc
);
710 if (pmc
->idx
== ARMV8_PMU_CYCLE_IDX
)
711 eventsel
= ARMV8_PMUV3_PERFCTR_CPU_CYCLES
;
713 eventsel
= evtreg
& kvm_pmu_event_mask(vcpu
->kvm
);
716 * Neither SW increment nor chained events need to be backed
719 if (eventsel
== ARMV8_PMUV3_PERFCTR_SW_INCR
||
720 eventsel
== ARMV8_PMUV3_PERFCTR_CHAIN
)
724 * If we have a filter in place and that the event isn't allowed, do
725 * not install a perf event either.
727 if (vcpu
->kvm
->arch
.pmu_filter
&&
728 !test_bit(eventsel
, vcpu
->kvm
->arch
.pmu_filter
))
731 memset(&attr
, 0, sizeof(struct perf_event_attr
));
732 attr
.type
= arm_pmu
->pmu
.type
;
733 attr
.size
= sizeof(attr
);
735 attr
.disabled
= !kvm_pmu_counter_is_enabled(pmc
);
736 attr
.exclude_user
= !kvm_pmc_counts_at_el0(pmc
);
737 attr
.exclude_hv
= 1; /* Don't count EL2 events */
738 attr
.exclude_host
= 1; /* Don't count host events */
739 attr
.config
= eventsel
;
742 * Filter events at EL1 (i.e. vEL2) when in a hyp context based on the
743 * guest's EL2 filter.
745 if (unlikely(is_hyp_ctxt(vcpu
)))
746 attr
.exclude_kernel
= !kvm_pmc_counts_at_el2(pmc
);
748 attr
.exclude_kernel
= !kvm_pmc_counts_at_el1(pmc
);
751 * If counting with a 64bit counter, advertise it to the perf
752 * code, carefully dealing with the initial sample period
753 * which also depends on the overflow.
755 if (kvm_pmc_is_64bit(pmc
))
756 attr
.config1
|= PERF_ATTR_CFG1_COUNTER_64BIT
;
758 attr
.sample_period
= compute_period(pmc
, kvm_pmu_get_pmc_value(pmc
));
760 event
= perf_event_create_kernel_counter(&attr
, -1, current
,
761 kvm_pmu_perf_overflow
, pmc
);
764 pr_err_once("kvm: pmu event creation failed %ld\n",
769 pmc
->perf_event
= event
;
773 * kvm_pmu_set_counter_event_type - set selected counter to monitor some event
774 * @vcpu: The vcpu pointer
775 * @data: The data guest writes to PMXEVTYPER_EL0
776 * @select_idx: The number of selected counter
778 * When OS accesses PMXEVTYPER_EL0, that means it wants to set a PMC to count an
779 * event with given hardware event number. Here we call perf_event API to
780 * emulate this action and create a kernel perf event for it.
782 void kvm_pmu_set_counter_event_type(struct kvm_vcpu
*vcpu
, u64 data
,
785 struct kvm_pmc
*pmc
= kvm_vcpu_idx_to_pmc(vcpu
, select_idx
);
788 if (!kvm_vcpu_has_pmu(vcpu
))
791 reg
= counter_index_to_evtreg(pmc
->idx
);
792 __vcpu_sys_reg(vcpu
, reg
) = data
& kvm_pmu_evtyper_mask(vcpu
->kvm
);
794 kvm_pmu_create_perf_event(pmc
);
797 void kvm_host_pmu_init(struct arm_pmu
*pmu
)
799 struct arm_pmu_entry
*entry
;
802 * Check the sanitised PMU version for the system, as KVM does not
803 * support implementations where PMUv3 exists on a subset of CPUs.
805 if (!pmuv3_implemented(kvm_arm_pmu_get_pmuver_limit()))
808 mutex_lock(&arm_pmus_lock
);
810 entry
= kmalloc(sizeof(*entry
), GFP_KERNEL
);
814 entry
->arm_pmu
= pmu
;
815 list_add_tail(&entry
->entry
, &arm_pmus
);
817 if (list_is_singular(&arm_pmus
))
818 static_branch_enable(&kvm_arm_pmu_available
);
821 mutex_unlock(&arm_pmus_lock
);
824 static struct arm_pmu
*kvm_pmu_probe_armpmu(void)
826 struct arm_pmu
*tmp
, *pmu
= NULL
;
827 struct arm_pmu_entry
*entry
;
830 mutex_lock(&arm_pmus_lock
);
833 * It is safe to use a stale cpu to iterate the list of PMUs so long as
834 * the same value is used for the entirety of the loop. Given this, and
835 * the fact that no percpu data is used for the lookup there is no need
836 * to disable preemption.
838 * It is still necessary to get a valid cpu, though, to probe for the
839 * default PMU instance as userspace is not required to specify a PMU
840 * type. In order to uphold the preexisting behavior KVM selects the
841 * PMU instance for the core during vcpu init. A dependent use
842 * case would be a user with disdain of all things big.LITTLE that
843 * affines the VMM to a particular cluster of cores.
845 * In any case, userspace should just do the sane thing and use the UAPI
846 * to select a PMU type directly. But, be wary of the baggage being
849 cpu
= raw_smp_processor_id();
850 list_for_each_entry(entry
, &arm_pmus
, entry
) {
851 tmp
= entry
->arm_pmu
;
853 if (cpumask_test_cpu(cpu
, &tmp
->supported_cpus
)) {
859 mutex_unlock(&arm_pmus_lock
);
864 u64
kvm_pmu_get_pmceid(struct kvm_vcpu
*vcpu
, bool pmceid1
)
866 unsigned long *bmap
= vcpu
->kvm
->arch
.pmu_filter
;
868 int base
, i
, nr_events
;
870 if (!kvm_vcpu_has_pmu(vcpu
))
874 val
= read_sysreg(pmceid0_el0
);
875 /* always support CHAIN */
876 val
|= BIT(ARMV8_PMUV3_PERFCTR_CHAIN
);
879 val
= read_sysreg(pmceid1_el0
);
881 * Don't advertise STALL_SLOT*, as PMMIR_EL0 is handled
884 val
&= ~(BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT
- 32) |
885 BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT_FRONTEND
- 32) |
886 BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT_BACKEND
- 32));
893 nr_events
= kvm_pmu_event_mask(vcpu
->kvm
) + 1;
895 for (i
= 0; i
< 32; i
+= 8) {
898 byte
= bitmap_get_value8(bmap
, base
+ i
);
900 if (nr_events
>= (0x4000 + base
+ 32)) {
901 byte
= bitmap_get_value8(bmap
, 0x4000 + base
+ i
);
902 mask
|= byte
<< (32 + i
);
909 void kvm_vcpu_reload_pmu(struct kvm_vcpu
*vcpu
)
911 u64 mask
= kvm_pmu_implemented_counter_mask(vcpu
);
913 kvm_pmu_handle_pmcr(vcpu
, kvm_vcpu_read_pmcr(vcpu
));
915 __vcpu_sys_reg(vcpu
, PMOVSSET_EL0
) &= mask
;
916 __vcpu_sys_reg(vcpu
, PMINTENSET_EL1
) &= mask
;
917 __vcpu_sys_reg(vcpu
, PMCNTENSET_EL0
) &= mask
;
920 int kvm_arm_pmu_v3_enable(struct kvm_vcpu
*vcpu
)
922 if (!kvm_vcpu_has_pmu(vcpu
))
925 if (!vcpu
->arch
.pmu
.created
)
929 * A valid interrupt configuration for the PMU is either to have a
930 * properly configured interrupt number and using an in-kernel
931 * irqchip, or to not have an in-kernel GIC and not set an IRQ.
933 if (irqchip_in_kernel(vcpu
->kvm
)) {
934 int irq
= vcpu
->arch
.pmu
.irq_num
;
936 * If we are using an in-kernel vgic, at this point we know
937 * the vgic will be initialized, so we can check the PMU irq
938 * number against the dimensions of the vgic and make sure
941 if (!irq_is_ppi(irq
) && !vgic_valid_spi(vcpu
->kvm
, irq
))
943 } else if (kvm_arm_pmu_irq_initialized(vcpu
)) {
947 /* One-off reload of the PMU on first run */
948 kvm_make_request(KVM_REQ_RELOAD_PMU
, vcpu
);
953 static int kvm_arm_pmu_v3_init(struct kvm_vcpu
*vcpu
)
955 if (irqchip_in_kernel(vcpu
->kvm
)) {
959 * If using the PMU with an in-kernel virtual GIC
960 * implementation, we require the GIC to be already
961 * initialized when initializing the PMU.
963 if (!vgic_initialized(vcpu
->kvm
))
966 if (!kvm_arm_pmu_irq_initialized(vcpu
))
969 ret
= kvm_vgic_set_owner(vcpu
, vcpu
->arch
.pmu
.irq_num
,
975 init_irq_work(&vcpu
->arch
.pmu
.overflow_work
,
976 kvm_pmu_perf_overflow_notify_vcpu
);
978 vcpu
->arch
.pmu
.created
= true;
983 * For one VM the interrupt type must be same for each vcpu.
984 * As a PPI, the interrupt number is the same for all vcpus,
985 * while as an SPI it must be a separate number per vcpu.
987 static bool pmu_irq_is_valid(struct kvm
*kvm
, int irq
)
990 struct kvm_vcpu
*vcpu
;
992 kvm_for_each_vcpu(i
, vcpu
, kvm
) {
993 if (!kvm_arm_pmu_irq_initialized(vcpu
))
996 if (irq_is_ppi(irq
)) {
997 if (vcpu
->arch
.pmu
.irq_num
!= irq
)
1000 if (vcpu
->arch
.pmu
.irq_num
== irq
)
1009 * kvm_arm_pmu_get_max_counters - Return the max number of PMU counters.
1010 * @kvm: The kvm pointer
1012 u8
kvm_arm_pmu_get_max_counters(struct kvm
*kvm
)
1014 struct arm_pmu
*arm_pmu
= kvm
->arch
.arm_pmu
;
1017 * The arm_pmu->cntr_mask considers the fixed counter(s) as well.
1018 * Ignore those and return only the general-purpose counters.
1020 return bitmap_weight(arm_pmu
->cntr_mask
, ARMV8_PMU_MAX_GENERAL_COUNTERS
);
1023 static void kvm_arm_set_pmu(struct kvm
*kvm
, struct arm_pmu
*arm_pmu
)
1025 lockdep_assert_held(&kvm
->arch
.config_lock
);
1027 kvm
->arch
.arm_pmu
= arm_pmu
;
1028 kvm
->arch
.pmcr_n
= kvm_arm_pmu_get_max_counters(kvm
);
1032 * kvm_arm_set_default_pmu - No PMU set, get the default one.
1033 * @kvm: The kvm pointer
1035 * The observant among you will notice that the supported_cpus
1036 * mask does not get updated for the default PMU even though it
1037 * is quite possible the selected instance supports only a
1038 * subset of cores in the system. This is intentional, and
1039 * upholds the preexisting behavior on heterogeneous systems
1040 * where vCPUs can be scheduled on any core but the guest
1041 * counters could stop working.
1043 int kvm_arm_set_default_pmu(struct kvm
*kvm
)
1045 struct arm_pmu
*arm_pmu
= kvm_pmu_probe_armpmu();
1050 kvm_arm_set_pmu(kvm
, arm_pmu
);
1054 static int kvm_arm_pmu_v3_set_pmu(struct kvm_vcpu
*vcpu
, int pmu_id
)
1056 struct kvm
*kvm
= vcpu
->kvm
;
1057 struct arm_pmu_entry
*entry
;
1058 struct arm_pmu
*arm_pmu
;
1061 lockdep_assert_held(&kvm
->arch
.config_lock
);
1062 mutex_lock(&arm_pmus_lock
);
1064 list_for_each_entry(entry
, &arm_pmus
, entry
) {
1065 arm_pmu
= entry
->arm_pmu
;
1066 if (arm_pmu
->pmu
.type
== pmu_id
) {
1067 if (kvm_vm_has_ran_once(kvm
) ||
1068 (kvm
->arch
.pmu_filter
&& kvm
->arch
.arm_pmu
!= arm_pmu
)) {
1073 kvm_arm_set_pmu(kvm
, arm_pmu
);
1074 cpumask_copy(kvm
->arch
.supported_cpus
, &arm_pmu
->supported_cpus
);
1080 mutex_unlock(&arm_pmus_lock
);
1084 int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu
*vcpu
, struct kvm_device_attr
*attr
)
1086 struct kvm
*kvm
= vcpu
->kvm
;
1088 lockdep_assert_held(&kvm
->arch
.config_lock
);
1090 if (!kvm_vcpu_has_pmu(vcpu
))
1093 if (vcpu
->arch
.pmu
.created
)
1096 switch (attr
->attr
) {
1097 case KVM_ARM_VCPU_PMU_V3_IRQ
: {
1098 int __user
*uaddr
= (int __user
*)(long)attr
->addr
;
1101 if (!irqchip_in_kernel(kvm
))
1104 if (get_user(irq
, uaddr
))
1107 /* The PMU overflow interrupt can be a PPI or a valid SPI. */
1108 if (!(irq_is_ppi(irq
) || irq_is_spi(irq
)))
1111 if (!pmu_irq_is_valid(kvm
, irq
))
1114 if (kvm_arm_pmu_irq_initialized(vcpu
))
1117 kvm_debug("Set kvm ARM PMU irq: %d\n", irq
);
1118 vcpu
->arch
.pmu
.irq_num
= irq
;
1121 case KVM_ARM_VCPU_PMU_V3_FILTER
: {
1122 u8 pmuver
= kvm_arm_pmu_get_pmuver_limit();
1123 struct kvm_pmu_event_filter __user
*uaddr
;
1124 struct kvm_pmu_event_filter filter
;
1128 * Allow userspace to specify an event filter for the entire
1129 * event range supported by PMUVer of the hardware, rather
1130 * than the guest's PMUVer for KVM backward compatibility.
1132 nr_events
= __kvm_pmu_event_mask(pmuver
) + 1;
1134 uaddr
= (struct kvm_pmu_event_filter __user
*)(long)attr
->addr
;
1136 if (copy_from_user(&filter
, uaddr
, sizeof(filter
)))
1139 if (((u32
)filter
.base_event
+ filter
.nevents
) > nr_events
||
1140 (filter
.action
!= KVM_PMU_EVENT_ALLOW
&&
1141 filter
.action
!= KVM_PMU_EVENT_DENY
))
1144 if (kvm_vm_has_ran_once(kvm
))
1147 if (!kvm
->arch
.pmu_filter
) {
1148 kvm
->arch
.pmu_filter
= bitmap_alloc(nr_events
, GFP_KERNEL_ACCOUNT
);
1149 if (!kvm
->arch
.pmu_filter
)
1153 * The default depends on the first applied filter.
1154 * If it allows events, the default is to deny.
1155 * Conversely, if the first filter denies a set of
1156 * events, the default is to allow.
1158 if (filter
.action
== KVM_PMU_EVENT_ALLOW
)
1159 bitmap_zero(kvm
->arch
.pmu_filter
, nr_events
);
1161 bitmap_fill(kvm
->arch
.pmu_filter
, nr_events
);
1164 if (filter
.action
== KVM_PMU_EVENT_ALLOW
)
1165 bitmap_set(kvm
->arch
.pmu_filter
, filter
.base_event
, filter
.nevents
);
1167 bitmap_clear(kvm
->arch
.pmu_filter
, filter
.base_event
, filter
.nevents
);
1171 case KVM_ARM_VCPU_PMU_V3_SET_PMU
: {
1172 int __user
*uaddr
= (int __user
*)(long)attr
->addr
;
1175 if (get_user(pmu_id
, uaddr
))
1178 return kvm_arm_pmu_v3_set_pmu(vcpu
, pmu_id
);
1180 case KVM_ARM_VCPU_PMU_V3_INIT
:
1181 return kvm_arm_pmu_v3_init(vcpu
);
1187 int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu
*vcpu
, struct kvm_device_attr
*attr
)
1189 switch (attr
->attr
) {
1190 case KVM_ARM_VCPU_PMU_V3_IRQ
: {
1191 int __user
*uaddr
= (int __user
*)(long)attr
->addr
;
1194 if (!irqchip_in_kernel(vcpu
->kvm
))
1197 if (!kvm_vcpu_has_pmu(vcpu
))
1200 if (!kvm_arm_pmu_irq_initialized(vcpu
))
1203 irq
= vcpu
->arch
.pmu
.irq_num
;
1204 return put_user(irq
, uaddr
);
1211 int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu
*vcpu
, struct kvm_device_attr
*attr
)
1213 switch (attr
->attr
) {
1214 case KVM_ARM_VCPU_PMU_V3_IRQ
:
1215 case KVM_ARM_VCPU_PMU_V3_INIT
:
1216 case KVM_ARM_VCPU_PMU_V3_FILTER
:
1217 case KVM_ARM_VCPU_PMU_V3_SET_PMU
:
1218 if (kvm_vcpu_has_pmu(vcpu
))
1225 u8
kvm_arm_pmu_get_pmuver_limit(void)
1229 tmp
= read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1
);
1230 tmp
= cpuid_feature_cap_perfmon_field(tmp
,
1231 ID_AA64DFR0_EL1_PMUVer_SHIFT
,
1232 ID_AA64DFR0_EL1_PMUVer_V3P5
);
1233 return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer
), tmp
);
1237 * kvm_vcpu_read_pmcr - Read PMCR_EL0 register for the vCPU
1238 * @vcpu: The vcpu pointer
1240 u64
kvm_vcpu_read_pmcr(struct kvm_vcpu
*vcpu
)
1242 u64 pmcr
= __vcpu_sys_reg(vcpu
, PMCR_EL0
);
1244 return u64_replace_bits(pmcr
, vcpu
->kvm
->arch
.pmcr_n
, ARMV8_PMU_PMCR_N
);
1247 void kvm_pmu_nested_transition(struct kvm_vcpu
*vcpu
)
1249 bool reprogrammed
= false;
1253 if (!kvm_vcpu_has_pmu(vcpu
))
1256 mask
= __vcpu_sys_reg(vcpu
, PMCNTENSET_EL0
);
1257 for_each_set_bit(i
, &mask
, 32) {
1258 struct kvm_pmc
*pmc
= kvm_vcpu_idx_to_pmc(vcpu
, i
);
1261 * We only need to reconfigure events where the filter is
1262 * different at EL1 vs. EL2, as we're multiplexing the true EL1
1263 * event filter bit for nested.
1265 if (kvm_pmc_counts_at_el1(pmc
) == kvm_pmc_counts_at_el2(pmc
))
1268 kvm_pmu_create_perf_event(pmc
);
1269 reprogrammed
= true;
1273 kvm_vcpu_pmu_restore_guest(vcpu
);