WIP FPC-III support
[linux/fpc-iii.git] / arch / arm64 / kvm / pmu-emul.c
blob4ad66a532e38b37007fd75610e15a426b7123bd8
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (C) 2015 Linaro Ltd.
4 * Author: Shannon Zhao <shannon.zhao@linaro.org>
5 */
7 #include <linux/cpu.h>
8 #include <linux/kvm.h>
9 #include <linux/kvm_host.h>
10 #include <linux/perf_event.h>
11 #include <linux/perf/arm_pmu.h>
12 #include <linux/uaccess.h>
13 #include <asm/kvm_emulate.h>
14 #include <kvm/arm_pmu.h>
15 #include <kvm/arm_vgic.h>
17 static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx);
18 static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx);
19 static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc);
21 #define PERF_ATTR_CFG1_KVM_PMU_CHAINED 0x1
23 static u32 kvm_pmu_event_mask(struct kvm *kvm)
25 switch (kvm->arch.pmuver) {
26 case 1: /* ARMv8.0 */
27 return GENMASK(9, 0);
28 case 4: /* ARMv8.1 */
29 case 5: /* ARMv8.4 */
30 case 6: /* ARMv8.5 */
31 return GENMASK(15, 0);
32 default: /* Shouldn't be here, just for sanity */
33 WARN_ONCE(1, "Unknown PMU version %d\n", kvm->arch.pmuver);
34 return 0;
38 /**
39 * kvm_pmu_idx_is_64bit - determine if select_idx is a 64bit counter
40 * @vcpu: The vcpu pointer
41 * @select_idx: The counter index
43 static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, u64 select_idx)
45 return (select_idx == ARMV8_PMU_CYCLE_IDX &&
46 __vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_LC);
49 static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc)
51 struct kvm_pmu *pmu;
52 struct kvm_vcpu_arch *vcpu_arch;
54 pmc -= pmc->idx;
55 pmu = container_of(pmc, struct kvm_pmu, pmc[0]);
56 vcpu_arch = container_of(pmu, struct kvm_vcpu_arch, pmu);
57 return container_of(vcpu_arch, struct kvm_vcpu, arch);
60 /**
61 * kvm_pmu_pmc_is_chained - determine if the pmc is chained
62 * @pmc: The PMU counter pointer
64 static bool kvm_pmu_pmc_is_chained(struct kvm_pmc *pmc)
66 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
68 return test_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
71 /**
72 * kvm_pmu_idx_is_high_counter - determine if select_idx is a high/low counter
73 * @select_idx: The counter index
75 static bool kvm_pmu_idx_is_high_counter(u64 select_idx)
77 return select_idx & 0x1;
80 /**
81 * kvm_pmu_get_canonical_pmc - obtain the canonical pmc
82 * @pmc: The PMU counter pointer
84 * When a pair of PMCs are chained together we use the low counter (canonical)
85 * to hold the underlying perf event.
87 static struct kvm_pmc *kvm_pmu_get_canonical_pmc(struct kvm_pmc *pmc)
89 if (kvm_pmu_pmc_is_chained(pmc) &&
90 kvm_pmu_idx_is_high_counter(pmc->idx))
91 return pmc - 1;
93 return pmc;
95 static struct kvm_pmc *kvm_pmu_get_alternate_pmc(struct kvm_pmc *pmc)
97 if (kvm_pmu_idx_is_high_counter(pmc->idx))
98 return pmc - 1;
99 else
100 return pmc + 1;
104 * kvm_pmu_idx_has_chain_evtype - determine if the event type is chain
105 * @vcpu: The vcpu pointer
106 * @select_idx: The counter index
108 static bool kvm_pmu_idx_has_chain_evtype(struct kvm_vcpu *vcpu, u64 select_idx)
110 u64 eventsel, reg;
112 select_idx |= 0x1;
114 if (select_idx == ARMV8_PMU_CYCLE_IDX)
115 return false;
117 reg = PMEVTYPER0_EL0 + select_idx;
118 eventsel = __vcpu_sys_reg(vcpu, reg) & kvm_pmu_event_mask(vcpu->kvm);
120 return eventsel == ARMV8_PMUV3_PERFCTR_CHAIN;
124 * kvm_pmu_get_pair_counter_value - get PMU counter value
125 * @vcpu: The vcpu pointer
126 * @pmc: The PMU counter pointer
128 static u64 kvm_pmu_get_pair_counter_value(struct kvm_vcpu *vcpu,
129 struct kvm_pmc *pmc)
131 u64 counter, counter_high, reg, enabled, running;
133 if (kvm_pmu_pmc_is_chained(pmc)) {
134 pmc = kvm_pmu_get_canonical_pmc(pmc);
135 reg = PMEVCNTR0_EL0 + pmc->idx;
137 counter = __vcpu_sys_reg(vcpu, reg);
138 counter_high = __vcpu_sys_reg(vcpu, reg + 1);
140 counter = lower_32_bits(counter) | (counter_high << 32);
141 } else {
142 reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
143 ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx;
144 counter = __vcpu_sys_reg(vcpu, reg);
148 * The real counter value is equal to the value of counter register plus
149 * the value perf event counts.
151 if (pmc->perf_event)
152 counter += perf_event_read_value(pmc->perf_event, &enabled,
153 &running);
155 return counter;
159 * kvm_pmu_get_counter_value - get PMU counter value
160 * @vcpu: The vcpu pointer
161 * @select_idx: The counter index
163 u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx)
165 u64 counter;
166 struct kvm_pmu *pmu = &vcpu->arch.pmu;
167 struct kvm_pmc *pmc = &pmu->pmc[select_idx];
169 counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
171 if (kvm_pmu_pmc_is_chained(pmc) &&
172 kvm_pmu_idx_is_high_counter(select_idx))
173 counter = upper_32_bits(counter);
174 else if (select_idx != ARMV8_PMU_CYCLE_IDX)
175 counter = lower_32_bits(counter);
177 return counter;
181 * kvm_pmu_set_counter_value - set PMU counter value
182 * @vcpu: The vcpu pointer
183 * @select_idx: The counter index
184 * @val: The counter value
186 void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val)
188 u64 reg;
190 reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
191 ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx;
192 __vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx);
194 /* Recreate the perf event to reflect the updated sample_period */
195 kvm_pmu_create_perf_event(vcpu, select_idx);
199 * kvm_pmu_release_perf_event - remove the perf event
200 * @pmc: The PMU counter pointer
202 static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc)
204 pmc = kvm_pmu_get_canonical_pmc(pmc);
205 if (pmc->perf_event) {
206 perf_event_disable(pmc->perf_event);
207 perf_event_release_kernel(pmc->perf_event);
208 pmc->perf_event = NULL;
213 * kvm_pmu_stop_counter - stop PMU counter
214 * @pmc: The PMU counter pointer
216 * If this counter has been configured to monitor some event, release it here.
218 static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc)
220 u64 counter, reg, val;
222 pmc = kvm_pmu_get_canonical_pmc(pmc);
223 if (!pmc->perf_event)
224 return;
226 counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
228 if (pmc->idx == ARMV8_PMU_CYCLE_IDX) {
229 reg = PMCCNTR_EL0;
230 val = counter;
231 } else {
232 reg = PMEVCNTR0_EL0 + pmc->idx;
233 val = lower_32_bits(counter);
236 __vcpu_sys_reg(vcpu, reg) = val;
238 if (kvm_pmu_pmc_is_chained(pmc))
239 __vcpu_sys_reg(vcpu, reg + 1) = upper_32_bits(counter);
241 kvm_pmu_release_perf_event(pmc);
245 * kvm_pmu_vcpu_init - assign pmu counter idx for cpu
246 * @vcpu: The vcpu pointer
249 void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu)
251 int i;
252 struct kvm_pmu *pmu = &vcpu->arch.pmu;
254 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++)
255 pmu->pmc[i].idx = i;
259 * kvm_pmu_vcpu_reset - reset pmu state for cpu
260 * @vcpu: The vcpu pointer
263 void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu)
265 unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
266 struct kvm_pmu *pmu = &vcpu->arch.pmu;
267 int i;
269 for_each_set_bit(i, &mask, 32)
270 kvm_pmu_stop_counter(vcpu, &pmu->pmc[i]);
272 bitmap_zero(vcpu->arch.pmu.chained, ARMV8_PMU_MAX_COUNTER_PAIRS);
276 * kvm_pmu_vcpu_destroy - free perf event of PMU for cpu
277 * @vcpu: The vcpu pointer
280 void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu)
282 int i;
283 struct kvm_pmu *pmu = &vcpu->arch.pmu;
285 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++)
286 kvm_pmu_release_perf_event(&pmu->pmc[i]);
287 irq_work_sync(&vcpu->arch.pmu.overflow_work);
290 u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu)
292 u64 val = __vcpu_sys_reg(vcpu, PMCR_EL0) >> ARMV8_PMU_PMCR_N_SHIFT;
294 val &= ARMV8_PMU_PMCR_N_MASK;
295 if (val == 0)
296 return BIT(ARMV8_PMU_CYCLE_IDX);
297 else
298 return GENMASK(val - 1, 0) | BIT(ARMV8_PMU_CYCLE_IDX);
302 * kvm_pmu_enable_counter_mask - enable selected PMU counters
303 * @vcpu: The vcpu pointer
304 * @val: the value guest writes to PMCNTENSET register
306 * Call perf_event_enable to start counting the perf event
308 void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
310 int i;
311 struct kvm_pmu *pmu = &vcpu->arch.pmu;
312 struct kvm_pmc *pmc;
314 if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val)
315 return;
317 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
318 if (!(val & BIT(i)))
319 continue;
321 pmc = &pmu->pmc[i];
323 /* A change in the enable state may affect the chain state */
324 kvm_pmu_update_pmc_chained(vcpu, i);
325 kvm_pmu_create_perf_event(vcpu, i);
327 /* At this point, pmc must be the canonical */
328 if (pmc->perf_event) {
329 perf_event_enable(pmc->perf_event);
330 if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE)
331 kvm_debug("fail to enable perf event\n");
337 * kvm_pmu_disable_counter_mask - disable selected PMU counters
338 * @vcpu: The vcpu pointer
339 * @val: the value guest writes to PMCNTENCLR register
341 * Call perf_event_disable to stop counting the perf event
343 void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
345 int i;
346 struct kvm_pmu *pmu = &vcpu->arch.pmu;
347 struct kvm_pmc *pmc;
349 if (!val)
350 return;
352 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
353 if (!(val & BIT(i)))
354 continue;
356 pmc = &pmu->pmc[i];
358 /* A change in the enable state may affect the chain state */
359 kvm_pmu_update_pmc_chained(vcpu, i);
360 kvm_pmu_create_perf_event(vcpu, i);
362 /* At this point, pmc must be the canonical */
363 if (pmc->perf_event)
364 perf_event_disable(pmc->perf_event);
368 static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu)
370 u64 reg = 0;
372 if ((__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) {
373 reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0);
374 reg &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
375 reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1);
376 reg &= kvm_pmu_valid_counter_mask(vcpu);
379 return reg;
382 static void kvm_pmu_update_state(struct kvm_vcpu *vcpu)
384 struct kvm_pmu *pmu = &vcpu->arch.pmu;
385 bool overflow;
387 if (!kvm_vcpu_has_pmu(vcpu))
388 return;
390 overflow = !!kvm_pmu_overflow_status(vcpu);
391 if (pmu->irq_level == overflow)
392 return;
394 pmu->irq_level = overflow;
396 if (likely(irqchip_in_kernel(vcpu->kvm))) {
397 int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
398 pmu->irq_num, overflow, pmu);
399 WARN_ON(ret);
403 bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu)
405 struct kvm_pmu *pmu = &vcpu->arch.pmu;
406 struct kvm_sync_regs *sregs = &vcpu->run->s.regs;
407 bool run_level = sregs->device_irq_level & KVM_ARM_DEV_PMU;
409 if (likely(irqchip_in_kernel(vcpu->kvm)))
410 return false;
412 return pmu->irq_level != run_level;
416 * Reflect the PMU overflow interrupt output level into the kvm_run structure
418 void kvm_pmu_update_run(struct kvm_vcpu *vcpu)
420 struct kvm_sync_regs *regs = &vcpu->run->s.regs;
422 /* Populate the timer bitmap for user space */
423 regs->device_irq_level &= ~KVM_ARM_DEV_PMU;
424 if (vcpu->arch.pmu.irq_level)
425 regs->device_irq_level |= KVM_ARM_DEV_PMU;
429 * kvm_pmu_flush_hwstate - flush pmu state to cpu
430 * @vcpu: The vcpu pointer
432 * Check if the PMU has overflowed while we were running in the host, and inject
433 * an interrupt if that was the case.
435 void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu)
437 kvm_pmu_update_state(vcpu);
441 * kvm_pmu_sync_hwstate - sync pmu state from cpu
442 * @vcpu: The vcpu pointer
444 * Check if the PMU has overflowed while we were running in the guest, and
445 * inject an interrupt if that was the case.
447 void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu)
449 kvm_pmu_update_state(vcpu);
453 * When perf interrupt is an NMI, we cannot safely notify the vcpu corresponding
454 * to the event.
455 * This is why we need a callback to do it once outside of the NMI context.
457 static void kvm_pmu_perf_overflow_notify_vcpu(struct irq_work *work)
459 struct kvm_vcpu *vcpu;
460 struct kvm_pmu *pmu;
462 pmu = container_of(work, struct kvm_pmu, overflow_work);
463 vcpu = kvm_pmc_to_vcpu(pmu->pmc);
465 kvm_vcpu_kick(vcpu);
469 * When the perf event overflows, set the overflow status and inform the vcpu.
471 static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
472 struct perf_sample_data *data,
473 struct pt_regs *regs)
475 struct kvm_pmc *pmc = perf_event->overflow_handler_context;
476 struct arm_pmu *cpu_pmu = to_arm_pmu(perf_event->pmu);
477 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
478 int idx = pmc->idx;
479 u64 period;
481 cpu_pmu->pmu.stop(perf_event, PERF_EF_UPDATE);
484 * Reset the sample period to the architectural limit,
485 * i.e. the point where the counter overflows.
487 period = -(local64_read(&perf_event->count));
489 if (!kvm_pmu_idx_is_64bit(vcpu, pmc->idx))
490 period &= GENMASK(31, 0);
492 local64_set(&perf_event->hw.period_left, 0);
493 perf_event->attr.sample_period = period;
494 perf_event->hw.sample_period = period;
496 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx);
498 if (kvm_pmu_overflow_status(vcpu)) {
499 kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
501 if (!in_nmi())
502 kvm_vcpu_kick(vcpu);
503 else
504 irq_work_queue(&vcpu->arch.pmu.overflow_work);
507 cpu_pmu->pmu.start(perf_event, PERF_EF_RELOAD);
511 * kvm_pmu_software_increment - do software increment
512 * @vcpu: The vcpu pointer
513 * @val: the value guest writes to PMSWINC register
515 void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val)
517 struct kvm_pmu *pmu = &vcpu->arch.pmu;
518 int i;
520 if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E))
521 return;
523 /* Weed out disabled counters */
524 val &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
526 for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) {
527 u64 type, reg;
529 if (!(val & BIT(i)))
530 continue;
532 /* PMSWINC only applies to ... SW_INC! */
533 type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i);
534 type &= kvm_pmu_event_mask(vcpu->kvm);
535 if (type != ARMV8_PMUV3_PERFCTR_SW_INCR)
536 continue;
538 /* increment this even SW_INC counter */
539 reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1;
540 reg = lower_32_bits(reg);
541 __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg;
543 if (reg) /* no overflow on the low part */
544 continue;
546 if (kvm_pmu_pmc_is_chained(&pmu->pmc[i])) {
547 /* increment the high counter */
548 reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) + 1;
549 reg = lower_32_bits(reg);
550 __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) = reg;
551 if (!reg) /* mark overflow on the high counter */
552 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i + 1);
553 } else {
554 /* mark overflow on low counter */
555 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i);
561 * kvm_pmu_handle_pmcr - handle PMCR register
562 * @vcpu: The vcpu pointer
563 * @val: the value guest writes to PMCR register
565 void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
567 unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
568 int i;
570 if (val & ARMV8_PMU_PMCR_E) {
571 kvm_pmu_enable_counter_mask(vcpu,
572 __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask);
573 } else {
574 kvm_pmu_disable_counter_mask(vcpu, mask);
577 if (val & ARMV8_PMU_PMCR_C)
578 kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0);
580 if (val & ARMV8_PMU_PMCR_P) {
581 for_each_set_bit(i, &mask, 32)
582 kvm_pmu_set_counter_value(vcpu, i, 0);
586 static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx)
588 return (__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) &&
589 (__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(select_idx));
593 * kvm_pmu_create_perf_event - create a perf event for a counter
594 * @vcpu: The vcpu pointer
595 * @select_idx: The number of selected counter
597 static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
599 struct kvm_pmu *pmu = &vcpu->arch.pmu;
600 struct kvm_pmc *pmc;
601 struct perf_event *event;
602 struct perf_event_attr attr;
603 u64 eventsel, counter, reg, data;
606 * For chained counters the event type and filtering attributes are
607 * obtained from the low/even counter. We also use this counter to
608 * determine if the event is enabled/disabled.
610 pmc = kvm_pmu_get_canonical_pmc(&pmu->pmc[select_idx]);
612 reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
613 ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + pmc->idx;
614 data = __vcpu_sys_reg(vcpu, reg);
616 kvm_pmu_stop_counter(vcpu, pmc);
617 if (pmc->idx == ARMV8_PMU_CYCLE_IDX)
618 eventsel = ARMV8_PMUV3_PERFCTR_CPU_CYCLES;
619 else
620 eventsel = data & kvm_pmu_event_mask(vcpu->kvm);
622 /* Software increment event doesn't need to be backed by a perf event */
623 if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR)
624 return;
627 * If we have a filter in place and that the event isn't allowed, do
628 * not install a perf event either.
630 if (vcpu->kvm->arch.pmu_filter &&
631 !test_bit(eventsel, vcpu->kvm->arch.pmu_filter))
632 return;
634 memset(&attr, 0, sizeof(struct perf_event_attr));
635 attr.type = PERF_TYPE_RAW;
636 attr.size = sizeof(attr);
637 attr.pinned = 1;
638 attr.disabled = !kvm_pmu_counter_is_enabled(vcpu, pmc->idx);
639 attr.exclude_user = data & ARMV8_PMU_EXCLUDE_EL0 ? 1 : 0;
640 attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0;
641 attr.exclude_hv = 1; /* Don't count EL2 events */
642 attr.exclude_host = 1; /* Don't count host events */
643 attr.config = eventsel;
645 counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
647 if (kvm_pmu_pmc_is_chained(pmc)) {
649 * The initial sample period (overflow count) of an event. For
650 * chained counters we only support overflow interrupts on the
651 * high counter.
653 attr.sample_period = (-counter) & GENMASK(63, 0);
654 attr.config1 |= PERF_ATTR_CFG1_KVM_PMU_CHAINED;
656 event = perf_event_create_kernel_counter(&attr, -1, current,
657 kvm_pmu_perf_overflow,
658 pmc + 1);
659 } else {
660 /* The initial sample period (overflow count) of an event. */
661 if (kvm_pmu_idx_is_64bit(vcpu, pmc->idx))
662 attr.sample_period = (-counter) & GENMASK(63, 0);
663 else
664 attr.sample_period = (-counter) & GENMASK(31, 0);
666 event = perf_event_create_kernel_counter(&attr, -1, current,
667 kvm_pmu_perf_overflow, pmc);
670 if (IS_ERR(event)) {
671 pr_err_once("kvm: pmu event creation failed %ld\n",
672 PTR_ERR(event));
673 return;
676 pmc->perf_event = event;
680 * kvm_pmu_update_pmc_chained - update chained bitmap
681 * @vcpu: The vcpu pointer
682 * @select_idx: The number of selected counter
684 * Update the chained bitmap based on the event type written in the
685 * typer register and the enable state of the odd register.
687 static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx)
689 struct kvm_pmu *pmu = &vcpu->arch.pmu;
690 struct kvm_pmc *pmc = &pmu->pmc[select_idx], *canonical_pmc;
691 bool new_state, old_state;
693 old_state = kvm_pmu_pmc_is_chained(pmc);
694 new_state = kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx) &&
695 kvm_pmu_counter_is_enabled(vcpu, pmc->idx | 0x1);
697 if (old_state == new_state)
698 return;
700 canonical_pmc = kvm_pmu_get_canonical_pmc(pmc);
701 kvm_pmu_stop_counter(vcpu, canonical_pmc);
702 if (new_state) {
704 * During promotion from !chained to chained we must ensure
705 * the adjacent counter is stopped and its event destroyed
707 kvm_pmu_stop_counter(vcpu, kvm_pmu_get_alternate_pmc(pmc));
708 set_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
709 return;
711 clear_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
715 * kvm_pmu_set_counter_event_type - set selected counter to monitor some event
716 * @vcpu: The vcpu pointer
717 * @data: The data guest writes to PMXEVTYPER_EL0
718 * @select_idx: The number of selected counter
720 * When OS accesses PMXEVTYPER_EL0, that means it wants to set a PMC to count an
721 * event with given hardware event number. Here we call perf_event API to
722 * emulate this action and create a kernel perf event for it.
724 void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
725 u64 select_idx)
727 u64 reg, mask;
729 mask = ARMV8_PMU_EVTYPE_MASK;
730 mask &= ~ARMV8_PMU_EVTYPE_EVENT;
731 mask |= kvm_pmu_event_mask(vcpu->kvm);
733 reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
734 ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + select_idx;
736 __vcpu_sys_reg(vcpu, reg) = data & mask;
738 kvm_pmu_update_pmc_chained(vcpu, select_idx);
739 kvm_pmu_create_perf_event(vcpu, select_idx);
742 static int kvm_pmu_probe_pmuver(void)
744 struct perf_event_attr attr = { };
745 struct perf_event *event;
746 struct arm_pmu *pmu;
747 int pmuver = 0xf;
750 * Create a dummy event that only counts user cycles. As we'll never
751 * leave this function with the event being live, it will never
752 * count anything. But it allows us to probe some of the PMU
753 * details. Yes, this is terrible.
755 attr.type = PERF_TYPE_RAW;
756 attr.size = sizeof(attr);
757 attr.pinned = 1;
758 attr.disabled = 0;
759 attr.exclude_user = 0;
760 attr.exclude_kernel = 1;
761 attr.exclude_hv = 1;
762 attr.exclude_host = 1;
763 attr.config = ARMV8_PMUV3_PERFCTR_CPU_CYCLES;
764 attr.sample_period = GENMASK(63, 0);
766 event = perf_event_create_kernel_counter(&attr, -1, current,
767 kvm_pmu_perf_overflow, &attr);
769 if (IS_ERR(event)) {
770 pr_err_once("kvm: pmu event creation failed %ld\n",
771 PTR_ERR(event));
772 return 0xf;
775 if (event->pmu) {
776 pmu = to_arm_pmu(event->pmu);
777 if (pmu->pmuver)
778 pmuver = pmu->pmuver;
781 perf_event_disable(event);
782 perf_event_release_kernel(event);
784 return pmuver;
787 u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
789 unsigned long *bmap = vcpu->kvm->arch.pmu_filter;
790 u64 val, mask = 0;
791 int base, i;
793 if (!pmceid1) {
794 val = read_sysreg(pmceid0_el0);
795 base = 0;
796 } else {
797 val = read_sysreg(pmceid1_el0);
798 base = 32;
801 if (!bmap)
802 return val;
804 for (i = 0; i < 32; i += 8) {
805 u64 byte;
807 byte = bitmap_get_value8(bmap, base + i);
808 mask |= byte << i;
809 byte = bitmap_get_value8(bmap, 0x4000 + base + i);
810 mask |= byte << (32 + i);
813 return val & mask;
816 bool kvm_arm_support_pmu_v3(void)
819 * Check if HW_PERF_EVENTS are supported by checking the number of
820 * hardware performance counters. This could ensure the presence of
821 * a physical PMU and CONFIG_PERF_EVENT is selected.
823 return (perf_num_counters() > 0);
826 int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu)
828 if (!kvm_vcpu_has_pmu(vcpu))
829 return 0;
831 if (!vcpu->arch.pmu.created)
832 return -EINVAL;
835 * A valid interrupt configuration for the PMU is either to have a
836 * properly configured interrupt number and using an in-kernel
837 * irqchip, or to not have an in-kernel GIC and not set an IRQ.
839 if (irqchip_in_kernel(vcpu->kvm)) {
840 int irq = vcpu->arch.pmu.irq_num;
842 * If we are using an in-kernel vgic, at this point we know
843 * the vgic will be initialized, so we can check the PMU irq
844 * number against the dimensions of the vgic and make sure
845 * it's valid.
847 if (!irq_is_ppi(irq) && !vgic_valid_spi(vcpu->kvm, irq))
848 return -EINVAL;
849 } else if (kvm_arm_pmu_irq_initialized(vcpu)) {
850 return -EINVAL;
853 return 0;
856 static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu)
858 if (irqchip_in_kernel(vcpu->kvm)) {
859 int ret;
862 * If using the PMU with an in-kernel virtual GIC
863 * implementation, we require the GIC to be already
864 * initialized when initializing the PMU.
866 if (!vgic_initialized(vcpu->kvm))
867 return -ENODEV;
869 if (!kvm_arm_pmu_irq_initialized(vcpu))
870 return -ENXIO;
872 ret = kvm_vgic_set_owner(vcpu, vcpu->arch.pmu.irq_num,
873 &vcpu->arch.pmu);
874 if (ret)
875 return ret;
878 init_irq_work(&vcpu->arch.pmu.overflow_work,
879 kvm_pmu_perf_overflow_notify_vcpu);
881 vcpu->arch.pmu.created = true;
882 return 0;
886 * For one VM the interrupt type must be same for each vcpu.
887 * As a PPI, the interrupt number is the same for all vcpus,
888 * while as an SPI it must be a separate number per vcpu.
890 static bool pmu_irq_is_valid(struct kvm *kvm, int irq)
892 int i;
893 struct kvm_vcpu *vcpu;
895 kvm_for_each_vcpu(i, vcpu, kvm) {
896 if (!kvm_arm_pmu_irq_initialized(vcpu))
897 continue;
899 if (irq_is_ppi(irq)) {
900 if (vcpu->arch.pmu.irq_num != irq)
901 return false;
902 } else {
903 if (vcpu->arch.pmu.irq_num == irq)
904 return false;
908 return true;
911 int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
913 if (!kvm_vcpu_has_pmu(vcpu))
914 return -ENODEV;
916 if (vcpu->arch.pmu.created)
917 return -EBUSY;
919 if (!vcpu->kvm->arch.pmuver)
920 vcpu->kvm->arch.pmuver = kvm_pmu_probe_pmuver();
922 if (vcpu->kvm->arch.pmuver == 0xf)
923 return -ENODEV;
925 switch (attr->attr) {
926 case KVM_ARM_VCPU_PMU_V3_IRQ: {
927 int __user *uaddr = (int __user *)(long)attr->addr;
928 int irq;
930 if (!irqchip_in_kernel(vcpu->kvm))
931 return -EINVAL;
933 if (get_user(irq, uaddr))
934 return -EFAULT;
936 /* The PMU overflow interrupt can be a PPI or a valid SPI. */
937 if (!(irq_is_ppi(irq) || irq_is_spi(irq)))
938 return -EINVAL;
940 if (!pmu_irq_is_valid(vcpu->kvm, irq))
941 return -EINVAL;
943 if (kvm_arm_pmu_irq_initialized(vcpu))
944 return -EBUSY;
946 kvm_debug("Set kvm ARM PMU irq: %d\n", irq);
947 vcpu->arch.pmu.irq_num = irq;
948 return 0;
950 case KVM_ARM_VCPU_PMU_V3_FILTER: {
951 struct kvm_pmu_event_filter __user *uaddr;
952 struct kvm_pmu_event_filter filter;
953 int nr_events;
955 nr_events = kvm_pmu_event_mask(vcpu->kvm) + 1;
957 uaddr = (struct kvm_pmu_event_filter __user *)(long)attr->addr;
959 if (copy_from_user(&filter, uaddr, sizeof(filter)))
960 return -EFAULT;
962 if (((u32)filter.base_event + filter.nevents) > nr_events ||
963 (filter.action != KVM_PMU_EVENT_ALLOW &&
964 filter.action != KVM_PMU_EVENT_DENY))
965 return -EINVAL;
967 mutex_lock(&vcpu->kvm->lock);
969 if (!vcpu->kvm->arch.pmu_filter) {
970 vcpu->kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL);
971 if (!vcpu->kvm->arch.pmu_filter) {
972 mutex_unlock(&vcpu->kvm->lock);
973 return -ENOMEM;
977 * The default depends on the first applied filter.
978 * If it allows events, the default is to deny.
979 * Conversely, if the first filter denies a set of
980 * events, the default is to allow.
982 if (filter.action == KVM_PMU_EVENT_ALLOW)
983 bitmap_zero(vcpu->kvm->arch.pmu_filter, nr_events);
984 else
985 bitmap_fill(vcpu->kvm->arch.pmu_filter, nr_events);
988 if (filter.action == KVM_PMU_EVENT_ALLOW)
989 bitmap_set(vcpu->kvm->arch.pmu_filter, filter.base_event, filter.nevents);
990 else
991 bitmap_clear(vcpu->kvm->arch.pmu_filter, filter.base_event, filter.nevents);
993 mutex_unlock(&vcpu->kvm->lock);
995 return 0;
997 case KVM_ARM_VCPU_PMU_V3_INIT:
998 return kvm_arm_pmu_v3_init(vcpu);
1001 return -ENXIO;
1004 int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
1006 switch (attr->attr) {
1007 case KVM_ARM_VCPU_PMU_V3_IRQ: {
1008 int __user *uaddr = (int __user *)(long)attr->addr;
1009 int irq;
1011 if (!irqchip_in_kernel(vcpu->kvm))
1012 return -EINVAL;
1014 if (!kvm_vcpu_has_pmu(vcpu))
1015 return -ENODEV;
1017 if (!kvm_arm_pmu_irq_initialized(vcpu))
1018 return -ENXIO;
1020 irq = vcpu->arch.pmu.irq_num;
1021 return put_user(irq, uaddr);
1025 return -ENXIO;
1028 int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
1030 switch (attr->attr) {
1031 case KVM_ARM_VCPU_PMU_V3_IRQ:
1032 case KVM_ARM_VCPU_PMU_V3_INIT:
1033 case KVM_ARM_VCPU_PMU_V3_FILTER:
1034 if (kvm_vcpu_has_pmu(vcpu))
1035 return 0;
1038 return -ENXIO;