1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright (C) 2015 Linaro Ltd.
4 * Author: Shannon Zhao <shannon.zhao@linaro.org>
9 #include <linux/kvm_host.h>
10 #include <linux/perf_event.h>
11 #include <linux/perf/arm_pmu.h>
12 #include <linux/uaccess.h>
13 #include <asm/kvm_emulate.h>
14 #include <kvm/arm_pmu.h>
15 #include <kvm/arm_vgic.h>
17 static void kvm_pmu_create_perf_event(struct kvm_vcpu
*vcpu
, u64 select_idx
);
18 static void kvm_pmu_update_pmc_chained(struct kvm_vcpu
*vcpu
, u64 select_idx
);
19 static void kvm_pmu_stop_counter(struct kvm_vcpu
*vcpu
, struct kvm_pmc
*pmc
);
21 #define PERF_ATTR_CFG1_KVM_PMU_CHAINED 0x1
23 static u32
kvm_pmu_event_mask(struct kvm
*kvm
)
25 switch (kvm
->arch
.pmuver
) {
31 return GENMASK(15, 0);
32 default: /* Shouldn't be here, just for sanity */
33 WARN_ONCE(1, "Unknown PMU version %d\n", kvm
->arch
.pmuver
);
39 * kvm_pmu_idx_is_64bit - determine if select_idx is a 64bit counter
40 * @vcpu: The vcpu pointer
41 * @select_idx: The counter index
43 static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu
*vcpu
, u64 select_idx
)
45 return (select_idx
== ARMV8_PMU_CYCLE_IDX
&&
46 __vcpu_sys_reg(vcpu
, PMCR_EL0
) & ARMV8_PMU_PMCR_LC
);
49 static struct kvm_vcpu
*kvm_pmc_to_vcpu(struct kvm_pmc
*pmc
)
52 struct kvm_vcpu_arch
*vcpu_arch
;
55 pmu
= container_of(pmc
, struct kvm_pmu
, pmc
[0]);
56 vcpu_arch
= container_of(pmu
, struct kvm_vcpu_arch
, pmu
);
57 return container_of(vcpu_arch
, struct kvm_vcpu
, arch
);
61 * kvm_pmu_pmc_is_chained - determine if the pmc is chained
62 * @pmc: The PMU counter pointer
64 static bool kvm_pmu_pmc_is_chained(struct kvm_pmc
*pmc
)
66 struct kvm_vcpu
*vcpu
= kvm_pmc_to_vcpu(pmc
);
68 return test_bit(pmc
->idx
>> 1, vcpu
->arch
.pmu
.chained
);
72 * kvm_pmu_idx_is_high_counter - determine if select_idx is a high/low counter
73 * @select_idx: The counter index
75 static bool kvm_pmu_idx_is_high_counter(u64 select_idx
)
77 return select_idx
& 0x1;
81 * kvm_pmu_get_canonical_pmc - obtain the canonical pmc
82 * @pmc: The PMU counter pointer
84 * When a pair of PMCs are chained together we use the low counter (canonical)
85 * to hold the underlying perf event.
87 static struct kvm_pmc
*kvm_pmu_get_canonical_pmc(struct kvm_pmc
*pmc
)
89 if (kvm_pmu_pmc_is_chained(pmc
) &&
90 kvm_pmu_idx_is_high_counter(pmc
->idx
))
95 static struct kvm_pmc
*kvm_pmu_get_alternate_pmc(struct kvm_pmc
*pmc
)
97 if (kvm_pmu_idx_is_high_counter(pmc
->idx
))
104 * kvm_pmu_idx_has_chain_evtype - determine if the event type is chain
105 * @vcpu: The vcpu pointer
106 * @select_idx: The counter index
108 static bool kvm_pmu_idx_has_chain_evtype(struct kvm_vcpu
*vcpu
, u64 select_idx
)
114 if (select_idx
== ARMV8_PMU_CYCLE_IDX
)
117 reg
= PMEVTYPER0_EL0
+ select_idx
;
118 eventsel
= __vcpu_sys_reg(vcpu
, reg
) & kvm_pmu_event_mask(vcpu
->kvm
);
120 return eventsel
== ARMV8_PMUV3_PERFCTR_CHAIN
;
124 * kvm_pmu_get_pair_counter_value - get PMU counter value
125 * @vcpu: The vcpu pointer
126 * @pmc: The PMU counter pointer
128 static u64
kvm_pmu_get_pair_counter_value(struct kvm_vcpu
*vcpu
,
131 u64 counter
, counter_high
, reg
, enabled
, running
;
133 if (kvm_pmu_pmc_is_chained(pmc
)) {
134 pmc
= kvm_pmu_get_canonical_pmc(pmc
);
135 reg
= PMEVCNTR0_EL0
+ pmc
->idx
;
137 counter
= __vcpu_sys_reg(vcpu
, reg
);
138 counter_high
= __vcpu_sys_reg(vcpu
, reg
+ 1);
140 counter
= lower_32_bits(counter
) | (counter_high
<< 32);
142 reg
= (pmc
->idx
== ARMV8_PMU_CYCLE_IDX
)
143 ? PMCCNTR_EL0
: PMEVCNTR0_EL0
+ pmc
->idx
;
144 counter
= __vcpu_sys_reg(vcpu
, reg
);
148 * The real counter value is equal to the value of counter register plus
149 * the value perf event counts.
152 counter
+= perf_event_read_value(pmc
->perf_event
, &enabled
,
159 * kvm_pmu_get_counter_value - get PMU counter value
160 * @vcpu: The vcpu pointer
161 * @select_idx: The counter index
163 u64
kvm_pmu_get_counter_value(struct kvm_vcpu
*vcpu
, u64 select_idx
)
166 struct kvm_pmu
*pmu
= &vcpu
->arch
.pmu
;
167 struct kvm_pmc
*pmc
= &pmu
->pmc
[select_idx
];
169 counter
= kvm_pmu_get_pair_counter_value(vcpu
, pmc
);
171 if (kvm_pmu_pmc_is_chained(pmc
) &&
172 kvm_pmu_idx_is_high_counter(select_idx
))
173 counter
= upper_32_bits(counter
);
174 else if (select_idx
!= ARMV8_PMU_CYCLE_IDX
)
175 counter
= lower_32_bits(counter
);
181 * kvm_pmu_set_counter_value - set PMU counter value
182 * @vcpu: The vcpu pointer
183 * @select_idx: The counter index
184 * @val: The counter value
186 void kvm_pmu_set_counter_value(struct kvm_vcpu
*vcpu
, u64 select_idx
, u64 val
)
190 reg
= (select_idx
== ARMV8_PMU_CYCLE_IDX
)
191 ? PMCCNTR_EL0
: PMEVCNTR0_EL0
+ select_idx
;
192 __vcpu_sys_reg(vcpu
, reg
) += (s64
)val
- kvm_pmu_get_counter_value(vcpu
, select_idx
);
194 /* Recreate the perf event to reflect the updated sample_period */
195 kvm_pmu_create_perf_event(vcpu
, select_idx
);
199 * kvm_pmu_release_perf_event - remove the perf event
200 * @pmc: The PMU counter pointer
202 static void kvm_pmu_release_perf_event(struct kvm_pmc
*pmc
)
204 pmc
= kvm_pmu_get_canonical_pmc(pmc
);
205 if (pmc
->perf_event
) {
206 perf_event_disable(pmc
->perf_event
);
207 perf_event_release_kernel(pmc
->perf_event
);
208 pmc
->perf_event
= NULL
;
213 * kvm_pmu_stop_counter - stop PMU counter
214 * @pmc: The PMU counter pointer
216 * If this counter has been configured to monitor some event, release it here.
218 static void kvm_pmu_stop_counter(struct kvm_vcpu
*vcpu
, struct kvm_pmc
*pmc
)
220 u64 counter
, reg
, val
;
222 pmc
= kvm_pmu_get_canonical_pmc(pmc
);
223 if (!pmc
->perf_event
)
226 counter
= kvm_pmu_get_pair_counter_value(vcpu
, pmc
);
228 if (pmc
->idx
== ARMV8_PMU_CYCLE_IDX
) {
232 reg
= PMEVCNTR0_EL0
+ pmc
->idx
;
233 val
= lower_32_bits(counter
);
236 __vcpu_sys_reg(vcpu
, reg
) = val
;
238 if (kvm_pmu_pmc_is_chained(pmc
))
239 __vcpu_sys_reg(vcpu
, reg
+ 1) = upper_32_bits(counter
);
241 kvm_pmu_release_perf_event(pmc
);
245 * kvm_pmu_vcpu_init - assign pmu counter idx for cpu
246 * @vcpu: The vcpu pointer
249 void kvm_pmu_vcpu_init(struct kvm_vcpu
*vcpu
)
252 struct kvm_pmu
*pmu
= &vcpu
->arch
.pmu
;
254 for (i
= 0; i
< ARMV8_PMU_MAX_COUNTERS
; i
++)
259 * kvm_pmu_vcpu_reset - reset pmu state for cpu
260 * @vcpu: The vcpu pointer
263 void kvm_pmu_vcpu_reset(struct kvm_vcpu
*vcpu
)
265 unsigned long mask
= kvm_pmu_valid_counter_mask(vcpu
);
266 struct kvm_pmu
*pmu
= &vcpu
->arch
.pmu
;
269 for_each_set_bit(i
, &mask
, 32)
270 kvm_pmu_stop_counter(vcpu
, &pmu
->pmc
[i
]);
272 bitmap_zero(vcpu
->arch
.pmu
.chained
, ARMV8_PMU_MAX_COUNTER_PAIRS
);
276 * kvm_pmu_vcpu_destroy - free perf event of PMU for cpu
277 * @vcpu: The vcpu pointer
280 void kvm_pmu_vcpu_destroy(struct kvm_vcpu
*vcpu
)
283 struct kvm_pmu
*pmu
= &vcpu
->arch
.pmu
;
285 for (i
= 0; i
< ARMV8_PMU_MAX_COUNTERS
; i
++)
286 kvm_pmu_release_perf_event(&pmu
->pmc
[i
]);
287 irq_work_sync(&vcpu
->arch
.pmu
.overflow_work
);
290 u64
kvm_pmu_valid_counter_mask(struct kvm_vcpu
*vcpu
)
292 u64 val
= __vcpu_sys_reg(vcpu
, PMCR_EL0
) >> ARMV8_PMU_PMCR_N_SHIFT
;
294 val
&= ARMV8_PMU_PMCR_N_MASK
;
296 return BIT(ARMV8_PMU_CYCLE_IDX
);
298 return GENMASK(val
- 1, 0) | BIT(ARMV8_PMU_CYCLE_IDX
);
302 * kvm_pmu_enable_counter_mask - enable selected PMU counters
303 * @vcpu: The vcpu pointer
304 * @val: the value guest writes to PMCNTENSET register
306 * Call perf_event_enable to start counting the perf event
308 void kvm_pmu_enable_counter_mask(struct kvm_vcpu
*vcpu
, u64 val
)
311 struct kvm_pmu
*pmu
= &vcpu
->arch
.pmu
;
314 if (!(__vcpu_sys_reg(vcpu
, PMCR_EL0
) & ARMV8_PMU_PMCR_E
) || !val
)
317 for (i
= 0; i
< ARMV8_PMU_MAX_COUNTERS
; i
++) {
323 /* A change in the enable state may affect the chain state */
324 kvm_pmu_update_pmc_chained(vcpu
, i
);
325 kvm_pmu_create_perf_event(vcpu
, i
);
327 /* At this point, pmc must be the canonical */
328 if (pmc
->perf_event
) {
329 perf_event_enable(pmc
->perf_event
);
330 if (pmc
->perf_event
->state
!= PERF_EVENT_STATE_ACTIVE
)
331 kvm_debug("fail to enable perf event\n");
337 * kvm_pmu_disable_counter_mask - disable selected PMU counters
338 * @vcpu: The vcpu pointer
339 * @val: the value guest writes to PMCNTENCLR register
341 * Call perf_event_disable to stop counting the perf event
343 void kvm_pmu_disable_counter_mask(struct kvm_vcpu
*vcpu
, u64 val
)
346 struct kvm_pmu
*pmu
= &vcpu
->arch
.pmu
;
352 for (i
= 0; i
< ARMV8_PMU_MAX_COUNTERS
; i
++) {
358 /* A change in the enable state may affect the chain state */
359 kvm_pmu_update_pmc_chained(vcpu
, i
);
360 kvm_pmu_create_perf_event(vcpu
, i
);
362 /* At this point, pmc must be the canonical */
364 perf_event_disable(pmc
->perf_event
);
368 static u64
kvm_pmu_overflow_status(struct kvm_vcpu
*vcpu
)
372 if ((__vcpu_sys_reg(vcpu
, PMCR_EL0
) & ARMV8_PMU_PMCR_E
)) {
373 reg
= __vcpu_sys_reg(vcpu
, PMOVSSET_EL0
);
374 reg
&= __vcpu_sys_reg(vcpu
, PMCNTENSET_EL0
);
375 reg
&= __vcpu_sys_reg(vcpu
, PMINTENSET_EL1
);
376 reg
&= kvm_pmu_valid_counter_mask(vcpu
);
382 static void kvm_pmu_update_state(struct kvm_vcpu
*vcpu
)
384 struct kvm_pmu
*pmu
= &vcpu
->arch
.pmu
;
387 if (!kvm_vcpu_has_pmu(vcpu
))
390 overflow
= !!kvm_pmu_overflow_status(vcpu
);
391 if (pmu
->irq_level
== overflow
)
394 pmu
->irq_level
= overflow
;
396 if (likely(irqchip_in_kernel(vcpu
->kvm
))) {
397 int ret
= kvm_vgic_inject_irq(vcpu
->kvm
, vcpu
->vcpu_id
,
398 pmu
->irq_num
, overflow
, pmu
);
403 bool kvm_pmu_should_notify_user(struct kvm_vcpu
*vcpu
)
405 struct kvm_pmu
*pmu
= &vcpu
->arch
.pmu
;
406 struct kvm_sync_regs
*sregs
= &vcpu
->run
->s
.regs
;
407 bool run_level
= sregs
->device_irq_level
& KVM_ARM_DEV_PMU
;
409 if (likely(irqchip_in_kernel(vcpu
->kvm
)))
412 return pmu
->irq_level
!= run_level
;
416 * Reflect the PMU overflow interrupt output level into the kvm_run structure
418 void kvm_pmu_update_run(struct kvm_vcpu
*vcpu
)
420 struct kvm_sync_regs
*regs
= &vcpu
->run
->s
.regs
;
422 /* Populate the timer bitmap for user space */
423 regs
->device_irq_level
&= ~KVM_ARM_DEV_PMU
;
424 if (vcpu
->arch
.pmu
.irq_level
)
425 regs
->device_irq_level
|= KVM_ARM_DEV_PMU
;
429 * kvm_pmu_flush_hwstate - flush pmu state to cpu
430 * @vcpu: The vcpu pointer
432 * Check if the PMU has overflowed while we were running in the host, and inject
433 * an interrupt if that was the case.
435 void kvm_pmu_flush_hwstate(struct kvm_vcpu
*vcpu
)
437 kvm_pmu_update_state(vcpu
);
441 * kvm_pmu_sync_hwstate - sync pmu state from cpu
442 * @vcpu: The vcpu pointer
444 * Check if the PMU has overflowed while we were running in the guest, and
445 * inject an interrupt if that was the case.
447 void kvm_pmu_sync_hwstate(struct kvm_vcpu
*vcpu
)
449 kvm_pmu_update_state(vcpu
);
453 * When perf interrupt is an NMI, we cannot safely notify the vcpu corresponding
455 * This is why we need a callback to do it once outside of the NMI context.
457 static void kvm_pmu_perf_overflow_notify_vcpu(struct irq_work
*work
)
459 struct kvm_vcpu
*vcpu
;
462 pmu
= container_of(work
, struct kvm_pmu
, overflow_work
);
463 vcpu
= kvm_pmc_to_vcpu(pmu
->pmc
);
469 * When the perf event overflows, set the overflow status and inform the vcpu.
471 static void kvm_pmu_perf_overflow(struct perf_event
*perf_event
,
472 struct perf_sample_data
*data
,
473 struct pt_regs
*regs
)
475 struct kvm_pmc
*pmc
= perf_event
->overflow_handler_context
;
476 struct arm_pmu
*cpu_pmu
= to_arm_pmu(perf_event
->pmu
);
477 struct kvm_vcpu
*vcpu
= kvm_pmc_to_vcpu(pmc
);
481 cpu_pmu
->pmu
.stop(perf_event
, PERF_EF_UPDATE
);
484 * Reset the sample period to the architectural limit,
485 * i.e. the point where the counter overflows.
487 period
= -(local64_read(&perf_event
->count
));
489 if (!kvm_pmu_idx_is_64bit(vcpu
, pmc
->idx
))
490 period
&= GENMASK(31, 0);
492 local64_set(&perf_event
->hw
.period_left
, 0);
493 perf_event
->attr
.sample_period
= period
;
494 perf_event
->hw
.sample_period
= period
;
496 __vcpu_sys_reg(vcpu
, PMOVSSET_EL0
) |= BIT(idx
);
498 if (kvm_pmu_overflow_status(vcpu
)) {
499 kvm_make_request(KVM_REQ_IRQ_PENDING
, vcpu
);
504 irq_work_queue(&vcpu
->arch
.pmu
.overflow_work
);
507 cpu_pmu
->pmu
.start(perf_event
, PERF_EF_RELOAD
);
511 * kvm_pmu_software_increment - do software increment
512 * @vcpu: The vcpu pointer
513 * @val: the value guest writes to PMSWINC register
515 void kvm_pmu_software_increment(struct kvm_vcpu
*vcpu
, u64 val
)
517 struct kvm_pmu
*pmu
= &vcpu
->arch
.pmu
;
520 if (!(__vcpu_sys_reg(vcpu
, PMCR_EL0
) & ARMV8_PMU_PMCR_E
))
523 /* Weed out disabled counters */
524 val
&= __vcpu_sys_reg(vcpu
, PMCNTENSET_EL0
);
526 for (i
= 0; i
< ARMV8_PMU_CYCLE_IDX
; i
++) {
532 /* PMSWINC only applies to ... SW_INC! */
533 type
= __vcpu_sys_reg(vcpu
, PMEVTYPER0_EL0
+ i
);
534 type
&= kvm_pmu_event_mask(vcpu
->kvm
);
535 if (type
!= ARMV8_PMUV3_PERFCTR_SW_INCR
)
538 /* increment this even SW_INC counter */
539 reg
= __vcpu_sys_reg(vcpu
, PMEVCNTR0_EL0
+ i
) + 1;
540 reg
= lower_32_bits(reg
);
541 __vcpu_sys_reg(vcpu
, PMEVCNTR0_EL0
+ i
) = reg
;
543 if (reg
) /* no overflow on the low part */
546 if (kvm_pmu_pmc_is_chained(&pmu
->pmc
[i
])) {
547 /* increment the high counter */
548 reg
= __vcpu_sys_reg(vcpu
, PMEVCNTR0_EL0
+ i
+ 1) + 1;
549 reg
= lower_32_bits(reg
);
550 __vcpu_sys_reg(vcpu
, PMEVCNTR0_EL0
+ i
+ 1) = reg
;
551 if (!reg
) /* mark overflow on the high counter */
552 __vcpu_sys_reg(vcpu
, PMOVSSET_EL0
) |= BIT(i
+ 1);
554 /* mark overflow on low counter */
555 __vcpu_sys_reg(vcpu
, PMOVSSET_EL0
) |= BIT(i
);
561 * kvm_pmu_handle_pmcr - handle PMCR register
562 * @vcpu: The vcpu pointer
563 * @val: the value guest writes to PMCR register
565 void kvm_pmu_handle_pmcr(struct kvm_vcpu
*vcpu
, u64 val
)
567 unsigned long mask
= kvm_pmu_valid_counter_mask(vcpu
);
570 if (val
& ARMV8_PMU_PMCR_E
) {
571 kvm_pmu_enable_counter_mask(vcpu
,
572 __vcpu_sys_reg(vcpu
, PMCNTENSET_EL0
) & mask
);
574 kvm_pmu_disable_counter_mask(vcpu
, mask
);
577 if (val
& ARMV8_PMU_PMCR_C
)
578 kvm_pmu_set_counter_value(vcpu
, ARMV8_PMU_CYCLE_IDX
, 0);
580 if (val
& ARMV8_PMU_PMCR_P
) {
581 for_each_set_bit(i
, &mask
, 32)
582 kvm_pmu_set_counter_value(vcpu
, i
, 0);
586 static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu
*vcpu
, u64 select_idx
)
588 return (__vcpu_sys_reg(vcpu
, PMCR_EL0
) & ARMV8_PMU_PMCR_E
) &&
589 (__vcpu_sys_reg(vcpu
, PMCNTENSET_EL0
) & BIT(select_idx
));
593 * kvm_pmu_create_perf_event - create a perf event for a counter
594 * @vcpu: The vcpu pointer
595 * @select_idx: The number of selected counter
597 static void kvm_pmu_create_perf_event(struct kvm_vcpu
*vcpu
, u64 select_idx
)
599 struct kvm_pmu
*pmu
= &vcpu
->arch
.pmu
;
601 struct perf_event
*event
;
602 struct perf_event_attr attr
;
603 u64 eventsel
, counter
, reg
, data
;
606 * For chained counters the event type and filtering attributes are
607 * obtained from the low/even counter. We also use this counter to
608 * determine if the event is enabled/disabled.
610 pmc
= kvm_pmu_get_canonical_pmc(&pmu
->pmc
[select_idx
]);
612 reg
= (pmc
->idx
== ARMV8_PMU_CYCLE_IDX
)
613 ? PMCCFILTR_EL0
: PMEVTYPER0_EL0
+ pmc
->idx
;
614 data
= __vcpu_sys_reg(vcpu
, reg
);
616 kvm_pmu_stop_counter(vcpu
, pmc
);
617 if (pmc
->idx
== ARMV8_PMU_CYCLE_IDX
)
618 eventsel
= ARMV8_PMUV3_PERFCTR_CPU_CYCLES
;
620 eventsel
= data
& kvm_pmu_event_mask(vcpu
->kvm
);
622 /* Software increment event doesn't need to be backed by a perf event */
623 if (eventsel
== ARMV8_PMUV3_PERFCTR_SW_INCR
)
627 * If we have a filter in place and that the event isn't allowed, do
628 * not install a perf event either.
630 if (vcpu
->kvm
->arch
.pmu_filter
&&
631 !test_bit(eventsel
, vcpu
->kvm
->arch
.pmu_filter
))
634 memset(&attr
, 0, sizeof(struct perf_event_attr
));
635 attr
.type
= PERF_TYPE_RAW
;
636 attr
.size
= sizeof(attr
);
638 attr
.disabled
= !kvm_pmu_counter_is_enabled(vcpu
, pmc
->idx
);
639 attr
.exclude_user
= data
& ARMV8_PMU_EXCLUDE_EL0
? 1 : 0;
640 attr
.exclude_kernel
= data
& ARMV8_PMU_EXCLUDE_EL1
? 1 : 0;
641 attr
.exclude_hv
= 1; /* Don't count EL2 events */
642 attr
.exclude_host
= 1; /* Don't count host events */
643 attr
.config
= eventsel
;
645 counter
= kvm_pmu_get_pair_counter_value(vcpu
, pmc
);
647 if (kvm_pmu_pmc_is_chained(pmc
)) {
649 * The initial sample period (overflow count) of an event. For
650 * chained counters we only support overflow interrupts on the
653 attr
.sample_period
= (-counter
) & GENMASK(63, 0);
654 attr
.config1
|= PERF_ATTR_CFG1_KVM_PMU_CHAINED
;
656 event
= perf_event_create_kernel_counter(&attr
, -1, current
,
657 kvm_pmu_perf_overflow
,
660 /* The initial sample period (overflow count) of an event. */
661 if (kvm_pmu_idx_is_64bit(vcpu
, pmc
->idx
))
662 attr
.sample_period
= (-counter
) & GENMASK(63, 0);
664 attr
.sample_period
= (-counter
) & GENMASK(31, 0);
666 event
= perf_event_create_kernel_counter(&attr
, -1, current
,
667 kvm_pmu_perf_overflow
, pmc
);
671 pr_err_once("kvm: pmu event creation failed %ld\n",
676 pmc
->perf_event
= event
;
680 * kvm_pmu_update_pmc_chained - update chained bitmap
681 * @vcpu: The vcpu pointer
682 * @select_idx: The number of selected counter
684 * Update the chained bitmap based on the event type written in the
685 * typer register and the enable state of the odd register.
687 static void kvm_pmu_update_pmc_chained(struct kvm_vcpu
*vcpu
, u64 select_idx
)
689 struct kvm_pmu
*pmu
= &vcpu
->arch
.pmu
;
690 struct kvm_pmc
*pmc
= &pmu
->pmc
[select_idx
], *canonical_pmc
;
691 bool new_state
, old_state
;
693 old_state
= kvm_pmu_pmc_is_chained(pmc
);
694 new_state
= kvm_pmu_idx_has_chain_evtype(vcpu
, pmc
->idx
) &&
695 kvm_pmu_counter_is_enabled(vcpu
, pmc
->idx
| 0x1);
697 if (old_state
== new_state
)
700 canonical_pmc
= kvm_pmu_get_canonical_pmc(pmc
);
701 kvm_pmu_stop_counter(vcpu
, canonical_pmc
);
704 * During promotion from !chained to chained we must ensure
705 * the adjacent counter is stopped and its event destroyed
707 kvm_pmu_stop_counter(vcpu
, kvm_pmu_get_alternate_pmc(pmc
));
708 set_bit(pmc
->idx
>> 1, vcpu
->arch
.pmu
.chained
);
711 clear_bit(pmc
->idx
>> 1, vcpu
->arch
.pmu
.chained
);
715 * kvm_pmu_set_counter_event_type - set selected counter to monitor some event
716 * @vcpu: The vcpu pointer
717 * @data: The data guest writes to PMXEVTYPER_EL0
718 * @select_idx: The number of selected counter
720 * When OS accesses PMXEVTYPER_EL0, that means it wants to set a PMC to count an
721 * event with given hardware event number. Here we call perf_event API to
722 * emulate this action and create a kernel perf event for it.
724 void kvm_pmu_set_counter_event_type(struct kvm_vcpu
*vcpu
, u64 data
,
729 mask
= ARMV8_PMU_EVTYPE_MASK
;
730 mask
&= ~ARMV8_PMU_EVTYPE_EVENT
;
731 mask
|= kvm_pmu_event_mask(vcpu
->kvm
);
733 reg
= (select_idx
== ARMV8_PMU_CYCLE_IDX
)
734 ? PMCCFILTR_EL0
: PMEVTYPER0_EL0
+ select_idx
;
736 __vcpu_sys_reg(vcpu
, reg
) = data
& mask
;
738 kvm_pmu_update_pmc_chained(vcpu
, select_idx
);
739 kvm_pmu_create_perf_event(vcpu
, select_idx
);
742 static int kvm_pmu_probe_pmuver(void)
744 struct perf_event_attr attr
= { };
745 struct perf_event
*event
;
750 * Create a dummy event that only counts user cycles. As we'll never
751 * leave this function with the event being live, it will never
752 * count anything. But it allows us to probe some of the PMU
753 * details. Yes, this is terrible.
755 attr
.type
= PERF_TYPE_RAW
;
756 attr
.size
= sizeof(attr
);
759 attr
.exclude_user
= 0;
760 attr
.exclude_kernel
= 1;
762 attr
.exclude_host
= 1;
763 attr
.config
= ARMV8_PMUV3_PERFCTR_CPU_CYCLES
;
764 attr
.sample_period
= GENMASK(63, 0);
766 event
= perf_event_create_kernel_counter(&attr
, -1, current
,
767 kvm_pmu_perf_overflow
, &attr
);
770 pr_err_once("kvm: pmu event creation failed %ld\n",
776 pmu
= to_arm_pmu(event
->pmu
);
778 pmuver
= pmu
->pmuver
;
781 perf_event_disable(event
);
782 perf_event_release_kernel(event
);
787 u64
kvm_pmu_get_pmceid(struct kvm_vcpu
*vcpu
, bool pmceid1
)
789 unsigned long *bmap
= vcpu
->kvm
->arch
.pmu_filter
;
794 val
= read_sysreg(pmceid0_el0
);
797 val
= read_sysreg(pmceid1_el0
);
804 for (i
= 0; i
< 32; i
+= 8) {
807 byte
= bitmap_get_value8(bmap
, base
+ i
);
809 byte
= bitmap_get_value8(bmap
, 0x4000 + base
+ i
);
810 mask
|= byte
<< (32 + i
);
816 bool kvm_arm_support_pmu_v3(void)
819 * Check if HW_PERF_EVENTS are supported by checking the number of
820 * hardware performance counters. This could ensure the presence of
821 * a physical PMU and CONFIG_PERF_EVENT is selected.
823 return (perf_num_counters() > 0);
826 int kvm_arm_pmu_v3_enable(struct kvm_vcpu
*vcpu
)
828 if (!kvm_vcpu_has_pmu(vcpu
))
831 if (!vcpu
->arch
.pmu
.created
)
835 * A valid interrupt configuration for the PMU is either to have a
836 * properly configured interrupt number and using an in-kernel
837 * irqchip, or to not have an in-kernel GIC and not set an IRQ.
839 if (irqchip_in_kernel(vcpu
->kvm
)) {
840 int irq
= vcpu
->arch
.pmu
.irq_num
;
842 * If we are using an in-kernel vgic, at this point we know
843 * the vgic will be initialized, so we can check the PMU irq
844 * number against the dimensions of the vgic and make sure
847 if (!irq_is_ppi(irq
) && !vgic_valid_spi(vcpu
->kvm
, irq
))
849 } else if (kvm_arm_pmu_irq_initialized(vcpu
)) {
853 kvm_pmu_vcpu_reset(vcpu
);
858 static int kvm_arm_pmu_v3_init(struct kvm_vcpu
*vcpu
)
860 if (irqchip_in_kernel(vcpu
->kvm
)) {
864 * If using the PMU with an in-kernel virtual GIC
865 * implementation, we require the GIC to be already
866 * initialized when initializing the PMU.
868 if (!vgic_initialized(vcpu
->kvm
))
871 if (!kvm_arm_pmu_irq_initialized(vcpu
))
874 ret
= kvm_vgic_set_owner(vcpu
, vcpu
->arch
.pmu
.irq_num
,
880 init_irq_work(&vcpu
->arch
.pmu
.overflow_work
,
881 kvm_pmu_perf_overflow_notify_vcpu
);
883 vcpu
->arch
.pmu
.created
= true;
888 * For one VM the interrupt type must be same for each vcpu.
889 * As a PPI, the interrupt number is the same for all vcpus,
890 * while as an SPI it must be a separate number per vcpu.
892 static bool pmu_irq_is_valid(struct kvm
*kvm
, int irq
)
895 struct kvm_vcpu
*vcpu
;
897 kvm_for_each_vcpu(i
, vcpu
, kvm
) {
898 if (!kvm_arm_pmu_irq_initialized(vcpu
))
901 if (irq_is_ppi(irq
)) {
902 if (vcpu
->arch
.pmu
.irq_num
!= irq
)
905 if (vcpu
->arch
.pmu
.irq_num
== irq
)
913 int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu
*vcpu
, struct kvm_device_attr
*attr
)
915 if (!kvm_vcpu_has_pmu(vcpu
))
918 if (vcpu
->arch
.pmu
.created
)
921 if (!vcpu
->kvm
->arch
.pmuver
)
922 vcpu
->kvm
->arch
.pmuver
= kvm_pmu_probe_pmuver();
924 if (vcpu
->kvm
->arch
.pmuver
== 0xf)
927 switch (attr
->attr
) {
928 case KVM_ARM_VCPU_PMU_V3_IRQ
: {
929 int __user
*uaddr
= (int __user
*)(long)attr
->addr
;
932 if (!irqchip_in_kernel(vcpu
->kvm
))
935 if (get_user(irq
, uaddr
))
938 /* The PMU overflow interrupt can be a PPI or a valid SPI. */
939 if (!(irq_is_ppi(irq
) || irq_is_spi(irq
)))
942 if (!pmu_irq_is_valid(vcpu
->kvm
, irq
))
945 if (kvm_arm_pmu_irq_initialized(vcpu
))
948 kvm_debug("Set kvm ARM PMU irq: %d\n", irq
);
949 vcpu
->arch
.pmu
.irq_num
= irq
;
952 case KVM_ARM_VCPU_PMU_V3_FILTER
: {
953 struct kvm_pmu_event_filter __user
*uaddr
;
954 struct kvm_pmu_event_filter filter
;
957 nr_events
= kvm_pmu_event_mask(vcpu
->kvm
) + 1;
959 uaddr
= (struct kvm_pmu_event_filter __user
*)(long)attr
->addr
;
961 if (copy_from_user(&filter
, uaddr
, sizeof(filter
)))
964 if (((u32
)filter
.base_event
+ filter
.nevents
) > nr_events
||
965 (filter
.action
!= KVM_PMU_EVENT_ALLOW
&&
966 filter
.action
!= KVM_PMU_EVENT_DENY
))
969 mutex_lock(&vcpu
->kvm
->lock
);
971 if (!vcpu
->kvm
->arch
.pmu_filter
) {
972 vcpu
->kvm
->arch
.pmu_filter
= bitmap_alloc(nr_events
, GFP_KERNEL
);
973 if (!vcpu
->kvm
->arch
.pmu_filter
) {
974 mutex_unlock(&vcpu
->kvm
->lock
);
979 * The default depends on the first applied filter.
980 * If it allows events, the default is to deny.
981 * Conversely, if the first filter denies a set of
982 * events, the default is to allow.
984 if (filter
.action
== KVM_PMU_EVENT_ALLOW
)
985 bitmap_zero(vcpu
->kvm
->arch
.pmu_filter
, nr_events
);
987 bitmap_fill(vcpu
->kvm
->arch
.pmu_filter
, nr_events
);
990 if (filter
.action
== KVM_PMU_EVENT_ALLOW
)
991 bitmap_set(vcpu
->kvm
->arch
.pmu_filter
, filter
.base_event
, filter
.nevents
);
993 bitmap_clear(vcpu
->kvm
->arch
.pmu_filter
, filter
.base_event
, filter
.nevents
);
995 mutex_unlock(&vcpu
->kvm
->lock
);
999 case KVM_ARM_VCPU_PMU_V3_INIT
:
1000 return kvm_arm_pmu_v3_init(vcpu
);
1006 int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu
*vcpu
, struct kvm_device_attr
*attr
)
1008 switch (attr
->attr
) {
1009 case KVM_ARM_VCPU_PMU_V3_IRQ
: {
1010 int __user
*uaddr
= (int __user
*)(long)attr
->addr
;
1013 if (!irqchip_in_kernel(vcpu
->kvm
))
1016 if (!kvm_vcpu_has_pmu(vcpu
))
1019 if (!kvm_arm_pmu_irq_initialized(vcpu
))
1022 irq
= vcpu
->arch
.pmu
.irq_num
;
1023 return put_user(irq
, uaddr
);
1030 int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu
*vcpu
, struct kvm_device_attr
*attr
)
1032 switch (attr
->attr
) {
1033 case KVM_ARM_VCPU_PMU_V3_IRQ
:
1034 case KVM_ARM_VCPU_PMU_V3_INIT
:
1035 case KVM_ARM_VCPU_PMU_V3_FILTER
:
1036 if (kvm_vcpu_has_pmu(vcpu
))