1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (c) 2023 Rivos Inc
6 * Atish Patra <atishp@rivosinc.com>
9 #define pr_fmt(fmt) "riscv-kvm-pmu: " fmt
10 #include <linux/errno.h>
11 #include <linux/err.h>
12 #include <linux/kvm_host.h>
13 #include <linux/perf/riscv_pmu.h>
15 #include <asm/kvm_vcpu_sbi.h>
16 #include <asm/kvm_vcpu_pmu.h>
18 #include <linux/bitops.h>
20 #define kvm_pmu_num_counters(pmu) ((pmu)->num_hw_ctrs + (pmu)->num_fw_ctrs)
21 #define get_event_type(x) (((x) & SBI_PMU_EVENT_IDX_TYPE_MASK) >> 16)
22 #define get_event_code(x) ((x) & SBI_PMU_EVENT_IDX_CODE_MASK)
24 static enum perf_hw_id hw_event_perf_map
[SBI_PMU_HW_GENERAL_MAX
] = {
25 [SBI_PMU_HW_CPU_CYCLES
] = PERF_COUNT_HW_CPU_CYCLES
,
26 [SBI_PMU_HW_INSTRUCTIONS
] = PERF_COUNT_HW_INSTRUCTIONS
,
27 [SBI_PMU_HW_CACHE_REFERENCES
] = PERF_COUNT_HW_CACHE_REFERENCES
,
28 [SBI_PMU_HW_CACHE_MISSES
] = PERF_COUNT_HW_CACHE_MISSES
,
29 [SBI_PMU_HW_BRANCH_INSTRUCTIONS
] = PERF_COUNT_HW_BRANCH_INSTRUCTIONS
,
30 [SBI_PMU_HW_BRANCH_MISSES
] = PERF_COUNT_HW_BRANCH_MISSES
,
31 [SBI_PMU_HW_BUS_CYCLES
] = PERF_COUNT_HW_BUS_CYCLES
,
32 [SBI_PMU_HW_STALLED_CYCLES_FRONTEND
] = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND
,
33 [SBI_PMU_HW_STALLED_CYCLES_BACKEND
] = PERF_COUNT_HW_STALLED_CYCLES_BACKEND
,
34 [SBI_PMU_HW_REF_CPU_CYCLES
] = PERF_COUNT_HW_REF_CPU_CYCLES
,
37 static u64
kvm_pmu_get_sample_period(struct kvm_pmc
*pmc
)
39 u64 counter_val_mask
= GENMASK(pmc
->cinfo
.width
, 0);
42 if (!pmc
->counter_val
)
43 sample_period
= counter_val_mask
;
45 sample_period
= (-pmc
->counter_val
) & counter_val_mask
;
50 static u32
kvm_pmu_get_perf_event_type(unsigned long eidx
)
52 enum sbi_pmu_event_type etype
= get_event_type(eidx
);
53 u32 type
= PERF_TYPE_MAX
;
56 case SBI_PMU_EVENT_TYPE_HW
:
57 type
= PERF_TYPE_HARDWARE
;
59 case SBI_PMU_EVENT_TYPE_CACHE
:
60 type
= PERF_TYPE_HW_CACHE
;
62 case SBI_PMU_EVENT_TYPE_RAW
:
63 case SBI_PMU_EVENT_TYPE_FW
:
73 static bool kvm_pmu_is_fw_event(unsigned long eidx
)
75 return get_event_type(eidx
) == SBI_PMU_EVENT_TYPE_FW
;
78 static void kvm_pmu_release_perf_event(struct kvm_pmc
*pmc
)
80 if (pmc
->perf_event
) {
81 perf_event_disable(pmc
->perf_event
);
82 perf_event_release_kernel(pmc
->perf_event
);
83 pmc
->perf_event
= NULL
;
87 static u64
kvm_pmu_get_perf_event_hw_config(u32 sbi_event_code
)
89 return hw_event_perf_map
[sbi_event_code
];
92 static u64
kvm_pmu_get_perf_event_cache_config(u32 sbi_event_code
)
95 unsigned int cache_type
, cache_op
, cache_result
;
97 /* All the cache event masks lie within 0xFF. No separate masking is necessary */
98 cache_type
= (sbi_event_code
& SBI_PMU_EVENT_CACHE_ID_CODE_MASK
) >>
99 SBI_PMU_EVENT_CACHE_ID_SHIFT
;
100 cache_op
= (sbi_event_code
& SBI_PMU_EVENT_CACHE_OP_ID_CODE_MASK
) >>
101 SBI_PMU_EVENT_CACHE_OP_SHIFT
;
102 cache_result
= sbi_event_code
& SBI_PMU_EVENT_CACHE_RESULT_ID_CODE_MASK
;
104 if (cache_type
>= PERF_COUNT_HW_CACHE_MAX
||
105 cache_op
>= PERF_COUNT_HW_CACHE_OP_MAX
||
106 cache_result
>= PERF_COUNT_HW_CACHE_RESULT_MAX
)
109 config
= cache_type
| (cache_op
<< 8) | (cache_result
<< 16);
114 static u64
kvm_pmu_get_perf_event_config(unsigned long eidx
, uint64_t evt_data
)
116 enum sbi_pmu_event_type etype
= get_event_type(eidx
);
117 u32 ecode
= get_event_code(eidx
);
118 u64 config
= U64_MAX
;
121 case SBI_PMU_EVENT_TYPE_HW
:
122 if (ecode
< SBI_PMU_HW_GENERAL_MAX
)
123 config
= kvm_pmu_get_perf_event_hw_config(ecode
);
125 case SBI_PMU_EVENT_TYPE_CACHE
:
126 config
= kvm_pmu_get_perf_event_cache_config(ecode
);
128 case SBI_PMU_EVENT_TYPE_RAW
:
129 config
= evt_data
& RISCV_PMU_RAW_EVENT_MASK
;
131 case SBI_PMU_EVENT_TYPE_FW
:
132 if (ecode
< SBI_PMU_FW_MAX
)
133 config
= (1ULL << 63) | ecode
;
142 static int kvm_pmu_get_fixed_pmc_index(unsigned long eidx
)
144 u32 etype
= kvm_pmu_get_perf_event_type(eidx
);
145 u32 ecode
= get_event_code(eidx
);
147 if (etype
!= SBI_PMU_EVENT_TYPE_HW
)
150 if (ecode
== SBI_PMU_HW_CPU_CYCLES
)
152 else if (ecode
== SBI_PMU_HW_INSTRUCTIONS
)
158 static int kvm_pmu_get_programmable_pmc_index(struct kvm_pmu
*kvpmu
, unsigned long eidx
,
159 unsigned long cbase
, unsigned long cmask
)
165 if (kvm_pmu_is_fw_event(eidx
)) {
166 /* Firmware counters are mapped 1:1 starting from num_hw_ctrs for simplicity */
167 min
= kvpmu
->num_hw_ctrs
;
168 max
= min
+ kvpmu
->num_fw_ctrs
;
170 /* First 3 counters are reserved for fixed counters */
172 max
= kvpmu
->num_hw_ctrs
;
175 for_each_set_bit(i
, &cmask
, BITS_PER_LONG
) {
177 if ((pmc_idx
>= min
&& pmc_idx
< max
) &&
178 !test_bit(pmc_idx
, kvpmu
->pmc_in_use
)) {
187 static int pmu_get_pmc_index(struct kvm_pmu
*pmu
, unsigned long eidx
,
188 unsigned long cbase
, unsigned long cmask
)
192 /* Fixed counters need to be have fixed mapping as they have different width */
193 ret
= kvm_pmu_get_fixed_pmc_index(eidx
);
197 return kvm_pmu_get_programmable_pmc_index(pmu
, eidx
, cbase
, cmask
);
200 static int pmu_fw_ctr_read_hi(struct kvm_vcpu
*vcpu
, unsigned long cidx
,
201 unsigned long *out_val
)
203 struct kvm_pmu
*kvpmu
= vcpu_to_pmu(vcpu
);
207 if (!IS_ENABLED(CONFIG_32BIT
)) {
208 pr_warn("%s: should be invoked for only RV32\n", __func__
);
212 if (cidx
>= kvm_pmu_num_counters(kvpmu
) || cidx
== 1) {
213 pr_warn("Invalid counter id [%ld]during read\n", cidx
);
217 pmc
= &kvpmu
->pmc
[cidx
];
219 if (pmc
->cinfo
.type
!= SBI_PMU_CTR_TYPE_FW
)
222 fevent_code
= get_event_code(pmc
->event_idx
);
223 pmc
->counter_val
= kvpmu
->fw_event
[fevent_code
].value
;
225 *out_val
= pmc
->counter_val
>> 32;
230 static int pmu_ctr_read(struct kvm_vcpu
*vcpu
, unsigned long cidx
,
231 unsigned long *out_val
)
233 struct kvm_pmu
*kvpmu
= vcpu_to_pmu(vcpu
);
235 u64 enabled
, running
;
238 if (cidx
>= kvm_pmu_num_counters(kvpmu
) || cidx
== 1) {
239 pr_warn("Invalid counter id [%ld] during read\n", cidx
);
243 pmc
= &kvpmu
->pmc
[cidx
];
245 if (pmc
->cinfo
.type
== SBI_PMU_CTR_TYPE_FW
) {
246 fevent_code
= get_event_code(pmc
->event_idx
);
247 pmc
->counter_val
= kvpmu
->fw_event
[fevent_code
].value
;
248 } else if (pmc
->perf_event
) {
249 pmc
->counter_val
+= perf_event_read_value(pmc
->perf_event
, &enabled
, &running
);
253 *out_val
= pmc
->counter_val
;
258 static int kvm_pmu_validate_counter_mask(struct kvm_pmu
*kvpmu
, unsigned long ctr_base
,
259 unsigned long ctr_mask
)
261 /* Make sure the we have a valid counter mask requested from the caller */
262 if (!ctr_mask
|| (ctr_base
+ __fls(ctr_mask
) >= kvm_pmu_num_counters(kvpmu
)))
268 static void kvm_riscv_pmu_overflow(struct perf_event
*perf_event
,
269 struct perf_sample_data
*data
,
270 struct pt_regs
*regs
)
272 struct kvm_pmc
*pmc
= perf_event
->overflow_handler_context
;
273 struct kvm_vcpu
*vcpu
= pmc
->vcpu
;
274 struct kvm_pmu
*kvpmu
= vcpu_to_pmu(vcpu
);
275 struct riscv_pmu
*rpmu
= to_riscv_pmu(perf_event
->pmu
);
279 * Stop the event counting by directly accessing the perf_event.
280 * Otherwise, this needs to deferred via a workqueue.
281 * That will introduce skew in the counter value because the actual
282 * physical counter would start after returning from this function.
283 * It will be stopped again once the workqueue is scheduled
285 rpmu
->pmu
.stop(perf_event
, PERF_EF_UPDATE
);
288 * The hw counter would start automatically when this function returns.
289 * Thus, the host may continue to interrupt and inject it to the guest
290 * even without the guest configuring the next event. Depending on the hardware
291 * the host may have some sluggishness only if privilege mode filtering is not
292 * available. In an ideal world, where qemu is not the only capable hardware,
293 * this can be removed.
294 * FYI: ARM64 does this way while x86 doesn't do anything as such.
295 * TODO: Should we keep it for RISC-V ?
297 period
= -(local64_read(&perf_event
->count
));
299 local64_set(&perf_event
->hw
.period_left
, 0);
300 perf_event
->attr
.sample_period
= period
;
301 perf_event
->hw
.sample_period
= period
;
303 set_bit(pmc
->idx
, kvpmu
->pmc_overflown
);
304 kvm_riscv_vcpu_set_interrupt(vcpu
, IRQ_PMU_OVF
);
306 rpmu
->pmu
.start(perf_event
, PERF_EF_RELOAD
);
309 static long kvm_pmu_create_perf_event(struct kvm_pmc
*pmc
, struct perf_event_attr
*attr
,
310 unsigned long flags
, unsigned long eidx
,
311 unsigned long evtdata
)
313 struct perf_event
*event
;
315 kvm_pmu_release_perf_event(pmc
);
316 attr
->config
= kvm_pmu_get_perf_event_config(eidx
, evtdata
);
317 if (flags
& SBI_PMU_CFG_FLAG_CLEAR_VALUE
) {
318 //TODO: Do we really want to clear the value in hardware counter
319 pmc
->counter_val
= 0;
323 * Set the default sample_period for now. The guest specified value
324 * will be updated in the start call.
326 attr
->sample_period
= kvm_pmu_get_sample_period(pmc
);
328 event
= perf_event_create_kernel_counter(attr
, -1, current
, kvm_riscv_pmu_overflow
, pmc
);
330 pr_debug("kvm pmu event creation failed for eidx %lx: %ld\n", eidx
, PTR_ERR(event
));
331 return PTR_ERR(event
);
334 pmc
->perf_event
= event
;
335 if (flags
& SBI_PMU_CFG_FLAG_AUTO_START
)
336 perf_event_enable(pmc
->perf_event
);
341 int kvm_riscv_vcpu_pmu_incr_fw(struct kvm_vcpu
*vcpu
, unsigned long fid
)
343 struct kvm_pmu
*kvpmu
= vcpu_to_pmu(vcpu
);
344 struct kvm_fw_event
*fevent
;
346 if (!kvpmu
|| fid
>= SBI_PMU_FW_MAX
)
349 fevent
= &kvpmu
->fw_event
[fid
];
356 int kvm_riscv_vcpu_pmu_read_hpm(struct kvm_vcpu
*vcpu
, unsigned int csr_num
,
357 unsigned long *val
, unsigned long new_val
,
358 unsigned long wr_mask
)
360 struct kvm_pmu
*kvpmu
= vcpu_to_pmu(vcpu
);
361 int cidx
, ret
= KVM_INSN_CONTINUE_NEXT_SEPC
;
363 if (!kvpmu
|| !kvpmu
->init_done
) {
365 * In absence of sscofpmf in the platform, the guest OS may use
366 * the legacy PMU driver to read cycle/instret. In that case,
367 * just return 0 to avoid any illegal trap. However, any other
368 * hpmcounter access should result in illegal trap as they must
369 * be access through SBI PMU only.
371 if (csr_num
== CSR_CYCLE
|| csr_num
== CSR_INSTRET
) {
375 return KVM_INSN_ILLEGAL_TRAP
;
379 /* The counter CSR are read only. Thus, any write should result in illegal traps */
381 return KVM_INSN_ILLEGAL_TRAP
;
383 cidx
= csr_num
- CSR_CYCLE
;
385 if (pmu_ctr_read(vcpu
, cidx
, val
) < 0)
386 return KVM_INSN_ILLEGAL_TRAP
;
391 static void kvm_pmu_clear_snapshot_area(struct kvm_vcpu
*vcpu
)
393 struct kvm_pmu
*kvpmu
= vcpu_to_pmu(vcpu
);
397 kvpmu
->snapshot_addr
= INVALID_GPA
;
400 int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu
*vcpu
, unsigned long saddr_low
,
401 unsigned long saddr_high
, unsigned long flags
,
402 struct kvm_vcpu_sbi_return
*retdata
)
404 struct kvm_pmu
*kvpmu
= vcpu_to_pmu(vcpu
);
405 int snapshot_area_size
= sizeof(struct riscv_pmu_snapshot_data
);
411 if (!kvpmu
|| flags
) {
412 sbiret
= SBI_ERR_INVALID_PARAM
;
416 if (saddr_low
== SBI_SHMEM_DISABLE
&& saddr_high
== SBI_SHMEM_DISABLE
) {
417 kvm_pmu_clear_snapshot_area(vcpu
);
423 if (saddr_high
!= 0) {
424 if (IS_ENABLED(CONFIG_32BIT
))
425 saddr
|= ((gpa_t
)saddr_high
<< 32);
427 sbiret
= SBI_ERR_INVALID_ADDRESS
;
431 hva
= kvm_vcpu_gfn_to_hva_prot(vcpu
, saddr
>> PAGE_SHIFT
, &writable
);
432 if (kvm_is_error_hva(hva
) || !writable
) {
433 sbiret
= SBI_ERR_INVALID_ADDRESS
;
437 kvpmu
->sdata
= kzalloc(snapshot_area_size
, GFP_ATOMIC
);
441 if (kvm_vcpu_write_guest(vcpu
, saddr
, kvpmu
->sdata
, snapshot_area_size
)) {
443 sbiret
= SBI_ERR_FAILURE
;
447 kvpmu
->snapshot_addr
= saddr
;
450 retdata
->err_val
= sbiret
;
455 int kvm_riscv_vcpu_pmu_num_ctrs(struct kvm_vcpu
*vcpu
,
456 struct kvm_vcpu_sbi_return
*retdata
)
458 struct kvm_pmu
*kvpmu
= vcpu_to_pmu(vcpu
);
460 retdata
->out_val
= kvm_pmu_num_counters(kvpmu
);
465 int kvm_riscv_vcpu_pmu_ctr_info(struct kvm_vcpu
*vcpu
, unsigned long cidx
,
466 struct kvm_vcpu_sbi_return
*retdata
)
468 struct kvm_pmu
*kvpmu
= vcpu_to_pmu(vcpu
);
470 if (cidx
> RISCV_KVM_MAX_COUNTERS
|| cidx
== 1) {
471 retdata
->err_val
= SBI_ERR_INVALID_PARAM
;
475 retdata
->out_val
= kvpmu
->pmc
[cidx
].cinfo
.value
;
480 int kvm_riscv_vcpu_pmu_ctr_start(struct kvm_vcpu
*vcpu
, unsigned long ctr_base
,
481 unsigned long ctr_mask
, unsigned long flags
, u64 ival
,
482 struct kvm_vcpu_sbi_return
*retdata
)
484 struct kvm_pmu
*kvpmu
= vcpu_to_pmu(vcpu
);
485 int i
, pmc_index
, sbiret
= 0;
488 bool snap_flag_set
= flags
& SBI_PMU_START_FLAG_INIT_SNAPSHOT
;
490 if (kvm_pmu_validate_counter_mask(kvpmu
, ctr_base
, ctr_mask
) < 0) {
491 sbiret
= SBI_ERR_INVALID_PARAM
;
496 if (kvpmu
->snapshot_addr
== INVALID_GPA
) {
497 sbiret
= SBI_ERR_NO_SHMEM
;
500 if (kvm_vcpu_read_guest(vcpu
, kvpmu
->snapshot_addr
, kvpmu
->sdata
,
501 sizeof(struct riscv_pmu_snapshot_data
))) {
502 pr_warn("Unable to read snapshot shared memory while starting counters\n");
503 sbiret
= SBI_ERR_FAILURE
;
507 /* Start the counters that have been configured and requested by the guest */
508 for_each_set_bit(i
, &ctr_mask
, RISCV_MAX_COUNTERS
) {
509 pmc_index
= i
+ ctr_base
;
510 if (!test_bit(pmc_index
, kvpmu
->pmc_in_use
))
512 /* The guest started the counter again. Reset the overflow status */
513 clear_bit(pmc_index
, kvpmu
->pmc_overflown
);
514 pmc
= &kvpmu
->pmc
[pmc_index
];
515 if (flags
& SBI_PMU_START_FLAG_SET_INIT_VALUE
) {
516 pmc
->counter_val
= ival
;
517 } else if (snap_flag_set
) {
518 /* The counter index in the snapshot are relative to the counter base */
519 pmc
->counter_val
= kvpmu
->sdata
->ctr_values
[i
];
522 if (pmc
->cinfo
.type
== SBI_PMU_CTR_TYPE_FW
) {
523 fevent_code
= get_event_code(pmc
->event_idx
);
524 if (fevent_code
>= SBI_PMU_FW_MAX
) {
525 sbiret
= SBI_ERR_INVALID_PARAM
;
529 /* Check if the counter was already started for some reason */
530 if (kvpmu
->fw_event
[fevent_code
].started
) {
531 sbiret
= SBI_ERR_ALREADY_STARTED
;
535 kvpmu
->fw_event
[fevent_code
].started
= true;
536 kvpmu
->fw_event
[fevent_code
].value
= pmc
->counter_val
;
537 } else if (pmc
->perf_event
) {
538 if (unlikely(pmc
->started
)) {
539 sbiret
= SBI_ERR_ALREADY_STARTED
;
542 perf_event_period(pmc
->perf_event
, kvm_pmu_get_sample_period(pmc
));
543 perf_event_enable(pmc
->perf_event
);
546 sbiret
= SBI_ERR_INVALID_PARAM
;
551 retdata
->err_val
= sbiret
;
556 int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu
*vcpu
, unsigned long ctr_base
,
557 unsigned long ctr_mask
, unsigned long flags
,
558 struct kvm_vcpu_sbi_return
*retdata
)
560 struct kvm_pmu
*kvpmu
= vcpu_to_pmu(vcpu
);
561 int i
, pmc_index
, sbiret
= 0;
562 u64 enabled
, running
;
565 bool snap_flag_set
= flags
& SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT
;
566 bool shmem_needs_update
= false;
568 if (kvm_pmu_validate_counter_mask(kvpmu
, ctr_base
, ctr_mask
) < 0) {
569 sbiret
= SBI_ERR_INVALID_PARAM
;
573 if (snap_flag_set
&& kvpmu
->snapshot_addr
== INVALID_GPA
) {
574 sbiret
= SBI_ERR_NO_SHMEM
;
578 /* Stop the counters that have been configured and requested by the guest */
579 for_each_set_bit(i
, &ctr_mask
, RISCV_MAX_COUNTERS
) {
580 pmc_index
= i
+ ctr_base
;
581 if (!test_bit(pmc_index
, kvpmu
->pmc_in_use
))
583 pmc
= &kvpmu
->pmc
[pmc_index
];
584 if (pmc
->cinfo
.type
== SBI_PMU_CTR_TYPE_FW
) {
585 fevent_code
= get_event_code(pmc
->event_idx
);
586 if (fevent_code
>= SBI_PMU_FW_MAX
) {
587 sbiret
= SBI_ERR_INVALID_PARAM
;
591 if (!kvpmu
->fw_event
[fevent_code
].started
)
592 sbiret
= SBI_ERR_ALREADY_STOPPED
;
594 kvpmu
->fw_event
[fevent_code
].started
= false;
595 } else if (pmc
->perf_event
) {
597 /* Stop counting the counter */
598 perf_event_disable(pmc
->perf_event
);
599 pmc
->started
= false;
601 sbiret
= SBI_ERR_ALREADY_STOPPED
;
604 if (flags
& SBI_PMU_STOP_FLAG_RESET
)
605 /* Release the counter if this is a reset request */
606 kvm_pmu_release_perf_event(pmc
);
608 sbiret
= SBI_ERR_INVALID_PARAM
;
611 if (snap_flag_set
&& !sbiret
) {
612 if (pmc
->cinfo
.type
== SBI_PMU_CTR_TYPE_FW
)
613 pmc
->counter_val
= kvpmu
->fw_event
[fevent_code
].value
;
614 else if (pmc
->perf_event
)
615 pmc
->counter_val
+= perf_event_read_value(pmc
->perf_event
,
618 * The counter and overflow indicies in the snapshot region are w.r.to
619 * cbase. Modify the set bit in the counter mask instead of the pmc_index
620 * which indicates the absolute counter index.
622 if (test_bit(pmc_index
, kvpmu
->pmc_overflown
))
623 kvpmu
->sdata
->ctr_overflow_mask
|= BIT(i
);
624 kvpmu
->sdata
->ctr_values
[i
] = pmc
->counter_val
;
625 shmem_needs_update
= true;
628 if (flags
& SBI_PMU_STOP_FLAG_RESET
) {
629 pmc
->event_idx
= SBI_PMU_EVENT_IDX_INVALID
;
630 clear_bit(pmc_index
, kvpmu
->pmc_in_use
);
631 clear_bit(pmc_index
, kvpmu
->pmc_overflown
);
634 * Only clear the given counter as the caller is responsible to
635 * validate both the overflow mask and configured counters.
637 kvpmu
->sdata
->ctr_overflow_mask
&= ~BIT(i
);
638 shmem_needs_update
= true;
643 if (shmem_needs_update
)
644 kvm_vcpu_write_guest(vcpu
, kvpmu
->snapshot_addr
, kvpmu
->sdata
,
645 sizeof(struct riscv_pmu_snapshot_data
));
648 retdata
->err_val
= sbiret
;
653 int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu
*vcpu
, unsigned long ctr_base
,
654 unsigned long ctr_mask
, unsigned long flags
,
655 unsigned long eidx
, u64 evtdata
,
656 struct kvm_vcpu_sbi_return
*retdata
)
658 int ctr_idx
, sbiret
= 0;
661 unsigned long event_code
;
662 u32 etype
= kvm_pmu_get_perf_event_type(eidx
);
663 struct kvm_pmu
*kvpmu
= vcpu_to_pmu(vcpu
);
664 struct kvm_pmc
*pmc
= NULL
;
665 struct perf_event_attr attr
= {
667 .size
= sizeof(struct perf_event_attr
),
670 * It should never reach here if the platform doesn't support the sscofpmf
671 * extension as mode filtering won't work without it.
673 .exclude_host
= true,
675 .exclude_user
= !!(flags
& SBI_PMU_CFG_FLAG_SET_UINH
),
676 .exclude_kernel
= !!(flags
& SBI_PMU_CFG_FLAG_SET_SINH
),
677 .config1
= RISCV_PMU_CONFIG1_GUEST_EVENTS
,
680 if (kvm_pmu_validate_counter_mask(kvpmu
, ctr_base
, ctr_mask
) < 0) {
681 sbiret
= SBI_ERR_INVALID_PARAM
;
685 event_code
= get_event_code(eidx
);
686 is_fevent
= kvm_pmu_is_fw_event(eidx
);
687 if (is_fevent
&& event_code
>= SBI_PMU_FW_MAX
) {
688 sbiret
= SBI_ERR_NOT_SUPPORTED
;
693 * SKIP_MATCH flag indicates the caller is aware of the assigned counter
694 * for this event. Just do a sanity check if it already marked used.
696 if (flags
& SBI_PMU_CFG_FLAG_SKIP_MATCH
) {
697 if (!test_bit(ctr_base
+ __ffs(ctr_mask
), kvpmu
->pmc_in_use
)) {
698 sbiret
= SBI_ERR_FAILURE
;
701 ctr_idx
= ctr_base
+ __ffs(ctr_mask
);
703 ctr_idx
= pmu_get_pmc_index(kvpmu
, eidx
, ctr_base
, ctr_mask
);
705 sbiret
= SBI_ERR_NOT_SUPPORTED
;
710 pmc
= &kvpmu
->pmc
[ctr_idx
];
714 if (flags
& SBI_PMU_CFG_FLAG_AUTO_START
)
715 kvpmu
->fw_event
[event_code
].started
= true;
717 ret
= kvm_pmu_create_perf_event(pmc
, &attr
, flags
, eidx
, evtdata
);
719 sbiret
= SBI_ERR_NOT_SUPPORTED
;
724 set_bit(ctr_idx
, kvpmu
->pmc_in_use
);
725 pmc
->event_idx
= eidx
;
726 retdata
->out_val
= ctr_idx
;
728 retdata
->err_val
= sbiret
;
733 int kvm_riscv_vcpu_pmu_fw_ctr_read_hi(struct kvm_vcpu
*vcpu
, unsigned long cidx
,
734 struct kvm_vcpu_sbi_return
*retdata
)
738 ret
= pmu_fw_ctr_read_hi(vcpu
, cidx
, &retdata
->out_val
);
740 retdata
->err_val
= SBI_ERR_INVALID_PARAM
;
745 int kvm_riscv_vcpu_pmu_fw_ctr_read(struct kvm_vcpu
*vcpu
, unsigned long cidx
,
746 struct kvm_vcpu_sbi_return
*retdata
)
750 ret
= pmu_ctr_read(vcpu
, cidx
, &retdata
->out_val
);
752 retdata
->err_val
= SBI_ERR_INVALID_PARAM
;
757 void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu
*vcpu
)
759 int i
= 0, ret
, num_hw_ctrs
= 0, hpm_width
= 0;
760 struct kvm_pmu
*kvpmu
= vcpu_to_pmu(vcpu
);
764 * PMU functionality should be only available to guests if privilege mode
765 * filtering is available in the host. Otherwise, guest will always count
766 * events while the execution is in hypervisor mode.
768 if (!riscv_isa_extension_available(NULL
, SSCOFPMF
))
771 ret
= riscv_pmu_get_hpm_info(&hpm_width
, &num_hw_ctrs
);
772 if (ret
< 0 || !hpm_width
|| !num_hw_ctrs
)
776 * Increase the number of hardware counters to offset the time counter.
778 kvpmu
->num_hw_ctrs
= num_hw_ctrs
+ 1;
779 kvpmu
->num_fw_ctrs
= SBI_PMU_FW_MAX
;
780 memset(&kvpmu
->fw_event
, 0, SBI_PMU_FW_MAX
* sizeof(struct kvm_fw_event
));
781 kvpmu
->snapshot_addr
= INVALID_GPA
;
783 if (kvpmu
->num_hw_ctrs
> RISCV_KVM_MAX_HW_CTRS
) {
784 pr_warn_once("Limiting the hardware counters to 32 as specified by the ISA");
785 kvpmu
->num_hw_ctrs
= RISCV_KVM_MAX_HW_CTRS
;
789 * There is no correlation between the logical hardware counter and virtual counters.
790 * However, we need to encode a hpmcounter CSR in the counter info field so that
791 * KVM can trap n emulate the read. This works well in the migration use case as
792 * KVM doesn't care if the actual hpmcounter is available in the hardware or not.
794 for (i
= 0; i
< kvm_pmu_num_counters(kvpmu
); i
++) {
795 /* TIME CSR shouldn't be read from perf interface */
798 pmc
= &kvpmu
->pmc
[i
];
800 pmc
->event_idx
= SBI_PMU_EVENT_IDX_INVALID
;
802 if (i
< kvpmu
->num_hw_ctrs
) {
803 pmc
->cinfo
.type
= SBI_PMU_CTR_TYPE_HW
;
805 /* CY, IR counters */
806 pmc
->cinfo
.width
= 63;
808 pmc
->cinfo
.width
= hpm_width
;
810 * The CSR number doesn't have any relation with the logical
811 * hardware counters. The CSR numbers are encoded sequentially
812 * to avoid maintaining a map between the virtual counter
815 pmc
->cinfo
.csr
= CSR_CYCLE
+ i
;
817 pmc
->cinfo
.type
= SBI_PMU_CTR_TYPE_FW
;
818 pmc
->cinfo
.width
= 63;
822 kvpmu
->init_done
= true;
825 void kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu
*vcpu
)
827 struct kvm_pmu
*kvpmu
= vcpu_to_pmu(vcpu
);
834 for_each_set_bit(i
, kvpmu
->pmc_in_use
, RISCV_KVM_MAX_COUNTERS
) {
835 pmc
= &kvpmu
->pmc
[i
];
836 pmc
->counter_val
= 0;
837 kvm_pmu_release_perf_event(pmc
);
838 pmc
->event_idx
= SBI_PMU_EVENT_IDX_INVALID
;
840 bitmap_zero(kvpmu
->pmc_in_use
, RISCV_KVM_MAX_COUNTERS
);
841 bitmap_zero(kvpmu
->pmc_overflown
, RISCV_KVM_MAX_COUNTERS
);
842 memset(&kvpmu
->fw_event
, 0, SBI_PMU_FW_MAX
* sizeof(struct kvm_fw_event
));
843 kvm_pmu_clear_snapshot_area(vcpu
);
846 void kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu
*vcpu
)
848 kvm_riscv_vcpu_pmu_deinit(vcpu
);