Merge tag 'trace-printf-v6.13' of git://git.kernel.org/pub/scm/linux/kernel/git/trace...
[drm/drm-misc.git] / arch / riscv / kvm / vcpu_pmu.c
blob2707a51b082ca7773b1797328bccd11795b65ec4
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (c) 2023 Rivos Inc
5 * Authors:
6 * Atish Patra <atishp@rivosinc.com>
7 */
9 #define pr_fmt(fmt) "riscv-kvm-pmu: " fmt
10 #include <linux/errno.h>
11 #include <linux/err.h>
12 #include <linux/kvm_host.h>
13 #include <linux/perf/riscv_pmu.h>
14 #include <asm/csr.h>
15 #include <asm/kvm_vcpu_sbi.h>
16 #include <asm/kvm_vcpu_pmu.h>
17 #include <asm/sbi.h>
18 #include <linux/bitops.h>
20 #define kvm_pmu_num_counters(pmu) ((pmu)->num_hw_ctrs + (pmu)->num_fw_ctrs)
21 #define get_event_type(x) (((x) & SBI_PMU_EVENT_IDX_TYPE_MASK) >> 16)
22 #define get_event_code(x) ((x) & SBI_PMU_EVENT_IDX_CODE_MASK)
24 static enum perf_hw_id hw_event_perf_map[SBI_PMU_HW_GENERAL_MAX] = {
25 [SBI_PMU_HW_CPU_CYCLES] = PERF_COUNT_HW_CPU_CYCLES,
26 [SBI_PMU_HW_INSTRUCTIONS] = PERF_COUNT_HW_INSTRUCTIONS,
27 [SBI_PMU_HW_CACHE_REFERENCES] = PERF_COUNT_HW_CACHE_REFERENCES,
28 [SBI_PMU_HW_CACHE_MISSES] = PERF_COUNT_HW_CACHE_MISSES,
29 [SBI_PMU_HW_BRANCH_INSTRUCTIONS] = PERF_COUNT_HW_BRANCH_INSTRUCTIONS,
30 [SBI_PMU_HW_BRANCH_MISSES] = PERF_COUNT_HW_BRANCH_MISSES,
31 [SBI_PMU_HW_BUS_CYCLES] = PERF_COUNT_HW_BUS_CYCLES,
32 [SBI_PMU_HW_STALLED_CYCLES_FRONTEND] = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND,
33 [SBI_PMU_HW_STALLED_CYCLES_BACKEND] = PERF_COUNT_HW_STALLED_CYCLES_BACKEND,
34 [SBI_PMU_HW_REF_CPU_CYCLES] = PERF_COUNT_HW_REF_CPU_CYCLES,
37 static u64 kvm_pmu_get_sample_period(struct kvm_pmc *pmc)
39 u64 counter_val_mask = GENMASK(pmc->cinfo.width, 0);
40 u64 sample_period;
42 if (!pmc->counter_val)
43 sample_period = counter_val_mask;
44 else
45 sample_period = (-pmc->counter_val) & counter_val_mask;
47 return sample_period;
50 static u32 kvm_pmu_get_perf_event_type(unsigned long eidx)
52 enum sbi_pmu_event_type etype = get_event_type(eidx);
53 u32 type = PERF_TYPE_MAX;
55 switch (etype) {
56 case SBI_PMU_EVENT_TYPE_HW:
57 type = PERF_TYPE_HARDWARE;
58 break;
59 case SBI_PMU_EVENT_TYPE_CACHE:
60 type = PERF_TYPE_HW_CACHE;
61 break;
62 case SBI_PMU_EVENT_TYPE_RAW:
63 case SBI_PMU_EVENT_TYPE_FW:
64 type = PERF_TYPE_RAW;
65 break;
66 default:
67 break;
70 return type;
73 static bool kvm_pmu_is_fw_event(unsigned long eidx)
75 return get_event_type(eidx) == SBI_PMU_EVENT_TYPE_FW;
78 static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc)
80 if (pmc->perf_event) {
81 perf_event_disable(pmc->perf_event);
82 perf_event_release_kernel(pmc->perf_event);
83 pmc->perf_event = NULL;
87 static u64 kvm_pmu_get_perf_event_hw_config(u32 sbi_event_code)
89 return hw_event_perf_map[sbi_event_code];
92 static u64 kvm_pmu_get_perf_event_cache_config(u32 sbi_event_code)
94 u64 config = U64_MAX;
95 unsigned int cache_type, cache_op, cache_result;
97 /* All the cache event masks lie within 0xFF. No separate masking is necessary */
98 cache_type = (sbi_event_code & SBI_PMU_EVENT_CACHE_ID_CODE_MASK) >>
99 SBI_PMU_EVENT_CACHE_ID_SHIFT;
100 cache_op = (sbi_event_code & SBI_PMU_EVENT_CACHE_OP_ID_CODE_MASK) >>
101 SBI_PMU_EVENT_CACHE_OP_SHIFT;
102 cache_result = sbi_event_code & SBI_PMU_EVENT_CACHE_RESULT_ID_CODE_MASK;
104 if (cache_type >= PERF_COUNT_HW_CACHE_MAX ||
105 cache_op >= PERF_COUNT_HW_CACHE_OP_MAX ||
106 cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
107 return config;
109 config = cache_type | (cache_op << 8) | (cache_result << 16);
111 return config;
114 static u64 kvm_pmu_get_perf_event_config(unsigned long eidx, uint64_t evt_data)
116 enum sbi_pmu_event_type etype = get_event_type(eidx);
117 u32 ecode = get_event_code(eidx);
118 u64 config = U64_MAX;
120 switch (etype) {
121 case SBI_PMU_EVENT_TYPE_HW:
122 if (ecode < SBI_PMU_HW_GENERAL_MAX)
123 config = kvm_pmu_get_perf_event_hw_config(ecode);
124 break;
125 case SBI_PMU_EVENT_TYPE_CACHE:
126 config = kvm_pmu_get_perf_event_cache_config(ecode);
127 break;
128 case SBI_PMU_EVENT_TYPE_RAW:
129 config = evt_data & RISCV_PMU_RAW_EVENT_MASK;
130 break;
131 case SBI_PMU_EVENT_TYPE_FW:
132 if (ecode < SBI_PMU_FW_MAX)
133 config = (1ULL << 63) | ecode;
134 break;
135 default:
136 break;
139 return config;
142 static int kvm_pmu_get_fixed_pmc_index(unsigned long eidx)
144 u32 etype = kvm_pmu_get_perf_event_type(eidx);
145 u32 ecode = get_event_code(eidx);
147 if (etype != SBI_PMU_EVENT_TYPE_HW)
148 return -EINVAL;
150 if (ecode == SBI_PMU_HW_CPU_CYCLES)
151 return 0;
152 else if (ecode == SBI_PMU_HW_INSTRUCTIONS)
153 return 2;
154 else
155 return -EINVAL;
158 static int kvm_pmu_get_programmable_pmc_index(struct kvm_pmu *kvpmu, unsigned long eidx,
159 unsigned long cbase, unsigned long cmask)
161 int ctr_idx = -1;
162 int i, pmc_idx;
163 int min, max;
165 if (kvm_pmu_is_fw_event(eidx)) {
166 /* Firmware counters are mapped 1:1 starting from num_hw_ctrs for simplicity */
167 min = kvpmu->num_hw_ctrs;
168 max = min + kvpmu->num_fw_ctrs;
169 } else {
170 /* First 3 counters are reserved for fixed counters */
171 min = 3;
172 max = kvpmu->num_hw_ctrs;
175 for_each_set_bit(i, &cmask, BITS_PER_LONG) {
176 pmc_idx = i + cbase;
177 if ((pmc_idx >= min && pmc_idx < max) &&
178 !test_bit(pmc_idx, kvpmu->pmc_in_use)) {
179 ctr_idx = pmc_idx;
180 break;
184 return ctr_idx;
187 static int pmu_get_pmc_index(struct kvm_pmu *pmu, unsigned long eidx,
188 unsigned long cbase, unsigned long cmask)
190 int ret;
192 /* Fixed counters need to be have fixed mapping as they have different width */
193 ret = kvm_pmu_get_fixed_pmc_index(eidx);
194 if (ret >= 0)
195 return ret;
197 return kvm_pmu_get_programmable_pmc_index(pmu, eidx, cbase, cmask);
200 static int pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx,
201 unsigned long *out_val)
203 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
204 struct kvm_pmc *pmc;
205 int fevent_code;
207 if (!IS_ENABLED(CONFIG_32BIT)) {
208 pr_warn("%s: should be invoked for only RV32\n", __func__);
209 return -EINVAL;
212 if (cidx >= kvm_pmu_num_counters(kvpmu) || cidx == 1) {
213 pr_warn("Invalid counter id [%ld]during read\n", cidx);
214 return -EINVAL;
217 pmc = &kvpmu->pmc[cidx];
219 if (pmc->cinfo.type != SBI_PMU_CTR_TYPE_FW)
220 return -EINVAL;
222 fevent_code = get_event_code(pmc->event_idx);
223 pmc->counter_val = kvpmu->fw_event[fevent_code].value;
225 *out_val = pmc->counter_val >> 32;
227 return 0;
230 static int pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
231 unsigned long *out_val)
233 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
234 struct kvm_pmc *pmc;
235 u64 enabled, running;
236 int fevent_code;
238 if (cidx >= kvm_pmu_num_counters(kvpmu) || cidx == 1) {
239 pr_warn("Invalid counter id [%ld] during read\n", cidx);
240 return -EINVAL;
243 pmc = &kvpmu->pmc[cidx];
245 if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
246 fevent_code = get_event_code(pmc->event_idx);
247 pmc->counter_val = kvpmu->fw_event[fevent_code].value;
248 } else if (pmc->perf_event) {
249 pmc->counter_val += perf_event_read_value(pmc->perf_event, &enabled, &running);
250 } else {
251 return -EINVAL;
253 *out_val = pmc->counter_val;
255 return 0;
258 static int kvm_pmu_validate_counter_mask(struct kvm_pmu *kvpmu, unsigned long ctr_base,
259 unsigned long ctr_mask)
261 /* Make sure the we have a valid counter mask requested from the caller */
262 if (!ctr_mask || (ctr_base + __fls(ctr_mask) >= kvm_pmu_num_counters(kvpmu)))
263 return -EINVAL;
265 return 0;
268 static void kvm_riscv_pmu_overflow(struct perf_event *perf_event,
269 struct perf_sample_data *data,
270 struct pt_regs *regs)
272 struct kvm_pmc *pmc = perf_event->overflow_handler_context;
273 struct kvm_vcpu *vcpu = pmc->vcpu;
274 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
275 struct riscv_pmu *rpmu = to_riscv_pmu(perf_event->pmu);
276 u64 period;
279 * Stop the event counting by directly accessing the perf_event.
280 * Otherwise, this needs to deferred via a workqueue.
281 * That will introduce skew in the counter value because the actual
282 * physical counter would start after returning from this function.
283 * It will be stopped again once the workqueue is scheduled
285 rpmu->pmu.stop(perf_event, PERF_EF_UPDATE);
288 * The hw counter would start automatically when this function returns.
289 * Thus, the host may continue to interrupt and inject it to the guest
290 * even without the guest configuring the next event. Depending on the hardware
291 * the host may have some sluggishness only if privilege mode filtering is not
292 * available. In an ideal world, where qemu is not the only capable hardware,
293 * this can be removed.
294 * FYI: ARM64 does this way while x86 doesn't do anything as such.
295 * TODO: Should we keep it for RISC-V ?
297 period = -(local64_read(&perf_event->count));
299 local64_set(&perf_event->hw.period_left, 0);
300 perf_event->attr.sample_period = period;
301 perf_event->hw.sample_period = period;
303 set_bit(pmc->idx, kvpmu->pmc_overflown);
304 kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_PMU_OVF);
306 rpmu->pmu.start(perf_event, PERF_EF_RELOAD);
309 static long kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_attr *attr,
310 unsigned long flags, unsigned long eidx,
311 unsigned long evtdata)
313 struct perf_event *event;
315 kvm_pmu_release_perf_event(pmc);
316 attr->config = kvm_pmu_get_perf_event_config(eidx, evtdata);
317 if (flags & SBI_PMU_CFG_FLAG_CLEAR_VALUE) {
318 //TODO: Do we really want to clear the value in hardware counter
319 pmc->counter_val = 0;
323 * Set the default sample_period for now. The guest specified value
324 * will be updated in the start call.
326 attr->sample_period = kvm_pmu_get_sample_period(pmc);
328 event = perf_event_create_kernel_counter(attr, -1, current, kvm_riscv_pmu_overflow, pmc);
329 if (IS_ERR(event)) {
330 pr_debug("kvm pmu event creation failed for eidx %lx: %ld\n", eidx, PTR_ERR(event));
331 return PTR_ERR(event);
334 pmc->perf_event = event;
335 if (flags & SBI_PMU_CFG_FLAG_AUTO_START)
336 perf_event_enable(pmc->perf_event);
338 return 0;
341 int kvm_riscv_vcpu_pmu_incr_fw(struct kvm_vcpu *vcpu, unsigned long fid)
343 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
344 struct kvm_fw_event *fevent;
346 if (!kvpmu || fid >= SBI_PMU_FW_MAX)
347 return -EINVAL;
349 fevent = &kvpmu->fw_event[fid];
350 if (fevent->started)
351 fevent->value++;
353 return 0;
356 int kvm_riscv_vcpu_pmu_read_hpm(struct kvm_vcpu *vcpu, unsigned int csr_num,
357 unsigned long *val, unsigned long new_val,
358 unsigned long wr_mask)
360 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
361 int cidx, ret = KVM_INSN_CONTINUE_NEXT_SEPC;
363 if (!kvpmu || !kvpmu->init_done) {
365 * In absence of sscofpmf in the platform, the guest OS may use
366 * the legacy PMU driver to read cycle/instret. In that case,
367 * just return 0 to avoid any illegal trap. However, any other
368 * hpmcounter access should result in illegal trap as they must
369 * be access through SBI PMU only.
371 if (csr_num == CSR_CYCLE || csr_num == CSR_INSTRET) {
372 *val = 0;
373 return ret;
374 } else {
375 return KVM_INSN_ILLEGAL_TRAP;
379 /* The counter CSR are read only. Thus, any write should result in illegal traps */
380 if (wr_mask)
381 return KVM_INSN_ILLEGAL_TRAP;
383 cidx = csr_num - CSR_CYCLE;
385 if (pmu_ctr_read(vcpu, cidx, val) < 0)
386 return KVM_INSN_ILLEGAL_TRAP;
388 return ret;
391 static void kvm_pmu_clear_snapshot_area(struct kvm_vcpu *vcpu)
393 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
395 kfree(kvpmu->sdata);
396 kvpmu->sdata = NULL;
397 kvpmu->snapshot_addr = INVALID_GPA;
400 int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long saddr_low,
401 unsigned long saddr_high, unsigned long flags,
402 struct kvm_vcpu_sbi_return *retdata)
404 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
405 int snapshot_area_size = sizeof(struct riscv_pmu_snapshot_data);
406 int sbiret = 0;
407 gpa_t saddr;
408 unsigned long hva;
409 bool writable;
411 if (!kvpmu || flags) {
412 sbiret = SBI_ERR_INVALID_PARAM;
413 goto out;
416 if (saddr_low == SBI_SHMEM_DISABLE && saddr_high == SBI_SHMEM_DISABLE) {
417 kvm_pmu_clear_snapshot_area(vcpu);
418 return 0;
421 saddr = saddr_low;
423 if (saddr_high != 0) {
424 if (IS_ENABLED(CONFIG_32BIT))
425 saddr |= ((gpa_t)saddr_high << 32);
426 else
427 sbiret = SBI_ERR_INVALID_ADDRESS;
428 goto out;
431 hva = kvm_vcpu_gfn_to_hva_prot(vcpu, saddr >> PAGE_SHIFT, &writable);
432 if (kvm_is_error_hva(hva) || !writable) {
433 sbiret = SBI_ERR_INVALID_ADDRESS;
434 goto out;
437 kvpmu->sdata = kzalloc(snapshot_area_size, GFP_ATOMIC);
438 if (!kvpmu->sdata)
439 return -ENOMEM;
441 if (kvm_vcpu_write_guest(vcpu, saddr, kvpmu->sdata, snapshot_area_size)) {
442 kfree(kvpmu->sdata);
443 sbiret = SBI_ERR_FAILURE;
444 goto out;
447 kvpmu->snapshot_addr = saddr;
449 out:
450 retdata->err_val = sbiret;
452 return 0;
455 int kvm_riscv_vcpu_pmu_num_ctrs(struct kvm_vcpu *vcpu,
456 struct kvm_vcpu_sbi_return *retdata)
458 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
460 retdata->out_val = kvm_pmu_num_counters(kvpmu);
462 return 0;
465 int kvm_riscv_vcpu_pmu_ctr_info(struct kvm_vcpu *vcpu, unsigned long cidx,
466 struct kvm_vcpu_sbi_return *retdata)
468 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
470 if (cidx > RISCV_KVM_MAX_COUNTERS || cidx == 1) {
471 retdata->err_val = SBI_ERR_INVALID_PARAM;
472 return 0;
475 retdata->out_val = kvpmu->pmc[cidx].cinfo.value;
477 return 0;
480 int kvm_riscv_vcpu_pmu_ctr_start(struct kvm_vcpu *vcpu, unsigned long ctr_base,
481 unsigned long ctr_mask, unsigned long flags, u64 ival,
482 struct kvm_vcpu_sbi_return *retdata)
484 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
485 int i, pmc_index, sbiret = 0;
486 struct kvm_pmc *pmc;
487 int fevent_code;
488 bool snap_flag_set = flags & SBI_PMU_START_FLAG_INIT_SNAPSHOT;
490 if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) {
491 sbiret = SBI_ERR_INVALID_PARAM;
492 goto out;
495 if (snap_flag_set) {
496 if (kvpmu->snapshot_addr == INVALID_GPA) {
497 sbiret = SBI_ERR_NO_SHMEM;
498 goto out;
500 if (kvm_vcpu_read_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata,
501 sizeof(struct riscv_pmu_snapshot_data))) {
502 pr_warn("Unable to read snapshot shared memory while starting counters\n");
503 sbiret = SBI_ERR_FAILURE;
504 goto out;
507 /* Start the counters that have been configured and requested by the guest */
508 for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) {
509 pmc_index = i + ctr_base;
510 if (!test_bit(pmc_index, kvpmu->pmc_in_use))
511 continue;
512 /* The guest started the counter again. Reset the overflow status */
513 clear_bit(pmc_index, kvpmu->pmc_overflown);
514 pmc = &kvpmu->pmc[pmc_index];
515 if (flags & SBI_PMU_START_FLAG_SET_INIT_VALUE) {
516 pmc->counter_val = ival;
517 } else if (snap_flag_set) {
518 /* The counter index in the snapshot are relative to the counter base */
519 pmc->counter_val = kvpmu->sdata->ctr_values[i];
522 if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
523 fevent_code = get_event_code(pmc->event_idx);
524 if (fevent_code >= SBI_PMU_FW_MAX) {
525 sbiret = SBI_ERR_INVALID_PARAM;
526 goto out;
529 /* Check if the counter was already started for some reason */
530 if (kvpmu->fw_event[fevent_code].started) {
531 sbiret = SBI_ERR_ALREADY_STARTED;
532 continue;
535 kvpmu->fw_event[fevent_code].started = true;
536 kvpmu->fw_event[fevent_code].value = pmc->counter_val;
537 } else if (pmc->perf_event) {
538 if (unlikely(pmc->started)) {
539 sbiret = SBI_ERR_ALREADY_STARTED;
540 continue;
542 perf_event_period(pmc->perf_event, kvm_pmu_get_sample_period(pmc));
543 perf_event_enable(pmc->perf_event);
544 pmc->started = true;
545 } else {
546 sbiret = SBI_ERR_INVALID_PARAM;
550 out:
551 retdata->err_val = sbiret;
553 return 0;
556 int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base,
557 unsigned long ctr_mask, unsigned long flags,
558 struct kvm_vcpu_sbi_return *retdata)
560 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
561 int i, pmc_index, sbiret = 0;
562 u64 enabled, running;
563 struct kvm_pmc *pmc;
564 int fevent_code;
565 bool snap_flag_set = flags & SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT;
566 bool shmem_needs_update = false;
568 if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) {
569 sbiret = SBI_ERR_INVALID_PARAM;
570 goto out;
573 if (snap_flag_set && kvpmu->snapshot_addr == INVALID_GPA) {
574 sbiret = SBI_ERR_NO_SHMEM;
575 goto out;
578 /* Stop the counters that have been configured and requested by the guest */
579 for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) {
580 pmc_index = i + ctr_base;
581 if (!test_bit(pmc_index, kvpmu->pmc_in_use))
582 continue;
583 pmc = &kvpmu->pmc[pmc_index];
584 if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
585 fevent_code = get_event_code(pmc->event_idx);
586 if (fevent_code >= SBI_PMU_FW_MAX) {
587 sbiret = SBI_ERR_INVALID_PARAM;
588 goto out;
591 if (!kvpmu->fw_event[fevent_code].started)
592 sbiret = SBI_ERR_ALREADY_STOPPED;
594 kvpmu->fw_event[fevent_code].started = false;
595 } else if (pmc->perf_event) {
596 if (pmc->started) {
597 /* Stop counting the counter */
598 perf_event_disable(pmc->perf_event);
599 pmc->started = false;
600 } else {
601 sbiret = SBI_ERR_ALREADY_STOPPED;
604 if (flags & SBI_PMU_STOP_FLAG_RESET)
605 /* Release the counter if this is a reset request */
606 kvm_pmu_release_perf_event(pmc);
607 } else {
608 sbiret = SBI_ERR_INVALID_PARAM;
611 if (snap_flag_set && !sbiret) {
612 if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW)
613 pmc->counter_val = kvpmu->fw_event[fevent_code].value;
614 else if (pmc->perf_event)
615 pmc->counter_val += perf_event_read_value(pmc->perf_event,
616 &enabled, &running);
618 * The counter and overflow indicies in the snapshot region are w.r.to
619 * cbase. Modify the set bit in the counter mask instead of the pmc_index
620 * which indicates the absolute counter index.
622 if (test_bit(pmc_index, kvpmu->pmc_overflown))
623 kvpmu->sdata->ctr_overflow_mask |= BIT(i);
624 kvpmu->sdata->ctr_values[i] = pmc->counter_val;
625 shmem_needs_update = true;
628 if (flags & SBI_PMU_STOP_FLAG_RESET) {
629 pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
630 clear_bit(pmc_index, kvpmu->pmc_in_use);
631 clear_bit(pmc_index, kvpmu->pmc_overflown);
632 if (snap_flag_set) {
634 * Only clear the given counter as the caller is responsible to
635 * validate both the overflow mask and configured counters.
637 kvpmu->sdata->ctr_overflow_mask &= ~BIT(i);
638 shmem_needs_update = true;
643 if (shmem_needs_update)
644 kvm_vcpu_write_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata,
645 sizeof(struct riscv_pmu_snapshot_data));
647 out:
648 retdata->err_val = sbiret;
650 return 0;
653 int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_base,
654 unsigned long ctr_mask, unsigned long flags,
655 unsigned long eidx, u64 evtdata,
656 struct kvm_vcpu_sbi_return *retdata)
658 int ctr_idx, sbiret = 0;
659 long ret;
660 bool is_fevent;
661 unsigned long event_code;
662 u32 etype = kvm_pmu_get_perf_event_type(eidx);
663 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
664 struct kvm_pmc *pmc = NULL;
665 struct perf_event_attr attr = {
666 .type = etype,
667 .size = sizeof(struct perf_event_attr),
668 .pinned = true,
670 * It should never reach here if the platform doesn't support the sscofpmf
671 * extension as mode filtering won't work without it.
673 .exclude_host = true,
674 .exclude_hv = true,
675 .exclude_user = !!(flags & SBI_PMU_CFG_FLAG_SET_UINH),
676 .exclude_kernel = !!(flags & SBI_PMU_CFG_FLAG_SET_SINH),
677 .config1 = RISCV_PMU_CONFIG1_GUEST_EVENTS,
680 if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) {
681 sbiret = SBI_ERR_INVALID_PARAM;
682 goto out;
685 event_code = get_event_code(eidx);
686 is_fevent = kvm_pmu_is_fw_event(eidx);
687 if (is_fevent && event_code >= SBI_PMU_FW_MAX) {
688 sbiret = SBI_ERR_NOT_SUPPORTED;
689 goto out;
693 * SKIP_MATCH flag indicates the caller is aware of the assigned counter
694 * for this event. Just do a sanity check if it already marked used.
696 if (flags & SBI_PMU_CFG_FLAG_SKIP_MATCH) {
697 if (!test_bit(ctr_base + __ffs(ctr_mask), kvpmu->pmc_in_use)) {
698 sbiret = SBI_ERR_FAILURE;
699 goto out;
701 ctr_idx = ctr_base + __ffs(ctr_mask);
702 } else {
703 ctr_idx = pmu_get_pmc_index(kvpmu, eidx, ctr_base, ctr_mask);
704 if (ctr_idx < 0) {
705 sbiret = SBI_ERR_NOT_SUPPORTED;
706 goto out;
710 pmc = &kvpmu->pmc[ctr_idx];
711 pmc->idx = ctr_idx;
713 if (is_fevent) {
714 if (flags & SBI_PMU_CFG_FLAG_AUTO_START)
715 kvpmu->fw_event[event_code].started = true;
716 } else {
717 ret = kvm_pmu_create_perf_event(pmc, &attr, flags, eidx, evtdata);
718 if (ret) {
719 sbiret = SBI_ERR_NOT_SUPPORTED;
720 goto out;
724 set_bit(ctr_idx, kvpmu->pmc_in_use);
725 pmc->event_idx = eidx;
726 retdata->out_val = ctr_idx;
727 out:
728 retdata->err_val = sbiret;
730 return 0;
733 int kvm_riscv_vcpu_pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx,
734 struct kvm_vcpu_sbi_return *retdata)
736 int ret;
738 ret = pmu_fw_ctr_read_hi(vcpu, cidx, &retdata->out_val);
739 if (ret == -EINVAL)
740 retdata->err_val = SBI_ERR_INVALID_PARAM;
742 return 0;
745 int kvm_riscv_vcpu_pmu_fw_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
746 struct kvm_vcpu_sbi_return *retdata)
748 int ret;
750 ret = pmu_ctr_read(vcpu, cidx, &retdata->out_val);
751 if (ret == -EINVAL)
752 retdata->err_val = SBI_ERR_INVALID_PARAM;
754 return 0;
757 void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu)
759 int i = 0, ret, num_hw_ctrs = 0, hpm_width = 0;
760 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
761 struct kvm_pmc *pmc;
764 * PMU functionality should be only available to guests if privilege mode
765 * filtering is available in the host. Otherwise, guest will always count
766 * events while the execution is in hypervisor mode.
768 if (!riscv_isa_extension_available(NULL, SSCOFPMF))
769 return;
771 ret = riscv_pmu_get_hpm_info(&hpm_width, &num_hw_ctrs);
772 if (ret < 0 || !hpm_width || !num_hw_ctrs)
773 return;
776 * Increase the number of hardware counters to offset the time counter.
778 kvpmu->num_hw_ctrs = num_hw_ctrs + 1;
779 kvpmu->num_fw_ctrs = SBI_PMU_FW_MAX;
780 memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event));
781 kvpmu->snapshot_addr = INVALID_GPA;
783 if (kvpmu->num_hw_ctrs > RISCV_KVM_MAX_HW_CTRS) {
784 pr_warn_once("Limiting the hardware counters to 32 as specified by the ISA");
785 kvpmu->num_hw_ctrs = RISCV_KVM_MAX_HW_CTRS;
789 * There is no correlation between the logical hardware counter and virtual counters.
790 * However, we need to encode a hpmcounter CSR in the counter info field so that
791 * KVM can trap n emulate the read. This works well in the migration use case as
792 * KVM doesn't care if the actual hpmcounter is available in the hardware or not.
794 for (i = 0; i < kvm_pmu_num_counters(kvpmu); i++) {
795 /* TIME CSR shouldn't be read from perf interface */
796 if (i == 1)
797 continue;
798 pmc = &kvpmu->pmc[i];
799 pmc->idx = i;
800 pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
801 pmc->vcpu = vcpu;
802 if (i < kvpmu->num_hw_ctrs) {
803 pmc->cinfo.type = SBI_PMU_CTR_TYPE_HW;
804 if (i < 3)
805 /* CY, IR counters */
806 pmc->cinfo.width = 63;
807 else
808 pmc->cinfo.width = hpm_width;
810 * The CSR number doesn't have any relation with the logical
811 * hardware counters. The CSR numbers are encoded sequentially
812 * to avoid maintaining a map between the virtual counter
813 * and CSR number.
815 pmc->cinfo.csr = CSR_CYCLE + i;
816 } else {
817 pmc->cinfo.type = SBI_PMU_CTR_TYPE_FW;
818 pmc->cinfo.width = 63;
822 kvpmu->init_done = true;
825 void kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu *vcpu)
827 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
828 struct kvm_pmc *pmc;
829 int i;
831 if (!kvpmu)
832 return;
834 for_each_set_bit(i, kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS) {
835 pmc = &kvpmu->pmc[i];
836 pmc->counter_val = 0;
837 kvm_pmu_release_perf_event(pmc);
838 pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
840 bitmap_zero(kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS);
841 bitmap_zero(kvpmu->pmc_overflown, RISCV_KVM_MAX_COUNTERS);
842 memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event));
843 kvm_pmu_clear_snapshot_area(vcpu);
846 void kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu *vcpu)
848 kvm_riscv_vcpu_pmu_deinit(vcpu);