arch/riscv/kvm/vcpu_pmu.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Copyright (c) 2023 Rivos Inc
   4  *
   5  * Authors:
   6  *     Atish Patra <atishp@rivosinc.com>
   7  */
   8
   9 #define pr_fmt(fmt)     "riscv-kvm-pmu: " fmt
  10 #include <linux/errno.h>
  11 #include <linux/err.h>
  12 #include <linux/kvm_host.h>
  13 #include <linux/perf/riscv_pmu.h>
  14 #include <asm/csr.h>
  15 #include <asm/kvm_vcpu_sbi.h>
  16 #include <asm/kvm_vcpu_pmu.h>
  17 #include <asm/sbi.h>
  18 #include <linux/bitops.h>
  19
  20 #define kvm_pmu_num_counters(pmu) ((pmu)->num_hw_ctrs + (pmu)->num_fw_ctrs)
  21 #define get_event_type(x) (((x) & SBI_PMU_EVENT_IDX_TYPE_MASK) >> 16)
  22 #define get_event_code(x) ((x) & SBI_PMU_EVENT_IDX_CODE_MASK)
  23
  24 static enum perf_hw_id hw_event_perf_map[SBI_PMU_HW_GENERAL_MAX] = {
  25         [SBI_PMU_HW_CPU_CYCLES] = PERF_COUNT_HW_CPU_CYCLES,
  26         [SBI_PMU_HW_INSTRUCTIONS] = PERF_COUNT_HW_INSTRUCTIONS,
  27         [SBI_PMU_HW_CACHE_REFERENCES] = PERF_COUNT_HW_CACHE_REFERENCES,
  28         [SBI_PMU_HW_CACHE_MISSES] = PERF_COUNT_HW_CACHE_MISSES,
  29         [SBI_PMU_HW_BRANCH_INSTRUCTIONS] = PERF_COUNT_HW_BRANCH_INSTRUCTIONS,
  30         [SBI_PMU_HW_BRANCH_MISSES] = PERF_COUNT_HW_BRANCH_MISSES,
  31         [SBI_PMU_HW_BUS_CYCLES] = PERF_COUNT_HW_BUS_CYCLES,
  32         [SBI_PMU_HW_STALLED_CYCLES_FRONTEND] = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND,
  33         [SBI_PMU_HW_STALLED_CYCLES_BACKEND] = PERF_COUNT_HW_STALLED_CYCLES_BACKEND,
  34         [SBI_PMU_HW_REF_CPU_CYCLES] = PERF_COUNT_HW_REF_CPU_CYCLES,
  35 };
  36
  37 static u64 kvm_pmu_get_sample_period(struct kvm_pmc *pmc)
  38 {
  39         u64 counter_val_mask = GENMASK(pmc->cinfo.width, 0);
  40         u64 sample_period;
  41
  42         if (!pmc->counter_val)
  43                 sample_period = counter_val_mask;
  44         else
  45                 sample_period = (-pmc->counter_val) & counter_val_mask;
  46
  47         return sample_period;
  48 }
  49
  50 static u32 kvm_pmu_get_perf_event_type(unsigned long eidx)
  51 {
  52         enum sbi_pmu_event_type etype = get_event_type(eidx);
  53         u32 type = PERF_TYPE_MAX;
  54
  55         switch (etype) {
  56         case SBI_PMU_EVENT_TYPE_HW:
  57                 type = PERF_TYPE_HARDWARE;
  58                 break;
  59         case SBI_PMU_EVENT_TYPE_CACHE:
  60                 type = PERF_TYPE_HW_CACHE;
  61                 break;
  62         case SBI_PMU_EVENT_TYPE_RAW:
  63         case SBI_PMU_EVENT_TYPE_FW:
  64                 type = PERF_TYPE_RAW;
  65                 break;
  66         default:
  67                 break;
  68         }
  69
  70         return type;
  71 }
  72
  73 static bool kvm_pmu_is_fw_event(unsigned long eidx)
  74 {
  75         return get_event_type(eidx) == SBI_PMU_EVENT_TYPE_FW;
  76 }
  77
  78 static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc)
  79 {
  80         if (pmc->perf_event) {
  81                 perf_event_disable(pmc->perf_event);
  82                 perf_event_release_kernel(pmc->perf_event);
  83                 pmc->perf_event = NULL;
  84         }
  85 }
  86
  87 static u64 kvm_pmu_get_perf_event_hw_config(u32 sbi_event_code)
  88 {
  89         return hw_event_perf_map[sbi_event_code];
  90 }
  91
  92 static u64 kvm_pmu_get_perf_event_cache_config(u32 sbi_event_code)
  93 {
  94         u64 config = U64_MAX;
  95         unsigned int cache_type, cache_op, cache_result;
  96
  97         /* All the cache event masks lie within 0xFF. No separate masking is necessary */
  98         cache_type = (sbi_event_code & SBI_PMU_EVENT_CACHE_ID_CODE_MASK) >>
  99                       SBI_PMU_EVENT_CACHE_ID_SHIFT;
 100         cache_op = (sbi_event_code & SBI_PMU_EVENT_CACHE_OP_ID_CODE_MASK) >>
 101                     SBI_PMU_EVENT_CACHE_OP_SHIFT;
 102         cache_result = sbi_event_code & SBI_PMU_EVENT_CACHE_RESULT_ID_CODE_MASK;
 103
 104         if (cache_type >= PERF_COUNT_HW_CACHE_MAX ||
 105             cache_op >= PERF_COUNT_HW_CACHE_OP_MAX ||
 106             cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
 107                 return config;
 108
 109         config = cache_type | (cache_op << 8) | (cache_result << 16);
 110
 111         return config;
 112 }
 113
 114 static u64 kvm_pmu_get_perf_event_config(unsigned long eidx, uint64_t evt_data)
 115 {
 116         enum sbi_pmu_event_type etype = get_event_type(eidx);
 117         u32 ecode = get_event_code(eidx);
 118         u64 config = U64_MAX;
 119
 120         switch (etype) {
 121         case SBI_PMU_EVENT_TYPE_HW:
 122                 if (ecode < SBI_PMU_HW_GENERAL_MAX)
 123                         config = kvm_pmu_get_perf_event_hw_config(ecode);
 124                 break;
 125         case SBI_PMU_EVENT_TYPE_CACHE:
 126                 config = kvm_pmu_get_perf_event_cache_config(ecode);
 127                 break;
 128         case SBI_PMU_EVENT_TYPE_RAW:
 129                 config = evt_data & RISCV_PMU_RAW_EVENT_MASK;
 130                 break;
 131         case SBI_PMU_EVENT_TYPE_FW:
 132                 if (ecode < SBI_PMU_FW_MAX)
 133                         config = (1ULL << 63) | ecode;
 134                 break;
 135         default:
 136                 break;
 137         }
 138
 139         return config;
 140 }
 141
 142 static int kvm_pmu_get_fixed_pmc_index(unsigned long eidx)
 143 {
 144         u32 etype = kvm_pmu_get_perf_event_type(eidx);
 145         u32 ecode = get_event_code(eidx);
 146
 147         if (etype != SBI_PMU_EVENT_TYPE_HW)
 148                 return -EINVAL;
 149
 150         if (ecode == SBI_PMU_HW_CPU_CYCLES)
 151                 return 0;
 152         else if (ecode == SBI_PMU_HW_INSTRUCTIONS)
 153                 return 2;
 154         else
 155                 return -EINVAL;
 156 }
 157
 158 static int kvm_pmu_get_programmable_pmc_index(struct kvm_pmu *kvpmu, unsigned long eidx,
 159                                               unsigned long cbase, unsigned long cmask)
 160 {
 161         int ctr_idx = -1;
 162         int i, pmc_idx;
 163         int min, max;
 164
 165         if (kvm_pmu_is_fw_event(eidx)) {
 166                 /* Firmware counters are mapped 1:1 starting from num_hw_ctrs for simplicity */
 167                 min = kvpmu->num_hw_ctrs;
 168                 max = min + kvpmu->num_fw_ctrs;
 169         } else {
 170                 /* First 3 counters are reserved for fixed counters */
 171                 min = 3;
 172                 max = kvpmu->num_hw_ctrs;
 173         }
 174
 175         for_each_set_bit(i, &cmask, BITS_PER_LONG) {
 176                 pmc_idx = i + cbase;
 177                 if ((pmc_idx >= min && pmc_idx < max) &&
 178                     !test_bit(pmc_idx, kvpmu->pmc_in_use)) {
 179                         ctr_idx = pmc_idx;
 180                         break;
 181                 }
 182         }
 183
 184         return ctr_idx;
 185 }
 186
 187 static int pmu_get_pmc_index(struct kvm_pmu *pmu, unsigned long eidx,
 188                              unsigned long cbase, unsigned long cmask)
 189 {
 190         int ret;
 191
 192         /* Fixed counters need to be have fixed mapping as they have different width */
 193         ret = kvm_pmu_get_fixed_pmc_index(eidx);
 194         if (ret >= 0)
 195                 return ret;
 196
 197         return kvm_pmu_get_programmable_pmc_index(pmu, eidx, cbase, cmask);
 198 }
 199
 200 static int pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx,
 201                               unsigned long *out_val)
 202 {
 203         struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
 204         struct kvm_pmc *pmc;
 205         int fevent_code;
 206
 207         if (!IS_ENABLED(CONFIG_32BIT)) {
 208                 pr_warn("%s: should be invoked for only RV32\n", __func__);
 209                 return -EINVAL;
 210         }
 211
 212         if (cidx >= kvm_pmu_num_counters(kvpmu) || cidx == 1) {
 213                 pr_warn("Invalid counter id [%ld]during read\n", cidx);
 214                 return -EINVAL;
 215         }
 216
 217         pmc = &kvpmu->pmc[cidx];
 218
 219         if (pmc->cinfo.type != SBI_PMU_CTR_TYPE_FW)
 220                 return -EINVAL;
 221
 222         fevent_code = get_event_code(pmc->event_idx);
 223         pmc->counter_val = kvpmu->fw_event[fevent_code].value;
 224
 225         *out_val = pmc->counter_val >> 32;
 226
 227         return 0;
 228 }
 229
 230 static int pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
 231                         unsigned long *out_val)
 232 {
 233         struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
 234         struct kvm_pmc *pmc;
 235         u64 enabled, running;
 236         int fevent_code;
 237
 238         if (cidx >= kvm_pmu_num_counters(kvpmu) || cidx == 1) {
 239                 pr_warn("Invalid counter id [%ld] during read\n", cidx);
 240                 return -EINVAL;
 241         }
 242
 243         pmc = &kvpmu->pmc[cidx];
 244
 245         if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
 246                 fevent_code = get_event_code(pmc->event_idx);
 247                 pmc->counter_val = kvpmu->fw_event[fevent_code].value;
 248         } else if (pmc->perf_event) {
 249                 pmc->counter_val += perf_event_read_value(pmc->perf_event, &enabled, &running);
 250         } else {
 251                 return -EINVAL;
 252         }
 253         *out_val = pmc->counter_val;
 254
 255         return 0;
 256 }
 257
 258 static int kvm_pmu_validate_counter_mask(struct kvm_pmu *kvpmu, unsigned long ctr_base,
 259                                          unsigned long ctr_mask)
 260 {
 261         /* Make sure the we have a valid counter mask requested from the caller */
 262         if (!ctr_mask || (ctr_base + __fls(ctr_mask) >= kvm_pmu_num_counters(kvpmu)))
 263                 return -EINVAL;
 264
 265         return 0;
 266 }
 267
 268 static void kvm_riscv_pmu_overflow(struct perf_event *perf_event,
 269                                    struct perf_sample_data *data,
 270                                    struct pt_regs *regs)
 271 {
 272         struct kvm_pmc *pmc = perf_event->overflow_handler_context;
 273         struct kvm_vcpu *vcpu = pmc->vcpu;
 274         struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
 275         struct riscv_pmu *rpmu = to_riscv_pmu(perf_event->pmu);
 276         u64 period;
 277
 278         /*
 279          * Stop the event counting by directly accessing the perf_event.
 280          * Otherwise, this needs to deferred via a workqueue.
 281          * That will introduce skew in the counter value because the actual
 282          * physical counter would start after returning from this function.
 283          * It will be stopped again once the workqueue is scheduled
 284          */
 285         rpmu->pmu.stop(perf_event, PERF_EF_UPDATE);
 286
 287         /*
 288          * The hw counter would start automatically when this function returns.
 289          * Thus, the host may continue to interrupt and inject it to the guest
 290          * even without the guest configuring the next event. Depending on the hardware
 291          * the host may have some sluggishness only if privilege mode filtering is not
 292          * available. In an ideal world, where qemu is not the only capable hardware,
 293          * this can be removed.
 294          * FYI: ARM64 does this way while x86 doesn't do anything as such.
 295          * TODO: Should we keep it for RISC-V ?
 296          */
 297         period = -(local64_read(&perf_event->count));
 298
 299         local64_set(&perf_event->hw.period_left, 0);
 300         perf_event->attr.sample_period = period;
 301         perf_event->hw.sample_period = period;
 302
 303         set_bit(pmc->idx, kvpmu->pmc_overflown);
 304         kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_PMU_OVF);
 305
 306         rpmu->pmu.start(perf_event, PERF_EF_RELOAD);
 307 }
 308
 309 static long kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_attr *attr,
 310                                       unsigned long flags, unsigned long eidx,
 311                                       unsigned long evtdata)
 312 {
 313         struct perf_event *event;
 314
 315         kvm_pmu_release_perf_event(pmc);
 316         attr->config = kvm_pmu_get_perf_event_config(eidx, evtdata);
 317         if (flags & SBI_PMU_CFG_FLAG_CLEAR_VALUE) {
 318                 //TODO: Do we really want to clear the value in hardware counter
 319                 pmc->counter_val = 0;
 320         }
 321
 322         /*
 323          * Set the default sample_period for now. The guest specified value
 324          * will be updated in the start call.
 325          */
 326         attr->sample_period = kvm_pmu_get_sample_period(pmc);
 327
 328         event = perf_event_create_kernel_counter(attr, -1, current, kvm_riscv_pmu_overflow, pmc);
 329         if (IS_ERR(event)) {
 330                 pr_debug("kvm pmu event creation failed for eidx %lx: %ld\n", eidx, PTR_ERR(event));
 331                 return PTR_ERR(event);
 332         }
 333
 334         pmc->perf_event = event;
 335         if (flags & SBI_PMU_CFG_FLAG_AUTO_START)
 336                 perf_event_enable(pmc->perf_event);
 337
 338         return 0;
 339 }
 340
 341 int kvm_riscv_vcpu_pmu_incr_fw(struct kvm_vcpu *vcpu, unsigned long fid)
 342 {
 343         struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
 344         struct kvm_fw_event *fevent;
 345
 346         if (!kvpmu || fid >= SBI_PMU_FW_MAX)
 347                 return -EINVAL;
 348
 349         fevent = &kvpmu->fw_event[fid];
 350         if (fevent->started)
 351                 fevent->value++;
 352
 353         return 0;
 354 }
 355
 356 int kvm_riscv_vcpu_pmu_read_hpm(struct kvm_vcpu *vcpu, unsigned int csr_num,
 357                                 unsigned long *val, unsigned long new_val,
 358                                 unsigned long wr_mask)
 359 {
 360         struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
 361         int cidx, ret = KVM_INSN_CONTINUE_NEXT_SEPC;
 362
 363         if (!kvpmu || !kvpmu->init_done) {
 364                 /*
 365                  * In absence of sscofpmf in the platform, the guest OS may use
 366                  * the legacy PMU driver to read cycle/instret. In that case,
 367                  * just return 0 to avoid any illegal trap. However, any other
 368                  * hpmcounter access should result in illegal trap as they must
 369                  * be access through SBI PMU only.
 370                  */
 371                 if (csr_num == CSR_CYCLE || csr_num == CSR_INSTRET) {
 372                         *val = 0;
 373                         return ret;
 374                 } else {
 375                         return KVM_INSN_ILLEGAL_TRAP;
 376                 }
 377         }
 378
 379         /* The counter CSR are read only. Thus, any write should result in illegal traps */
 380         if (wr_mask)
 381                 return KVM_INSN_ILLEGAL_TRAP;
 382
 383         cidx = csr_num - CSR_CYCLE;
 384
 385         if (pmu_ctr_read(vcpu, cidx, val) < 0)
 386                 return KVM_INSN_ILLEGAL_TRAP;
 387
 388         return ret;
 389 }
 390
 391 static void kvm_pmu_clear_snapshot_area(struct kvm_vcpu *vcpu)
 392 {
 393         struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
 394
 395         kfree(kvpmu->sdata);
 396         kvpmu->sdata = NULL;
 397         kvpmu->snapshot_addr = INVALID_GPA;
 398 }
 399
 400 int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long saddr_low,
 401                                       unsigned long saddr_high, unsigned long flags,
 402                                       struct kvm_vcpu_sbi_return *retdata)
 403 {
 404         struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
 405         int snapshot_area_size = sizeof(struct riscv_pmu_snapshot_data);
 406         int sbiret = 0;
 407         gpa_t saddr;
 408         unsigned long hva;
 409         bool writable;
 410
 411         if (!kvpmu || flags) {
 412                 sbiret = SBI_ERR_INVALID_PARAM;
 413                 goto out;
 414         }
 415
 416         if (saddr_low == SBI_SHMEM_DISABLE && saddr_high == SBI_SHMEM_DISABLE) {
 417                 kvm_pmu_clear_snapshot_area(vcpu);
 418                 return 0;
 419         }
 420
 421         saddr = saddr_low;
 422
 423         if (saddr_high != 0) {
 424                 if (IS_ENABLED(CONFIG_32BIT))
 425                         saddr |= ((gpa_t)saddr_high << 32);
 426                 else
 427                         sbiret = SBI_ERR_INVALID_ADDRESS;
 428                 goto out;
 429         }
 430
 431         hva = kvm_vcpu_gfn_to_hva_prot(vcpu, saddr >> PAGE_SHIFT, &writable);
 432         if (kvm_is_error_hva(hva) || !writable) {
 433                 sbiret = SBI_ERR_INVALID_ADDRESS;
 434                 goto out;
 435         }
 436
 437         kvpmu->sdata = kzalloc(snapshot_area_size, GFP_ATOMIC);
 438         if (!kvpmu->sdata)
 439                 return -ENOMEM;
 440
 441         if (kvm_vcpu_write_guest(vcpu, saddr, kvpmu->sdata, snapshot_area_size)) {
 442                 kfree(kvpmu->sdata);
 443                 sbiret = SBI_ERR_FAILURE;
 444                 goto out;
 445         }
 446
 447         kvpmu->snapshot_addr = saddr;
 448
 449 out:
 450         retdata->err_val = sbiret;
 451
 452         return 0;
 453 }
 454
 455 int kvm_riscv_vcpu_pmu_num_ctrs(struct kvm_vcpu *vcpu,
 456                                 struct kvm_vcpu_sbi_return *retdata)
 457 {
 458         struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
 459
 460         retdata->out_val = kvm_pmu_num_counters(kvpmu);
 461
 462         return 0;
 463 }
 464
 465 int kvm_riscv_vcpu_pmu_ctr_info(struct kvm_vcpu *vcpu, unsigned long cidx,
 466                                 struct kvm_vcpu_sbi_return *retdata)
 467 {
 468         struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
 469
 470         if (cidx > RISCV_KVM_MAX_COUNTERS || cidx == 1) {
 471                 retdata->err_val = SBI_ERR_INVALID_PARAM;
 472                 return 0;
 473         }
 474
 475         retdata->out_val = kvpmu->pmc[cidx].cinfo.value;
 476
 477         return 0;
 478 }
 479
 480 int kvm_riscv_vcpu_pmu_ctr_start(struct kvm_vcpu *vcpu, unsigned long ctr_base,
 481                                  unsigned long ctr_mask, unsigned long flags, u64 ival,
 482                                  struct kvm_vcpu_sbi_return *retdata)
 483 {
 484         struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
 485         int i, pmc_index, sbiret = 0;
 486         struct kvm_pmc *pmc;
 487         int fevent_code;
 488         bool snap_flag_set = flags & SBI_PMU_START_FLAG_INIT_SNAPSHOT;
 489
 490         if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) {
 491                 sbiret = SBI_ERR_INVALID_PARAM;
 492                 goto out;
 493         }
 494
 495         if (snap_flag_set) {
 496                 if (kvpmu->snapshot_addr == INVALID_GPA) {
 497                         sbiret = SBI_ERR_NO_SHMEM;
 498                         goto out;
 499                 }
 500                 if (kvm_vcpu_read_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata,
 501                                         sizeof(struct riscv_pmu_snapshot_data))) {
 502                         pr_warn("Unable to read snapshot shared memory while starting counters\n");
 503                         sbiret = SBI_ERR_FAILURE;
 504                         goto out;
 505                 }
 506         }
 507         /* Start the counters that have been configured and requested by the guest */
 508         for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) {
 509                 pmc_index = i + ctr_base;
 510                 if (!test_bit(pmc_index, kvpmu->pmc_in_use))
 511                         continue;
 512                 /* The guest started the counter again. Reset the overflow status */
 513                 clear_bit(pmc_index, kvpmu->pmc_overflown);
 514                 pmc = &kvpmu->pmc[pmc_index];
 515                 if (flags & SBI_PMU_START_FLAG_SET_INIT_VALUE) {
 516                         pmc->counter_val = ival;
 517                 } else if (snap_flag_set) {
 518                         /* The counter index in the snapshot are relative to the counter base */
 519                         pmc->counter_val = kvpmu->sdata->ctr_values[i];
 520                 }
 521
 522                 if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
 523                         fevent_code = get_event_code(pmc->event_idx);
 524                         if (fevent_code >= SBI_PMU_FW_MAX) {
 525                                 sbiret = SBI_ERR_INVALID_PARAM;
 526                                 goto out;
 527                         }
 528
 529                         /* Check if the counter was already started for some reason */
 530                         if (kvpmu->fw_event[fevent_code].started) {
 531                                 sbiret = SBI_ERR_ALREADY_STARTED;
 532                                 continue;
 533                         }
 534
 535                         kvpmu->fw_event[fevent_code].started = true;
 536                         kvpmu->fw_event[fevent_code].value = pmc->counter_val;
 537                 } else if (pmc->perf_event) {
 538                         if (unlikely(pmc->started)) {
 539                                 sbiret = SBI_ERR_ALREADY_STARTED;
 540                                 continue;
 541                         }
 542                         perf_event_period(pmc->perf_event, kvm_pmu_get_sample_period(pmc));
 543                         perf_event_enable(pmc->perf_event);
 544                         pmc->started = true;
 545                 } else {
 546                         sbiret = SBI_ERR_INVALID_PARAM;
 547                 }
 548         }
 549
 550 out:
 551         retdata->err_val = sbiret;
 552
 553         return 0;
 554 }
 555
 556 int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base,
 557                                 unsigned long ctr_mask, unsigned long flags,
 558                                 struct kvm_vcpu_sbi_return *retdata)
 559 {
 560         struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
 561         int i, pmc_index, sbiret = 0;
 562         u64 enabled, running;
 563         struct kvm_pmc *pmc;
 564         int fevent_code;
 565         bool snap_flag_set = flags & SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT;
 566         bool shmem_needs_update = false;
 567
 568         if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) {
 569                 sbiret = SBI_ERR_INVALID_PARAM;
 570                 goto out;
 571         }
 572
 573         if (snap_flag_set && kvpmu->snapshot_addr == INVALID_GPA) {
 574                 sbiret = SBI_ERR_NO_SHMEM;
 575                 goto out;
 576         }
 577
 578         /* Stop the counters that have been configured and requested by the guest */
 579         for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) {
 580                 pmc_index = i + ctr_base;
 581                 if (!test_bit(pmc_index, kvpmu->pmc_in_use))
 582                         continue;
 583                 pmc = &kvpmu->pmc[pmc_index];
 584                 if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
 585                         fevent_code = get_event_code(pmc->event_idx);
 586                         if (fevent_code >= SBI_PMU_FW_MAX) {
 587                                 sbiret = SBI_ERR_INVALID_PARAM;
 588                                 goto out;
 589                         }
 590
 591                         if (!kvpmu->fw_event[fevent_code].started)
 592                                 sbiret = SBI_ERR_ALREADY_STOPPED;
 593
 594                         kvpmu->fw_event[fevent_code].started = false;
 595                 } else if (pmc->perf_event) {
 596                         if (pmc->started) {
 597                                 /* Stop counting the counter */
 598                                 perf_event_disable(pmc->perf_event);
 599                                 pmc->started = false;
 600                         } else {
 601                                 sbiret = SBI_ERR_ALREADY_STOPPED;
 602                         }
 603
 604                         if (flags & SBI_PMU_STOP_FLAG_RESET)
 605                                 /* Release the counter if this is a reset request */
 606                                 kvm_pmu_release_perf_event(pmc);
 607                 } else {
 608                         sbiret = SBI_ERR_INVALID_PARAM;
 609                 }
 610
 611                 if (snap_flag_set && !sbiret) {
 612                         if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW)
 613                                 pmc->counter_val = kvpmu->fw_event[fevent_code].value;
 614                         else if (pmc->perf_event)
 615                                 pmc->counter_val += perf_event_read_value(pmc->perf_event,
 616                                                                           &enabled, &running);
 617                         /*
 618                          * The counter and overflow indicies in the snapshot region are w.r.to
 619                          * cbase. Modify the set bit in the counter mask instead of the pmc_index
 620                          * which indicates the absolute counter index.
 621                          */
 622                         if (test_bit(pmc_index, kvpmu->pmc_overflown))
 623                                 kvpmu->sdata->ctr_overflow_mask |= BIT(i);
 624                         kvpmu->sdata->ctr_values[i] = pmc->counter_val;
 625                         shmem_needs_update = true;
 626                 }
 627
 628                 if (flags & SBI_PMU_STOP_FLAG_RESET) {
 629                         pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
 630                         clear_bit(pmc_index, kvpmu->pmc_in_use);
 631                         clear_bit(pmc_index, kvpmu->pmc_overflown);
 632                         if (snap_flag_set) {
 633                                 /*
 634                                  * Only clear the given counter as the caller is responsible to
 635                                  * validate both the overflow mask and configured counters.
 636                                  */
 637                                 kvpmu->sdata->ctr_overflow_mask &= ~BIT(i);
 638                                 shmem_needs_update = true;
 639                         }
 640                 }
 641         }
 642
 643         if (shmem_needs_update)
 644                 kvm_vcpu_write_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata,
 645                                              sizeof(struct riscv_pmu_snapshot_data));
 646
 647 out:
 648         retdata->err_val = sbiret;
 649
 650         return 0;
 651 }
 652
 653 int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_base,
 654                                      unsigned long ctr_mask, unsigned long flags,
 655                                      unsigned long eidx, u64 evtdata,
 656                                      struct kvm_vcpu_sbi_return *retdata)
 657 {
 658         int ctr_idx, sbiret = 0;
 659         long ret;
 660         bool is_fevent;
 661         unsigned long event_code;
 662         u32 etype = kvm_pmu_get_perf_event_type(eidx);
 663         struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
 664         struct kvm_pmc *pmc = NULL;
 665         struct perf_event_attr attr = {
 666                 .type = etype,
 667                 .size = sizeof(struct perf_event_attr),
 668                 .pinned = true,
 669                 /*
 670                  * It should never reach here if the platform doesn't support the sscofpmf
 671                  * extension as mode filtering won't work without it.
 672                  */
 673                 .exclude_host = true,
 674                 .exclude_hv = true,
 675                 .exclude_user = !!(flags & SBI_PMU_CFG_FLAG_SET_UINH),
 676                 .exclude_kernel = !!(flags & SBI_PMU_CFG_FLAG_SET_SINH),
 677                 .config1 = RISCV_PMU_CONFIG1_GUEST_EVENTS,
 678         };
 679
 680         if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) {
 681                 sbiret = SBI_ERR_INVALID_PARAM;
 682                 goto out;
 683         }
 684
 685         event_code = get_event_code(eidx);
 686         is_fevent = kvm_pmu_is_fw_event(eidx);
 687         if (is_fevent && event_code >= SBI_PMU_FW_MAX) {
 688                 sbiret = SBI_ERR_NOT_SUPPORTED;
 689                 goto out;
 690         }
 691
 692         /*
 693          * SKIP_MATCH flag indicates the caller is aware of the assigned counter
 694          * for this event. Just do a sanity check if it already marked used.
 695          */
 696         if (flags & SBI_PMU_CFG_FLAG_SKIP_MATCH) {
 697                 if (!test_bit(ctr_base + __ffs(ctr_mask), kvpmu->pmc_in_use)) {
 698                         sbiret = SBI_ERR_FAILURE;
 699                         goto out;
 700                 }
 701                 ctr_idx = ctr_base + __ffs(ctr_mask);
 702         } else  {
 703                 ctr_idx = pmu_get_pmc_index(kvpmu, eidx, ctr_base, ctr_mask);
 704                 if (ctr_idx < 0) {
 705                         sbiret = SBI_ERR_NOT_SUPPORTED;
 706                         goto out;
 707                 }
 708         }
 709
 710         pmc = &kvpmu->pmc[ctr_idx];
 711         pmc->idx = ctr_idx;
 712
 713         if (is_fevent) {
 714                 if (flags & SBI_PMU_CFG_FLAG_AUTO_START)
 715                         kvpmu->fw_event[event_code].started = true;
 716         } else {
 717                 ret = kvm_pmu_create_perf_event(pmc, &attr, flags, eidx, evtdata);
 718                 if (ret) {
 719                         sbiret = SBI_ERR_NOT_SUPPORTED;
 720                         goto out;
 721                 }
 722         }
 723
 724         set_bit(ctr_idx, kvpmu->pmc_in_use);
 725         pmc->event_idx = eidx;
 726         retdata->out_val = ctr_idx;
 727 out:
 728         retdata->err_val = sbiret;
 729
 730         return 0;
 731 }
 732
 733 int kvm_riscv_vcpu_pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx,
 734                                       struct kvm_vcpu_sbi_return *retdata)
 735 {
 736         int ret;
 737
 738         ret = pmu_fw_ctr_read_hi(vcpu, cidx, &retdata->out_val);
 739         if (ret == -EINVAL)
 740                 retdata->err_val = SBI_ERR_INVALID_PARAM;
 741
 742         return 0;
 743 }
 744
 745 int kvm_riscv_vcpu_pmu_fw_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
 746                                 struct kvm_vcpu_sbi_return *retdata)
 747 {
 748         int ret;
 749
 750         ret = pmu_ctr_read(vcpu, cidx, &retdata->out_val);
 751         if (ret == -EINVAL)
 752                 retdata->err_val = SBI_ERR_INVALID_PARAM;
 753
 754         return 0;
 755 }
 756
 757 void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu)
 758 {
 759         int i = 0, ret, num_hw_ctrs = 0, hpm_width = 0;
 760         struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
 761         struct kvm_pmc *pmc;
 762
 763         /*
 764          * PMU functionality should be only available to guests if privilege mode
 765          * filtering is available in the host. Otherwise, guest will always count
 766          * events while the execution is in hypervisor mode.
 767          */
 768         if (!riscv_isa_extension_available(NULL, SSCOFPMF))
 769                 return;
 770
 771         ret = riscv_pmu_get_hpm_info(&hpm_width, &num_hw_ctrs);
 772         if (ret < 0 || !hpm_width || !num_hw_ctrs)
 773                 return;
 774
 775         /*
 776          * Increase the number of hardware counters to offset the time counter.
 777          */
 778         kvpmu->num_hw_ctrs = num_hw_ctrs + 1;
 779         kvpmu->num_fw_ctrs = SBI_PMU_FW_MAX;
 780         memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event));
 781         kvpmu->snapshot_addr = INVALID_GPA;
 782
 783         if (kvpmu->num_hw_ctrs > RISCV_KVM_MAX_HW_CTRS) {
 784                 pr_warn_once("Limiting the hardware counters to 32 as specified by the ISA");
 785                 kvpmu->num_hw_ctrs = RISCV_KVM_MAX_HW_CTRS;
 786         }
 787
 788         /*
 789          * There is no correlation between the logical hardware counter and virtual counters.
 790          * However, we need to encode a hpmcounter CSR in the counter info field so that
 791          * KVM can trap n emulate the read. This works well in the migration use case as
 792          * KVM doesn't care if the actual hpmcounter is available in the hardware or not.
 793          */
 794         for (i = 0; i < kvm_pmu_num_counters(kvpmu); i++) {
 795                 /* TIME CSR shouldn't be read from perf interface */
 796                 if (i == 1)
 797                         continue;
 798                 pmc = &kvpmu->pmc[i];
 799                 pmc->idx = i;
 800                 pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
 801                 pmc->vcpu = vcpu;
 802                 if (i < kvpmu->num_hw_ctrs) {
 803                         pmc->cinfo.type = SBI_PMU_CTR_TYPE_HW;
 804                         if (i < 3)
 805                                 /* CY, IR counters */
 806                                 pmc->cinfo.width = 63;
 807                         else
 808                                 pmc->cinfo.width = hpm_width;
 809                         /*
 810                          * The CSR number doesn't have any relation with the logical
 811                          * hardware counters. The CSR numbers are encoded sequentially
 812                          * to avoid maintaining a map between the virtual counter
 813                          * and CSR number.
 814                          */
 815                         pmc->cinfo.csr = CSR_CYCLE + i;
 816                 } else {
 817                         pmc->cinfo.type = SBI_PMU_CTR_TYPE_FW;
 818                         pmc->cinfo.width = 63;
 819                 }
 820         }
 821
 822         kvpmu->init_done = true;
 823 }
 824
 825 void kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu *vcpu)
 826 {
 827         struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
 828         struct kvm_pmc *pmc;
 829         int i;
 830
 831         if (!kvpmu)
 832                 return;
 833
 834         for_each_set_bit(i, kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS) {
 835                 pmc = &kvpmu->pmc[i];
 836                 pmc->counter_val = 0;
 837                 kvm_pmu_release_perf_event(pmc);
 838                 pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
 839         }
 840         bitmap_zero(kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS);
 841         bitmap_zero(kvpmu->pmc_overflown, RISCV_KVM_MAX_COUNTERS);
 842         memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event));
 843         kvm_pmu_clear_snapshot_area(vcpu);
 844 }
 845
 846 void kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu *vcpu)
 847 {
 848         kvm_riscv_vcpu_pmu_deinit(vcpu);
 849 }