1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (C) 2019 Western Digital Corporation or its affiliates.
6 * Anup Patel <anup.patel@wdc.com>
9 #include <linux/bitops.h>
10 #include <linux/entry-kvm.h>
11 #include <linux/errno.h>
12 #include <linux/err.h>
13 #include <linux/kdebug.h>
14 #include <linux/module.h>
15 #include <linux/percpu.h>
16 #include <linux/vmalloc.h>
17 #include <linux/sched/signal.h>
19 #include <linux/kvm_host.h>
20 #include <asm/cacheflush.h>
21 #include <asm/kvm_nacl.h>
22 #include <asm/kvm_vcpu_vector.h>
24 #define CREATE_TRACE_POINTS
27 const struct _kvm_stats_desc kvm_vcpu_stats_desc
[] = {
28 KVM_GENERIC_VCPU_STATS(),
29 STATS_DESC_COUNTER(VCPU
, ecall_exit_stat
),
30 STATS_DESC_COUNTER(VCPU
, wfi_exit_stat
),
31 STATS_DESC_COUNTER(VCPU
, wrs_exit_stat
),
32 STATS_DESC_COUNTER(VCPU
, mmio_exit_user
),
33 STATS_DESC_COUNTER(VCPU
, mmio_exit_kernel
),
34 STATS_DESC_COUNTER(VCPU
, csr_exit_user
),
35 STATS_DESC_COUNTER(VCPU
, csr_exit_kernel
),
36 STATS_DESC_COUNTER(VCPU
, signal_exits
),
37 STATS_DESC_COUNTER(VCPU
, exits
)
40 const struct kvm_stats_header kvm_vcpu_stats_header
= {
41 .name_size
= KVM_STATS_NAME_SIZE
,
42 .num_desc
= ARRAY_SIZE(kvm_vcpu_stats_desc
),
43 .id_offset
= sizeof(struct kvm_stats_header
),
44 .desc_offset
= sizeof(struct kvm_stats_header
) + KVM_STATS_NAME_SIZE
,
45 .data_offset
= sizeof(struct kvm_stats_header
) + KVM_STATS_NAME_SIZE
+
46 sizeof(kvm_vcpu_stats_desc
),
49 static void kvm_riscv_reset_vcpu(struct kvm_vcpu
*vcpu
)
51 struct kvm_vcpu_csr
*csr
= &vcpu
->arch
.guest_csr
;
52 struct kvm_vcpu_csr
*reset_csr
= &vcpu
->arch
.guest_reset_csr
;
53 struct kvm_cpu_context
*cntx
= &vcpu
->arch
.guest_context
;
54 struct kvm_cpu_context
*reset_cntx
= &vcpu
->arch
.guest_reset_context
;
58 * The preemption should be disabled here because it races with
59 * kvm_sched_out/kvm_sched_in(called from preempt notifiers) which
60 * also calls vcpu_load/put.
63 loaded
= (vcpu
->cpu
!= -1);
65 kvm_arch_vcpu_put(vcpu
);
67 vcpu
->arch
.last_exit_cpu
= -1;
69 memcpy(csr
, reset_csr
, sizeof(*csr
));
71 spin_lock(&vcpu
->arch
.reset_cntx_lock
);
72 memcpy(cntx
, reset_cntx
, sizeof(*cntx
));
73 spin_unlock(&vcpu
->arch
.reset_cntx_lock
);
75 kvm_riscv_vcpu_fp_reset(vcpu
);
77 kvm_riscv_vcpu_vector_reset(vcpu
);
79 kvm_riscv_vcpu_timer_reset(vcpu
);
81 kvm_riscv_vcpu_aia_reset(vcpu
);
83 bitmap_zero(vcpu
->arch
.irqs_pending
, KVM_RISCV_VCPU_NR_IRQS
);
84 bitmap_zero(vcpu
->arch
.irqs_pending_mask
, KVM_RISCV_VCPU_NR_IRQS
);
86 kvm_riscv_vcpu_pmu_reset(vcpu
);
88 vcpu
->arch
.hfence_head
= 0;
89 vcpu
->arch
.hfence_tail
= 0;
90 memset(vcpu
->arch
.hfence_queue
, 0, sizeof(vcpu
->arch
.hfence_queue
));
92 kvm_riscv_vcpu_sbi_sta_reset(vcpu
);
94 /* Reset the guest CSRs for hotplug usecase */
96 kvm_arch_vcpu_load(vcpu
, smp_processor_id());
100 int kvm_arch_vcpu_precreate(struct kvm
*kvm
, unsigned int id
)
105 int kvm_arch_vcpu_create(struct kvm_vcpu
*vcpu
)
108 struct kvm_cpu_context
*cntx
;
109 struct kvm_vcpu_csr
*reset_csr
= &vcpu
->arch
.guest_reset_csr
;
111 spin_lock_init(&vcpu
->arch
.mp_state_lock
);
113 /* Mark this VCPU never ran */
114 vcpu
->arch
.ran_atleast_once
= false;
115 vcpu
->arch
.mmu_page_cache
.gfp_zero
= __GFP_ZERO
;
116 bitmap_zero(vcpu
->arch
.isa
, RISCV_ISA_EXT_MAX
);
118 /* Setup ISA features available to VCPU */
119 kvm_riscv_vcpu_setup_isa(vcpu
);
121 /* Setup vendor, arch, and implementation details */
122 vcpu
->arch
.mvendorid
= sbi_get_mvendorid();
123 vcpu
->arch
.marchid
= sbi_get_marchid();
124 vcpu
->arch
.mimpid
= sbi_get_mimpid();
126 /* Setup VCPU hfence queue */
127 spin_lock_init(&vcpu
->arch
.hfence_lock
);
129 /* Setup reset state of shadow SSTATUS and HSTATUS CSRs */
130 spin_lock_init(&vcpu
->arch
.reset_cntx_lock
);
132 spin_lock(&vcpu
->arch
.reset_cntx_lock
);
133 cntx
= &vcpu
->arch
.guest_reset_context
;
134 cntx
->sstatus
= SR_SPP
| SR_SPIE
;
136 cntx
->hstatus
|= HSTATUS_VTW
;
137 cntx
->hstatus
|= HSTATUS_SPVP
;
138 cntx
->hstatus
|= HSTATUS_SPV
;
139 spin_unlock(&vcpu
->arch
.reset_cntx_lock
);
141 if (kvm_riscv_vcpu_alloc_vector_context(vcpu
, cntx
))
144 /* By default, make CY, TM, and IR counters accessible in VU mode */
145 reset_csr
->scounteren
= 0x7;
147 /* Setup VCPU timer */
148 kvm_riscv_vcpu_timer_init(vcpu
);
150 /* setup performance monitoring */
151 kvm_riscv_vcpu_pmu_init(vcpu
);
154 rc
= kvm_riscv_vcpu_aia_init(vcpu
);
159 * Setup SBI extensions
160 * NOTE: This must be the last thing to be initialized.
162 kvm_riscv_vcpu_sbi_init(vcpu
);
165 kvm_riscv_reset_vcpu(vcpu
);
170 void kvm_arch_vcpu_postcreate(struct kvm_vcpu
*vcpu
)
173 * vcpu with id 0 is the designated boot cpu.
174 * Keep all vcpus with non-zero id in power-off state so that
175 * they can be brought up using SBI HSM extension.
177 if (vcpu
->vcpu_idx
!= 0)
178 kvm_riscv_vcpu_power_off(vcpu
);
181 void kvm_arch_vcpu_destroy(struct kvm_vcpu
*vcpu
)
183 /* Cleanup VCPU AIA context */
184 kvm_riscv_vcpu_aia_deinit(vcpu
);
186 /* Cleanup VCPU timer */
187 kvm_riscv_vcpu_timer_deinit(vcpu
);
189 kvm_riscv_vcpu_pmu_deinit(vcpu
);
191 /* Free unused pages pre-allocated for G-stage page table mappings */
192 kvm_mmu_free_memory_cache(&vcpu
->arch
.mmu_page_cache
);
194 /* Free vector context space for host and guest kernel */
195 kvm_riscv_vcpu_free_vector_context(vcpu
);
198 int kvm_cpu_has_pending_timer(struct kvm_vcpu
*vcpu
)
200 return kvm_riscv_vcpu_timer_pending(vcpu
);
203 void kvm_arch_vcpu_blocking(struct kvm_vcpu
*vcpu
)
205 kvm_riscv_aia_wakeon_hgei(vcpu
, true);
208 void kvm_arch_vcpu_unblocking(struct kvm_vcpu
*vcpu
)
210 kvm_riscv_aia_wakeon_hgei(vcpu
, false);
213 int kvm_arch_vcpu_runnable(struct kvm_vcpu
*vcpu
)
215 return (kvm_riscv_vcpu_has_interrupts(vcpu
, -1UL) &&
216 !kvm_riscv_vcpu_stopped(vcpu
) && !vcpu
->arch
.pause
);
219 int kvm_arch_vcpu_should_kick(struct kvm_vcpu
*vcpu
)
221 return kvm_vcpu_exiting_guest_mode(vcpu
) == IN_GUEST_MODE
;
224 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu
*vcpu
)
226 return (vcpu
->arch
.guest_context
.sstatus
& SR_SPP
) ? true : false;
229 #ifdef CONFIG_GUEST_PERF_EVENTS
230 unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu
*vcpu
)
232 return vcpu
->arch
.guest_context
.sepc
;
236 vm_fault_t
kvm_arch_vcpu_fault(struct kvm_vcpu
*vcpu
, struct vm_fault
*vmf
)
238 return VM_FAULT_SIGBUS
;
241 long kvm_arch_vcpu_async_ioctl(struct file
*filp
,
242 unsigned int ioctl
, unsigned long arg
)
244 struct kvm_vcpu
*vcpu
= filp
->private_data
;
245 void __user
*argp
= (void __user
*)arg
;
247 if (ioctl
== KVM_INTERRUPT
) {
248 struct kvm_interrupt irq
;
250 if (copy_from_user(&irq
, argp
, sizeof(irq
)))
253 if (irq
.irq
== KVM_INTERRUPT_SET
)
254 return kvm_riscv_vcpu_set_interrupt(vcpu
, IRQ_VS_EXT
);
256 return kvm_riscv_vcpu_unset_interrupt(vcpu
, IRQ_VS_EXT
);
262 long kvm_arch_vcpu_ioctl(struct file
*filp
,
263 unsigned int ioctl
, unsigned long arg
)
265 struct kvm_vcpu
*vcpu
= filp
->private_data
;
266 void __user
*argp
= (void __user
*)arg
;
270 case KVM_SET_ONE_REG
:
271 case KVM_GET_ONE_REG
: {
272 struct kvm_one_reg reg
;
275 if (copy_from_user(®
, argp
, sizeof(reg
)))
278 if (ioctl
== KVM_SET_ONE_REG
)
279 r
= kvm_riscv_vcpu_set_reg(vcpu
, ®
);
281 r
= kvm_riscv_vcpu_get_reg(vcpu
, ®
);
284 case KVM_GET_REG_LIST
: {
285 struct kvm_reg_list __user
*user_list
= argp
;
286 struct kvm_reg_list reg_list
;
290 if (copy_from_user(®_list
, user_list
, sizeof(reg_list
)))
293 reg_list
.n
= kvm_riscv_vcpu_num_regs(vcpu
);
294 if (copy_to_user(user_list
, ®_list
, sizeof(reg_list
)))
299 r
= kvm_riscv_vcpu_copy_reg_indices(vcpu
, user_list
->reg
);
309 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu
*vcpu
,
310 struct kvm_sregs
*sregs
)
315 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu
*vcpu
,
316 struct kvm_sregs
*sregs
)
321 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu
*vcpu
, struct kvm_fpu
*fpu
)
326 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu
*vcpu
, struct kvm_fpu
*fpu
)
331 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu
*vcpu
,
332 struct kvm_translation
*tr
)
337 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu
*vcpu
, struct kvm_regs
*regs
)
342 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu
*vcpu
, struct kvm_regs
*regs
)
347 void kvm_riscv_vcpu_flush_interrupts(struct kvm_vcpu
*vcpu
)
349 struct kvm_vcpu_csr
*csr
= &vcpu
->arch
.guest_csr
;
350 unsigned long mask
, val
;
352 if (READ_ONCE(vcpu
->arch
.irqs_pending_mask
[0])) {
353 mask
= xchg_acquire(&vcpu
->arch
.irqs_pending_mask
[0], 0);
354 val
= READ_ONCE(vcpu
->arch
.irqs_pending
[0]) & mask
;
360 /* Flush AIA high interrupts */
361 kvm_riscv_vcpu_aia_flush_interrupts(vcpu
);
364 void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu
*vcpu
)
367 struct kvm_vcpu_arch
*v
= &vcpu
->arch
;
368 struct kvm_vcpu_csr
*csr
= &vcpu
->arch
.guest_csr
;
370 /* Read current HVIP and VSIE CSRs */
371 csr
->vsie
= ncsr_read(CSR_VSIE
);
373 /* Sync-up HVIP.VSSIP bit changes does by Guest */
374 hvip
= ncsr_read(CSR_HVIP
);
375 if ((csr
->hvip
^ hvip
) & (1UL << IRQ_VS_SOFT
)) {
376 if (hvip
& (1UL << IRQ_VS_SOFT
)) {
377 if (!test_and_set_bit(IRQ_VS_SOFT
,
378 v
->irqs_pending_mask
))
379 set_bit(IRQ_VS_SOFT
, v
->irqs_pending
);
381 if (!test_and_set_bit(IRQ_VS_SOFT
,
382 v
->irqs_pending_mask
))
383 clear_bit(IRQ_VS_SOFT
, v
->irqs_pending
);
387 /* Sync up the HVIP.LCOFIP bit changes (only clear) by the guest */
388 if ((csr
->hvip
^ hvip
) & (1UL << IRQ_PMU_OVF
)) {
389 if (!(hvip
& (1UL << IRQ_PMU_OVF
)) &&
390 !test_and_set_bit(IRQ_PMU_OVF
, v
->irqs_pending_mask
))
391 clear_bit(IRQ_PMU_OVF
, v
->irqs_pending
);
394 /* Sync-up AIA high interrupts */
395 kvm_riscv_vcpu_aia_sync_interrupts(vcpu
);
397 /* Sync-up timer CSRs */
398 kvm_riscv_vcpu_timer_sync(vcpu
);
401 int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu
*vcpu
, unsigned int irq
)
404 * We only allow VS-mode software, timer, and external
405 * interrupts when irq is one of the local interrupts
406 * defined by RISC-V privilege specification.
408 if (irq
< IRQ_LOCAL_MAX
&&
409 irq
!= IRQ_VS_SOFT
&&
410 irq
!= IRQ_VS_TIMER
&&
415 set_bit(irq
, vcpu
->arch
.irqs_pending
);
416 smp_mb__before_atomic();
417 set_bit(irq
, vcpu
->arch
.irqs_pending_mask
);
424 int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu
*vcpu
, unsigned int irq
)
427 * We only allow VS-mode software, timer, counter overflow and external
428 * interrupts when irq is one of the local interrupts
429 * defined by RISC-V privilege specification.
431 if (irq
< IRQ_LOCAL_MAX
&&
432 irq
!= IRQ_VS_SOFT
&&
433 irq
!= IRQ_VS_TIMER
&&
438 clear_bit(irq
, vcpu
->arch
.irqs_pending
);
439 smp_mb__before_atomic();
440 set_bit(irq
, vcpu
->arch
.irqs_pending_mask
);
445 bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu
*vcpu
, u64 mask
)
449 ie
= ((vcpu
->arch
.guest_csr
.vsie
& VSIP_VALID_MASK
)
450 << VSIP_TO_HVIP_SHIFT
) & (unsigned long)mask
;
451 ie
|= vcpu
->arch
.guest_csr
.vsie
& ~IRQ_LOCAL_MASK
&
453 if (READ_ONCE(vcpu
->arch
.irqs_pending
[0]) & ie
)
456 /* Check AIA high interrupts */
457 return kvm_riscv_vcpu_aia_has_interrupts(vcpu
, mask
);
460 void __kvm_riscv_vcpu_power_off(struct kvm_vcpu
*vcpu
)
462 WRITE_ONCE(vcpu
->arch
.mp_state
.mp_state
, KVM_MP_STATE_STOPPED
);
463 kvm_make_request(KVM_REQ_SLEEP
, vcpu
);
467 void kvm_riscv_vcpu_power_off(struct kvm_vcpu
*vcpu
)
469 spin_lock(&vcpu
->arch
.mp_state_lock
);
470 __kvm_riscv_vcpu_power_off(vcpu
);
471 spin_unlock(&vcpu
->arch
.mp_state_lock
);
474 void __kvm_riscv_vcpu_power_on(struct kvm_vcpu
*vcpu
)
476 WRITE_ONCE(vcpu
->arch
.mp_state
.mp_state
, KVM_MP_STATE_RUNNABLE
);
477 kvm_vcpu_wake_up(vcpu
);
480 void kvm_riscv_vcpu_power_on(struct kvm_vcpu
*vcpu
)
482 spin_lock(&vcpu
->arch
.mp_state_lock
);
483 __kvm_riscv_vcpu_power_on(vcpu
);
484 spin_unlock(&vcpu
->arch
.mp_state_lock
);
487 bool kvm_riscv_vcpu_stopped(struct kvm_vcpu
*vcpu
)
489 return READ_ONCE(vcpu
->arch
.mp_state
.mp_state
) == KVM_MP_STATE_STOPPED
;
492 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu
*vcpu
,
493 struct kvm_mp_state
*mp_state
)
495 *mp_state
= READ_ONCE(vcpu
->arch
.mp_state
);
500 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu
*vcpu
,
501 struct kvm_mp_state
*mp_state
)
505 spin_lock(&vcpu
->arch
.mp_state_lock
);
507 switch (mp_state
->mp_state
) {
508 case KVM_MP_STATE_RUNNABLE
:
509 WRITE_ONCE(vcpu
->arch
.mp_state
, *mp_state
);
511 case KVM_MP_STATE_STOPPED
:
512 __kvm_riscv_vcpu_power_off(vcpu
);
518 spin_unlock(&vcpu
->arch
.mp_state_lock
);
523 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu
*vcpu
,
524 struct kvm_guest_debug
*dbg
)
526 if (dbg
->control
& KVM_GUESTDBG_ENABLE
) {
527 vcpu
->guest_debug
= dbg
->control
;
528 vcpu
->arch
.cfg
.hedeleg
&= ~BIT(EXC_BREAKPOINT
);
530 vcpu
->guest_debug
= 0;
531 vcpu
->arch
.cfg
.hedeleg
|= BIT(EXC_BREAKPOINT
);
537 static void kvm_riscv_vcpu_setup_config(struct kvm_vcpu
*vcpu
)
539 const unsigned long *isa
= vcpu
->arch
.isa
;
540 struct kvm_vcpu_config
*cfg
= &vcpu
->arch
.cfg
;
542 if (riscv_isa_extension_available(isa
, SVPBMT
))
543 cfg
->henvcfg
|= ENVCFG_PBMTE
;
545 if (riscv_isa_extension_available(isa
, SSTC
))
546 cfg
->henvcfg
|= ENVCFG_STCE
;
548 if (riscv_isa_extension_available(isa
, ZICBOM
))
549 cfg
->henvcfg
|= (ENVCFG_CBIE
| ENVCFG_CBCFE
);
551 if (riscv_isa_extension_available(isa
, ZICBOZ
))
552 cfg
->henvcfg
|= ENVCFG_CBZE
;
554 if (riscv_isa_extension_available(isa
, SVADU
) &&
555 !riscv_isa_extension_available(isa
, SVADE
))
556 cfg
->henvcfg
|= ENVCFG_ADUE
;
558 if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN
)) {
559 cfg
->hstateen0
|= SMSTATEEN0_HSENVCFG
;
560 if (riscv_isa_extension_available(isa
, SSAIA
))
561 cfg
->hstateen0
|= SMSTATEEN0_AIA_IMSIC
|
564 if (riscv_isa_extension_available(isa
, SMSTATEEN
))
565 cfg
->hstateen0
|= SMSTATEEN0_SSTATEEN0
;
568 cfg
->hedeleg
= KVM_HEDELEG_DEFAULT
;
569 if (vcpu
->guest_debug
)
570 cfg
->hedeleg
&= ~BIT(EXC_BREAKPOINT
);
573 void kvm_arch_vcpu_load(struct kvm_vcpu
*vcpu
, int cpu
)
576 struct kvm_vcpu_csr
*csr
= &vcpu
->arch
.guest_csr
;
577 struct kvm_vcpu_config
*cfg
= &vcpu
->arch
.cfg
;
579 if (kvm_riscv_nacl_sync_csr_available()) {
581 nacl_csr_write(nsh
, CSR_VSSTATUS
, csr
->vsstatus
);
582 nacl_csr_write(nsh
, CSR_VSIE
, csr
->vsie
);
583 nacl_csr_write(nsh
, CSR_VSTVEC
, csr
->vstvec
);
584 nacl_csr_write(nsh
, CSR_VSSCRATCH
, csr
->vsscratch
);
585 nacl_csr_write(nsh
, CSR_VSEPC
, csr
->vsepc
);
586 nacl_csr_write(nsh
, CSR_VSCAUSE
, csr
->vscause
);
587 nacl_csr_write(nsh
, CSR_VSTVAL
, csr
->vstval
);
588 nacl_csr_write(nsh
, CSR_HEDELEG
, cfg
->hedeleg
);
589 nacl_csr_write(nsh
, CSR_HVIP
, csr
->hvip
);
590 nacl_csr_write(nsh
, CSR_VSATP
, csr
->vsatp
);
591 nacl_csr_write(nsh
, CSR_HENVCFG
, cfg
->henvcfg
);
592 if (IS_ENABLED(CONFIG_32BIT
))
593 nacl_csr_write(nsh
, CSR_HENVCFGH
, cfg
->henvcfg
>> 32);
594 if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN
)) {
595 nacl_csr_write(nsh
, CSR_HSTATEEN0
, cfg
->hstateen0
);
596 if (IS_ENABLED(CONFIG_32BIT
))
597 nacl_csr_write(nsh
, CSR_HSTATEEN0H
, cfg
->hstateen0
>> 32);
600 csr_write(CSR_VSSTATUS
, csr
->vsstatus
);
601 csr_write(CSR_VSIE
, csr
->vsie
);
602 csr_write(CSR_VSTVEC
, csr
->vstvec
);
603 csr_write(CSR_VSSCRATCH
, csr
->vsscratch
);
604 csr_write(CSR_VSEPC
, csr
->vsepc
);
605 csr_write(CSR_VSCAUSE
, csr
->vscause
);
606 csr_write(CSR_VSTVAL
, csr
->vstval
);
607 csr_write(CSR_HEDELEG
, cfg
->hedeleg
);
608 csr_write(CSR_HVIP
, csr
->hvip
);
609 csr_write(CSR_VSATP
, csr
->vsatp
);
610 csr_write(CSR_HENVCFG
, cfg
->henvcfg
);
611 if (IS_ENABLED(CONFIG_32BIT
))
612 csr_write(CSR_HENVCFGH
, cfg
->henvcfg
>> 32);
613 if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN
)) {
614 csr_write(CSR_HSTATEEN0
, cfg
->hstateen0
);
615 if (IS_ENABLED(CONFIG_32BIT
))
616 csr_write(CSR_HSTATEEN0H
, cfg
->hstateen0
>> 32);
620 kvm_riscv_gstage_update_hgatp(vcpu
);
622 kvm_riscv_vcpu_timer_restore(vcpu
);
624 kvm_riscv_vcpu_host_fp_save(&vcpu
->arch
.host_context
);
625 kvm_riscv_vcpu_guest_fp_restore(&vcpu
->arch
.guest_context
,
627 kvm_riscv_vcpu_host_vector_save(&vcpu
->arch
.host_context
);
628 kvm_riscv_vcpu_guest_vector_restore(&vcpu
->arch
.guest_context
,
631 kvm_riscv_vcpu_aia_load(vcpu
, cpu
);
633 kvm_make_request(KVM_REQ_STEAL_UPDATE
, vcpu
);
638 void kvm_arch_vcpu_put(struct kvm_vcpu
*vcpu
)
641 struct kvm_vcpu_csr
*csr
= &vcpu
->arch
.guest_csr
;
645 kvm_riscv_vcpu_aia_put(vcpu
);
647 kvm_riscv_vcpu_guest_fp_save(&vcpu
->arch
.guest_context
,
649 kvm_riscv_vcpu_host_fp_restore(&vcpu
->arch
.host_context
);
651 kvm_riscv_vcpu_timer_save(vcpu
);
652 kvm_riscv_vcpu_guest_vector_save(&vcpu
->arch
.guest_context
,
654 kvm_riscv_vcpu_host_vector_restore(&vcpu
->arch
.host_context
);
656 if (kvm_riscv_nacl_available()) {
658 csr
->vsstatus
= nacl_csr_read(nsh
, CSR_VSSTATUS
);
659 csr
->vsie
= nacl_csr_read(nsh
, CSR_VSIE
);
660 csr
->vstvec
= nacl_csr_read(nsh
, CSR_VSTVEC
);
661 csr
->vsscratch
= nacl_csr_read(nsh
, CSR_VSSCRATCH
);
662 csr
->vsepc
= nacl_csr_read(nsh
, CSR_VSEPC
);
663 csr
->vscause
= nacl_csr_read(nsh
, CSR_VSCAUSE
);
664 csr
->vstval
= nacl_csr_read(nsh
, CSR_VSTVAL
);
665 csr
->hvip
= nacl_csr_read(nsh
, CSR_HVIP
);
666 csr
->vsatp
= nacl_csr_read(nsh
, CSR_VSATP
);
668 csr
->vsstatus
= csr_read(CSR_VSSTATUS
);
669 csr
->vsie
= csr_read(CSR_VSIE
);
670 csr
->vstvec
= csr_read(CSR_VSTVEC
);
671 csr
->vsscratch
= csr_read(CSR_VSSCRATCH
);
672 csr
->vsepc
= csr_read(CSR_VSEPC
);
673 csr
->vscause
= csr_read(CSR_VSCAUSE
);
674 csr
->vstval
= csr_read(CSR_VSTVAL
);
675 csr
->hvip
= csr_read(CSR_HVIP
);
676 csr
->vsatp
= csr_read(CSR_VSATP
);
680 static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu
*vcpu
)
682 struct rcuwait
*wait
= kvm_arch_vcpu_get_wait(vcpu
);
684 if (kvm_request_pending(vcpu
)) {
685 if (kvm_check_request(KVM_REQ_SLEEP
, vcpu
)) {
686 kvm_vcpu_srcu_read_unlock(vcpu
);
687 rcuwait_wait_event(wait
,
688 (!kvm_riscv_vcpu_stopped(vcpu
)) && (!vcpu
->arch
.pause
),
690 kvm_vcpu_srcu_read_lock(vcpu
);
692 if (kvm_riscv_vcpu_stopped(vcpu
) || vcpu
->arch
.pause
) {
694 * Awaken to handle a signal, request to
697 kvm_make_request(KVM_REQ_SLEEP
, vcpu
);
701 if (kvm_check_request(KVM_REQ_VCPU_RESET
, vcpu
))
702 kvm_riscv_reset_vcpu(vcpu
);
704 if (kvm_check_request(KVM_REQ_UPDATE_HGATP
, vcpu
))
705 kvm_riscv_gstage_update_hgatp(vcpu
);
707 if (kvm_check_request(KVM_REQ_FENCE_I
, vcpu
))
708 kvm_riscv_fence_i_process(vcpu
);
711 * The generic KVM_REQ_TLB_FLUSH is same as
712 * KVM_REQ_HFENCE_GVMA_VMID_ALL
714 if (kvm_check_request(KVM_REQ_HFENCE_GVMA_VMID_ALL
, vcpu
))
715 kvm_riscv_hfence_gvma_vmid_all_process(vcpu
);
717 if (kvm_check_request(KVM_REQ_HFENCE_VVMA_ALL
, vcpu
))
718 kvm_riscv_hfence_vvma_all_process(vcpu
);
720 if (kvm_check_request(KVM_REQ_HFENCE
, vcpu
))
721 kvm_riscv_hfence_process(vcpu
);
723 if (kvm_check_request(KVM_REQ_STEAL_UPDATE
, vcpu
))
724 kvm_riscv_vcpu_record_steal_time(vcpu
);
728 static void kvm_riscv_update_hvip(struct kvm_vcpu
*vcpu
)
730 struct kvm_vcpu_csr
*csr
= &vcpu
->arch
.guest_csr
;
732 ncsr_write(CSR_HVIP
, csr
->hvip
);
733 kvm_riscv_vcpu_aia_update_hvip(vcpu
);
736 static __always_inline
void kvm_riscv_vcpu_swap_in_guest_state(struct kvm_vcpu
*vcpu
)
738 struct kvm_vcpu_smstateen_csr
*smcsr
= &vcpu
->arch
.smstateen_csr
;
739 struct kvm_vcpu_csr
*csr
= &vcpu
->arch
.guest_csr
;
740 struct kvm_vcpu_config
*cfg
= &vcpu
->arch
.cfg
;
742 vcpu
->arch
.host_scounteren
= csr_swap(CSR_SCOUNTEREN
, csr
->scounteren
);
743 vcpu
->arch
.host_senvcfg
= csr_swap(CSR_SENVCFG
, csr
->senvcfg
);
744 if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN
) &&
745 (cfg
->hstateen0
& SMSTATEEN0_SSTATEEN0
))
746 vcpu
->arch
.host_sstateen0
= csr_swap(CSR_SSTATEEN0
,
750 static __always_inline
void kvm_riscv_vcpu_swap_in_host_state(struct kvm_vcpu
*vcpu
)
752 struct kvm_vcpu_smstateen_csr
*smcsr
= &vcpu
->arch
.smstateen_csr
;
753 struct kvm_vcpu_csr
*csr
= &vcpu
->arch
.guest_csr
;
754 struct kvm_vcpu_config
*cfg
= &vcpu
->arch
.cfg
;
756 csr
->scounteren
= csr_swap(CSR_SCOUNTEREN
, vcpu
->arch
.host_scounteren
);
757 csr
->senvcfg
= csr_swap(CSR_SENVCFG
, vcpu
->arch
.host_senvcfg
);
758 if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN
) &&
759 (cfg
->hstateen0
& SMSTATEEN0_SSTATEEN0
))
760 smcsr
->sstateen0
= csr_swap(CSR_SSTATEEN0
,
761 vcpu
->arch
.host_sstateen0
);
765 * Actually run the vCPU, entering an RCU extended quiescent state (EQS) while
766 * the vCPU is running.
768 * This must be noinstr as instrumentation may make use of RCU, and this is not
769 * safe during the EQS.
771 static void noinstr
kvm_riscv_vcpu_enter_exit(struct kvm_vcpu
*vcpu
,
772 struct kvm_cpu_trap
*trap
)
775 struct kvm_cpu_context
*gcntx
= &vcpu
->arch
.guest_context
;
776 struct kvm_cpu_context
*hcntx
= &vcpu
->arch
.host_context
;
779 * We save trap CSRs (such as SEPC, SCAUSE, STVAL, HTVAL, and
780 * HTINST) here because we do local_irq_enable() after this
781 * function in kvm_arch_vcpu_ioctl_run() which can result in
782 * an interrupt immediately after local_irq_enable() and can
783 * potentially change trap CSRs.
786 kvm_riscv_vcpu_swap_in_guest_state(vcpu
);
787 guest_state_enter_irqoff();
789 if (kvm_riscv_nacl_sync_sret_available()) {
792 if (kvm_riscv_nacl_autoswap_csr_available()) {
794 nacl_csr_read(nsh
, CSR_HSTATUS
);
795 nacl_scratch_write_long(nsh
,
796 SBI_NACL_SHMEM_AUTOSWAP_OFFSET
+
797 SBI_NACL_SHMEM_AUTOSWAP_HSTATUS
,
799 nacl_scratch_write_long(nsh
,
800 SBI_NACL_SHMEM_AUTOSWAP_OFFSET
,
801 SBI_NACL_SHMEM_AUTOSWAP_FLAG_HSTATUS
);
802 } else if (kvm_riscv_nacl_sync_csr_available()) {
803 hcntx
->hstatus
= nacl_csr_swap(nsh
,
804 CSR_HSTATUS
, gcntx
->hstatus
);
806 hcntx
->hstatus
= csr_swap(CSR_HSTATUS
, gcntx
->hstatus
);
809 nacl_scratch_write_longs(nsh
,
810 SBI_NACL_SHMEM_SRET_OFFSET
+
811 SBI_NACL_SHMEM_SRET_X(1),
813 SBI_NACL_SHMEM_SRET_X_LAST
);
815 __kvm_riscv_nacl_switch_to(&vcpu
->arch
, SBI_EXT_NACL
,
816 SBI_EXT_NACL_SYNC_SRET
);
818 if (kvm_riscv_nacl_autoswap_csr_available()) {
819 nacl_scratch_write_long(nsh
,
820 SBI_NACL_SHMEM_AUTOSWAP_OFFSET
,
822 gcntx
->hstatus
= nacl_scratch_read_long(nsh
,
823 SBI_NACL_SHMEM_AUTOSWAP_OFFSET
+
824 SBI_NACL_SHMEM_AUTOSWAP_HSTATUS
);
826 gcntx
->hstatus
= csr_swap(CSR_HSTATUS
, hcntx
->hstatus
);
829 trap
->htval
= nacl_csr_read(nsh
, CSR_HTVAL
);
830 trap
->htinst
= nacl_csr_read(nsh
, CSR_HTINST
);
832 hcntx
->hstatus
= csr_swap(CSR_HSTATUS
, gcntx
->hstatus
);
834 __kvm_riscv_switch_to(&vcpu
->arch
);
836 gcntx
->hstatus
= csr_swap(CSR_HSTATUS
, hcntx
->hstatus
);
838 trap
->htval
= csr_read(CSR_HTVAL
);
839 trap
->htinst
= csr_read(CSR_HTINST
);
842 trap
->sepc
= gcntx
->sepc
;
843 trap
->scause
= csr_read(CSR_SCAUSE
);
844 trap
->stval
= csr_read(CSR_STVAL
);
846 vcpu
->arch
.last_exit_cpu
= vcpu
->cpu
;
847 guest_state_exit_irqoff();
848 kvm_riscv_vcpu_swap_in_host_state(vcpu
);
851 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu
*vcpu
)
854 struct kvm_cpu_trap trap
;
855 struct kvm_run
*run
= vcpu
->run
;
857 if (!vcpu
->arch
.ran_atleast_once
)
858 kvm_riscv_vcpu_setup_config(vcpu
);
860 /* Mark this VCPU ran at least once */
861 vcpu
->arch
.ran_atleast_once
= true;
863 kvm_vcpu_srcu_read_lock(vcpu
);
865 switch (run
->exit_reason
) {
867 /* Process MMIO value returned from user-space */
868 ret
= kvm_riscv_vcpu_mmio_return(vcpu
, vcpu
->run
);
870 case KVM_EXIT_RISCV_SBI
:
871 /* Process SBI value returned from user-space */
872 ret
= kvm_riscv_vcpu_sbi_return(vcpu
, vcpu
->run
);
874 case KVM_EXIT_RISCV_CSR
:
875 /* Process CSR value returned from user-space */
876 ret
= kvm_riscv_vcpu_csr_return(vcpu
, vcpu
->run
);
883 kvm_vcpu_srcu_read_unlock(vcpu
);
887 if (!vcpu
->wants_to_run
) {
888 kvm_vcpu_srcu_read_unlock(vcpu
);
894 kvm_sigset_activate(vcpu
);
897 run
->exit_reason
= KVM_EXIT_UNKNOWN
;
899 /* Check conditions before entering the guest */
900 ret
= xfer_to_guest_mode_handle_work(vcpu
);
905 kvm_riscv_gstage_vmid_update(vcpu
);
907 kvm_riscv_check_vcpu_requests(vcpu
);
911 /* Update AIA HW state before entering guest */
912 ret
= kvm_riscv_vcpu_aia_update(vcpu
);
921 * Ensure we set mode to IN_GUEST_MODE after we disable
922 * interrupts and before the final VCPU requests check.
923 * See the comment in kvm_vcpu_exiting_guest_mode() and
924 * Documentation/virt/kvm/vcpu-requests.rst
926 vcpu
->mode
= IN_GUEST_MODE
;
928 kvm_vcpu_srcu_read_unlock(vcpu
);
929 smp_mb__after_srcu_read_unlock();
932 * We might have got VCPU interrupts updated asynchronously
933 * so update it in HW.
935 kvm_riscv_vcpu_flush_interrupts(vcpu
);
937 /* Update HVIP CSR for current CPU */
938 kvm_riscv_update_hvip(vcpu
);
940 if (kvm_riscv_gstage_vmid_ver_changed(&vcpu
->kvm
->arch
.vmid
) ||
941 kvm_request_pending(vcpu
) ||
942 xfer_to_guest_mode_work_pending()) {
943 vcpu
->mode
= OUTSIDE_GUEST_MODE
;
946 kvm_vcpu_srcu_read_lock(vcpu
);
951 * Cleanup stale TLB enteries
953 * Note: This should be done after G-stage VMID has been
954 * updated using kvm_riscv_gstage_vmid_ver_changed()
956 kvm_riscv_local_tlb_sanitize(vcpu
);
958 trace_kvm_entry(vcpu
);
960 guest_timing_enter_irqoff();
962 kvm_riscv_vcpu_enter_exit(vcpu
, &trap
);
964 vcpu
->mode
= OUTSIDE_GUEST_MODE
;
967 /* Syncup interrupts state with HW */
968 kvm_riscv_vcpu_sync_interrupts(vcpu
);
971 * We must ensure that any pending interrupts are taken before
972 * we exit guest timing so that timer ticks are accounted as
973 * guest time. Transiently unmask interrupts so that any
974 * pending interrupts are taken.
976 * There's no barrier which ensures that pending interrupts are
977 * recognised, so we just hope that the CPU takes any pending
978 * interrupts between the enable and disable.
983 guest_timing_exit_irqoff();
987 trace_kvm_exit(&trap
);
991 kvm_vcpu_srcu_read_lock(vcpu
);
993 ret
= kvm_riscv_vcpu_exit(vcpu
, run
, &trap
);
996 kvm_sigset_deactivate(vcpu
);
1000 kvm_vcpu_srcu_read_unlock(vcpu
);