1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright (C) 2012 - Virtual Open Systems and Columbia University
4 * Author: Christoffer Dall <c.dall@virtualopensystems.com>
7 #include <linux/mman.h>
8 #include <linux/kvm_host.h>
10 #include <linux/hugetlb.h>
11 #include <linux/sched/signal.h>
12 #include <trace/events/kvm.h>
13 #include <asm/pgalloc.h>
14 #include <asm/cacheflush.h>
15 #include <asm/kvm_arm.h>
16 #include <asm/kvm_mmu.h>
17 #include <asm/kvm_pgtable.h>
18 #include <asm/kvm_ras.h>
19 #include <asm/kvm_asm.h>
20 #include <asm/kvm_emulate.h>
25 static struct kvm_pgtable
*hyp_pgtable
;
26 static DEFINE_MUTEX(kvm_hyp_pgd_mutex
);
28 static unsigned long hyp_idmap_start
;
29 static unsigned long hyp_idmap_end
;
30 static phys_addr_t hyp_idmap_vector
;
32 static unsigned long io_map_base
;
36 * Release kvm_mmu_lock periodically if the memory region is large. Otherwise,
37 * we may see kernel panics with CONFIG_DETECT_HUNG_TASK,
38 * CONFIG_LOCKUP_DETECTOR, CONFIG_LOCKDEP. Additionally, holding the lock too
39 * long will also starve other vCPUs. We have to also make sure that the page
40 * tables are not freed while we released the lock.
42 static int stage2_apply_range(struct kvm
*kvm
, phys_addr_t addr
,
44 int (*fn
)(struct kvm_pgtable
*, u64
, u64
),
51 struct kvm_pgtable
*pgt
= kvm
->arch
.mmu
.pgt
;
55 next
= stage2_pgd_addr_end(kvm
, addr
, end
);
56 ret
= fn(pgt
, addr
, next
- addr
);
60 if (resched
&& next
!= end
)
61 cond_resched_lock(&kvm
->mmu_lock
);
62 } while (addr
= next
, addr
!= end
);
67 #define stage2_apply_range_resched(kvm, addr, end, fn) \
68 stage2_apply_range(kvm, addr, end, fn, true)
70 static bool memslot_is_logging(struct kvm_memory_slot
*memslot
)
72 return memslot
->dirty_bitmap
&& !(memslot
->flags
& KVM_MEM_READONLY
);
76 * kvm_flush_remote_tlbs() - flush all VM TLB entries for v7/8
77 * @kvm: pointer to kvm structure.
79 * Interface to HYP function to flush all VM TLB entries
81 void kvm_flush_remote_tlbs(struct kvm
*kvm
)
83 kvm_call_hyp(__kvm_tlb_flush_vmid
, &kvm
->arch
.mmu
);
86 static bool kvm_is_device_pfn(unsigned long pfn
)
88 return !pfn_valid(pfn
);
92 * Unmapping vs dcache management:
94 * If a guest maps certain memory pages as uncached, all writes will
95 * bypass the data cache and go directly to RAM. However, the CPUs
96 * can still speculate reads (not writes) and fill cache lines with
99 * Those cache lines will be *clean* cache lines though, so a
100 * clean+invalidate operation is equivalent to an invalidate
101 * operation, because no cache lines are marked dirty.
103 * Those clean cache lines could be filled prior to an uncached write
104 * by the guest, and the cache coherent IO subsystem would therefore
105 * end up writing old data to disk.
107 * This is why right after unmapping a page/section and invalidating
108 * the corresponding TLBs, we flush to make sure the IO subsystem will
109 * never hit in the cache.
111 * This is all avoided on systems that have ARM64_HAS_STAGE2_FWB, as
112 * we then fully enforce cacheability of RAM, no matter what the guest
116 * unmap_stage2_range -- Clear stage2 page table entries to unmap a range
117 * @mmu: The KVM stage-2 MMU pointer
118 * @start: The intermediate physical base address of the range to unmap
119 * @size: The size of the area to unmap
120 * @may_block: Whether or not we are permitted to block
122 * Clear a range of stage-2 mappings, lowering the various ref-counts. Must
123 * be called while holding mmu_lock (unless for freeing the stage2 pgd before
124 * destroying the VM), otherwise another faulting VCPU may come in and mess
125 * with things behind our backs.
127 static void __unmap_stage2_range(struct kvm_s2_mmu
*mmu
, phys_addr_t start
, u64 size
,
130 struct kvm
*kvm
= mmu
->kvm
;
131 phys_addr_t end
= start
+ size
;
133 assert_spin_locked(&kvm
->mmu_lock
);
134 WARN_ON(size
& ~PAGE_MASK
);
135 WARN_ON(stage2_apply_range(kvm
, start
, end
, kvm_pgtable_stage2_unmap
,
139 static void unmap_stage2_range(struct kvm_s2_mmu
*mmu
, phys_addr_t start
, u64 size
)
141 __unmap_stage2_range(mmu
, start
, size
, true);
144 static void stage2_flush_memslot(struct kvm
*kvm
,
145 struct kvm_memory_slot
*memslot
)
147 phys_addr_t addr
= memslot
->base_gfn
<< PAGE_SHIFT
;
148 phys_addr_t end
= addr
+ PAGE_SIZE
* memslot
->npages
;
150 stage2_apply_range_resched(kvm
, addr
, end
, kvm_pgtable_stage2_flush
);
154 * stage2_flush_vm - Invalidate cache for pages mapped in stage 2
155 * @kvm: The struct kvm pointer
157 * Go through the stage 2 page tables and invalidate any cache lines
158 * backing memory already mapped to the VM.
160 static void stage2_flush_vm(struct kvm
*kvm
)
162 struct kvm_memslots
*slots
;
163 struct kvm_memory_slot
*memslot
;
166 idx
= srcu_read_lock(&kvm
->srcu
);
167 spin_lock(&kvm
->mmu_lock
);
169 slots
= kvm_memslots(kvm
);
170 kvm_for_each_memslot(memslot
, slots
)
171 stage2_flush_memslot(kvm
, memslot
);
173 spin_unlock(&kvm
->mmu_lock
);
174 srcu_read_unlock(&kvm
->srcu
, idx
);
178 * free_hyp_pgds - free Hyp-mode page tables
180 void free_hyp_pgds(void)
182 mutex_lock(&kvm_hyp_pgd_mutex
);
184 kvm_pgtable_hyp_destroy(hyp_pgtable
);
187 mutex_unlock(&kvm_hyp_pgd_mutex
);
190 static int __create_hyp_mappings(unsigned long start
, unsigned long size
,
191 unsigned long phys
, enum kvm_pgtable_prot prot
)
195 mutex_lock(&kvm_hyp_pgd_mutex
);
196 err
= kvm_pgtable_hyp_map(hyp_pgtable
, start
, size
, phys
, prot
);
197 mutex_unlock(&kvm_hyp_pgd_mutex
);
202 static phys_addr_t
kvm_kaddr_to_phys(void *kaddr
)
204 if (!is_vmalloc_addr(kaddr
)) {
205 BUG_ON(!virt_addr_valid(kaddr
));
208 return page_to_phys(vmalloc_to_page(kaddr
)) +
209 offset_in_page(kaddr
);
214 * create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode
215 * @from: The virtual kernel start address of the range
216 * @to: The virtual kernel end address of the range (exclusive)
217 * @prot: The protection to be applied to this range
219 * The same virtual address as the kernel virtual address is also used
220 * in Hyp-mode mapping (modulo HYP_PAGE_OFFSET) to the same underlying
223 int create_hyp_mappings(void *from
, void *to
, enum kvm_pgtable_prot prot
)
225 phys_addr_t phys_addr
;
226 unsigned long virt_addr
;
227 unsigned long start
= kern_hyp_va((unsigned long)from
);
228 unsigned long end
= kern_hyp_va((unsigned long)to
);
230 if (is_kernel_in_hyp_mode())
233 start
= start
& PAGE_MASK
;
234 end
= PAGE_ALIGN(end
);
236 for (virt_addr
= start
; virt_addr
< end
; virt_addr
+= PAGE_SIZE
) {
239 phys_addr
= kvm_kaddr_to_phys(from
+ virt_addr
- start
);
240 err
= __create_hyp_mappings(virt_addr
, PAGE_SIZE
, phys_addr
,
249 static int __create_hyp_private_mapping(phys_addr_t phys_addr
, size_t size
,
250 unsigned long *haddr
,
251 enum kvm_pgtable_prot prot
)
256 mutex_lock(&kvm_hyp_pgd_mutex
);
259 * This assumes that we have enough space below the idmap
260 * page to allocate our VAs. If not, the check below will
261 * kick. A potential alternative would be to detect that
262 * overflow and switch to an allocation above the idmap.
264 * The allocated size is always a multiple of PAGE_SIZE.
266 size
= PAGE_ALIGN(size
+ offset_in_page(phys_addr
));
267 base
= io_map_base
- size
;
270 * Verify that BIT(VA_BITS - 1) hasn't been flipped by
271 * allocating the new area, as it would indicate we've
272 * overflowed the idmap/IO address range.
274 if ((base
^ io_map_base
) & BIT(VA_BITS
- 1))
279 mutex_unlock(&kvm_hyp_pgd_mutex
);
284 ret
= __create_hyp_mappings(base
, size
, phys_addr
, prot
);
288 *haddr
= base
+ offset_in_page(phys_addr
);
294 * create_hyp_io_mappings - Map IO into both kernel and HYP
295 * @phys_addr: The physical start address which gets mapped
296 * @size: Size of the region being mapped
297 * @kaddr: Kernel VA for this mapping
298 * @haddr: HYP VA for this mapping
300 int create_hyp_io_mappings(phys_addr_t phys_addr
, size_t size
,
301 void __iomem
**kaddr
,
302 void __iomem
**haddr
)
307 *kaddr
= ioremap(phys_addr
, size
);
311 if (is_kernel_in_hyp_mode()) {
316 ret
= __create_hyp_private_mapping(phys_addr
, size
,
317 &addr
, PAGE_HYP_DEVICE
);
325 *haddr
= (void __iomem
*)addr
;
330 * create_hyp_exec_mappings - Map an executable range into HYP
331 * @phys_addr: The physical start address which gets mapped
332 * @size: Size of the region being mapped
333 * @haddr: HYP VA for this mapping
335 int create_hyp_exec_mappings(phys_addr_t phys_addr
, size_t size
,
341 BUG_ON(is_kernel_in_hyp_mode());
343 ret
= __create_hyp_private_mapping(phys_addr
, size
,
344 &addr
, PAGE_HYP_EXEC
);
350 *haddr
= (void *)addr
;
355 * kvm_init_stage2_mmu - Initialise a S2 MMU strucrure
356 * @kvm: The pointer to the KVM structure
357 * @mmu: The pointer to the s2 MMU structure
359 * Allocates only the stage-2 HW PGD level table(s).
360 * Note we don't need locking here as this is only called when the VM is
361 * created, which can only be done once.
363 int kvm_init_stage2_mmu(struct kvm
*kvm
, struct kvm_s2_mmu
*mmu
)
366 struct kvm_pgtable
*pgt
;
368 if (mmu
->pgt
!= NULL
) {
369 kvm_err("kvm_arch already initialized?\n");
373 pgt
= kzalloc(sizeof(*pgt
), GFP_KERNEL
);
377 err
= kvm_pgtable_stage2_init(pgt
, kvm
);
379 goto out_free_pgtable
;
381 mmu
->last_vcpu_ran
= alloc_percpu(typeof(*mmu
->last_vcpu_ran
));
382 if (!mmu
->last_vcpu_ran
) {
384 goto out_destroy_pgtable
;
387 for_each_possible_cpu(cpu
)
388 *per_cpu_ptr(mmu
->last_vcpu_ran
, cpu
) = -1;
392 mmu
->pgd_phys
= __pa(pgt
->pgd
);
393 mmu
->vmid
.vmid_gen
= 0;
397 kvm_pgtable_stage2_destroy(pgt
);
403 static void stage2_unmap_memslot(struct kvm
*kvm
,
404 struct kvm_memory_slot
*memslot
)
406 hva_t hva
= memslot
->userspace_addr
;
407 phys_addr_t addr
= memslot
->base_gfn
<< PAGE_SHIFT
;
408 phys_addr_t size
= PAGE_SIZE
* memslot
->npages
;
409 hva_t reg_end
= hva
+ size
;
412 * A memory region could potentially cover multiple VMAs, and any holes
413 * between them, so iterate over all of them to find out if we should
416 * +--------------------------------------------+
417 * +---------------+----------------+ +----------------+
418 * | : VMA 1 | VMA 2 | | VMA 3 : |
419 * +---------------+----------------+ +----------------+
421 * +--------------------------------------------+
424 struct vm_area_struct
*vma
= find_vma(current
->mm
, hva
);
425 hva_t vm_start
, vm_end
;
427 if (!vma
|| vma
->vm_start
>= reg_end
)
431 * Take the intersection of this VMA with the memory region
433 vm_start
= max(hva
, vma
->vm_start
);
434 vm_end
= min(reg_end
, vma
->vm_end
);
436 if (!(vma
->vm_flags
& VM_PFNMAP
)) {
437 gpa_t gpa
= addr
+ (vm_start
- memslot
->userspace_addr
);
438 unmap_stage2_range(&kvm
->arch
.mmu
, gpa
, vm_end
- vm_start
);
441 } while (hva
< reg_end
);
445 * stage2_unmap_vm - Unmap Stage-2 RAM mappings
446 * @kvm: The struct kvm pointer
448 * Go through the memregions and unmap any regular RAM
449 * backing memory already mapped to the VM.
451 void stage2_unmap_vm(struct kvm
*kvm
)
453 struct kvm_memslots
*slots
;
454 struct kvm_memory_slot
*memslot
;
457 idx
= srcu_read_lock(&kvm
->srcu
);
458 mmap_read_lock(current
->mm
);
459 spin_lock(&kvm
->mmu_lock
);
461 slots
= kvm_memslots(kvm
);
462 kvm_for_each_memslot(memslot
, slots
)
463 stage2_unmap_memslot(kvm
, memslot
);
465 spin_unlock(&kvm
->mmu_lock
);
466 mmap_read_unlock(current
->mm
);
467 srcu_read_unlock(&kvm
->srcu
, idx
);
470 void kvm_free_stage2_pgd(struct kvm_s2_mmu
*mmu
)
472 struct kvm
*kvm
= mmu
->kvm
;
473 struct kvm_pgtable
*pgt
= NULL
;
475 spin_lock(&kvm
->mmu_lock
);
480 free_percpu(mmu
->last_vcpu_ran
);
482 spin_unlock(&kvm
->mmu_lock
);
485 kvm_pgtable_stage2_destroy(pgt
);
491 * kvm_phys_addr_ioremap - map a device range to guest IPA
493 * @kvm: The KVM pointer
494 * @guest_ipa: The IPA at which to insert the mapping
495 * @pa: The physical address of the device
496 * @size: The size of the mapping
497 * @writable: Whether or not to create a writable mapping
499 int kvm_phys_addr_ioremap(struct kvm
*kvm
, phys_addr_t guest_ipa
,
500 phys_addr_t pa
, unsigned long size
, bool writable
)
504 struct kvm_mmu_memory_cache cache
= { 0, __GFP_ZERO
, NULL
, };
505 struct kvm_pgtable
*pgt
= kvm
->arch
.mmu
.pgt
;
506 enum kvm_pgtable_prot prot
= KVM_PGTABLE_PROT_DEVICE
|
508 (writable
? KVM_PGTABLE_PROT_W
: 0);
510 size
+= offset_in_page(guest_ipa
);
511 guest_ipa
&= PAGE_MASK
;
513 for (addr
= guest_ipa
; addr
< guest_ipa
+ size
; addr
+= PAGE_SIZE
) {
514 ret
= kvm_mmu_topup_memory_cache(&cache
,
515 kvm_mmu_cache_min_pages(kvm
));
519 spin_lock(&kvm
->mmu_lock
);
520 ret
= kvm_pgtable_stage2_map(pgt
, addr
, PAGE_SIZE
, pa
, prot
,
522 spin_unlock(&kvm
->mmu_lock
);
529 kvm_mmu_free_memory_cache(&cache
);
534 * stage2_wp_range() - write protect stage2 memory region range
535 * @mmu: The KVM stage-2 MMU pointer
536 * @addr: Start address of range
537 * @end: End address of range
539 static void stage2_wp_range(struct kvm_s2_mmu
*mmu
, phys_addr_t addr
, phys_addr_t end
)
541 struct kvm
*kvm
= mmu
->kvm
;
542 stage2_apply_range_resched(kvm
, addr
, end
, kvm_pgtable_stage2_wrprotect
);
546 * kvm_mmu_wp_memory_region() - write protect stage 2 entries for memory slot
547 * @kvm: The KVM pointer
548 * @slot: The memory slot to write protect
550 * Called to start logging dirty pages after memory region
551 * KVM_MEM_LOG_DIRTY_PAGES operation is called. After this function returns
552 * all present PUD, PMD and PTEs are write protected in the memory region.
553 * Afterwards read of dirty page log can be called.
555 * Acquires kvm_mmu_lock. Called with kvm->slots_lock mutex acquired,
556 * serializing operations for VM memory regions.
558 void kvm_mmu_wp_memory_region(struct kvm
*kvm
, int slot
)
560 struct kvm_memslots
*slots
= kvm_memslots(kvm
);
561 struct kvm_memory_slot
*memslot
= id_to_memslot(slots
, slot
);
562 phys_addr_t start
, end
;
564 if (WARN_ON_ONCE(!memslot
))
567 start
= memslot
->base_gfn
<< PAGE_SHIFT
;
568 end
= (memslot
->base_gfn
+ memslot
->npages
) << PAGE_SHIFT
;
570 spin_lock(&kvm
->mmu_lock
);
571 stage2_wp_range(&kvm
->arch
.mmu
, start
, end
);
572 spin_unlock(&kvm
->mmu_lock
);
573 kvm_flush_remote_tlbs(kvm
);
577 * kvm_mmu_write_protect_pt_masked() - write protect dirty pages
578 * @kvm: The KVM pointer
579 * @slot: The memory slot associated with mask
580 * @gfn_offset: The gfn offset in memory slot
581 * @mask: The mask of dirty pages at offset 'gfn_offset' in this memory
582 * slot to be write protected
584 * Walks bits set in mask write protects the associated pte's. Caller must
585 * acquire kvm_mmu_lock.
587 static void kvm_mmu_write_protect_pt_masked(struct kvm
*kvm
,
588 struct kvm_memory_slot
*slot
,
589 gfn_t gfn_offset
, unsigned long mask
)
591 phys_addr_t base_gfn
= slot
->base_gfn
+ gfn_offset
;
592 phys_addr_t start
= (base_gfn
+ __ffs(mask
)) << PAGE_SHIFT
;
593 phys_addr_t end
= (base_gfn
+ __fls(mask
) + 1) << PAGE_SHIFT
;
595 stage2_wp_range(&kvm
->arch
.mmu
, start
, end
);
599 * kvm_arch_mmu_enable_log_dirty_pt_masked - enable dirty logging for selected
602 * It calls kvm_mmu_write_protect_pt_masked to write protect selected pages to
603 * enable dirty logging for them.
605 void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm
*kvm
,
606 struct kvm_memory_slot
*slot
,
607 gfn_t gfn_offset
, unsigned long mask
)
609 kvm_mmu_write_protect_pt_masked(kvm
, slot
, gfn_offset
, mask
);
612 static void clean_dcache_guest_page(kvm_pfn_t pfn
, unsigned long size
)
614 __clean_dcache_guest_page(pfn
, size
);
617 static void invalidate_icache_guest_page(kvm_pfn_t pfn
, unsigned long size
)
619 __invalidate_icache_guest_page(pfn
, size
);
622 static void kvm_send_hwpoison_signal(unsigned long address
, short lsb
)
624 send_sig_mceerr(BUS_MCEERR_AR
, (void __user
*)address
, lsb
, current
);
627 static bool fault_supports_stage2_huge_mapping(struct kvm_memory_slot
*memslot
,
629 unsigned long map_size
)
632 hva_t uaddr_start
, uaddr_end
;
635 /* The memslot and the VMA are guaranteed to be aligned to PAGE_SIZE */
636 if (map_size
== PAGE_SIZE
)
639 size
= memslot
->npages
* PAGE_SIZE
;
641 gpa_start
= memslot
->base_gfn
<< PAGE_SHIFT
;
643 uaddr_start
= memslot
->userspace_addr
;
644 uaddr_end
= uaddr_start
+ size
;
647 * Pages belonging to memslots that don't have the same alignment
648 * within a PMD/PUD for userspace and IPA cannot be mapped with stage-2
649 * PMD/PUD entries, because we'll end up mapping the wrong pages.
651 * Consider a layout like the following:
653 * memslot->userspace_addr:
654 * +-----+--------------------+--------------------+---+
655 * |abcde|fgh Stage-1 block | Stage-1 block tv|xyz|
656 * +-----+--------------------+--------------------+---+
658 * memslot->base_gfn << PAGE_SHIFT:
659 * +---+--------------------+--------------------+-----+
660 * |abc|def Stage-2 block | Stage-2 block |tvxyz|
661 * +---+--------------------+--------------------+-----+
663 * If we create those stage-2 blocks, we'll end up with this incorrect
669 if ((gpa_start
& (map_size
- 1)) != (uaddr_start
& (map_size
- 1)))
673 * Next, let's make sure we're not trying to map anything not covered
674 * by the memslot. This means we have to prohibit block size mappings
675 * for the beginning and end of a non-block aligned and non-block sized
676 * memory slot (illustrated by the head and tail parts of the
677 * userspace view above containing pages 'abcde' and 'xyz',
680 * Note that it doesn't matter if we do the check using the
681 * userspace_addr or the base_gfn, as both are equally aligned (per
682 * the check above) and equally sized.
684 return (hva
& ~(map_size
- 1)) >= uaddr_start
&&
685 (hva
& ~(map_size
- 1)) + map_size
<= uaddr_end
;
689 * Check if the given hva is backed by a transparent huge page (THP) and
690 * whether it can be mapped using block mapping in stage2. If so, adjust
691 * the stage2 PFN and IPA accordingly. Only PMD_SIZE THPs are currently
692 * supported. This will need to be updated to support other THP sizes.
694 * Returns the size of the mapping.
697 transparent_hugepage_adjust(struct kvm_memory_slot
*memslot
,
698 unsigned long hva
, kvm_pfn_t
*pfnp
,
701 kvm_pfn_t pfn
= *pfnp
;
704 * Make sure the adjustment is done only for THP pages. Also make
705 * sure that the HVA and IPA are sufficiently aligned and that the
706 * block map is contained within the memslot.
708 if (kvm_is_transparent_hugepage(pfn
) &&
709 fault_supports_stage2_huge_mapping(memslot
, hva
, PMD_SIZE
)) {
711 * The address we faulted on is backed by a transparent huge
712 * page. However, because we map the compound huge page and
713 * not the individual tail page, we need to transfer the
714 * refcount to the head page. We have to be careful that the
715 * THP doesn't start to split while we are adjusting the
718 * We are sure this doesn't happen, because mmu_notifier_retry
719 * was successful and we are holding the mmu_lock, so if this
720 * THP is trying to split, it will be blocked in the mmu
721 * notifier before touching any of the pages, specifically
722 * before being able to call __split_huge_page_refcount().
724 * We can therefore safely transfer the refcount from PG_tail
725 * to PG_head and switch the pfn from a tail page to the head
729 kvm_release_pfn_clean(pfn
);
730 pfn
&= ~(PTRS_PER_PMD
- 1);
737 /* Use page mapping if we cannot use block mapping. */
741 static int user_mem_abort(struct kvm_vcpu
*vcpu
, phys_addr_t fault_ipa
,
742 struct kvm_memory_slot
*memslot
, unsigned long hva
,
743 unsigned long fault_status
)
746 bool write_fault
, writable
, force_pte
= false;
749 unsigned long mmu_seq
;
750 struct kvm
*kvm
= vcpu
->kvm
;
751 struct kvm_mmu_memory_cache
*memcache
= &vcpu
->arch
.mmu_page_cache
;
752 struct vm_area_struct
*vma
;
756 bool logging_active
= memslot_is_logging(memslot
);
757 unsigned long fault_level
= kvm_vcpu_trap_get_fault_level(vcpu
);
758 unsigned long vma_pagesize
, fault_granule
;
759 enum kvm_pgtable_prot prot
= KVM_PGTABLE_PROT_R
;
760 struct kvm_pgtable
*pgt
;
762 fault_granule
= 1UL << ARM64_HW_PGTABLE_LEVEL_SHIFT(fault_level
);
763 write_fault
= kvm_is_write_fault(vcpu
);
764 exec_fault
= kvm_vcpu_trap_is_exec_fault(vcpu
);
765 VM_BUG_ON(write_fault
&& exec_fault
);
767 if (fault_status
== FSC_PERM
&& !write_fault
&& !exec_fault
) {
768 kvm_err("Unexpected L2 read permission error\n");
772 /* Let's check if we will get back a huge page backed by hugetlbfs */
773 mmap_read_lock(current
->mm
);
774 vma
= find_vma_intersection(current
->mm
, hva
, hva
+ 1);
775 if (unlikely(!vma
)) {
776 kvm_err("Failed to find VMA for hva 0x%lx\n", hva
);
777 mmap_read_unlock(current
->mm
);
781 if (is_vm_hugetlb_page(vma
))
782 vma_shift
= huge_page_shift(hstate_vma(vma
));
784 vma_shift
= PAGE_SHIFT
;
786 if (logging_active
||
787 (vma
->vm_flags
& VM_PFNMAP
)) {
789 vma_shift
= PAGE_SHIFT
;
793 #ifndef __PAGETABLE_PMD_FOLDED
795 if (fault_supports_stage2_huge_mapping(memslot
, hva
, PUD_SIZE
))
800 vma_shift
= PMD_SHIFT
;
803 if (fault_supports_stage2_huge_mapping(memslot
, hva
, PMD_SIZE
))
807 vma_shift
= PAGE_SHIFT
;
813 WARN_ONCE(1, "Unknown vma_shift %d", vma_shift
);
816 vma_pagesize
= 1UL << vma_shift
;
817 if (vma_pagesize
== PMD_SIZE
|| vma_pagesize
== PUD_SIZE
)
818 fault_ipa
&= ~(vma_pagesize
- 1);
820 gfn
= fault_ipa
>> PAGE_SHIFT
;
821 mmap_read_unlock(current
->mm
);
824 * Permission faults just need to update the existing leaf entry,
825 * and so normally don't require allocations from the memcache. The
826 * only exception to this is when dirty logging is enabled at runtime
827 * and a write fault needs to collapse a block entry into a table.
829 if (fault_status
!= FSC_PERM
|| (logging_active
&& write_fault
)) {
830 ret
= kvm_mmu_topup_memory_cache(memcache
,
831 kvm_mmu_cache_min_pages(kvm
));
836 mmu_seq
= vcpu
->kvm
->mmu_notifier_seq
;
838 * Ensure the read of mmu_notifier_seq happens before we call
839 * gfn_to_pfn_prot (which calls get_user_pages), so that we don't risk
840 * the page we just got a reference to gets unmapped before we have a
841 * chance to grab the mmu_lock, which ensure that if the page gets
842 * unmapped afterwards, the call to kvm_unmap_hva will take it away
843 * from us again properly. This smp_rmb() interacts with the smp_wmb()
844 * in kvm_mmu_notifier_invalidate_<page|range_end>.
848 pfn
= gfn_to_pfn_prot(kvm
, gfn
, write_fault
, &writable
);
849 if (pfn
== KVM_PFN_ERR_HWPOISON
) {
850 kvm_send_hwpoison_signal(hva
, vma_shift
);
853 if (is_error_noslot_pfn(pfn
))
856 if (kvm_is_device_pfn(pfn
)) {
859 } else if (logging_active
&& !write_fault
) {
861 * Only actually map the page as writable if this was a write
867 if (exec_fault
&& device
)
870 spin_lock(&kvm
->mmu_lock
);
871 pgt
= vcpu
->arch
.hw_mmu
->pgt
;
872 if (mmu_notifier_retry(kvm
, mmu_seq
))
876 * If we are not forced to use page mapping, check if we are
877 * backed by a THP and thus use block mapping if possible.
879 if (vma_pagesize
== PAGE_SIZE
&& !force_pte
)
880 vma_pagesize
= transparent_hugepage_adjust(memslot
, hva
,
883 prot
|= KVM_PGTABLE_PROT_W
;
884 kvm_set_pfn_dirty(pfn
);
885 mark_page_dirty(kvm
, gfn
);
888 if (fault_status
!= FSC_PERM
&& !device
)
889 clean_dcache_guest_page(pfn
, vma_pagesize
);
892 prot
|= KVM_PGTABLE_PROT_X
;
893 invalidate_icache_guest_page(pfn
, vma_pagesize
);
897 prot
|= KVM_PGTABLE_PROT_DEVICE
;
898 else if (cpus_have_const_cap(ARM64_HAS_CACHE_DIC
))
899 prot
|= KVM_PGTABLE_PROT_X
;
902 * Under the premise of getting a FSC_PERM fault, we just need to relax
903 * permissions only if vma_pagesize equals fault_granule. Otherwise,
904 * kvm_pgtable_stage2_map() should be called to change block size.
906 if (fault_status
== FSC_PERM
&& vma_pagesize
== fault_granule
) {
907 ret
= kvm_pgtable_stage2_relax_perms(pgt
, fault_ipa
, prot
);
909 ret
= kvm_pgtable_stage2_map(pgt
, fault_ipa
, vma_pagesize
,
910 __pfn_to_phys(pfn
), prot
,
915 spin_unlock(&kvm
->mmu_lock
);
916 kvm_set_pfn_accessed(pfn
);
917 kvm_release_pfn_clean(pfn
);
921 /* Resolve the access fault by making the page young again. */
922 static void handle_access_fault(struct kvm_vcpu
*vcpu
, phys_addr_t fault_ipa
)
926 struct kvm_s2_mmu
*mmu
;
928 trace_kvm_access_fault(fault_ipa
);
930 spin_lock(&vcpu
->kvm
->mmu_lock
);
931 mmu
= vcpu
->arch
.hw_mmu
;
932 kpte
= kvm_pgtable_stage2_mkyoung(mmu
->pgt
, fault_ipa
);
933 spin_unlock(&vcpu
->kvm
->mmu_lock
);
937 kvm_set_pfn_accessed(pte_pfn(pte
));
941 * kvm_handle_guest_abort - handles all 2nd stage aborts
942 * @vcpu: the VCPU pointer
944 * Any abort that gets to the host is almost guaranteed to be caused by a
945 * missing second stage translation table entry, which can mean that either the
946 * guest simply needs more memory and we must allocate an appropriate page or it
947 * can mean that the guest tried to access I/O memory, which is emulated by user
948 * space. The distinction is based on the IPA causing the fault and whether this
949 * memory region has been registered as standard RAM by user space.
951 int kvm_handle_guest_abort(struct kvm_vcpu
*vcpu
)
953 unsigned long fault_status
;
954 phys_addr_t fault_ipa
;
955 struct kvm_memory_slot
*memslot
;
957 bool is_iabt
, write_fault
, writable
;
961 fault_status
= kvm_vcpu_trap_get_fault_type(vcpu
);
963 fault_ipa
= kvm_vcpu_get_fault_ipa(vcpu
);
964 is_iabt
= kvm_vcpu_trap_is_iabt(vcpu
);
966 /* Synchronous External Abort? */
967 if (kvm_vcpu_abt_issea(vcpu
)) {
969 * For RAS the host kernel may handle this abort.
970 * There is no need to pass the error into the guest.
972 if (kvm_handle_guest_sea(fault_ipa
, kvm_vcpu_get_esr(vcpu
)))
973 kvm_inject_vabt(vcpu
);
978 trace_kvm_guest_fault(*vcpu_pc(vcpu
), kvm_vcpu_get_esr(vcpu
),
979 kvm_vcpu_get_hfar(vcpu
), fault_ipa
);
981 /* Check the stage-2 fault is trans. fault or write fault */
982 if (fault_status
!= FSC_FAULT
&& fault_status
!= FSC_PERM
&&
983 fault_status
!= FSC_ACCESS
) {
984 kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",
985 kvm_vcpu_trap_get_class(vcpu
),
986 (unsigned long)kvm_vcpu_trap_get_fault(vcpu
),
987 (unsigned long)kvm_vcpu_get_esr(vcpu
));
991 idx
= srcu_read_lock(&vcpu
->kvm
->srcu
);
993 gfn
= fault_ipa
>> PAGE_SHIFT
;
994 memslot
= gfn_to_memslot(vcpu
->kvm
, gfn
);
995 hva
= gfn_to_hva_memslot_prot(memslot
, gfn
, &writable
);
996 write_fault
= kvm_is_write_fault(vcpu
);
997 if (kvm_is_error_hva(hva
) || (write_fault
&& !writable
)) {
999 * The guest has put either its instructions or its page-tables
1000 * somewhere it shouldn't have. Userspace won't be able to do
1001 * anything about this (there's no syndrome for a start), so
1002 * re-inject the abort back into the guest.
1009 if (kvm_vcpu_abt_iss1tw(vcpu
)) {
1010 kvm_inject_dabt(vcpu
, kvm_vcpu_get_hfar(vcpu
));
1016 * Check for a cache maintenance operation. Since we
1017 * ended-up here, we know it is outside of any memory
1018 * slot. But we can't find out if that is for a device,
1019 * or if the guest is just being stupid. The only thing
1020 * we know for sure is that this range cannot be cached.
1022 * So let's assume that the guest is just being
1023 * cautious, and skip the instruction.
1025 if (kvm_is_error_hva(hva
) && kvm_vcpu_dabt_is_cm(vcpu
)) {
1032 * The IPA is reported as [MAX:12], so we need to
1033 * complement it with the bottom 12 bits from the
1034 * faulting VA. This is always 12 bits, irrespective
1037 fault_ipa
|= kvm_vcpu_get_hfar(vcpu
) & ((1 << 12) - 1);
1038 ret
= io_mem_abort(vcpu
, fault_ipa
);
1042 /* Userspace should not be able to register out-of-bounds IPAs */
1043 VM_BUG_ON(fault_ipa
>= kvm_phys_size(vcpu
->kvm
));
1045 if (fault_status
== FSC_ACCESS
) {
1046 handle_access_fault(vcpu
, fault_ipa
);
1051 ret
= user_mem_abort(vcpu
, fault_ipa
, memslot
, hva
, fault_status
);
1055 if (ret
== -ENOEXEC
) {
1056 kvm_inject_pabt(vcpu
, kvm_vcpu_get_hfar(vcpu
));
1060 srcu_read_unlock(&vcpu
->kvm
->srcu
, idx
);
1064 static int handle_hva_to_gpa(struct kvm
*kvm
,
1065 unsigned long start
,
1067 int (*handler
)(struct kvm
*kvm
,
1068 gpa_t gpa
, u64 size
,
1072 struct kvm_memslots
*slots
;
1073 struct kvm_memory_slot
*memslot
;
1076 slots
= kvm_memslots(kvm
);
1078 /* we only care about the pages that the guest sees */
1079 kvm_for_each_memslot(memslot
, slots
) {
1080 unsigned long hva_start
, hva_end
;
1083 hva_start
= max(start
, memslot
->userspace_addr
);
1084 hva_end
= min(end
, memslot
->userspace_addr
+
1085 (memslot
->npages
<< PAGE_SHIFT
));
1086 if (hva_start
>= hva_end
)
1089 gpa
= hva_to_gfn_memslot(hva_start
, memslot
) << PAGE_SHIFT
;
1090 ret
|= handler(kvm
, gpa
, (u64
)(hva_end
- hva_start
), data
);
1096 static int kvm_unmap_hva_handler(struct kvm
*kvm
, gpa_t gpa
, u64 size
, void *data
)
1098 unsigned flags
= *(unsigned *)data
;
1099 bool may_block
= flags
& MMU_NOTIFIER_RANGE_BLOCKABLE
;
1101 __unmap_stage2_range(&kvm
->arch
.mmu
, gpa
, size
, may_block
);
1105 int kvm_unmap_hva_range(struct kvm
*kvm
,
1106 unsigned long start
, unsigned long end
, unsigned flags
)
1108 if (!kvm
->arch
.mmu
.pgt
)
1111 trace_kvm_unmap_hva_range(start
, end
);
1112 handle_hva_to_gpa(kvm
, start
, end
, &kvm_unmap_hva_handler
, &flags
);
1116 static int kvm_set_spte_handler(struct kvm
*kvm
, gpa_t gpa
, u64 size
, void *data
)
1118 kvm_pfn_t
*pfn
= (kvm_pfn_t
*)data
;
1120 WARN_ON(size
!= PAGE_SIZE
);
1123 * The MMU notifiers will have unmapped a huge PMD before calling
1124 * ->change_pte() (which in turn calls kvm_set_spte_hva()) and
1125 * therefore we never need to clear out a huge PMD through this
1126 * calling path and a memcache is not required.
1128 kvm_pgtable_stage2_map(kvm
->arch
.mmu
.pgt
, gpa
, PAGE_SIZE
,
1129 __pfn_to_phys(*pfn
), KVM_PGTABLE_PROT_R
, NULL
);
1133 int kvm_set_spte_hva(struct kvm
*kvm
, unsigned long hva
, pte_t pte
)
1135 unsigned long end
= hva
+ PAGE_SIZE
;
1136 kvm_pfn_t pfn
= pte_pfn(pte
);
1138 if (!kvm
->arch
.mmu
.pgt
)
1141 trace_kvm_set_spte_hva(hva
);
1144 * We've moved a page around, probably through CoW, so let's treat it
1145 * just like a translation fault and clean the cache to the PoC.
1147 clean_dcache_guest_page(pfn
, PAGE_SIZE
);
1148 handle_hva_to_gpa(kvm
, hva
, end
, &kvm_set_spte_handler
, &pfn
);
1152 static int kvm_age_hva_handler(struct kvm
*kvm
, gpa_t gpa
, u64 size
, void *data
)
1157 WARN_ON(size
!= PAGE_SIZE
&& size
!= PMD_SIZE
&& size
!= PUD_SIZE
);
1158 kpte
= kvm_pgtable_stage2_mkold(kvm
->arch
.mmu
.pgt
, gpa
);
1160 return pte_valid(pte
) && pte_young(pte
);
1163 static int kvm_test_age_hva_handler(struct kvm
*kvm
, gpa_t gpa
, u64 size
, void *data
)
1165 WARN_ON(size
!= PAGE_SIZE
&& size
!= PMD_SIZE
&& size
!= PUD_SIZE
);
1166 return kvm_pgtable_stage2_is_young(kvm
->arch
.mmu
.pgt
, gpa
);
1169 int kvm_age_hva(struct kvm
*kvm
, unsigned long start
, unsigned long end
)
1171 if (!kvm
->arch
.mmu
.pgt
)
1173 trace_kvm_age_hva(start
, end
);
1174 return handle_hva_to_gpa(kvm
, start
, end
, kvm_age_hva_handler
, NULL
);
1177 int kvm_test_age_hva(struct kvm
*kvm
, unsigned long hva
)
1179 if (!kvm
->arch
.mmu
.pgt
)
1181 trace_kvm_test_age_hva(hva
);
1182 return handle_hva_to_gpa(kvm
, hva
, hva
+ PAGE_SIZE
,
1183 kvm_test_age_hva_handler
, NULL
);
1186 phys_addr_t
kvm_mmu_get_httbr(void)
1188 return __pa(hyp_pgtable
->pgd
);
1191 phys_addr_t
kvm_get_idmap_vector(void)
1193 return hyp_idmap_vector
;
1196 static int kvm_map_idmap_text(void)
1198 unsigned long size
= hyp_idmap_end
- hyp_idmap_start
;
1199 int err
= __create_hyp_mappings(hyp_idmap_start
, size
, hyp_idmap_start
,
1202 kvm_err("Failed to idmap %lx-%lx\n",
1203 hyp_idmap_start
, hyp_idmap_end
);
1208 int kvm_mmu_init(void)
1213 hyp_idmap_start
= __pa_symbol(__hyp_idmap_text_start
);
1214 hyp_idmap_start
= ALIGN_DOWN(hyp_idmap_start
, PAGE_SIZE
);
1215 hyp_idmap_end
= __pa_symbol(__hyp_idmap_text_end
);
1216 hyp_idmap_end
= ALIGN(hyp_idmap_end
, PAGE_SIZE
);
1217 hyp_idmap_vector
= __pa_symbol(__kvm_hyp_init
);
1220 * We rely on the linker script to ensure at build time that the HYP
1221 * init code does not cross a page boundary.
1223 BUG_ON((hyp_idmap_start
^ (hyp_idmap_end
- 1)) & PAGE_MASK
);
1225 hyp_va_bits
= 64 - ((idmap_t0sz
& TCR_T0SZ_MASK
) >> TCR_T0SZ_OFFSET
);
1226 kvm_debug("Using %u-bit virtual addresses at EL2\n", hyp_va_bits
);
1227 kvm_debug("IDMAP page: %lx\n", hyp_idmap_start
);
1228 kvm_debug("HYP VA range: %lx:%lx\n",
1229 kern_hyp_va(PAGE_OFFSET
),
1230 kern_hyp_va((unsigned long)high_memory
- 1));
1232 if (hyp_idmap_start
>= kern_hyp_va(PAGE_OFFSET
) &&
1233 hyp_idmap_start
< kern_hyp_va((unsigned long)high_memory
- 1) &&
1234 hyp_idmap_start
!= (unsigned long)__hyp_idmap_text_start
) {
1236 * The idmap page is intersecting with the VA space,
1237 * it is not safe to continue further.
1239 kvm_err("IDMAP intersecting with HYP VA, unable to continue\n");
1244 hyp_pgtable
= kzalloc(sizeof(*hyp_pgtable
), GFP_KERNEL
);
1246 kvm_err("Hyp mode page-table not allocated\n");
1251 err
= kvm_pgtable_hyp_init(hyp_pgtable
, hyp_va_bits
);
1253 goto out_free_pgtable
;
1255 err
= kvm_map_idmap_text();
1257 goto out_destroy_pgtable
;
1259 io_map_base
= hyp_idmap_start
;
1262 out_destroy_pgtable
:
1263 kvm_pgtable_hyp_destroy(hyp_pgtable
);
1271 void kvm_arch_commit_memory_region(struct kvm
*kvm
,
1272 const struct kvm_userspace_memory_region
*mem
,
1273 struct kvm_memory_slot
*old
,
1274 const struct kvm_memory_slot
*new,
1275 enum kvm_mr_change change
)
1278 * At this point memslot has been committed and there is an
1279 * allocated dirty_bitmap[], dirty pages will be tracked while the
1280 * memory slot is write protected.
1282 if (change
!= KVM_MR_DELETE
&& mem
->flags
& KVM_MEM_LOG_DIRTY_PAGES
) {
1284 * If we're with initial-all-set, we don't need to write
1285 * protect any pages because they're all reported as dirty.
1286 * Huge pages and normal pages will be write protect gradually.
1288 if (!kvm_dirty_log_manual_protect_and_init_set(kvm
)) {
1289 kvm_mmu_wp_memory_region(kvm
, mem
->slot
);
1294 int kvm_arch_prepare_memory_region(struct kvm
*kvm
,
1295 struct kvm_memory_slot
*memslot
,
1296 const struct kvm_userspace_memory_region
*mem
,
1297 enum kvm_mr_change change
)
1299 hva_t hva
= mem
->userspace_addr
;
1300 hva_t reg_end
= hva
+ mem
->memory_size
;
1301 bool writable
= !(mem
->flags
& KVM_MEM_READONLY
);
1304 if (change
!= KVM_MR_CREATE
&& change
!= KVM_MR_MOVE
&&
1305 change
!= KVM_MR_FLAGS_ONLY
)
1309 * Prevent userspace from creating a memory region outside of the IPA
1310 * space addressable by the KVM guest IPA space.
1312 if (memslot
->base_gfn
+ memslot
->npages
>=
1313 (kvm_phys_size(kvm
) >> PAGE_SHIFT
))
1316 mmap_read_lock(current
->mm
);
1318 * A memory region could potentially cover multiple VMAs, and any holes
1319 * between them, so iterate over all of them to find out if we can map
1320 * any of them right now.
1322 * +--------------------------------------------+
1323 * +---------------+----------------+ +----------------+
1324 * | : VMA 1 | VMA 2 | | VMA 3 : |
1325 * +---------------+----------------+ +----------------+
1327 * +--------------------------------------------+
1330 struct vm_area_struct
*vma
= find_vma(current
->mm
, hva
);
1331 hva_t vm_start
, vm_end
;
1333 if (!vma
|| vma
->vm_start
>= reg_end
)
1337 * Take the intersection of this VMA with the memory region
1339 vm_start
= max(hva
, vma
->vm_start
);
1340 vm_end
= min(reg_end
, vma
->vm_end
);
1342 if (vma
->vm_flags
& VM_PFNMAP
) {
1343 gpa_t gpa
= mem
->guest_phys_addr
+
1344 (vm_start
- mem
->userspace_addr
);
1347 pa
= (phys_addr_t
)vma
->vm_pgoff
<< PAGE_SHIFT
;
1348 pa
+= vm_start
- vma
->vm_start
;
1350 /* IO region dirty page logging not allowed */
1351 if (memslot
->flags
& KVM_MEM_LOG_DIRTY_PAGES
) {
1356 ret
= kvm_phys_addr_ioremap(kvm
, gpa
, pa
,
1363 } while (hva
< reg_end
);
1365 if (change
== KVM_MR_FLAGS_ONLY
)
1368 spin_lock(&kvm
->mmu_lock
);
1370 unmap_stage2_range(&kvm
->arch
.mmu
, mem
->guest_phys_addr
, mem
->memory_size
);
1371 else if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB
))
1372 stage2_flush_memslot(kvm
, memslot
);
1373 spin_unlock(&kvm
->mmu_lock
);
1375 mmap_read_unlock(current
->mm
);
1379 void kvm_arch_free_memslot(struct kvm
*kvm
, struct kvm_memory_slot
*slot
)
1383 void kvm_arch_memslots_updated(struct kvm
*kvm
, u64 gen
)
1387 void kvm_arch_flush_shadow_all(struct kvm
*kvm
)
1389 kvm_free_stage2_pgd(&kvm
->arch
.mmu
);
1392 void kvm_arch_flush_shadow_memslot(struct kvm
*kvm
,
1393 struct kvm_memory_slot
*slot
)
1395 gpa_t gpa
= slot
->base_gfn
<< PAGE_SHIFT
;
1396 phys_addr_t size
= slot
->npages
<< PAGE_SHIFT
;
1398 spin_lock(&kvm
->mmu_lock
);
1399 unmap_stage2_range(&kvm
->arch
.mmu
, gpa
, size
);
1400 spin_unlock(&kvm
->mmu_lock
);
1404 * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized).
1407 * - S/W ops are local to a CPU (not broadcast)
1408 * - We have line migration behind our back (speculation)
1409 * - System caches don't support S/W at all (damn!)
1411 * In the face of the above, the best we can do is to try and convert
1412 * S/W ops to VA ops. Because the guest is not allowed to infer the
1413 * S/W to PA mapping, it can only use S/W to nuke the whole cache,
1414 * which is a rather good thing for us.
1416 * Also, it is only used when turning caches on/off ("The expected
1417 * usage of the cache maintenance instructions that operate by set/way
1418 * is associated with the cache maintenance instructions associated
1419 * with the powerdown and powerup of caches, if this is required by
1420 * the implementation.").
1422 * We use the following policy:
1424 * - If we trap a S/W operation, we enable VM trapping to detect
1425 * caches being turned on/off, and do a full clean.
1427 * - We flush the caches on both caches being turned on and off.
1429 * - Once the caches are enabled, we stop trapping VM ops.
1431 void kvm_set_way_flush(struct kvm_vcpu
*vcpu
)
1433 unsigned long hcr
= *vcpu_hcr(vcpu
);
1436 * If this is the first time we do a S/W operation
1437 * (i.e. HCR_TVM not set) flush the whole memory, and set the
1440 * Otherwise, rely on the VM trapping to wait for the MMU +
1441 * Caches to be turned off. At that point, we'll be able to
1442 * clean the caches again.
1444 if (!(hcr
& HCR_TVM
)) {
1445 trace_kvm_set_way_flush(*vcpu_pc(vcpu
),
1446 vcpu_has_cache_enabled(vcpu
));
1447 stage2_flush_vm(vcpu
->kvm
);
1448 *vcpu_hcr(vcpu
) = hcr
| HCR_TVM
;
1452 void kvm_toggle_cache(struct kvm_vcpu
*vcpu
, bool was_enabled
)
1454 bool now_enabled
= vcpu_has_cache_enabled(vcpu
);
1457 * If switching the MMU+caches on, need to invalidate the caches.
1458 * If switching it off, need to clean the caches.
1459 * Clean + invalidate does the trick always.
1461 if (now_enabled
!= was_enabled
)
1462 stage2_flush_vm(vcpu
->kvm
);
1464 /* Caches are now on, stop trapping VM ops (until a S/W op) */
1466 *vcpu_hcr(vcpu
) &= ~HCR_TVM
;
1468 trace_kvm_toggle_cache(*vcpu_pc(vcpu
), was_enabled
, now_enabled
);