1 // SPDX-License-Identifier: GPL-2.0-only
5 * Copyright (C) 2018, Red Hat, Inc.
7 * Tests for vCPU state save/restore, including nested guest state.
13 #include <sys/ioctl.h>
15 #include "test_util.h"
18 #include "processor.h"
22 #define L2_GUEST_STACK_SIZE 256
24 void svm_l2_guest_code(void)
30 /* Done, exit to L1 and never come back. */
34 static void svm_l1_guest_code(struct svm_test_data
*svm
)
36 unsigned long l2_guest_stack
[L2_GUEST_STACK_SIZE
];
37 struct vmcb
*vmcb
= svm
->vmcb
;
39 GUEST_ASSERT(svm
->vmcb_gpa
);
40 /* Prepare for L2 execution. */
41 generic_svm_setup(svm
, svm_l2_guest_code
,
42 &l2_guest_stack
[L2_GUEST_STACK_SIZE
]);
45 run_guest(vmcb
, svm
->vmcb_gpa
);
46 GUEST_ASSERT(vmcb
->control
.exit_code
== SVM_EXIT_VMMCALL
);
49 run_guest(vmcb
, svm
->vmcb_gpa
);
50 GUEST_ASSERT(vmcb
->control
.exit_code
== SVM_EXIT_VMMCALL
);
54 void vmx_l2_guest_code(void)
61 /* L1 has now set up a shadow VMCS for us. */
62 GUEST_ASSERT(vmreadz(GUEST_RIP
) == 0xc0ffee);
64 GUEST_ASSERT(vmreadz(GUEST_RIP
) == 0xc0ffee);
65 GUEST_ASSERT(!vmwrite(GUEST_RIP
, 0xc0fffee));
67 GUEST_ASSERT(vmreadz(GUEST_RIP
) == 0xc0fffee);
68 GUEST_ASSERT(!vmwrite(GUEST_RIP
, 0xc0ffffee));
71 /* Done, exit to L1 and never come back. */
75 static void vmx_l1_guest_code(struct vmx_pages
*vmx_pages
)
77 unsigned long l2_guest_stack
[L2_GUEST_STACK_SIZE
];
79 GUEST_ASSERT(vmx_pages
->vmcs_gpa
);
80 GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages
));
82 GUEST_ASSERT(load_vmcs(vmx_pages
));
83 GUEST_ASSERT(vmptrstz() == vmx_pages
->vmcs_gpa
);
86 GUEST_ASSERT(vmptrstz() == vmx_pages
->vmcs_gpa
);
88 prepare_vmcs(vmx_pages
, vmx_l2_guest_code
,
89 &l2_guest_stack
[L2_GUEST_STACK_SIZE
]);
92 GUEST_ASSERT(vmptrstz() == vmx_pages
->vmcs_gpa
);
93 GUEST_ASSERT(!vmlaunch());
94 GUEST_ASSERT(vmptrstz() == vmx_pages
->vmcs_gpa
);
95 GUEST_ASSERT(vmreadz(VM_EXIT_REASON
) == EXIT_REASON_VMCALL
);
97 /* Check that the launched state is preserved. */
98 GUEST_ASSERT(vmlaunch());
100 GUEST_ASSERT(!vmresume());
101 GUEST_ASSERT(vmreadz(VM_EXIT_REASON
) == EXIT_REASON_VMCALL
);
104 GUEST_ASSERT(vmreadz(VM_EXIT_REASON
) == EXIT_REASON_VMCALL
);
106 GUEST_ASSERT(!vmresume());
107 GUEST_ASSERT(vmreadz(VM_EXIT_REASON
) == EXIT_REASON_VMCALL
);
109 vmwrite(GUEST_RIP
, vmreadz(GUEST_RIP
) + 3);
111 vmwrite(SECONDARY_VM_EXEC_CONTROL
, SECONDARY_EXEC_SHADOW_VMCS
);
112 vmwrite(VMCS_LINK_POINTER
, vmx_pages
->shadow_vmcs_gpa
);
114 GUEST_ASSERT(!vmptrld(vmx_pages
->shadow_vmcs_gpa
));
115 GUEST_ASSERT(vmlaunch());
117 GUEST_ASSERT(vmlaunch());
118 GUEST_ASSERT(vmresume());
120 vmwrite(GUEST_RIP
, 0xc0ffee);
122 GUEST_ASSERT(vmreadz(GUEST_RIP
) == 0xc0ffee);
124 GUEST_ASSERT(!vmptrld(vmx_pages
->vmcs_gpa
));
125 GUEST_ASSERT(!vmresume());
126 GUEST_ASSERT(vmreadz(VM_EXIT_REASON
) == EXIT_REASON_VMCALL
);
128 GUEST_ASSERT(!vmptrld(vmx_pages
->shadow_vmcs_gpa
));
129 GUEST_ASSERT(vmreadz(GUEST_RIP
) == 0xc0ffffee);
130 GUEST_ASSERT(vmlaunch());
131 GUEST_ASSERT(vmresume());
133 GUEST_ASSERT(vmreadz(GUEST_RIP
) == 0xc0ffffee);
134 GUEST_ASSERT(vmlaunch());
135 GUEST_ASSERT(vmresume());
138 static void __attribute__((__flatten__
)) guest_code(void *arg
)
142 if (this_cpu_has(X86_FEATURE_XSAVE
)) {
143 uint64_t supported_xcr0
= this_cpu_supported_xcr0();
144 uint8_t buffer
[4096];
146 memset(buffer
, 0xcc, sizeof(buffer
));
149 * Modify state for all supported xfeatures to take them out of
150 * their "init" state, i.e. to make them show up in XSTATE_BV.
152 * Note off-by-default features, e.g. AMX, are out of scope for
153 * this particular testcase as they have a different ABI.
155 GUEST_ASSERT(supported_xcr0
& XFEATURE_MASK_FP
);
156 asm volatile ("fincstp");
158 GUEST_ASSERT(supported_xcr0
& XFEATURE_MASK_SSE
);
159 asm volatile ("vmovdqu %0, %%xmm0" :: "m" (buffer
));
161 if (supported_xcr0
& XFEATURE_MASK_YMM
)
162 asm volatile ("vmovdqu %0, %%ymm0" :: "m" (buffer
));
164 if (supported_xcr0
& XFEATURE_MASK_AVX512
) {
165 asm volatile ("kmovq %0, %%k1" :: "r" (-1ull));
166 asm volatile ("vmovupd %0, %%zmm0" :: "m" (buffer
));
167 asm volatile ("vmovupd %0, %%zmm16" :: "m" (buffer
));
170 if (this_cpu_has(X86_FEATURE_MPX
)) {
171 uint64_t bounds
[2] = { 10, 0xffffffffull
};
172 uint64_t output
[2] = { };
174 GUEST_ASSERT(supported_xcr0
& XFEATURE_MASK_BNDREGS
);
175 GUEST_ASSERT(supported_xcr0
& XFEATURE_MASK_BNDCSR
);
178 * Don't bother trying to get BNDCSR into the INUSE
179 * state. MSR_IA32_BNDCFGS doesn't count as it isn't
180 * managed via XSAVE/XRSTOR, and BNDCFGU can only be
181 * modified by XRSTOR. Stuffing XSTATE_BV in the host
182 * is simpler than doing XRSTOR here in the guest.
184 * However, temporarily enable MPX in BNDCFGS so that
185 * BNDMOV actually loads BND1. If MPX isn't *fully*
186 * enabled, all MPX instructions are treated as NOPs.
188 * Hand encode "bndmov (%rax),%bnd1" as support for MPX
189 * mnemonics/registers has been removed from gcc and
190 * clang (and was never fully supported by clang).
192 wrmsr(MSR_IA32_BNDCFGS
, BIT_ULL(0));
193 asm volatile (".byte 0x66,0x0f,0x1a,0x08" :: "a" (bounds
));
195 * Hand encode "bndmov %bnd1, (%rax)" to sanity check
196 * that BND1 actually got loaded.
198 asm volatile (".byte 0x66,0x0f,0x1b,0x08" :: "a" (output
));
199 wrmsr(MSR_IA32_BNDCFGS
, 0);
201 GUEST_ASSERT_EQ(bounds
[0], output
[0]);
202 GUEST_ASSERT_EQ(bounds
[1], output
[1]);
204 if (this_cpu_has(X86_FEATURE_PKU
)) {
205 GUEST_ASSERT(supported_xcr0
& XFEATURE_MASK_PKRU
);
206 set_cr4(get_cr4() | X86_CR4_PKE
);
207 GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSPKE
));
216 if (this_cpu_has(X86_FEATURE_SVM
))
217 svm_l1_guest_code(arg
);
219 vmx_l1_guest_code(arg
);
225 int main(int argc
, char *argv
[])
227 uint64_t *xstate_bv
, saved_xstate_bv
;
228 vm_vaddr_t nested_gva
= 0;
229 struct kvm_cpuid2 empty_cpuid
= {};
230 struct kvm_regs regs1
, regs2
;
231 struct kvm_vcpu
*vcpu
, *vcpuN
;
233 struct kvm_x86_state
*state
;
238 vm
= vm_create_with_one_vcpu(&vcpu
, guest_code
);
240 vcpu_regs_get(vcpu
, ®s1
);
242 if (kvm_has_cap(KVM_CAP_NESTED_STATE
)) {
243 if (kvm_cpu_has(X86_FEATURE_SVM
))
244 vcpu_alloc_svm(vm
, &nested_gva
);
245 else if (kvm_cpu_has(X86_FEATURE_VMX
))
246 vcpu_alloc_vmx(vm
, &nested_gva
);
250 pr_info("will skip nested state checks\n");
252 vcpu_args_set(vcpu
, 1, nested_gva
);
254 for (stage
= 1;; stage
++) {
256 TEST_ASSERT_KVM_EXIT_REASON(vcpu
, KVM_EXIT_IO
);
258 switch (get_ucall(vcpu
, &uc
)) {
260 REPORT_GUEST_ASSERT(uc
);
267 TEST_FAIL("Unknown ucall %lu", uc
.cmd
);
270 /* UCALL_SYNC is handled here. */
271 TEST_ASSERT(!strcmp((const char *)uc
.args
[0], "hello") &&
272 uc
.args
[1] == stage
, "Stage %d: Unexpected register values vmexit, got %lx",
273 stage
, (ulong
)uc
.args
[1]);
275 state
= vcpu_save_state(vcpu
);
276 memset(®s1
, 0, sizeof(regs1
));
277 vcpu_regs_get(vcpu
, ®s1
);
281 /* Restore state in a new VM. */
282 vcpu
= vm_recreate_with_one_vcpu(vm
);
283 vcpu_load_state(vcpu
, state
);
286 * Restore XSAVE state in a dummy vCPU, first without doing
287 * KVM_SET_CPUID2, and then with an empty guest CPUID. Except
288 * for off-by-default xfeatures, e.g. AMX, KVM is supposed to
289 * allow KVM_SET_XSAVE regardless of guest CPUID. Manually
290 * load only XSAVE state, MSRs in particular have a much more
293 * Load two versions of XSAVE state: one with the actual guest
294 * XSAVE state, and one with all supported features forced "on"
295 * in xstate_bv, e.g. to ensure that KVM allows loading all
296 * supported features, even if something goes awry in saving
297 * the original snapshot.
299 xstate_bv
= (void *)&((uint8_t *)state
->xsave
->region
)[512];
300 saved_xstate_bv
= *xstate_bv
;
302 vcpuN
= __vm_vcpu_add(vm
, vcpu
->id
+ 1);
303 vcpu_xsave_set(vcpuN
, state
->xsave
);
304 *xstate_bv
= kvm_cpu_supported_xcr0();
305 vcpu_xsave_set(vcpuN
, state
->xsave
);
307 vcpu_init_cpuid(vcpuN
, &empty_cpuid
);
308 vcpu_xsave_set(vcpuN
, state
->xsave
);
309 *xstate_bv
= saved_xstate_bv
;
310 vcpu_xsave_set(vcpuN
, state
->xsave
);
312 kvm_x86_state_cleanup(state
);
314 memset(®s2
, 0, sizeof(regs2
));
315 vcpu_regs_get(vcpu
, ®s2
);
316 TEST_ASSERT(!memcmp(®s1
, ®s2
, sizeof(regs2
)),
317 "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
318 (ulong
) regs2
.rdi
, (ulong
) regs2
.rsi
);