1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright(c) 2021 Intel Corporation. */
3 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
8 #include "kvm_cache_regs.h"
14 bool __read_mostly enable_sgx
= 1;
15 module_param_named(sgx
, enable_sgx
, bool, 0444);
17 /* Initial value of guest's virtual SGX_LEPUBKEYHASHn MSRs */
18 static u64 sgx_pubkey_hash
[4] __ro_after_init
;
21 * ENCLS's memory operands use a fixed segment (DS) and a fixed
22 * address size based on the mode. Related prefixes are ignored.
24 static int sgx_get_encls_gva(struct kvm_vcpu
*vcpu
, unsigned long offset
,
25 int size
, int alignment
, gva_t
*gva
)
30 /* Skip vmcs.GUEST_DS retrieval for 64-bit mode to avoid VMREADs. */
32 if (!is_64_bit_mode(vcpu
)) {
33 vmx_get_segment(vcpu
, &s
, VCPU_SREG_DS
);
37 if (!IS_ALIGNED(*gva
, alignment
)) {
39 } else if (likely(is_64_bit_mode(vcpu
))) {
40 *gva
= vmx_get_untagged_addr(vcpu
, *gva
, 0);
41 fault
= is_noncanonical_address(*gva
, vcpu
);
44 fault
= (s
.unusable
) ||
45 (s
.type
!= 2 && s
.type
!= 3) ||
47 ((s
.base
!= 0 || s
.limit
!= 0xffffffff) &&
48 (((u64
)*gva
+ size
- 1) > s
.limit
+ 1));
51 kvm_inject_gp(vcpu
, 0);
52 return fault
? -EINVAL
: 0;
55 static void sgx_handle_emulation_failure(struct kvm_vcpu
*vcpu
, u64 addr
,
58 uint64_t data
[2] = { addr
, size
};
60 __kvm_prepare_emulation_failure_exit(vcpu
, data
, ARRAY_SIZE(data
));
63 static int sgx_read_hva(struct kvm_vcpu
*vcpu
, unsigned long hva
, void *data
,
66 if (__copy_from_user(data
, (void __user
*)hva
, size
)) {
67 sgx_handle_emulation_failure(vcpu
, hva
, size
);
74 static int sgx_gva_to_gpa(struct kvm_vcpu
*vcpu
, gva_t gva
, bool write
,
77 struct x86_exception ex
;
80 *gpa
= kvm_mmu_gva_to_gpa_write(vcpu
, gva
, &ex
);
82 *gpa
= kvm_mmu_gva_to_gpa_read(vcpu
, gva
, &ex
);
84 if (*gpa
== INVALID_GPA
) {
85 kvm_inject_emulated_page_fault(vcpu
, &ex
);
92 static int sgx_gpa_to_hva(struct kvm_vcpu
*vcpu
, gpa_t gpa
, unsigned long *hva
)
94 *hva
= kvm_vcpu_gfn_to_hva(vcpu
, PFN_DOWN(gpa
));
95 if (kvm_is_error_hva(*hva
)) {
96 sgx_handle_emulation_failure(vcpu
, gpa
, 1);
100 *hva
|= gpa
& ~PAGE_MASK
;
105 static int sgx_inject_fault(struct kvm_vcpu
*vcpu
, gva_t gva
, int trapnr
)
107 struct x86_exception ex
;
110 * A non-EPCM #PF indicates a bad userspace HVA. This *should* check
111 * for PFEC.SGX and not assume any #PF on SGX2 originated in the EPC,
112 * but the error code isn't (yet) plumbed through the ENCLS helpers.
114 if (trapnr
== PF_VECTOR
&& !boot_cpu_has(X86_FEATURE_SGX2
)) {
115 kvm_prepare_emulation_failure_exit(vcpu
);
120 * If the guest thinks it's running on SGX2 hardware, inject an SGX
121 * #PF if the fault matches an EPCM fault signature (#GP on SGX1,
122 * #PF on SGX2). The assumption is that EPCM faults are much more
123 * likely than a bad userspace address.
125 if ((trapnr
== PF_VECTOR
|| !boot_cpu_has(X86_FEATURE_SGX2
)) &&
126 guest_cpuid_has(vcpu
, X86_FEATURE_SGX2
)) {
127 memset(&ex
, 0, sizeof(ex
));
128 ex
.vector
= PF_VECTOR
;
129 ex
.error_code
= PFERR_PRESENT_MASK
| PFERR_WRITE_MASK
|
132 ex
.error_code_valid
= true;
133 ex
.nested_page_fault
= false;
134 kvm_inject_emulated_page_fault(vcpu
, &ex
);
136 kvm_inject_gp(vcpu
, 0);
141 static int __handle_encls_ecreate(struct kvm_vcpu
*vcpu
,
142 struct sgx_pageinfo
*pageinfo
,
143 unsigned long secs_hva
,
146 struct sgx_secs
*contents
= (struct sgx_secs
*)pageinfo
->contents
;
147 struct kvm_cpuid_entry2
*sgx_12_0
, *sgx_12_1
;
148 u64 attributes
, xfrm
, size
;
153 sgx_12_0
= kvm_find_cpuid_entry_index(vcpu
, 0x12, 0);
154 sgx_12_1
= kvm_find_cpuid_entry_index(vcpu
, 0x12, 1);
155 if (!sgx_12_0
|| !sgx_12_1
) {
156 kvm_prepare_emulation_failure_exit(vcpu
);
160 miscselect
= contents
->miscselect
;
161 attributes
= contents
->attributes
;
162 xfrm
= contents
->xfrm
;
163 size
= contents
->size
;
165 /* Enforce restriction of access to the PROVISIONKEY. */
166 if (!vcpu
->kvm
->arch
.sgx_provisioning_allowed
&&
167 (attributes
& SGX_ATTR_PROVISIONKEY
)) {
168 if (sgx_12_1
->eax
& SGX_ATTR_PROVISIONKEY
)
169 pr_warn_once("SGX PROVISIONKEY advertised but not allowed\n");
170 kvm_inject_gp(vcpu
, 0);
175 * Enforce CPUID restrictions on MISCSELECT, ATTRIBUTES and XFRM. Note
176 * that the allowed XFRM (XFeature Request Mask) isn't strictly bound
177 * by the supported XCR0. FP+SSE *must* be set in XFRM, even if XSAVE
178 * is unsupported, i.e. even if XCR0 itself is completely unsupported.
180 if ((u32
)miscselect
& ~sgx_12_0
->ebx
||
181 (u32
)attributes
& ~sgx_12_1
->eax
||
182 (u32
)(attributes
>> 32) & ~sgx_12_1
->ebx
||
183 (u32
)xfrm
& ~sgx_12_1
->ecx
||
184 (u32
)(xfrm
>> 32) & ~sgx_12_1
->edx
||
185 xfrm
& ~(vcpu
->arch
.guest_supported_xcr0
| XFEATURE_MASK_FPSSE
) ||
186 (xfrm
& XFEATURE_MASK_FPSSE
) != XFEATURE_MASK_FPSSE
) {
187 kvm_inject_gp(vcpu
, 0);
191 /* Enforce CPUID restriction on max enclave size. */
192 max_size_log2
= (attributes
& SGX_ATTR_MODE64BIT
) ? sgx_12_0
->edx
>> 8 :
194 if (size
>= BIT_ULL(max_size_log2
)) {
195 kvm_inject_gp(vcpu
, 0);
200 * sgx_virt_ecreate() returns:
201 * 1) 0: ECREATE was successful
202 * 2) -EFAULT: ECREATE was run but faulted, and trapnr was set to the
204 * 3) -EINVAL: access_ok() on @secs_hva failed. This should never
205 * happen as KVM checks host addresses at memslot creation.
206 * sgx_virt_ecreate() has already warned in this case.
208 ret
= sgx_virt_ecreate(pageinfo
, (void __user
*)secs_hva
, &trapnr
);
210 return kvm_skip_emulated_instruction(vcpu
);
212 return sgx_inject_fault(vcpu
, secs_gva
, trapnr
);
217 static int handle_encls_ecreate(struct kvm_vcpu
*vcpu
)
219 gva_t pageinfo_gva
, secs_gva
;
220 gva_t metadata_gva
, contents_gva
;
221 gpa_t metadata_gpa
, contents_gpa
, secs_gpa
;
222 unsigned long metadata_hva
, contents_hva
, secs_hva
;
223 struct sgx_pageinfo pageinfo
;
224 struct sgx_secs
*contents
;
225 struct x86_exception ex
;
228 if (sgx_get_encls_gva(vcpu
, kvm_rbx_read(vcpu
), 32, 32, &pageinfo_gva
) ||
229 sgx_get_encls_gva(vcpu
, kvm_rcx_read(vcpu
), 4096, 4096, &secs_gva
))
233 * Copy the PAGEINFO to local memory, its pointers need to be
234 * translated, i.e. we need to do a deep copy/translate.
236 r
= kvm_read_guest_virt(vcpu
, pageinfo_gva
, &pageinfo
,
237 sizeof(pageinfo
), &ex
);
238 if (r
== X86EMUL_PROPAGATE_FAULT
) {
239 kvm_inject_emulated_page_fault(vcpu
, &ex
);
241 } else if (r
!= X86EMUL_CONTINUE
) {
242 sgx_handle_emulation_failure(vcpu
, pageinfo_gva
,
247 if (sgx_get_encls_gva(vcpu
, pageinfo
.metadata
, 64, 64, &metadata_gva
) ||
248 sgx_get_encls_gva(vcpu
, pageinfo
.contents
, 4096, 4096,
253 * Translate the SECINFO, SOURCE and SECS pointers from GVA to GPA.
254 * Resume the guest on failure to inject a #PF.
256 if (sgx_gva_to_gpa(vcpu
, metadata_gva
, false, &metadata_gpa
) ||
257 sgx_gva_to_gpa(vcpu
, contents_gva
, false, &contents_gpa
) ||
258 sgx_gva_to_gpa(vcpu
, secs_gva
, true, &secs_gpa
))
262 * ...and then to HVA. The order of accesses isn't architectural, i.e.
263 * KVM doesn't have to fully process one address at a time. Exit to
264 * userspace if a GPA is invalid.
266 if (sgx_gpa_to_hva(vcpu
, metadata_gpa
, &metadata_hva
) ||
267 sgx_gpa_to_hva(vcpu
, contents_gpa
, &contents_hva
) ||
268 sgx_gpa_to_hva(vcpu
, secs_gpa
, &secs_hva
))
272 * Copy contents into kernel memory to prevent TOCTOU attack. E.g. the
273 * guest could do ECREATE w/ SECS.SGX_ATTR_PROVISIONKEY=0, and
274 * simultaneously set SGX_ATTR_PROVISIONKEY to bypass the check to
275 * enforce restriction of access to the PROVISIONKEY.
277 contents
= (struct sgx_secs
*)__get_free_page(GFP_KERNEL
);
281 /* Exit to userspace if copying from a host userspace address fails. */
282 if (sgx_read_hva(vcpu
, contents_hva
, (void *)contents
, PAGE_SIZE
)) {
283 free_page((unsigned long)contents
);
287 pageinfo
.metadata
= metadata_hva
;
288 pageinfo
.contents
= (u64
)contents
;
290 r
= __handle_encls_ecreate(vcpu
, &pageinfo
, secs_hva
, secs_gva
);
292 free_page((unsigned long)contents
);
297 static int handle_encls_einit(struct kvm_vcpu
*vcpu
)
299 unsigned long sig_hva
, secs_hva
, token_hva
, rflags
;
300 struct vcpu_vmx
*vmx
= to_vmx(vcpu
);
301 gva_t sig_gva
, secs_gva
, token_gva
;
302 gpa_t sig_gpa
, secs_gpa
, token_gpa
;
305 if (sgx_get_encls_gva(vcpu
, kvm_rbx_read(vcpu
), 1808, 4096, &sig_gva
) ||
306 sgx_get_encls_gva(vcpu
, kvm_rcx_read(vcpu
), 4096, 4096, &secs_gva
) ||
307 sgx_get_encls_gva(vcpu
, kvm_rdx_read(vcpu
), 304, 512, &token_gva
))
311 * Translate the SIGSTRUCT, SECS and TOKEN pointers from GVA to GPA.
312 * Resume the guest on failure to inject a #PF.
314 if (sgx_gva_to_gpa(vcpu
, sig_gva
, false, &sig_gpa
) ||
315 sgx_gva_to_gpa(vcpu
, secs_gva
, true, &secs_gpa
) ||
316 sgx_gva_to_gpa(vcpu
, token_gva
, false, &token_gpa
))
320 * ...and then to HVA. The order of accesses isn't architectural, i.e.
321 * KVM doesn't have to fully process one address at a time. Exit to
322 * userspace if a GPA is invalid. Note, all structures are aligned and
323 * cannot split pages.
325 if (sgx_gpa_to_hva(vcpu
, sig_gpa
, &sig_hva
) ||
326 sgx_gpa_to_hva(vcpu
, secs_gpa
, &secs_hva
) ||
327 sgx_gpa_to_hva(vcpu
, token_gpa
, &token_hva
))
330 ret
= sgx_virt_einit((void __user
*)sig_hva
, (void __user
*)token_hva
,
331 (void __user
*)secs_hva
,
332 vmx
->msr_ia32_sgxlepubkeyhash
, &trapnr
);
335 return sgx_inject_fault(vcpu
, secs_gva
, trapnr
);
338 * sgx_virt_einit() returns -EINVAL when access_ok() fails on @sig_hva,
339 * @token_hva or @secs_hva. This should never happen as KVM checks host
340 * addresses at memslot creation. sgx_virt_einit() has already warned
341 * in this case, so just return.
346 rflags
= vmx_get_rflags(vcpu
) & ~(X86_EFLAGS_CF
| X86_EFLAGS_PF
|
347 X86_EFLAGS_AF
| X86_EFLAGS_SF
|
350 rflags
|= X86_EFLAGS_ZF
;
352 rflags
&= ~X86_EFLAGS_ZF
;
353 vmx_set_rflags(vcpu
, rflags
);
355 kvm_rax_write(vcpu
, ret
);
356 return kvm_skip_emulated_instruction(vcpu
);
359 static inline bool encls_leaf_enabled_in_guest(struct kvm_vcpu
*vcpu
, u32 leaf
)
362 * ENCLS generates a #UD if SGX1 isn't supported, i.e. this point will
363 * be reached if and only if the SGX1 leafs are enabled.
365 if (leaf
>= ECREATE
&& leaf
<= ETRACK
)
368 if (leaf
>= EAUG
&& leaf
<= EMODT
)
369 return guest_cpuid_has(vcpu
, X86_FEATURE_SGX2
);
374 static inline bool sgx_enabled_in_guest_bios(struct kvm_vcpu
*vcpu
)
376 const u64 bits
= FEAT_CTL_SGX_ENABLED
| FEAT_CTL_LOCKED
;
378 return (to_vmx(vcpu
)->msr_ia32_feature_control
& bits
) == bits
;
381 int handle_encls(struct kvm_vcpu
*vcpu
)
383 u32 leaf
= (u32
)kvm_rax_read(vcpu
);
385 if (!enable_sgx
|| !guest_cpuid_has(vcpu
, X86_FEATURE_SGX
) ||
386 !guest_cpuid_has(vcpu
, X86_FEATURE_SGX1
)) {
387 kvm_queue_exception(vcpu
, UD_VECTOR
);
388 } else if (!encls_leaf_enabled_in_guest(vcpu
, leaf
) ||
389 !sgx_enabled_in_guest_bios(vcpu
) || !is_paging(vcpu
)) {
390 kvm_inject_gp(vcpu
, 0);
393 return handle_encls_ecreate(vcpu
);
395 return handle_encls_einit(vcpu
);
396 WARN_ONCE(1, "unexpected exit on ENCLS[%u]", leaf
);
397 vcpu
->run
->exit_reason
= KVM_EXIT_UNKNOWN
;
398 vcpu
->run
->hw
.hardware_exit_reason
= EXIT_REASON_ENCLS
;
404 void setup_default_sgx_lepubkeyhash(void)
407 * Use Intel's default value for Skylake hardware if Launch Control is
408 * not supported, i.e. Intel's hash is hardcoded into silicon, or if
409 * Launch Control is supported and enabled, i.e. mimic the reset value
410 * and let the guest write the MSRs at will. If Launch Control is
411 * supported but disabled, then use the current MSR values as the hash
412 * MSRs exist but are read-only (locked and not writable).
414 if (!enable_sgx
|| boot_cpu_has(X86_FEATURE_SGX_LC
) ||
415 rdmsrl_safe(MSR_IA32_SGXLEPUBKEYHASH0
, &sgx_pubkey_hash
[0])) {
416 sgx_pubkey_hash
[0] = 0xa6053e051270b7acULL
;
417 sgx_pubkey_hash
[1] = 0x6cfbe8ba8b3b413dULL
;
418 sgx_pubkey_hash
[2] = 0xc4916d99f2b3735dULL
;
419 sgx_pubkey_hash
[3] = 0xd4f8c05909f9bb3bULL
;
421 /* MSR_IA32_SGXLEPUBKEYHASH0 is read above */
422 rdmsrl(MSR_IA32_SGXLEPUBKEYHASH1
, sgx_pubkey_hash
[1]);
423 rdmsrl(MSR_IA32_SGXLEPUBKEYHASH2
, sgx_pubkey_hash
[2]);
424 rdmsrl(MSR_IA32_SGXLEPUBKEYHASH3
, sgx_pubkey_hash
[3]);
428 void vcpu_setup_sgx_lepubkeyhash(struct kvm_vcpu
*vcpu
)
430 struct vcpu_vmx
*vmx
= to_vmx(vcpu
);
432 memcpy(vmx
->msr_ia32_sgxlepubkeyhash
, sgx_pubkey_hash
,
433 sizeof(sgx_pubkey_hash
));
437 * ECREATE must be intercepted to enforce MISCSELECT, ATTRIBUTES and XFRM
438 * restrictions if the guest's allowed-1 settings diverge from hardware.
440 static bool sgx_intercept_encls_ecreate(struct kvm_vcpu
*vcpu
)
442 struct kvm_cpuid_entry2
*guest_cpuid
;
443 u32 eax
, ebx
, ecx
, edx
;
445 if (!vcpu
->kvm
->arch
.sgx_provisioning_allowed
)
448 guest_cpuid
= kvm_find_cpuid_entry_index(vcpu
, 0x12, 0);
452 cpuid_count(0x12, 0, &eax
, &ebx
, &ecx
, &edx
);
453 if (guest_cpuid
->ebx
!= ebx
|| guest_cpuid
->edx
!= edx
)
456 guest_cpuid
= kvm_find_cpuid_entry_index(vcpu
, 0x12, 1);
460 cpuid_count(0x12, 1, &eax
, &ebx
, &ecx
, &edx
);
461 if (guest_cpuid
->eax
!= eax
|| guest_cpuid
->ebx
!= ebx
||
462 guest_cpuid
->ecx
!= ecx
|| guest_cpuid
->edx
!= edx
)
468 void vmx_write_encls_bitmap(struct kvm_vcpu
*vcpu
, struct vmcs12
*vmcs12
)
471 * There is no software enable bit for SGX that is virtualized by
472 * hardware, e.g. there's no CR4.SGXE, so when SGX is disabled in the
473 * guest (either by the host or by the guest's BIOS) but enabled in the
474 * host, trap all ENCLS leafs and inject #UD/#GP as needed to emulate
475 * the expected system behavior for ENCLS.
479 /* Nothing to do if hardware doesn't support SGX */
480 if (!cpu_has_vmx_encls_vmexit())
483 if (guest_cpuid_has(vcpu
, X86_FEATURE_SGX
) &&
484 sgx_enabled_in_guest_bios(vcpu
)) {
485 if (guest_cpuid_has(vcpu
, X86_FEATURE_SGX1
)) {
486 bitmap
&= ~GENMASK_ULL(ETRACK
, ECREATE
);
487 if (sgx_intercept_encls_ecreate(vcpu
))
488 bitmap
|= (1 << ECREATE
);
491 if (guest_cpuid_has(vcpu
, X86_FEATURE_SGX2
))
492 bitmap
&= ~GENMASK_ULL(EMODT
, EAUG
);
495 * Trap and execute EINIT if launch control is enabled in the
496 * host using the guest's values for launch control MSRs, even
497 * if the guest's values are fixed to hardware default values.
498 * The MSRs are not loaded/saved on VM-Enter/VM-Exit as writing
499 * the MSRs is extraordinarily expensive.
501 if (boot_cpu_has(X86_FEATURE_SGX_LC
))
502 bitmap
|= (1 << EINIT
);
504 if (!vmcs12
&& is_guest_mode(vcpu
))
505 vmcs12
= get_vmcs12(vcpu
);
506 if (vmcs12
&& nested_cpu_has_encls_exit(vmcs12
))
507 bitmap
|= vmcs12
->encls_exiting_bitmap
;
509 vmcs_write64(ENCLS_EXITING_BITMAP
, bitmap
);