arch/x86/kvm/vmx/sgx.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*  Copyright(c) 2021 Intel Corporation. */
   3 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
   4
   5 #include <asm/sgx.h>
   6
   7 #include "cpuid.h"
   8 #include "kvm_cache_regs.h"
   9 #include "nested.h"
  10 #include "sgx.h"
  11 #include "vmx.h"
  12 #include "x86.h"
  13
  14 bool __read_mostly enable_sgx = 1;
  15 module_param_named(sgx, enable_sgx, bool, 0444);
  16
  17 /* Initial value of guest's virtual SGX_LEPUBKEYHASHn MSRs */
  18 static u64 sgx_pubkey_hash[4] __ro_after_init;
  19
  20 /*
  21  * ENCLS's memory operands use a fixed segment (DS) and a fixed
  22  * address size based on the mode.  Related prefixes are ignored.
  23  */
  24 static int sgx_get_encls_gva(struct kvm_vcpu *vcpu, unsigned long offset,
  25                              int size, int alignment, gva_t *gva)
  26 {
  27         struct kvm_segment s;
  28         bool fault;
  29
  30         /* Skip vmcs.GUEST_DS retrieval for 64-bit mode to avoid VMREADs. */
  31         *gva = offset;
  32         if (!is_64_bit_mode(vcpu)) {
  33                 vmx_get_segment(vcpu, &s, VCPU_SREG_DS);
  34                 *gva += s.base;
  35         }
  36
  37         if (!IS_ALIGNED(*gva, alignment)) {
  38                 fault = true;
  39         } else if (likely(is_64_bit_mode(vcpu))) {
  40                 *gva = vmx_get_untagged_addr(vcpu, *gva, 0);
  41                 fault = is_noncanonical_address(*gva, vcpu);
  42         } else {
  43                 *gva &= 0xffffffff;
  44                 fault = (s.unusable) ||
  45                         (s.type != 2 && s.type != 3) ||
  46                         (*gva > s.limit) ||
  47                         ((s.base != 0 || s.limit != 0xffffffff) &&
  48                         (((u64)*gva + size - 1) > s.limit + 1));
  49         }
  50         if (fault)
  51                 kvm_inject_gp(vcpu, 0);
  52         return fault ? -EINVAL : 0;
  53 }
  54
  55 static void sgx_handle_emulation_failure(struct kvm_vcpu *vcpu, u64 addr,
  56                                          unsigned int size)
  57 {
  58         uint64_t data[2] = { addr, size };
  59
  60         __kvm_prepare_emulation_failure_exit(vcpu, data, ARRAY_SIZE(data));
  61 }
  62
  63 static int sgx_read_hva(struct kvm_vcpu *vcpu, unsigned long hva, void *data,
  64                         unsigned int size)
  65 {
  66         if (__copy_from_user(data, (void __user *)hva, size)) {
  67                 sgx_handle_emulation_failure(vcpu, hva, size);
  68                 return -EFAULT;
  69         }
  70
  71         return 0;
  72 }
  73
  74 static int sgx_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t gva, bool write,
  75                           gpa_t *gpa)
  76 {
  77         struct x86_exception ex;
  78
  79         if (write)
  80                 *gpa = kvm_mmu_gva_to_gpa_write(vcpu, gva, &ex);
  81         else
  82                 *gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, &ex);
  83
  84         if (*gpa == INVALID_GPA) {
  85                 kvm_inject_emulated_page_fault(vcpu, &ex);
  86                 return -EFAULT;
  87         }
  88
  89         return 0;
  90 }
  91
  92 static int sgx_gpa_to_hva(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned long *hva)
  93 {
  94         *hva = kvm_vcpu_gfn_to_hva(vcpu, PFN_DOWN(gpa));
  95         if (kvm_is_error_hva(*hva)) {
  96                 sgx_handle_emulation_failure(vcpu, gpa, 1);
  97                 return -EFAULT;
  98         }
  99
 100         *hva |= gpa & ~PAGE_MASK;
 101
 102         return 0;
 103 }
 104
 105 static int sgx_inject_fault(struct kvm_vcpu *vcpu, gva_t gva, int trapnr)
 106 {
 107         struct x86_exception ex;
 108
 109         /*
 110          * A non-EPCM #PF indicates a bad userspace HVA.  This *should* check
 111          * for PFEC.SGX and not assume any #PF on SGX2 originated in the EPC,
 112          * but the error code isn't (yet) plumbed through the ENCLS helpers.
 113          */
 114         if (trapnr == PF_VECTOR && !boot_cpu_has(X86_FEATURE_SGX2)) {
 115                 kvm_prepare_emulation_failure_exit(vcpu);
 116                 return 0;
 117         }
 118
 119         /*
 120          * If the guest thinks it's running on SGX2 hardware, inject an SGX
 121          * #PF if the fault matches an EPCM fault signature (#GP on SGX1,
 122          * #PF on SGX2).  The assumption is that EPCM faults are much more
 123          * likely than a bad userspace address.
 124          */
 125         if ((trapnr == PF_VECTOR || !boot_cpu_has(X86_FEATURE_SGX2)) &&
 126             guest_cpuid_has(vcpu, X86_FEATURE_SGX2)) {
 127                 memset(&ex, 0, sizeof(ex));
 128                 ex.vector = PF_VECTOR;
 129                 ex.error_code = PFERR_PRESENT_MASK | PFERR_WRITE_MASK |
 130                                 PFERR_SGX_MASK;
 131                 ex.address = gva;
 132                 ex.error_code_valid = true;
 133                 ex.nested_page_fault = false;
 134                 kvm_inject_emulated_page_fault(vcpu, &ex);
 135         } else {
 136                 kvm_inject_gp(vcpu, 0);
 137         }
 138         return 1;
 139 }
 140
 141 static int __handle_encls_ecreate(struct kvm_vcpu *vcpu,
 142                                   struct sgx_pageinfo *pageinfo,
 143                                   unsigned long secs_hva,
 144                                   gva_t secs_gva)
 145 {
 146         struct sgx_secs *contents = (struct sgx_secs *)pageinfo->contents;
 147         struct kvm_cpuid_entry2 *sgx_12_0, *sgx_12_1;
 148         u64 attributes, xfrm, size;
 149         u32 miscselect;
 150         u8 max_size_log2;
 151         int trapnr, ret;
 152
 153         sgx_12_0 = kvm_find_cpuid_entry_index(vcpu, 0x12, 0);
 154         sgx_12_1 = kvm_find_cpuid_entry_index(vcpu, 0x12, 1);
 155         if (!sgx_12_0 || !sgx_12_1) {
 156                 kvm_prepare_emulation_failure_exit(vcpu);
 157                 return 0;
 158         }
 159
 160         miscselect = contents->miscselect;
 161         attributes = contents->attributes;
 162         xfrm = contents->xfrm;
 163         size = contents->size;
 164
 165         /* Enforce restriction of access to the PROVISIONKEY. */
 166         if (!vcpu->kvm->arch.sgx_provisioning_allowed &&
 167             (attributes & SGX_ATTR_PROVISIONKEY)) {
 168                 if (sgx_12_1->eax & SGX_ATTR_PROVISIONKEY)
 169                         pr_warn_once("SGX PROVISIONKEY advertised but not allowed\n");
 170                 kvm_inject_gp(vcpu, 0);
 171                 return 1;
 172         }
 173
 174         /*
 175          * Enforce CPUID restrictions on MISCSELECT, ATTRIBUTES and XFRM.  Note
 176          * that the allowed XFRM (XFeature Request Mask) isn't strictly bound
 177          * by the supported XCR0.  FP+SSE *must* be set in XFRM, even if XSAVE
 178          * is unsupported, i.e. even if XCR0 itself is completely unsupported.
 179          */
 180         if ((u32)miscselect & ~sgx_12_0->ebx ||
 181             (u32)attributes & ~sgx_12_1->eax ||
 182             (u32)(attributes >> 32) & ~sgx_12_1->ebx ||
 183             (u32)xfrm & ~sgx_12_1->ecx ||
 184             (u32)(xfrm >> 32) & ~sgx_12_1->edx ||
 185             xfrm & ~(vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FPSSE) ||
 186             (xfrm & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) {
 187                 kvm_inject_gp(vcpu, 0);
 188                 return 1;
 189         }
 190
 191         /* Enforce CPUID restriction on max enclave size. */
 192         max_size_log2 = (attributes & SGX_ATTR_MODE64BIT) ? sgx_12_0->edx >> 8 :
 193                                                             sgx_12_0->edx;
 194         if (size >= BIT_ULL(max_size_log2)) {
 195                 kvm_inject_gp(vcpu, 0);
 196                 return 1;
 197         }
 198
 199         /*
 200          * sgx_virt_ecreate() returns:
 201          *  1) 0:       ECREATE was successful
 202          *  2) -EFAULT: ECREATE was run but faulted, and trapnr was set to the
 203          *              exception number.
 204          *  3) -EINVAL: access_ok() on @secs_hva failed. This should never
 205          *              happen as KVM checks host addresses at memslot creation.
 206          *              sgx_virt_ecreate() has already warned in this case.
 207          */
 208         ret = sgx_virt_ecreate(pageinfo, (void __user *)secs_hva, &trapnr);
 209         if (!ret)
 210                 return kvm_skip_emulated_instruction(vcpu);
 211         if (ret == -EFAULT)
 212                 return sgx_inject_fault(vcpu, secs_gva, trapnr);
 213
 214         return ret;
 215 }
 216
 217 static int handle_encls_ecreate(struct kvm_vcpu *vcpu)
 218 {
 219         gva_t pageinfo_gva, secs_gva;
 220         gva_t metadata_gva, contents_gva;
 221         gpa_t metadata_gpa, contents_gpa, secs_gpa;
 222         unsigned long metadata_hva, contents_hva, secs_hva;
 223         struct sgx_pageinfo pageinfo;
 224         struct sgx_secs *contents;
 225         struct x86_exception ex;
 226         int r;
 227
 228         if (sgx_get_encls_gva(vcpu, kvm_rbx_read(vcpu), 32, 32, &pageinfo_gva) ||
 229             sgx_get_encls_gva(vcpu, kvm_rcx_read(vcpu), 4096, 4096, &secs_gva))
 230                 return 1;
 231
 232         /*
 233          * Copy the PAGEINFO to local memory, its pointers need to be
 234          * translated, i.e. we need to do a deep copy/translate.
 235          */
 236         r = kvm_read_guest_virt(vcpu, pageinfo_gva, &pageinfo,
 237                                 sizeof(pageinfo), &ex);
 238         if (r == X86EMUL_PROPAGATE_FAULT) {
 239                 kvm_inject_emulated_page_fault(vcpu, &ex);
 240                 return 1;
 241         } else if (r != X86EMUL_CONTINUE) {
 242                 sgx_handle_emulation_failure(vcpu, pageinfo_gva,
 243                                              sizeof(pageinfo));
 244                 return 0;
 245         }
 246
 247         if (sgx_get_encls_gva(vcpu, pageinfo.metadata, 64, 64, &metadata_gva) ||
 248             sgx_get_encls_gva(vcpu, pageinfo.contents, 4096, 4096,
 249                               &contents_gva))
 250                 return 1;
 251
 252         /*
 253          * Translate the SECINFO, SOURCE and SECS pointers from GVA to GPA.
 254          * Resume the guest on failure to inject a #PF.
 255          */
 256         if (sgx_gva_to_gpa(vcpu, metadata_gva, false, &metadata_gpa) ||
 257             sgx_gva_to_gpa(vcpu, contents_gva, false, &contents_gpa) ||
 258             sgx_gva_to_gpa(vcpu, secs_gva, true, &secs_gpa))
 259                 return 1;
 260
 261         /*
 262          * ...and then to HVA.  The order of accesses isn't architectural, i.e.
 263          * KVM doesn't have to fully process one address at a time.  Exit to
 264          * userspace if a GPA is invalid.
 265          */
 266         if (sgx_gpa_to_hva(vcpu, metadata_gpa, &metadata_hva) ||
 267             sgx_gpa_to_hva(vcpu, contents_gpa, &contents_hva) ||
 268             sgx_gpa_to_hva(vcpu, secs_gpa, &secs_hva))
 269                 return 0;
 270
 271         /*
 272          * Copy contents into kernel memory to prevent TOCTOU attack. E.g. the
 273          * guest could do ECREATE w/ SECS.SGX_ATTR_PROVISIONKEY=0, and
 274          * simultaneously set SGX_ATTR_PROVISIONKEY to bypass the check to
 275          * enforce restriction of access to the PROVISIONKEY.
 276          */
 277         contents = (struct sgx_secs *)__get_free_page(GFP_KERNEL);
 278         if (!contents)
 279                 return -ENOMEM;
 280
 281         /* Exit to userspace if copying from a host userspace address fails. */
 282         if (sgx_read_hva(vcpu, contents_hva, (void *)contents, PAGE_SIZE)) {
 283                 free_page((unsigned long)contents);
 284                 return 0;
 285         }
 286
 287         pageinfo.metadata = metadata_hva;
 288         pageinfo.contents = (u64)contents;
 289
 290         r = __handle_encls_ecreate(vcpu, &pageinfo, secs_hva, secs_gva);
 291
 292         free_page((unsigned long)contents);
 293
 294         return r;
 295 }
 296
 297 static int handle_encls_einit(struct kvm_vcpu *vcpu)
 298 {
 299         unsigned long sig_hva, secs_hva, token_hva, rflags;
 300         struct vcpu_vmx *vmx = to_vmx(vcpu);
 301         gva_t sig_gva, secs_gva, token_gva;
 302         gpa_t sig_gpa, secs_gpa, token_gpa;
 303         int ret, trapnr;
 304
 305         if (sgx_get_encls_gva(vcpu, kvm_rbx_read(vcpu), 1808, 4096, &sig_gva) ||
 306             sgx_get_encls_gva(vcpu, kvm_rcx_read(vcpu), 4096, 4096, &secs_gva) ||
 307             sgx_get_encls_gva(vcpu, kvm_rdx_read(vcpu), 304, 512, &token_gva))
 308                 return 1;
 309
 310         /*
 311          * Translate the SIGSTRUCT, SECS and TOKEN pointers from GVA to GPA.
 312          * Resume the guest on failure to inject a #PF.
 313          */
 314         if (sgx_gva_to_gpa(vcpu, sig_gva, false, &sig_gpa) ||
 315             sgx_gva_to_gpa(vcpu, secs_gva, true, &secs_gpa) ||
 316             sgx_gva_to_gpa(vcpu, token_gva, false, &token_gpa))
 317                 return 1;
 318
 319         /*
 320          * ...and then to HVA.  The order of accesses isn't architectural, i.e.
 321          * KVM doesn't have to fully process one address at a time.  Exit to
 322          * userspace if a GPA is invalid.  Note, all structures are aligned and
 323          * cannot split pages.
 324          */
 325         if (sgx_gpa_to_hva(vcpu, sig_gpa, &sig_hva) ||
 326             sgx_gpa_to_hva(vcpu, secs_gpa, &secs_hva) ||
 327             sgx_gpa_to_hva(vcpu, token_gpa, &token_hva))
 328                 return 0;
 329
 330         ret = sgx_virt_einit((void __user *)sig_hva, (void __user *)token_hva,
 331                              (void __user *)secs_hva,
 332                              vmx->msr_ia32_sgxlepubkeyhash, &trapnr);
 333
 334         if (ret == -EFAULT)
 335                 return sgx_inject_fault(vcpu, secs_gva, trapnr);
 336
 337         /*
 338          * sgx_virt_einit() returns -EINVAL when access_ok() fails on @sig_hva,
 339          * @token_hva or @secs_hva. This should never happen as KVM checks host
 340          * addresses at memslot creation. sgx_virt_einit() has already warned
 341          * in this case, so just return.
 342          */
 343         if (ret < 0)
 344                 return ret;
 345
 346         rflags = vmx_get_rflags(vcpu) & ~(X86_EFLAGS_CF | X86_EFLAGS_PF |
 347                                           X86_EFLAGS_AF | X86_EFLAGS_SF |
 348                                           X86_EFLAGS_OF);
 349         if (ret)
 350                 rflags |= X86_EFLAGS_ZF;
 351         else
 352                 rflags &= ~X86_EFLAGS_ZF;
 353         vmx_set_rflags(vcpu, rflags);
 354
 355         kvm_rax_write(vcpu, ret);
 356         return kvm_skip_emulated_instruction(vcpu);
 357 }
 358
 359 static inline bool encls_leaf_enabled_in_guest(struct kvm_vcpu *vcpu, u32 leaf)
 360 {
 361         /*
 362          * ENCLS generates a #UD if SGX1 isn't supported, i.e. this point will
 363          * be reached if and only if the SGX1 leafs are enabled.
 364          */
 365         if (leaf >= ECREATE && leaf <= ETRACK)
 366                 return true;
 367
 368         if (leaf >= EAUG && leaf <= EMODT)
 369                 return guest_cpuid_has(vcpu, X86_FEATURE_SGX2);
 370
 371         return false;
 372 }
 373
 374 static inline bool sgx_enabled_in_guest_bios(struct kvm_vcpu *vcpu)
 375 {
 376         const u64 bits = FEAT_CTL_SGX_ENABLED | FEAT_CTL_LOCKED;
 377
 378         return (to_vmx(vcpu)->msr_ia32_feature_control & bits) == bits;
 379 }
 380
 381 int handle_encls(struct kvm_vcpu *vcpu)
 382 {
 383         u32 leaf = (u32)kvm_rax_read(vcpu);
 384
 385         if (!enable_sgx || !guest_cpuid_has(vcpu, X86_FEATURE_SGX) ||
 386             !guest_cpuid_has(vcpu, X86_FEATURE_SGX1)) {
 387                 kvm_queue_exception(vcpu, UD_VECTOR);
 388         } else if (!encls_leaf_enabled_in_guest(vcpu, leaf) ||
 389                    !sgx_enabled_in_guest_bios(vcpu) || !is_paging(vcpu)) {
 390                 kvm_inject_gp(vcpu, 0);
 391         } else {
 392                 if (leaf == ECREATE)
 393                         return handle_encls_ecreate(vcpu);
 394                 if (leaf == EINIT)
 395                         return handle_encls_einit(vcpu);
 396                 WARN_ONCE(1, "unexpected exit on ENCLS[%u]", leaf);
 397                 vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
 398                 vcpu->run->hw.hardware_exit_reason = EXIT_REASON_ENCLS;
 399                 return 0;
 400         }
 401         return 1;
 402 }
 403
 404 void setup_default_sgx_lepubkeyhash(void)
 405 {
 406         /*
 407          * Use Intel's default value for Skylake hardware if Launch Control is
 408          * not supported, i.e. Intel's hash is hardcoded into silicon, or if
 409          * Launch Control is supported and enabled, i.e. mimic the reset value
 410          * and let the guest write the MSRs at will.  If Launch Control is
 411          * supported but disabled, then use the current MSR values as the hash
 412          * MSRs exist but are read-only (locked and not writable).
 413          */
 414         if (!enable_sgx || boot_cpu_has(X86_FEATURE_SGX_LC) ||
 415             rdmsrl_safe(MSR_IA32_SGXLEPUBKEYHASH0, &sgx_pubkey_hash[0])) {
 416                 sgx_pubkey_hash[0] = 0xa6053e051270b7acULL;
 417                 sgx_pubkey_hash[1] = 0x6cfbe8ba8b3b413dULL;
 418                 sgx_pubkey_hash[2] = 0xc4916d99f2b3735dULL;
 419                 sgx_pubkey_hash[3] = 0xd4f8c05909f9bb3bULL;
 420         } else {
 421                 /* MSR_IA32_SGXLEPUBKEYHASH0 is read above */
 422                 rdmsrl(MSR_IA32_SGXLEPUBKEYHASH1, sgx_pubkey_hash[1]);
 423                 rdmsrl(MSR_IA32_SGXLEPUBKEYHASH2, sgx_pubkey_hash[2]);
 424                 rdmsrl(MSR_IA32_SGXLEPUBKEYHASH3, sgx_pubkey_hash[3]);
 425         }
 426 }
 427
 428 void vcpu_setup_sgx_lepubkeyhash(struct kvm_vcpu *vcpu)
 429 {
 430         struct vcpu_vmx *vmx = to_vmx(vcpu);
 431
 432         memcpy(vmx->msr_ia32_sgxlepubkeyhash, sgx_pubkey_hash,
 433                sizeof(sgx_pubkey_hash));
 434 }
 435
 436 /*
 437  * ECREATE must be intercepted to enforce MISCSELECT, ATTRIBUTES and XFRM
 438  * restrictions if the guest's allowed-1 settings diverge from hardware.
 439  */
 440 static bool sgx_intercept_encls_ecreate(struct kvm_vcpu *vcpu)
 441 {
 442         struct kvm_cpuid_entry2 *guest_cpuid;
 443         u32 eax, ebx, ecx, edx;
 444
 445         if (!vcpu->kvm->arch.sgx_provisioning_allowed)
 446                 return true;
 447
 448         guest_cpuid = kvm_find_cpuid_entry_index(vcpu, 0x12, 0);
 449         if (!guest_cpuid)
 450                 return true;
 451
 452         cpuid_count(0x12, 0, &eax, &ebx, &ecx, &edx);
 453         if (guest_cpuid->ebx != ebx || guest_cpuid->edx != edx)
 454                 return true;
 455
 456         guest_cpuid = kvm_find_cpuid_entry_index(vcpu, 0x12, 1);
 457         if (!guest_cpuid)
 458                 return true;
 459
 460         cpuid_count(0x12, 1, &eax, &ebx, &ecx, &edx);
 461         if (guest_cpuid->eax != eax || guest_cpuid->ebx != ebx ||
 462             guest_cpuid->ecx != ecx || guest_cpuid->edx != edx)
 463                 return true;
 464
 465         return false;
 466 }
 467
 468 void vmx_write_encls_bitmap(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 469 {
 470         /*
 471          * There is no software enable bit for SGX that is virtualized by
 472          * hardware, e.g. there's no CR4.SGXE, so when SGX is disabled in the
 473          * guest (either by the host or by the guest's BIOS) but enabled in the
 474          * host, trap all ENCLS leafs and inject #UD/#GP as needed to emulate
 475          * the expected system behavior for ENCLS.
 476          */
 477         u64 bitmap = -1ull;
 478
 479         /* Nothing to do if hardware doesn't support SGX */
 480         if (!cpu_has_vmx_encls_vmexit())
 481                 return;
 482
 483         if (guest_cpuid_has(vcpu, X86_FEATURE_SGX) &&
 484             sgx_enabled_in_guest_bios(vcpu)) {
 485                 if (guest_cpuid_has(vcpu, X86_FEATURE_SGX1)) {
 486                         bitmap &= ~GENMASK_ULL(ETRACK, ECREATE);
 487                         if (sgx_intercept_encls_ecreate(vcpu))
 488                                 bitmap |= (1 << ECREATE);
 489                 }
 490
 491                 if (guest_cpuid_has(vcpu, X86_FEATURE_SGX2))
 492                         bitmap &= ~GENMASK_ULL(EMODT, EAUG);
 493
 494                 /*
 495                  * Trap and execute EINIT if launch control is enabled in the
 496                  * host using the guest's values for launch control MSRs, even
 497                  * if the guest's values are fixed to hardware default values.
 498                  * The MSRs are not loaded/saved on VM-Enter/VM-Exit as writing
 499                  * the MSRs is extraordinarily expensive.
 500                  */
 501                 if (boot_cpu_has(X86_FEATURE_SGX_LC))
 502                         bitmap |= (1 << EINIT);
 503
 504                 if (!vmcs12 && is_guest_mode(vcpu))
 505                         vmcs12 = get_vmcs12(vcpu);
 506                 if (vmcs12 && nested_cpu_has_encls_exit(vmcs12))
 507                         bitmap |= vmcs12->encls_exiting_bitmap;
 508         }
 509         vmcs_write64(ENCLS_EXITING_BITMAP, bitmap);
 510 }