drm/panthor: Don't add write fences to the shared BOs
[drm/drm-misc.git] / arch / x86 / kernel / cpu / sgx / ioctl.c
blobb65ab214bdf57d97c07454969a3511fe64e4cfd3
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright(c) 2016-20 Intel Corporation. */
4 #include <asm/mman.h>
5 #include <asm/sgx.h>
6 #include <linux/mman.h>
7 #include <linux/delay.h>
8 #include <linux/file.h>
9 #include <linux/hashtable.h>
10 #include <linux/highmem.h>
11 #include <linux/ratelimit.h>
12 #include <linux/sched/signal.h>
13 #include <linux/shmem_fs.h>
14 #include <linux/slab.h>
15 #include <linux/suspend.h>
16 #include "driver.h"
17 #include "encl.h"
18 #include "encls.h"
20 struct sgx_va_page *sgx_encl_grow(struct sgx_encl *encl, bool reclaim)
22 struct sgx_va_page *va_page = NULL;
23 void *err;
25 BUILD_BUG_ON(SGX_VA_SLOT_COUNT !=
26 (SGX_ENCL_PAGE_VA_OFFSET_MASK >> 3) + 1);
28 if (!(encl->page_cnt % SGX_VA_SLOT_COUNT)) {
29 va_page = kzalloc(sizeof(*va_page), GFP_KERNEL);
30 if (!va_page)
31 return ERR_PTR(-ENOMEM);
33 va_page->epc_page = sgx_alloc_va_page(reclaim);
34 if (IS_ERR(va_page->epc_page)) {
35 err = ERR_CAST(va_page->epc_page);
36 kfree(va_page);
37 return err;
40 WARN_ON_ONCE(encl->page_cnt % SGX_VA_SLOT_COUNT);
42 encl->page_cnt++;
43 return va_page;
46 void sgx_encl_shrink(struct sgx_encl *encl, struct sgx_va_page *va_page)
48 encl->page_cnt--;
50 if (va_page) {
51 sgx_encl_free_epc_page(va_page->epc_page);
52 list_del(&va_page->list);
53 kfree(va_page);
57 static int sgx_encl_create(struct sgx_encl *encl, struct sgx_secs *secs)
59 struct sgx_epc_page *secs_epc;
60 struct sgx_va_page *va_page;
61 struct sgx_pageinfo pginfo;
62 struct sgx_secinfo secinfo;
63 unsigned long encl_size;
64 struct file *backing;
65 long ret;
67 va_page = sgx_encl_grow(encl, true);
68 if (IS_ERR(va_page))
69 return PTR_ERR(va_page);
70 else if (va_page)
71 list_add(&va_page->list, &encl->va_pages);
72 /* else the tail page of the VA page list had free slots. */
74 /* The extra page goes to SECS. */
75 encl_size = secs->size + PAGE_SIZE;
77 backing = shmem_file_setup("SGX backing", encl_size + (encl_size >> 5),
78 VM_NORESERVE);
79 if (IS_ERR(backing)) {
80 ret = PTR_ERR(backing);
81 goto err_out_shrink;
84 encl->backing = backing;
86 secs_epc = sgx_alloc_epc_page(&encl->secs, true);
87 if (IS_ERR(secs_epc)) {
88 ret = PTR_ERR(secs_epc);
89 goto err_out_backing;
92 encl->secs.epc_page = secs_epc;
94 pginfo.addr = 0;
95 pginfo.contents = (unsigned long)secs;
96 pginfo.metadata = (unsigned long)&secinfo;
97 pginfo.secs = 0;
98 memset(&secinfo, 0, sizeof(secinfo));
100 ret = __ecreate((void *)&pginfo, sgx_get_epc_virt_addr(secs_epc));
101 if (ret) {
102 ret = -EIO;
103 goto err_out;
106 if (secs->attributes & SGX_ATTR_DEBUG)
107 set_bit(SGX_ENCL_DEBUG, &encl->flags);
109 encl->secs.encl = encl;
110 encl->secs.type = SGX_PAGE_TYPE_SECS;
111 encl->base = secs->base;
112 encl->size = secs->size;
113 encl->attributes = secs->attributes;
114 encl->attributes_mask = SGX_ATTR_UNPRIV_MASK;
116 /* Set only after completion, as encl->lock has not been taken. */
117 set_bit(SGX_ENCL_CREATED, &encl->flags);
119 return 0;
121 err_out:
122 sgx_encl_free_epc_page(encl->secs.epc_page);
123 encl->secs.epc_page = NULL;
125 err_out_backing:
126 fput(encl->backing);
127 encl->backing = NULL;
129 err_out_shrink:
130 sgx_encl_shrink(encl, va_page);
132 return ret;
136 * sgx_ioc_enclave_create() - handler for %SGX_IOC_ENCLAVE_CREATE
137 * @encl: An enclave pointer.
138 * @arg: The ioctl argument.
140 * Allocate kernel data structures for the enclave and invoke ECREATE.
142 * Return:
143 * - 0: Success.
144 * - -EIO: ECREATE failed.
145 * - -errno: POSIX error.
147 static long sgx_ioc_enclave_create(struct sgx_encl *encl, void __user *arg)
149 struct sgx_enclave_create create_arg;
150 void *secs;
151 int ret;
153 if (test_bit(SGX_ENCL_CREATED, &encl->flags))
154 return -EINVAL;
156 if (copy_from_user(&create_arg, arg, sizeof(create_arg)))
157 return -EFAULT;
159 secs = kmalloc(PAGE_SIZE, GFP_KERNEL);
160 if (!secs)
161 return -ENOMEM;
163 if (copy_from_user(secs, (void __user *)create_arg.src, PAGE_SIZE))
164 ret = -EFAULT;
165 else
166 ret = sgx_encl_create(encl, secs);
168 kfree(secs);
169 return ret;
172 static int sgx_validate_secinfo(struct sgx_secinfo *secinfo)
174 u64 perm = secinfo->flags & SGX_SECINFO_PERMISSION_MASK;
175 u64 pt = secinfo->flags & SGX_SECINFO_PAGE_TYPE_MASK;
177 if (pt != SGX_SECINFO_REG && pt != SGX_SECINFO_TCS)
178 return -EINVAL;
180 if ((perm & SGX_SECINFO_W) && !(perm & SGX_SECINFO_R))
181 return -EINVAL;
184 * CPU will silently overwrite the permissions as zero, which means
185 * that we need to validate it ourselves.
187 if (pt == SGX_SECINFO_TCS && perm)
188 return -EINVAL;
190 if (secinfo->flags & SGX_SECINFO_RESERVED_MASK)
191 return -EINVAL;
193 if (memchr_inv(secinfo->reserved, 0, sizeof(secinfo->reserved)))
194 return -EINVAL;
196 return 0;
199 static int __sgx_encl_add_page(struct sgx_encl *encl,
200 struct sgx_encl_page *encl_page,
201 struct sgx_epc_page *epc_page,
202 struct sgx_secinfo *secinfo, unsigned long src)
204 struct sgx_pageinfo pginfo;
205 struct vm_area_struct *vma;
206 struct page *src_page;
207 int ret;
209 /* Deny noexec. */
210 vma = find_vma(current->mm, src);
211 if (!vma)
212 return -EFAULT;
214 if (!(vma->vm_flags & VM_MAYEXEC))
215 return -EACCES;
217 ret = get_user_pages(src, 1, 0, &src_page);
218 if (ret < 1)
219 return -EFAULT;
221 pginfo.secs = (unsigned long)sgx_get_epc_virt_addr(encl->secs.epc_page);
222 pginfo.addr = encl_page->desc & PAGE_MASK;
223 pginfo.metadata = (unsigned long)secinfo;
224 pginfo.contents = (unsigned long)kmap_local_page(src_page);
226 ret = __eadd(&pginfo, sgx_get_epc_virt_addr(epc_page));
228 kunmap_local((void *)pginfo.contents);
229 put_page(src_page);
231 return ret ? -EIO : 0;
235 * If the caller requires measurement of the page as a proof for the content,
236 * use EEXTEND to add a measurement for 256 bytes of the page. Repeat this
237 * operation until the entire page is measured."
239 static int __sgx_encl_extend(struct sgx_encl *encl,
240 struct sgx_epc_page *epc_page)
242 unsigned long offset;
243 int ret;
245 for (offset = 0; offset < PAGE_SIZE; offset += SGX_EEXTEND_BLOCK_SIZE) {
246 ret = __eextend(sgx_get_epc_virt_addr(encl->secs.epc_page),
247 sgx_get_epc_virt_addr(epc_page) + offset);
248 if (ret) {
249 if (encls_failed(ret))
250 ENCLS_WARN(ret, "EEXTEND");
252 return -EIO;
256 return 0;
259 static int sgx_encl_add_page(struct sgx_encl *encl, unsigned long src,
260 unsigned long offset, struct sgx_secinfo *secinfo,
261 unsigned long flags)
263 struct sgx_encl_page *encl_page;
264 struct sgx_epc_page *epc_page;
265 struct sgx_va_page *va_page;
266 int ret;
268 encl_page = sgx_encl_page_alloc(encl, offset, secinfo->flags);
269 if (IS_ERR(encl_page))
270 return PTR_ERR(encl_page);
272 epc_page = sgx_alloc_epc_page(encl_page, true);
273 if (IS_ERR(epc_page)) {
274 kfree(encl_page);
275 return PTR_ERR(epc_page);
278 va_page = sgx_encl_grow(encl, true);
279 if (IS_ERR(va_page)) {
280 ret = PTR_ERR(va_page);
281 goto err_out_free;
284 mmap_read_lock(current->mm);
285 mutex_lock(&encl->lock);
288 * Adding to encl->va_pages must be done under encl->lock. Ditto for
289 * deleting (via sgx_encl_shrink()) in the error path.
291 if (va_page)
292 list_add(&va_page->list, &encl->va_pages);
295 * Insert prior to EADD in case of OOM. EADD modifies MRENCLAVE, i.e.
296 * can't be gracefully unwound, while failure on EADD/EXTEND is limited
297 * to userspace errors (or kernel/hardware bugs).
299 ret = xa_insert(&encl->page_array, PFN_DOWN(encl_page->desc),
300 encl_page, GFP_KERNEL);
301 if (ret)
302 goto err_out_unlock;
304 ret = __sgx_encl_add_page(encl, encl_page, epc_page, secinfo,
305 src);
306 if (ret)
307 goto err_out;
310 * Complete the "add" before doing the "extend" so that the "add"
311 * isn't in a half-baked state in the extremely unlikely scenario
312 * the enclave will be destroyed in response to EEXTEND failure.
314 encl_page->encl = encl;
315 encl_page->epc_page = epc_page;
316 encl_page->type = (secinfo->flags & SGX_SECINFO_PAGE_TYPE_MASK) >> 8;
317 encl->secs_child_cnt++;
319 if (flags & SGX_PAGE_MEASURE) {
320 ret = __sgx_encl_extend(encl, epc_page);
321 if (ret)
322 goto err_out;
325 sgx_mark_page_reclaimable(encl_page->epc_page);
326 mutex_unlock(&encl->lock);
327 mmap_read_unlock(current->mm);
328 return ret;
330 err_out:
331 xa_erase(&encl->page_array, PFN_DOWN(encl_page->desc));
333 err_out_unlock:
334 sgx_encl_shrink(encl, va_page);
335 mutex_unlock(&encl->lock);
336 mmap_read_unlock(current->mm);
338 err_out_free:
339 sgx_encl_free_epc_page(epc_page);
340 kfree(encl_page);
342 return ret;
346 * Ensure user provided offset and length values are valid for
347 * an enclave.
349 static int sgx_validate_offset_length(struct sgx_encl *encl,
350 unsigned long offset,
351 unsigned long length)
353 if (!IS_ALIGNED(offset, PAGE_SIZE))
354 return -EINVAL;
356 if (!length || !IS_ALIGNED(length, PAGE_SIZE))
357 return -EINVAL;
359 if (offset + length < offset)
360 return -EINVAL;
362 if (offset + length - PAGE_SIZE >= encl->size)
363 return -EINVAL;
365 return 0;
369 * sgx_ioc_enclave_add_pages() - The handler for %SGX_IOC_ENCLAVE_ADD_PAGES
370 * @encl: an enclave pointer
371 * @arg: a user pointer to a struct sgx_enclave_add_pages instance
373 * Add one or more pages to an uninitialized enclave, and optionally extend the
374 * measurement with the contents of the page. The SECINFO and measurement mask
375 * are applied to all pages.
377 * A SECINFO for a TCS is required to always contain zero permissions because
378 * CPU silently zeros them. Allowing anything else would cause a mismatch in
379 * the measurement.
381 * mmap()'s protection bits are capped by the page permissions. For each page
382 * address, the maximum protection bits are computed with the following
383 * heuristics:
385 * 1. A regular page: PROT_R, PROT_W and PROT_X match the SECINFO permissions.
386 * 2. A TCS page: PROT_R | PROT_W.
388 * mmap() is not allowed to surpass the minimum of the maximum protection bits
389 * within the given address range.
391 * The function deinitializes kernel data structures for enclave and returns
392 * -EIO in any of the following conditions:
394 * - Enclave Page Cache (EPC), the physical memory holding enclaves, has
395 * been invalidated. This will cause EADD and EEXTEND to fail.
396 * - If the source address is corrupted somehow when executing EADD.
398 * Return:
399 * - 0: Success.
400 * - -EACCES: The source page is located in a noexec partition.
401 * - -ENOMEM: Out of EPC pages.
402 * - -EINTR: The call was interrupted before data was processed.
403 * - -EIO: Either EADD or EEXTEND failed because invalid source address
404 * or power cycle.
405 * - -errno: POSIX error.
407 static long sgx_ioc_enclave_add_pages(struct sgx_encl *encl, void __user *arg)
409 struct sgx_enclave_add_pages add_arg;
410 struct sgx_secinfo secinfo;
411 unsigned long c;
412 int ret;
414 if (!test_bit(SGX_ENCL_CREATED, &encl->flags) ||
415 test_bit(SGX_ENCL_INITIALIZED, &encl->flags))
416 return -EINVAL;
418 if (copy_from_user(&add_arg, arg, sizeof(add_arg)))
419 return -EFAULT;
421 if (!IS_ALIGNED(add_arg.src, PAGE_SIZE))
422 return -EINVAL;
424 if (sgx_validate_offset_length(encl, add_arg.offset, add_arg.length))
425 return -EINVAL;
427 if (copy_from_user(&secinfo, (void __user *)add_arg.secinfo,
428 sizeof(secinfo)))
429 return -EFAULT;
431 if (sgx_validate_secinfo(&secinfo))
432 return -EINVAL;
434 for (c = 0 ; c < add_arg.length; c += PAGE_SIZE) {
435 if (signal_pending(current)) {
436 if (!c)
437 ret = -ERESTARTSYS;
439 break;
442 if (need_resched())
443 cond_resched();
445 ret = sgx_encl_add_page(encl, add_arg.src + c, add_arg.offset + c,
446 &secinfo, add_arg.flags);
447 if (ret)
448 break;
451 add_arg.count = c;
453 if (copy_to_user(arg, &add_arg, sizeof(add_arg)))
454 return -EFAULT;
456 return ret;
459 static int __sgx_get_key_hash(struct crypto_shash *tfm, const void *modulus,
460 void *hash)
462 SHASH_DESC_ON_STACK(shash, tfm);
464 shash->tfm = tfm;
466 return crypto_shash_digest(shash, modulus, SGX_MODULUS_SIZE, hash);
469 static int sgx_get_key_hash(const void *modulus, void *hash)
471 struct crypto_shash *tfm;
472 int ret;
474 tfm = crypto_alloc_shash("sha256", 0, CRYPTO_ALG_ASYNC);
475 if (IS_ERR(tfm))
476 return PTR_ERR(tfm);
478 ret = __sgx_get_key_hash(tfm, modulus, hash);
480 crypto_free_shash(tfm);
481 return ret;
484 static int sgx_encl_init(struct sgx_encl *encl, struct sgx_sigstruct *sigstruct,
485 void *token)
487 u64 mrsigner[4];
488 int i, j;
489 void *addr;
490 int ret;
493 * Deny initializing enclaves with attributes (namely provisioning)
494 * that have not been explicitly allowed.
496 if (encl->attributes & ~encl->attributes_mask)
497 return -EACCES;
500 * Attributes should not be enforced *only* against what's available on
501 * platform (done in sgx_encl_create) but checked and enforced against
502 * the mask for enforcement in sigstruct. For example an enclave could
503 * opt to sign with AVX bit in xfrm, but still be loadable on a platform
504 * without it if the sigstruct->body.attributes_mask does not turn that
505 * bit on.
507 if (sigstruct->body.attributes & sigstruct->body.attributes_mask &
508 sgx_attributes_reserved_mask)
509 return -EINVAL;
511 if (sigstruct->body.miscselect & sigstruct->body.misc_mask &
512 sgx_misc_reserved_mask)
513 return -EINVAL;
515 if (sigstruct->body.xfrm & sigstruct->body.xfrm_mask &
516 sgx_xfrm_reserved_mask)
517 return -EINVAL;
519 ret = sgx_get_key_hash(sigstruct->modulus, mrsigner);
520 if (ret)
521 return ret;
523 mutex_lock(&encl->lock);
526 * ENCLS[EINIT] is interruptible because it has such a high latency,
527 * e.g. 50k+ cycles on success. If an IRQ/NMI/SMI becomes pending,
528 * EINIT may fail with SGX_UNMASKED_EVENT so that the event can be
529 * serviced.
531 for (i = 0; i < SGX_EINIT_SLEEP_COUNT; i++) {
532 for (j = 0; j < SGX_EINIT_SPIN_COUNT; j++) {
533 addr = sgx_get_epc_virt_addr(encl->secs.epc_page);
535 preempt_disable();
537 sgx_update_lepubkeyhash(mrsigner);
539 ret = __einit(sigstruct, token, addr);
541 preempt_enable();
543 if (ret == SGX_UNMASKED_EVENT)
544 continue;
545 else
546 break;
549 if (ret != SGX_UNMASKED_EVENT)
550 break;
552 msleep_interruptible(SGX_EINIT_SLEEP_TIME);
554 if (signal_pending(current)) {
555 ret = -ERESTARTSYS;
556 goto err_out;
560 if (encls_faulted(ret)) {
561 if (encls_failed(ret))
562 ENCLS_WARN(ret, "EINIT");
564 ret = -EIO;
565 } else if (ret) {
566 pr_debug("EINIT returned %d\n", ret);
567 ret = -EPERM;
568 } else {
569 set_bit(SGX_ENCL_INITIALIZED, &encl->flags);
572 err_out:
573 mutex_unlock(&encl->lock);
574 return ret;
578 * sgx_ioc_enclave_init() - handler for %SGX_IOC_ENCLAVE_INIT
579 * @encl: an enclave pointer
580 * @arg: userspace pointer to a struct sgx_enclave_init instance
582 * Flush any outstanding enqueued EADD operations and perform EINIT. The
583 * Launch Enclave Public Key Hash MSRs are rewritten as necessary to match
584 * the enclave's MRSIGNER, which is calculated from the provided sigstruct.
586 * Return:
587 * - 0: Success.
588 * - -EPERM: Invalid SIGSTRUCT.
589 * - -EIO: EINIT failed because of a power cycle.
590 * - -errno: POSIX error.
592 static long sgx_ioc_enclave_init(struct sgx_encl *encl, void __user *arg)
594 struct sgx_sigstruct *sigstruct;
595 struct sgx_enclave_init init_arg;
596 void *token;
597 int ret;
599 if (!test_bit(SGX_ENCL_CREATED, &encl->flags) ||
600 test_bit(SGX_ENCL_INITIALIZED, &encl->flags))
601 return -EINVAL;
603 if (copy_from_user(&init_arg, arg, sizeof(init_arg)))
604 return -EFAULT;
607 * 'sigstruct' must be on a page boundary and 'token' on a 512 byte
608 * boundary. kmalloc() will give this alignment when allocating
609 * PAGE_SIZE bytes.
611 sigstruct = kmalloc(PAGE_SIZE, GFP_KERNEL);
612 if (!sigstruct)
613 return -ENOMEM;
615 token = (void *)((unsigned long)sigstruct + PAGE_SIZE / 2);
616 memset(token, 0, SGX_LAUNCH_TOKEN_SIZE);
618 if (copy_from_user(sigstruct, (void __user *)init_arg.sigstruct,
619 sizeof(*sigstruct))) {
620 ret = -EFAULT;
621 goto out;
625 * A legacy field used with Intel signed enclaves. These used to mean
626 * regular and architectural enclaves. The CPU only accepts these values
627 * but they do not have any other meaning.
629 * Thus, reject any other values.
631 if (sigstruct->header.vendor != 0x0000 &&
632 sigstruct->header.vendor != 0x8086) {
633 ret = -EINVAL;
634 goto out;
637 ret = sgx_encl_init(encl, sigstruct, token);
639 out:
640 kfree(sigstruct);
641 return ret;
645 * sgx_ioc_enclave_provision() - handler for %SGX_IOC_ENCLAVE_PROVISION
646 * @encl: an enclave pointer
647 * @arg: userspace pointer to a struct sgx_enclave_provision instance
649 * Allow ATTRIBUTE.PROVISION_KEY for an enclave by providing a file handle to
650 * /dev/sgx_provision.
652 * Return:
653 * - 0: Success.
654 * - -errno: Otherwise.
656 static long sgx_ioc_enclave_provision(struct sgx_encl *encl, void __user *arg)
658 struct sgx_enclave_provision params;
660 if (copy_from_user(&params, arg, sizeof(params)))
661 return -EFAULT;
663 return sgx_set_attribute(&encl->attributes_mask, params.fd);
667 * Ensure enclave is ready for SGX2 functions. Readiness is checked
668 * by ensuring the hardware supports SGX2 and the enclave is initialized
669 * and thus able to handle requests to modify pages within it.
671 static int sgx_ioc_sgx2_ready(struct sgx_encl *encl)
673 if (!(cpu_feature_enabled(X86_FEATURE_SGX2)))
674 return -ENODEV;
676 if (!test_bit(SGX_ENCL_INITIALIZED, &encl->flags))
677 return -EINVAL;
679 return 0;
683 * Some SGX functions require that no cached linear-to-physical address
684 * mappings are present before they can succeed. Collaborate with
685 * hardware via ENCLS[ETRACK] to ensure that all cached
686 * linear-to-physical address mappings belonging to all threads of
687 * the enclave are cleared. See sgx_encl_cpumask() for details.
689 * Must be called with enclave's mutex held from the time the
690 * SGX function requiring that no cached linear-to-physical mappings
691 * are present is executed until this ETRACK flow is complete.
693 static int sgx_enclave_etrack(struct sgx_encl *encl)
695 void *epc_virt;
696 int ret;
698 epc_virt = sgx_get_epc_virt_addr(encl->secs.epc_page);
699 ret = __etrack(epc_virt);
700 if (ret) {
702 * ETRACK only fails when there is an OS issue. For
703 * example, two consecutive ETRACK was sent without
704 * completed IPI between.
706 pr_err_once("ETRACK returned %d (0x%x)", ret, ret);
708 * Send IPIs to kick CPUs out of the enclave and
709 * try ETRACK again.
711 on_each_cpu_mask(sgx_encl_cpumask(encl), sgx_ipi_cb, NULL, 1);
712 ret = __etrack(epc_virt);
713 if (ret) {
714 pr_err_once("ETRACK repeat returned %d (0x%x)",
715 ret, ret);
716 return -EFAULT;
719 on_each_cpu_mask(sgx_encl_cpumask(encl), sgx_ipi_cb, NULL, 1);
721 return 0;
725 * sgx_enclave_restrict_permissions() - Restrict EPCM permissions
726 * @encl: Enclave to which the pages belong.
727 * @modp: Checked parameters from user on which pages need modifying and
728 * their new permissions.
730 * Return:
731 * - 0: Success.
732 * - -errno: Otherwise.
734 static long
735 sgx_enclave_restrict_permissions(struct sgx_encl *encl,
736 struct sgx_enclave_restrict_permissions *modp)
738 struct sgx_encl_page *entry;
739 struct sgx_secinfo secinfo;
740 unsigned long addr;
741 unsigned long c;
742 void *epc_virt;
743 int ret;
745 memset(&secinfo, 0, sizeof(secinfo));
746 secinfo.flags = modp->permissions & SGX_SECINFO_PERMISSION_MASK;
748 for (c = 0 ; c < modp->length; c += PAGE_SIZE) {
749 addr = encl->base + modp->offset + c;
751 sgx_reclaim_direct();
753 mutex_lock(&encl->lock);
755 entry = sgx_encl_load_page(encl, addr);
756 if (IS_ERR(entry)) {
757 ret = PTR_ERR(entry) == -EBUSY ? -EAGAIN : -EFAULT;
758 goto out_unlock;
762 * Changing EPCM permissions is only supported on regular
763 * SGX pages. Attempting this change on other pages will
764 * result in #PF.
766 if (entry->type != SGX_PAGE_TYPE_REG) {
767 ret = -EINVAL;
768 goto out_unlock;
772 * Apart from ensuring that read-access remains, do not verify
773 * the permission bits requested. Kernel has no control over
774 * how EPCM permissions can be relaxed from within the enclave.
775 * ENCLS[EMODPR] can only remove existing EPCM permissions,
776 * attempting to set new permissions will be ignored by the
777 * hardware.
780 /* Change EPCM permissions. */
781 epc_virt = sgx_get_epc_virt_addr(entry->epc_page);
782 ret = __emodpr(&secinfo, epc_virt);
783 if (encls_faulted(ret)) {
785 * All possible faults should be avoidable:
786 * parameters have been checked, will only change
787 * permissions of a regular page, and no concurrent
788 * SGX1/SGX2 ENCLS instructions since these
789 * are protected with mutex.
791 pr_err_once("EMODPR encountered exception %d\n",
792 ENCLS_TRAPNR(ret));
793 ret = -EFAULT;
794 goto out_unlock;
796 if (encls_failed(ret)) {
797 modp->result = ret;
798 ret = -EFAULT;
799 goto out_unlock;
802 ret = sgx_enclave_etrack(encl);
803 if (ret) {
804 ret = -EFAULT;
805 goto out_unlock;
808 mutex_unlock(&encl->lock);
811 ret = 0;
812 goto out;
814 out_unlock:
815 mutex_unlock(&encl->lock);
816 out:
817 modp->count = c;
819 return ret;
823 * sgx_ioc_enclave_restrict_permissions() - handler for
824 * %SGX_IOC_ENCLAVE_RESTRICT_PERMISSIONS
825 * @encl: an enclave pointer
826 * @arg: userspace pointer to a &struct sgx_enclave_restrict_permissions
827 * instance
829 * SGX2 distinguishes between relaxing and restricting the enclave page
830 * permissions maintained by the hardware (EPCM permissions) of pages
831 * belonging to an initialized enclave (after SGX_IOC_ENCLAVE_INIT).
833 * EPCM permissions cannot be restricted from within the enclave, the enclave
834 * requires the kernel to run the privileged level 0 instructions ENCLS[EMODPR]
835 * and ENCLS[ETRACK]. An attempt to relax EPCM permissions with this call
836 * will be ignored by the hardware.
838 * Return:
839 * - 0: Success
840 * - -errno: Otherwise
842 static long sgx_ioc_enclave_restrict_permissions(struct sgx_encl *encl,
843 void __user *arg)
845 struct sgx_enclave_restrict_permissions params;
846 long ret;
848 ret = sgx_ioc_sgx2_ready(encl);
849 if (ret)
850 return ret;
852 if (copy_from_user(&params, arg, sizeof(params)))
853 return -EFAULT;
855 if (sgx_validate_offset_length(encl, params.offset, params.length))
856 return -EINVAL;
858 if (params.permissions & ~SGX_SECINFO_PERMISSION_MASK)
859 return -EINVAL;
862 * Fail early if invalid permissions requested to prevent ENCLS[EMODPR]
863 * from faulting later when the CPU does the same check.
865 if ((params.permissions & SGX_SECINFO_W) &&
866 !(params.permissions & SGX_SECINFO_R))
867 return -EINVAL;
869 if (params.result || params.count)
870 return -EINVAL;
872 ret = sgx_enclave_restrict_permissions(encl, &params);
874 if (copy_to_user(arg, &params, sizeof(params)))
875 return -EFAULT;
877 return ret;
881 * sgx_enclave_modify_types() - Modify type of SGX enclave pages
882 * @encl: Enclave to which the pages belong.
883 * @modt: Checked parameters from user about which pages need modifying
884 * and their new page type.
886 * Return:
887 * - 0: Success
888 * - -errno: Otherwise
890 static long sgx_enclave_modify_types(struct sgx_encl *encl,
891 struct sgx_enclave_modify_types *modt)
893 unsigned long max_prot_restore;
894 enum sgx_page_type page_type;
895 struct sgx_encl_page *entry;
896 struct sgx_secinfo secinfo;
897 unsigned long prot;
898 unsigned long addr;
899 unsigned long c;
900 void *epc_virt;
901 int ret;
903 page_type = modt->page_type & SGX_PAGE_TYPE_MASK;
906 * The only new page types allowed by hardware are PT_TCS and PT_TRIM.
908 if (page_type != SGX_PAGE_TYPE_TCS && page_type != SGX_PAGE_TYPE_TRIM)
909 return -EINVAL;
911 memset(&secinfo, 0, sizeof(secinfo));
913 secinfo.flags = page_type << 8;
915 for (c = 0 ; c < modt->length; c += PAGE_SIZE) {
916 addr = encl->base + modt->offset + c;
918 sgx_reclaim_direct();
920 mutex_lock(&encl->lock);
922 entry = sgx_encl_load_page(encl, addr);
923 if (IS_ERR(entry)) {
924 ret = PTR_ERR(entry) == -EBUSY ? -EAGAIN : -EFAULT;
925 goto out_unlock;
929 * Borrow the logic from the Intel SDM. Regular pages
930 * (SGX_PAGE_TYPE_REG) can change type to SGX_PAGE_TYPE_TCS
931 * or SGX_PAGE_TYPE_TRIM but TCS pages can only be trimmed.
932 * CET pages not supported yet.
934 if (!(entry->type == SGX_PAGE_TYPE_REG ||
935 (entry->type == SGX_PAGE_TYPE_TCS &&
936 page_type == SGX_PAGE_TYPE_TRIM))) {
937 ret = -EINVAL;
938 goto out_unlock;
941 max_prot_restore = entry->vm_max_prot_bits;
944 * Once a regular page becomes a TCS page it cannot be
945 * changed back. So the maximum allowed protection reflects
946 * the TCS page that is always RW from kernel perspective but
947 * will be inaccessible from within enclave. Before doing
948 * so, do make sure that the new page type continues to
949 * respect the originally vetted page permissions.
951 if (entry->type == SGX_PAGE_TYPE_REG &&
952 page_type == SGX_PAGE_TYPE_TCS) {
953 if (~entry->vm_max_prot_bits & (VM_READ | VM_WRITE)) {
954 ret = -EPERM;
955 goto out_unlock;
957 prot = PROT_READ | PROT_WRITE;
958 entry->vm_max_prot_bits = calc_vm_prot_bits(prot, 0);
961 * Prevent page from being reclaimed while mutex
962 * is released.
964 if (sgx_unmark_page_reclaimable(entry->epc_page)) {
965 ret = -EAGAIN;
966 goto out_entry_changed;
970 * Do not keep encl->lock because of dependency on
971 * mmap_lock acquired in sgx_zap_enclave_ptes().
973 mutex_unlock(&encl->lock);
975 sgx_zap_enclave_ptes(encl, addr);
977 mutex_lock(&encl->lock);
979 sgx_mark_page_reclaimable(entry->epc_page);
982 /* Change EPC type */
983 epc_virt = sgx_get_epc_virt_addr(entry->epc_page);
984 ret = __emodt(&secinfo, epc_virt);
985 if (encls_faulted(ret)) {
987 * All possible faults should be avoidable:
988 * parameters have been checked, will only change
989 * valid page types, and no concurrent
990 * SGX1/SGX2 ENCLS instructions since these are
991 * protected with mutex.
993 pr_err_once("EMODT encountered exception %d\n",
994 ENCLS_TRAPNR(ret));
995 ret = -EFAULT;
996 goto out_entry_changed;
998 if (encls_failed(ret)) {
999 modt->result = ret;
1000 ret = -EFAULT;
1001 goto out_entry_changed;
1004 ret = sgx_enclave_etrack(encl);
1005 if (ret) {
1006 ret = -EFAULT;
1007 goto out_unlock;
1010 entry->type = page_type;
1012 mutex_unlock(&encl->lock);
1015 ret = 0;
1016 goto out;
1018 out_entry_changed:
1019 entry->vm_max_prot_bits = max_prot_restore;
1020 out_unlock:
1021 mutex_unlock(&encl->lock);
1022 out:
1023 modt->count = c;
1025 return ret;
1029 * sgx_ioc_enclave_modify_types() - handler for %SGX_IOC_ENCLAVE_MODIFY_TYPES
1030 * @encl: an enclave pointer
1031 * @arg: userspace pointer to a &struct sgx_enclave_modify_types instance
1033 * Ability to change the enclave page type supports the following use cases:
1035 * * It is possible to add TCS pages to an enclave by changing the type of
1036 * regular pages (%SGX_PAGE_TYPE_REG) to TCS (%SGX_PAGE_TYPE_TCS) pages.
1037 * With this support the number of threads supported by an initialized
1038 * enclave can be increased dynamically.
1040 * * Regular or TCS pages can dynamically be removed from an initialized
1041 * enclave by changing the page type to %SGX_PAGE_TYPE_TRIM. Changing the
1042 * page type to %SGX_PAGE_TYPE_TRIM marks the page for removal with actual
1043 * removal done by handler of %SGX_IOC_ENCLAVE_REMOVE_PAGES ioctl() called
1044 * after ENCLU[EACCEPT] is run on %SGX_PAGE_TYPE_TRIM page from within the
1045 * enclave.
1047 * Return:
1048 * - 0: Success
1049 * - -errno: Otherwise
1051 static long sgx_ioc_enclave_modify_types(struct sgx_encl *encl,
1052 void __user *arg)
1054 struct sgx_enclave_modify_types params;
1055 long ret;
1057 ret = sgx_ioc_sgx2_ready(encl);
1058 if (ret)
1059 return ret;
1061 if (copy_from_user(&params, arg, sizeof(params)))
1062 return -EFAULT;
1064 if (sgx_validate_offset_length(encl, params.offset, params.length))
1065 return -EINVAL;
1067 if (params.page_type & ~SGX_PAGE_TYPE_MASK)
1068 return -EINVAL;
1070 if (params.result || params.count)
1071 return -EINVAL;
1073 ret = sgx_enclave_modify_types(encl, &params);
1075 if (copy_to_user(arg, &params, sizeof(params)))
1076 return -EFAULT;
1078 return ret;
1082 * sgx_encl_remove_pages() - Remove trimmed pages from SGX enclave
1083 * @encl: Enclave to which the pages belong
1084 * @params: Checked parameters from user on which pages need to be removed
1086 * Return:
1087 * - 0: Success.
1088 * - -errno: Otherwise.
1090 static long sgx_encl_remove_pages(struct sgx_encl *encl,
1091 struct sgx_enclave_remove_pages *params)
1093 struct sgx_encl_page *entry;
1094 struct sgx_secinfo secinfo;
1095 unsigned long addr;
1096 unsigned long c;
1097 void *epc_virt;
1098 int ret;
1100 memset(&secinfo, 0, sizeof(secinfo));
1101 secinfo.flags = SGX_SECINFO_R | SGX_SECINFO_W | SGX_SECINFO_X;
1103 for (c = 0 ; c < params->length; c += PAGE_SIZE) {
1104 addr = encl->base + params->offset + c;
1106 sgx_reclaim_direct();
1108 mutex_lock(&encl->lock);
1110 entry = sgx_encl_load_page(encl, addr);
1111 if (IS_ERR(entry)) {
1112 ret = PTR_ERR(entry) == -EBUSY ? -EAGAIN : -EFAULT;
1113 goto out_unlock;
1116 if (entry->type != SGX_PAGE_TYPE_TRIM) {
1117 ret = -EPERM;
1118 goto out_unlock;
1122 * ENCLS[EMODPR] is a no-op instruction used to inform if
1123 * ENCLU[EACCEPT] was run from within the enclave. If
1124 * ENCLS[EMODPR] is run with RWX on a trimmed page that is
1125 * not yet accepted then it will return
1126 * %SGX_PAGE_NOT_MODIFIABLE, after the trimmed page is
1127 * accepted the instruction will encounter a page fault.
1129 epc_virt = sgx_get_epc_virt_addr(entry->epc_page);
1130 ret = __emodpr(&secinfo, epc_virt);
1131 if (!encls_faulted(ret) || ENCLS_TRAPNR(ret) != X86_TRAP_PF) {
1132 ret = -EPERM;
1133 goto out_unlock;
1136 if (sgx_unmark_page_reclaimable(entry->epc_page)) {
1137 ret = -EBUSY;
1138 goto out_unlock;
1142 * Do not keep encl->lock because of dependency on
1143 * mmap_lock acquired in sgx_zap_enclave_ptes().
1145 mutex_unlock(&encl->lock);
1147 sgx_zap_enclave_ptes(encl, addr);
1149 mutex_lock(&encl->lock);
1151 sgx_encl_free_epc_page(entry->epc_page);
1152 encl->secs_child_cnt--;
1153 entry->epc_page = NULL;
1154 xa_erase(&encl->page_array, PFN_DOWN(entry->desc));
1155 sgx_encl_shrink(encl, NULL);
1156 kfree(entry);
1158 mutex_unlock(&encl->lock);
1161 ret = 0;
1162 goto out;
1164 out_unlock:
1165 mutex_unlock(&encl->lock);
1166 out:
1167 params->count = c;
1169 return ret;
1173 * sgx_ioc_enclave_remove_pages() - handler for %SGX_IOC_ENCLAVE_REMOVE_PAGES
1174 * @encl: an enclave pointer
1175 * @arg: userspace pointer to &struct sgx_enclave_remove_pages instance
1177 * Final step of the flow removing pages from an initialized enclave. The
1178 * complete flow is:
1180 * 1) User changes the type of the pages to be removed to %SGX_PAGE_TYPE_TRIM
1181 * using the %SGX_IOC_ENCLAVE_MODIFY_TYPES ioctl().
1182 * 2) User approves the page removal by running ENCLU[EACCEPT] from within
1183 * the enclave.
1184 * 3) User initiates actual page removal using the
1185 * %SGX_IOC_ENCLAVE_REMOVE_PAGES ioctl() that is handled here.
1187 * First remove any page table entries pointing to the page and then proceed
1188 * with the actual removal of the enclave page and data in support of it.
1190 * VA pages are not affected by this removal. It is thus possible that the
1191 * enclave may end up with more VA pages than needed to support all its
1192 * pages.
1194 * Return:
1195 * - 0: Success
1196 * - -errno: Otherwise
1198 static long sgx_ioc_enclave_remove_pages(struct sgx_encl *encl,
1199 void __user *arg)
1201 struct sgx_enclave_remove_pages params;
1202 long ret;
1204 ret = sgx_ioc_sgx2_ready(encl);
1205 if (ret)
1206 return ret;
1208 if (copy_from_user(&params, arg, sizeof(params)))
1209 return -EFAULT;
1211 if (sgx_validate_offset_length(encl, params.offset, params.length))
1212 return -EINVAL;
1214 if (params.count)
1215 return -EINVAL;
1217 ret = sgx_encl_remove_pages(encl, &params);
1219 if (copy_to_user(arg, &params, sizeof(params)))
1220 return -EFAULT;
1222 return ret;
1225 long sgx_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
1227 struct sgx_encl *encl = filep->private_data;
1228 int ret;
1230 if (test_and_set_bit(SGX_ENCL_IOCTL, &encl->flags))
1231 return -EBUSY;
1233 switch (cmd) {
1234 case SGX_IOC_ENCLAVE_CREATE:
1235 ret = sgx_ioc_enclave_create(encl, (void __user *)arg);
1236 break;
1237 case SGX_IOC_ENCLAVE_ADD_PAGES:
1238 ret = sgx_ioc_enclave_add_pages(encl, (void __user *)arg);
1239 break;
1240 case SGX_IOC_ENCLAVE_INIT:
1241 ret = sgx_ioc_enclave_init(encl, (void __user *)arg);
1242 break;
1243 case SGX_IOC_ENCLAVE_PROVISION:
1244 ret = sgx_ioc_enclave_provision(encl, (void __user *)arg);
1245 break;
1246 case SGX_IOC_ENCLAVE_RESTRICT_PERMISSIONS:
1247 ret = sgx_ioc_enclave_restrict_permissions(encl,
1248 (void __user *)arg);
1249 break;
1250 case SGX_IOC_ENCLAVE_MODIFY_TYPES:
1251 ret = sgx_ioc_enclave_modify_types(encl, (void __user *)arg);
1252 break;
1253 case SGX_IOC_ENCLAVE_REMOVE_PAGES:
1254 ret = sgx_ioc_enclave_remove_pages(encl, (void __user *)arg);
1255 break;
1256 default:
1257 ret = -ENOIOCTLCMD;
1258 break;
1261 clear_bit(SGX_ENCL_IOCTL, &encl->flags);
1262 return ret;