arch/x86/kernel/cpu/sgx/encl.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*  Copyright(c) 2016-20 Intel Corporation. */
   3
   4 #include <linux/lockdep.h>
   5 #include <linux/mm.h>
   6 #include <linux/mman.h>
   7 #include <linux/shmem_fs.h>
   8 #include <linux/suspend.h>
   9 #include <linux/sched/mm.h>
  10 #include <asm/sgx.h>
  11 #include "encl.h"
  12 #include "encls.h"
  13 #include "sgx.h"
  14
  15 static int sgx_encl_lookup_backing(struct sgx_encl *encl, unsigned long page_index,
  16                             struct sgx_backing *backing);
  17
  18 #define PCMDS_PER_PAGE (PAGE_SIZE / sizeof(struct sgx_pcmd))
  19 /*
  20  * 32 PCMD entries share a PCMD page. PCMD_FIRST_MASK is used to
  21  * determine the page index associated with the first PCMD entry
  22  * within a PCMD page.
  23  */
  24 #define PCMD_FIRST_MASK GENMASK(4, 0)
  25
  26 /**
  27  * reclaimer_writing_to_pcmd() - Query if any enclave page associated with
  28  *                               a PCMD page is in process of being reclaimed.
  29  * @encl:        Enclave to which PCMD page belongs
  30  * @start_addr:  Address of enclave page using first entry within the PCMD page
  31  *
  32  * When an enclave page is reclaimed some Paging Crypto MetaData (PCMD) is
  33  * stored. The PCMD data of a reclaimed enclave page contains enough
  34  * information for the processor to verify the page at the time
  35  * it is loaded back into the Enclave Page Cache (EPC).
  36  *
  37  * The backing storage to which enclave pages are reclaimed is laid out as
  38  * follows:
  39  * Encrypted enclave pages:SECS page:PCMD pages
  40  *
  41  * Each PCMD page contains the PCMD metadata of
  42  * PAGE_SIZE/sizeof(struct sgx_pcmd) enclave pages.
  43  *
  44  * A PCMD page can only be truncated if it is (a) empty, and (b) not in the
  45  * process of getting data (and thus soon being non-empty). (b) is tested with
  46  * a check if an enclave page sharing the PCMD page is in the process of being
  47  * reclaimed.
  48  *
  49  * The reclaimer sets the SGX_ENCL_PAGE_BEING_RECLAIMED flag when it
  50  * intends to reclaim that enclave page - it means that the PCMD page
  51  * associated with that enclave page is about to get some data and thus
  52  * even if the PCMD page is empty, it should not be truncated.
  53  *
  54  * Context: Enclave mutex (&sgx_encl->lock) must be held.
  55  * Return: 1 if the reclaimer is about to write to the PCMD page
  56  *         0 if the reclaimer has no intention to write to the PCMD page
  57  */
  58 static int reclaimer_writing_to_pcmd(struct sgx_encl *encl,
  59                                      unsigned long start_addr)
  60 {
  61         int reclaimed = 0;
  62         int i;
  63
  64         /*
  65          * PCMD_FIRST_MASK is based on number of PCMD entries within
  66          * PCMD page being 32.
  67          */
  68         BUILD_BUG_ON(PCMDS_PER_PAGE != 32);
  69
  70         for (i = 0; i < PCMDS_PER_PAGE; i++) {
  71                 struct sgx_encl_page *entry;
  72                 unsigned long addr;
  73
  74                 addr = start_addr + i * PAGE_SIZE;
  75
  76                 /*
  77                  * Stop when reaching the SECS page - it does not
  78                  * have a page_array entry and its reclaim is
  79                  * started and completed with enclave mutex held so
  80                  * it does not use the SGX_ENCL_PAGE_BEING_RECLAIMED
  81                  * flag.
  82                  */
  83                 if (addr == encl->base + encl->size)
  84                         break;
  85
  86                 entry = xa_load(&encl->page_array, PFN_DOWN(addr));
  87                 if (!entry)
  88                         continue;
  89
  90                 /*
  91                  * VA page slot ID uses same bit as the flag so it is important
  92                  * to ensure that the page is not already in backing store.
  93                  */
  94                 if (entry->epc_page &&
  95                     (entry->desc & SGX_ENCL_PAGE_BEING_RECLAIMED)) {
  96                         reclaimed = 1;
  97                         break;
  98                 }
  99         }
 100
 101         return reclaimed;
 102 }
 103
 104 /*
 105  * Calculate byte offset of a PCMD struct associated with an enclave page. PCMD's
 106  * follow right after the EPC data in the backing storage. In addition to the
 107  * visible enclave pages, there's one extra page slot for SECS, before PCMD
 108  * structs.
 109  */
 110 static inline pgoff_t sgx_encl_get_backing_page_pcmd_offset(struct sgx_encl *encl,
 111                                                             unsigned long page_index)
 112 {
 113         pgoff_t epc_end_off = encl->size + sizeof(struct sgx_secs);
 114
 115         return epc_end_off + page_index * sizeof(struct sgx_pcmd);
 116 }
 117
 118 /*
 119  * Free a page from the backing storage in the given page index.
 120  */
 121 static inline void sgx_encl_truncate_backing_page(struct sgx_encl *encl, unsigned long page_index)
 122 {
 123         struct inode *inode = file_inode(encl->backing);
 124
 125         shmem_truncate_range(inode, PFN_PHYS(page_index), PFN_PHYS(page_index) + PAGE_SIZE - 1);
 126 }
 127
 128 /*
 129  * ELDU: Load an EPC page as unblocked. For more info, see "OS Management of EPC
 130  * Pages" in the SDM.
 131  */
 132 static int __sgx_encl_eldu(struct sgx_encl_page *encl_page,
 133                            struct sgx_epc_page *epc_page,
 134                            struct sgx_epc_page *secs_page)
 135 {
 136         unsigned long va_offset = encl_page->desc & SGX_ENCL_PAGE_VA_OFFSET_MASK;
 137         struct sgx_encl *encl = encl_page->encl;
 138         pgoff_t page_index, page_pcmd_off;
 139         unsigned long pcmd_first_page;
 140         struct sgx_pageinfo pginfo;
 141         struct sgx_backing b;
 142         bool pcmd_page_empty;
 143         u8 *pcmd_page;
 144         int ret;
 145
 146         if (secs_page)
 147                 page_index = PFN_DOWN(encl_page->desc - encl_page->encl->base);
 148         else
 149                 page_index = PFN_DOWN(encl->size);
 150
 151         /*
 152          * Address of enclave page using the first entry within the PCMD page.
 153          */
 154         pcmd_first_page = PFN_PHYS(page_index & ~PCMD_FIRST_MASK) + encl->base;
 155
 156         page_pcmd_off = sgx_encl_get_backing_page_pcmd_offset(encl, page_index);
 157
 158         ret = sgx_encl_lookup_backing(encl, page_index, &b);
 159         if (ret)
 160                 return ret;
 161
 162         pginfo.addr = encl_page->desc & PAGE_MASK;
 163         pginfo.contents = (unsigned long)kmap_local_page(b.contents);
 164         pcmd_page = kmap_local_page(b.pcmd);
 165         pginfo.metadata = (unsigned long)pcmd_page + b.pcmd_offset;
 166
 167         if (secs_page)
 168                 pginfo.secs = (u64)sgx_get_epc_virt_addr(secs_page);
 169         else
 170                 pginfo.secs = 0;
 171
 172         ret = __eldu(&pginfo, sgx_get_epc_virt_addr(epc_page),
 173                      sgx_get_epc_virt_addr(encl_page->va_page->epc_page) + va_offset);
 174         if (ret) {
 175                 if (encls_failed(ret))
 176                         ENCLS_WARN(ret, "ELDU");
 177
 178                 ret = -EFAULT;
 179         }
 180
 181         memset(pcmd_page + b.pcmd_offset, 0, sizeof(struct sgx_pcmd));
 182         set_page_dirty(b.pcmd);
 183
 184         /*
 185          * The area for the PCMD in the page was zeroed above.  Check if the
 186          * whole page is now empty meaning that all PCMD's have been zeroed:
 187          */
 188         pcmd_page_empty = !memchr_inv(pcmd_page, 0, PAGE_SIZE);
 189
 190         kunmap_local(pcmd_page);
 191         kunmap_local((void *)(unsigned long)pginfo.contents);
 192
 193         get_page(b.pcmd);
 194         sgx_encl_put_backing(&b);
 195
 196         sgx_encl_truncate_backing_page(encl, page_index);
 197
 198         if (pcmd_page_empty && !reclaimer_writing_to_pcmd(encl, pcmd_first_page)) {
 199                 sgx_encl_truncate_backing_page(encl, PFN_DOWN(page_pcmd_off));
 200                 pcmd_page = kmap_local_page(b.pcmd);
 201                 if (memchr_inv(pcmd_page, 0, PAGE_SIZE))
 202                         pr_warn("PCMD page not empty after truncate.\n");
 203                 kunmap_local(pcmd_page);
 204         }
 205
 206         put_page(b.pcmd);
 207
 208         return ret;
 209 }
 210
 211 static struct sgx_epc_page *sgx_encl_eldu(struct sgx_encl_page *encl_page,
 212                                           struct sgx_epc_page *secs_page)
 213 {
 214
 215         unsigned long va_offset = encl_page->desc & SGX_ENCL_PAGE_VA_OFFSET_MASK;
 216         struct sgx_encl *encl = encl_page->encl;
 217         struct sgx_epc_page *epc_page;
 218         int ret;
 219
 220         epc_page = sgx_alloc_epc_page(encl_page, false);
 221         if (IS_ERR(epc_page))
 222                 return epc_page;
 223
 224         ret = __sgx_encl_eldu(encl_page, epc_page, secs_page);
 225         if (ret) {
 226                 sgx_encl_free_epc_page(epc_page);
 227                 return ERR_PTR(ret);
 228         }
 229
 230         sgx_free_va_slot(encl_page->va_page, va_offset);
 231         list_move(&encl_page->va_page->list, &encl->va_pages);
 232         encl_page->desc &= ~SGX_ENCL_PAGE_VA_OFFSET_MASK;
 233         encl_page->epc_page = epc_page;
 234
 235         return epc_page;
 236 }
 237
 238 /*
 239  * Ensure the SECS page is not swapped out.  Must be called with encl->lock
 240  * to protect the enclave states including SECS and ensure the SECS page is
 241  * not swapped out again while being used.
 242  */
 243 static struct sgx_epc_page *sgx_encl_load_secs(struct sgx_encl *encl)
 244 {
 245         struct sgx_epc_page *epc_page = encl->secs.epc_page;
 246
 247         if (!epc_page)
 248                 epc_page = sgx_encl_eldu(&encl->secs, NULL);
 249
 250         return epc_page;
 251 }
 252
 253 static struct sgx_encl_page *__sgx_encl_load_page(struct sgx_encl *encl,
 254                                                   struct sgx_encl_page *entry)
 255 {
 256         struct sgx_epc_page *epc_page;
 257
 258         /* Entry successfully located. */
 259         if (entry->epc_page) {
 260                 if (entry->desc & SGX_ENCL_PAGE_BEING_RECLAIMED)
 261                         return ERR_PTR(-EBUSY);
 262
 263                 return entry;
 264         }
 265
 266         epc_page = sgx_encl_load_secs(encl);
 267         if (IS_ERR(epc_page))
 268                 return ERR_CAST(epc_page);
 269
 270         epc_page = sgx_encl_eldu(entry, encl->secs.epc_page);
 271         if (IS_ERR(epc_page))
 272                 return ERR_CAST(epc_page);
 273
 274         encl->secs_child_cnt++;
 275         sgx_mark_page_reclaimable(entry->epc_page);
 276
 277         return entry;
 278 }
 279
 280 static struct sgx_encl_page *sgx_encl_load_page_in_vma(struct sgx_encl *encl,
 281                                                        unsigned long addr,
 282                                                        unsigned long vm_flags)
 283 {
 284         unsigned long vm_prot_bits = vm_flags & VM_ACCESS_FLAGS;
 285         struct sgx_encl_page *entry;
 286
 287         entry = xa_load(&encl->page_array, PFN_DOWN(addr));
 288         if (!entry)
 289                 return ERR_PTR(-EFAULT);
 290
 291         /*
 292          * Verify that the page has equal or higher build time
 293          * permissions than the VMA permissions (i.e. the subset of {VM_READ,
 294          * VM_WRITE, VM_EXECUTE} in vma->vm_flags).
 295          */
 296         if ((entry->vm_max_prot_bits & vm_prot_bits) != vm_prot_bits)
 297                 return ERR_PTR(-EFAULT);
 298
 299         return __sgx_encl_load_page(encl, entry);
 300 }
 301
 302 struct sgx_encl_page *sgx_encl_load_page(struct sgx_encl *encl,
 303                                          unsigned long addr)
 304 {
 305         struct sgx_encl_page *entry;
 306
 307         entry = xa_load(&encl->page_array, PFN_DOWN(addr));
 308         if (!entry)
 309                 return ERR_PTR(-EFAULT);
 310
 311         return __sgx_encl_load_page(encl, entry);
 312 }
 313
 314 /**
 315  * sgx_encl_eaug_page() - Dynamically add page to initialized enclave
 316  * @vma:        VMA obtained from fault info from where page is accessed
 317  * @encl:       enclave accessing the page
 318  * @addr:       address that triggered the page fault
 319  *
 320  * When an initialized enclave accesses a page with no backing EPC page
 321  * on a SGX2 system then the EPC can be added dynamically via the SGX2
 322  * ENCLS[EAUG] instruction.
 323  *
 324  * Returns: Appropriate vm_fault_t: VM_FAULT_NOPAGE when PTE was installed
 325  * successfully, VM_FAULT_SIGBUS or VM_FAULT_OOM as error otherwise.
 326  */
 327 static vm_fault_t sgx_encl_eaug_page(struct vm_area_struct *vma,
 328                                      struct sgx_encl *encl, unsigned long addr)
 329 {
 330         vm_fault_t vmret = VM_FAULT_SIGBUS;
 331         struct sgx_pageinfo pginfo = {0};
 332         struct sgx_encl_page *encl_page;
 333         struct sgx_epc_page *epc_page;
 334         struct sgx_va_page *va_page;
 335         unsigned long phys_addr;
 336         u64 secinfo_flags;
 337         int ret;
 338
 339         if (!test_bit(SGX_ENCL_INITIALIZED, &encl->flags))
 340                 return VM_FAULT_SIGBUS;
 341
 342         /*
 343          * Ignore internal permission checking for dynamically added pages.
 344          * They matter only for data added during the pre-initialization
 345          * phase. The enclave decides the permissions by the means of
 346          * EACCEPT, EACCEPTCOPY and EMODPE.
 347          */
 348         secinfo_flags = SGX_SECINFO_R | SGX_SECINFO_W | SGX_SECINFO_X;
 349         encl_page = sgx_encl_page_alloc(encl, addr - encl->base, secinfo_flags);
 350         if (IS_ERR(encl_page))
 351                 return VM_FAULT_OOM;
 352
 353         mutex_lock(&encl->lock);
 354
 355         epc_page = sgx_encl_load_secs(encl);
 356         if (IS_ERR(epc_page)) {
 357                 if (PTR_ERR(epc_page) == -EBUSY)
 358                         vmret = VM_FAULT_NOPAGE;
 359                 goto err_out_unlock;
 360         }
 361
 362         epc_page = sgx_alloc_epc_page(encl_page, false);
 363         if (IS_ERR(epc_page)) {
 364                 if (PTR_ERR(epc_page) == -EBUSY)
 365                         vmret =  VM_FAULT_NOPAGE;
 366                 goto err_out_unlock;
 367         }
 368
 369         va_page = sgx_encl_grow(encl, false);
 370         if (IS_ERR(va_page)) {
 371                 if (PTR_ERR(va_page) == -EBUSY)
 372                         vmret = VM_FAULT_NOPAGE;
 373                 goto err_out_epc;
 374         }
 375
 376         if (va_page)
 377                 list_add(&va_page->list, &encl->va_pages);
 378
 379         ret = xa_insert(&encl->page_array, PFN_DOWN(encl_page->desc),
 380                         encl_page, GFP_KERNEL);
 381         /*
 382          * If ret == -EBUSY then page was created in another flow while
 383          * running without encl->lock
 384          */
 385         if (ret)
 386                 goto err_out_shrink;
 387
 388         pginfo.secs = (unsigned long)sgx_get_epc_virt_addr(encl->secs.epc_page);
 389         pginfo.addr = encl_page->desc & PAGE_MASK;
 390         pginfo.metadata = 0;
 391
 392         ret = __eaug(&pginfo, sgx_get_epc_virt_addr(epc_page));
 393         if (ret)
 394                 goto err_out;
 395
 396         encl_page->encl = encl;
 397         encl_page->epc_page = epc_page;
 398         encl_page->type = SGX_PAGE_TYPE_REG;
 399         encl->secs_child_cnt++;
 400
 401         sgx_mark_page_reclaimable(encl_page->epc_page);
 402
 403         phys_addr = sgx_get_epc_phys_addr(epc_page);
 404         /*
 405          * Do not undo everything when creating PTE entry fails - next #PF
 406          * would find page ready for a PTE.
 407          */
 408         vmret = vmf_insert_pfn(vma, addr, PFN_DOWN(phys_addr));
 409         if (vmret != VM_FAULT_NOPAGE) {
 410                 mutex_unlock(&encl->lock);
 411                 return VM_FAULT_SIGBUS;
 412         }
 413         mutex_unlock(&encl->lock);
 414         return VM_FAULT_NOPAGE;
 415
 416 err_out:
 417         xa_erase(&encl->page_array, PFN_DOWN(encl_page->desc));
 418
 419 err_out_shrink:
 420         sgx_encl_shrink(encl, va_page);
 421 err_out_epc:
 422         sgx_encl_free_epc_page(epc_page);
 423 err_out_unlock:
 424         mutex_unlock(&encl->lock);
 425         kfree(encl_page);
 426
 427         return vmret;
 428 }
 429
 430 static vm_fault_t sgx_vma_fault(struct vm_fault *vmf)
 431 {
 432         unsigned long addr = (unsigned long)vmf->address;
 433         struct vm_area_struct *vma = vmf->vma;
 434         struct sgx_encl_page *entry;
 435         unsigned long phys_addr;
 436         struct sgx_encl *encl;
 437         vm_fault_t ret;
 438
 439         encl = vma->vm_private_data;
 440
 441         /*
 442          * It's very unlikely but possible that allocating memory for the
 443          * mm_list entry of a forked process failed in sgx_vma_open(). When
 444          * this happens, vm_private_data is set to NULL.
 445          */
 446         if (unlikely(!encl))
 447                 return VM_FAULT_SIGBUS;
 448
 449         /*
 450          * The page_array keeps track of all enclave pages, whether they
 451          * are swapped out or not. If there is no entry for this page and
 452          * the system supports SGX2 then it is possible to dynamically add
 453          * a new enclave page. This is only possible for an initialized
 454          * enclave that will be checked for right away.
 455          */
 456         if (cpu_feature_enabled(X86_FEATURE_SGX2) &&
 457             (!xa_load(&encl->page_array, PFN_DOWN(addr))))
 458                 return sgx_encl_eaug_page(vma, encl, addr);
 459
 460         mutex_lock(&encl->lock);
 461
 462         entry = sgx_encl_load_page_in_vma(encl, addr, vma->vm_flags);
 463         if (IS_ERR(entry)) {
 464                 mutex_unlock(&encl->lock);
 465
 466                 if (PTR_ERR(entry) == -EBUSY)
 467                         return VM_FAULT_NOPAGE;
 468
 469                 return VM_FAULT_SIGBUS;
 470         }
 471
 472         phys_addr = sgx_get_epc_phys_addr(entry->epc_page);
 473
 474         ret = vmf_insert_pfn(vma, addr, PFN_DOWN(phys_addr));
 475         if (ret != VM_FAULT_NOPAGE) {
 476                 mutex_unlock(&encl->lock);
 477
 478                 return VM_FAULT_SIGBUS;
 479         }
 480
 481         sgx_encl_test_and_clear_young(vma->vm_mm, entry);
 482         mutex_unlock(&encl->lock);
 483
 484         return VM_FAULT_NOPAGE;
 485 }
 486
 487 static void sgx_vma_open(struct vm_area_struct *vma)
 488 {
 489         struct sgx_encl *encl = vma->vm_private_data;
 490
 491         /*
 492          * It's possible but unlikely that vm_private_data is NULL. This can
 493          * happen in a grandchild of a process, when sgx_encl_mm_add() had
 494          * failed to allocate memory in this callback.
 495          */
 496         if (unlikely(!encl))
 497                 return;
 498
 499         if (sgx_encl_mm_add(encl, vma->vm_mm))
 500                 vma->vm_private_data = NULL;
 501 }
 502
 503
 504 /**
 505  * sgx_encl_may_map() - Check if a requested VMA mapping is allowed
 506  * @encl:               an enclave pointer
 507  * @start:              lower bound of the address range, inclusive
 508  * @end:                upper bound of the address range, exclusive
 509  * @vm_flags:           VMA flags
 510  *
 511  * Iterate through the enclave pages contained within [@start, @end) to verify
 512  * that the permissions requested by a subset of {VM_READ, VM_WRITE, VM_EXEC}
 513  * do not contain any permissions that are not contained in the build time
 514  * permissions of any of the enclave pages within the given address range.
 515  *
 516  * An enclave creator must declare the strongest permissions that will be
 517  * needed for each enclave page. This ensures that mappings have the identical
 518  * or weaker permissions than the earlier declared permissions.
 519  *
 520  * Return: 0 on success, -EACCES otherwise
 521  */
 522 int sgx_encl_may_map(struct sgx_encl *encl, unsigned long start,
 523                      unsigned long end, unsigned long vm_flags)
 524 {
 525         unsigned long vm_prot_bits = vm_flags & VM_ACCESS_FLAGS;
 526         struct sgx_encl_page *page;
 527         unsigned long count = 0;
 528         int ret = 0;
 529
 530         XA_STATE(xas, &encl->page_array, PFN_DOWN(start));
 531
 532         /* Disallow mapping outside enclave's address range. */
 533         if (test_bit(SGX_ENCL_INITIALIZED, &encl->flags) &&
 534             (start < encl->base || end > encl->base + encl->size))
 535                 return -EACCES;
 536
 537         /*
 538          * Disallow READ_IMPLIES_EXEC tasks as their VMA permissions might
 539          * conflict with the enclave page permissions.
 540          */
 541         if (current->personality & READ_IMPLIES_EXEC)
 542                 return -EACCES;
 543
 544         mutex_lock(&encl->lock);
 545         xas_lock(&xas);
 546         xas_for_each(&xas, page, PFN_DOWN(end - 1)) {
 547                 if (~page->vm_max_prot_bits & vm_prot_bits) {
 548                         ret = -EACCES;
 549                         break;
 550                 }
 551
 552                 /* Reschedule on every XA_CHECK_SCHED iteration. */
 553                 if (!(++count % XA_CHECK_SCHED)) {
 554                         xas_pause(&xas);
 555                         xas_unlock(&xas);
 556                         mutex_unlock(&encl->lock);
 557
 558                         cond_resched();
 559
 560                         mutex_lock(&encl->lock);
 561                         xas_lock(&xas);
 562                 }
 563         }
 564         xas_unlock(&xas);
 565         mutex_unlock(&encl->lock);
 566
 567         return ret;
 568 }
 569
 570 static int sgx_vma_mprotect(struct vm_area_struct *vma, unsigned long start,
 571                             unsigned long end, unsigned long newflags)
 572 {
 573         return sgx_encl_may_map(vma->vm_private_data, start, end, newflags);
 574 }
 575
 576 static int sgx_encl_debug_read(struct sgx_encl *encl, struct sgx_encl_page *page,
 577                                unsigned long addr, void *data)
 578 {
 579         unsigned long offset = addr & ~PAGE_MASK;
 580         int ret;
 581
 582
 583         ret = __edbgrd(sgx_get_epc_virt_addr(page->epc_page) + offset, data);
 584         if (ret)
 585                 return -EIO;
 586
 587         return 0;
 588 }
 589
 590 static int sgx_encl_debug_write(struct sgx_encl *encl, struct sgx_encl_page *page,
 591                                 unsigned long addr, void *data)
 592 {
 593         unsigned long offset = addr & ~PAGE_MASK;
 594         int ret;
 595
 596         ret = __edbgwr(sgx_get_epc_virt_addr(page->epc_page) + offset, data);
 597         if (ret)
 598                 return -EIO;
 599
 600         return 0;
 601 }
 602
 603 /*
 604  * Load an enclave page to EPC if required, and take encl->lock.
 605  */
 606 static struct sgx_encl_page *sgx_encl_reserve_page(struct sgx_encl *encl,
 607                                                    unsigned long addr,
 608                                                    unsigned long vm_flags)
 609 {
 610         struct sgx_encl_page *entry;
 611
 612         for ( ; ; ) {
 613                 mutex_lock(&encl->lock);
 614
 615                 entry = sgx_encl_load_page_in_vma(encl, addr, vm_flags);
 616                 if (PTR_ERR(entry) != -EBUSY)
 617                         break;
 618
 619                 mutex_unlock(&encl->lock);
 620         }
 621
 622         if (IS_ERR(entry))
 623                 mutex_unlock(&encl->lock);
 624
 625         return entry;
 626 }
 627
 628 static int sgx_vma_access(struct vm_area_struct *vma, unsigned long addr,
 629                           void *buf, int len, int write)
 630 {
 631         struct sgx_encl *encl = vma->vm_private_data;
 632         struct sgx_encl_page *entry = NULL;
 633         char data[sizeof(unsigned long)];
 634         unsigned long align;
 635         int offset;
 636         int cnt;
 637         int ret = 0;
 638         int i;
 639
 640         /*
 641          * If process was forked, VMA is still there but vm_private_data is set
 642          * to NULL.
 643          */
 644         if (!encl)
 645                 return -EFAULT;
 646
 647         if (!test_bit(SGX_ENCL_DEBUG, &encl->flags))
 648                 return -EFAULT;
 649
 650         for (i = 0; i < len; i += cnt) {
 651                 entry = sgx_encl_reserve_page(encl, (addr + i) & PAGE_MASK,
 652                                               vma->vm_flags);
 653                 if (IS_ERR(entry)) {
 654                         ret = PTR_ERR(entry);
 655                         break;
 656                 }
 657
 658                 align = ALIGN_DOWN(addr + i, sizeof(unsigned long));
 659                 offset = (addr + i) & (sizeof(unsigned long) - 1);
 660                 cnt = sizeof(unsigned long) - offset;
 661                 cnt = min(cnt, len - i);
 662
 663                 ret = sgx_encl_debug_read(encl, entry, align, data);
 664                 if (ret)
 665                         goto out;
 666
 667                 if (write) {
 668                         memcpy(data + offset, buf + i, cnt);
 669                         ret = sgx_encl_debug_write(encl, entry, align, data);
 670                         if (ret)
 671                                 goto out;
 672                 } else {
 673                         memcpy(buf + i, data + offset, cnt);
 674                 }
 675
 676 out:
 677                 mutex_unlock(&encl->lock);
 678
 679                 if (ret)
 680                         break;
 681         }
 682
 683         return ret < 0 ? ret : i;
 684 }
 685
 686 const struct vm_operations_struct sgx_vm_ops = {
 687         .fault = sgx_vma_fault,
 688         .mprotect = sgx_vma_mprotect,
 689         .open = sgx_vma_open,
 690         .access = sgx_vma_access,
 691 };
 692
 693 /**
 694  * sgx_encl_release - Destroy an enclave instance
 695  * @ref:        address of a kref inside &sgx_encl
 696  *
 697  * Used together with kref_put(). Frees all the resources associated with the
 698  * enclave and the instance itself.
 699  */
 700 void sgx_encl_release(struct kref *ref)
 701 {
 702         struct sgx_encl *encl = container_of(ref, struct sgx_encl, refcount);
 703         unsigned long max_page_index = PFN_DOWN(encl->base + encl->size - 1);
 704         struct sgx_va_page *va_page;
 705         struct sgx_encl_page *entry;
 706         unsigned long count = 0;
 707
 708         XA_STATE(xas, &encl->page_array, PFN_DOWN(encl->base));
 709
 710         xas_lock(&xas);
 711         xas_for_each(&xas, entry, max_page_index) {
 712                 if (entry->epc_page) {
 713                         /*
 714                          * The page and its radix tree entry cannot be freed
 715                          * if the page is being held by the reclaimer.
 716                          */
 717                         if (sgx_unmark_page_reclaimable(entry->epc_page))
 718                                 continue;
 719
 720                         sgx_encl_free_epc_page(entry->epc_page);
 721                         encl->secs_child_cnt--;
 722                         entry->epc_page = NULL;
 723                 }
 724
 725                 kfree(entry);
 726                 /*
 727                  * Invoke scheduler on every XA_CHECK_SCHED iteration
 728                  * to prevent soft lockups.
 729                  */
 730                 if (!(++count % XA_CHECK_SCHED)) {
 731                         xas_pause(&xas);
 732                         xas_unlock(&xas);
 733
 734                         cond_resched();
 735
 736                         xas_lock(&xas);
 737                 }
 738         }
 739         xas_unlock(&xas);
 740
 741         xa_destroy(&encl->page_array);
 742
 743         if (!encl->secs_child_cnt && encl->secs.epc_page) {
 744                 sgx_encl_free_epc_page(encl->secs.epc_page);
 745                 encl->secs.epc_page = NULL;
 746         }
 747
 748         while (!list_empty(&encl->va_pages)) {
 749                 va_page = list_first_entry(&encl->va_pages, struct sgx_va_page,
 750                                            list);
 751                 list_del(&va_page->list);
 752                 sgx_encl_free_epc_page(va_page->epc_page);
 753                 kfree(va_page);
 754         }
 755
 756         if (encl->backing)
 757                 fput(encl->backing);
 758
 759         cleanup_srcu_struct(&encl->srcu);
 760
 761         WARN_ON_ONCE(!list_empty(&encl->mm_list));
 762
 763         /* Detect EPC page leak's. */
 764         WARN_ON_ONCE(encl->secs_child_cnt);
 765         WARN_ON_ONCE(encl->secs.epc_page);
 766
 767         kfree(encl);
 768 }
 769
 770 /*
 771  * 'mm' is exiting and no longer needs mmu notifications.
 772  */
 773 static void sgx_mmu_notifier_release(struct mmu_notifier *mn,
 774                                      struct mm_struct *mm)
 775 {
 776         struct sgx_encl_mm *encl_mm = container_of(mn, struct sgx_encl_mm, mmu_notifier);
 777         struct sgx_encl_mm *tmp = NULL;
 778         bool found = false;
 779
 780         /*
 781          * The enclave itself can remove encl_mm.  Note, objects can't be moved
 782          * off an RCU protected list, but deletion is ok.
 783          */
 784         spin_lock(&encl_mm->encl->mm_lock);
 785         list_for_each_entry(tmp, &encl_mm->encl->mm_list, list) {
 786                 if (tmp == encl_mm) {
 787                         list_del_rcu(&encl_mm->list);
 788                         found = true;
 789                         break;
 790                 }
 791         }
 792         spin_unlock(&encl_mm->encl->mm_lock);
 793
 794         if (found) {
 795                 synchronize_srcu(&encl_mm->encl->srcu);
 796                 mmu_notifier_put(mn);
 797         }
 798 }
 799
 800 static void sgx_mmu_notifier_free(struct mmu_notifier *mn)
 801 {
 802         struct sgx_encl_mm *encl_mm = container_of(mn, struct sgx_encl_mm, mmu_notifier);
 803
 804         /* 'encl_mm' is going away, put encl_mm->encl reference: */
 805         kref_put(&encl_mm->encl->refcount, sgx_encl_release);
 806
 807         kfree(encl_mm);
 808 }
 809
 810 static const struct mmu_notifier_ops sgx_mmu_notifier_ops = {
 811         .release                = sgx_mmu_notifier_release,
 812         .free_notifier          = sgx_mmu_notifier_free,
 813 };
 814
 815 static struct sgx_encl_mm *sgx_encl_find_mm(struct sgx_encl *encl,
 816                                             struct mm_struct *mm)
 817 {
 818         struct sgx_encl_mm *encl_mm = NULL;
 819         struct sgx_encl_mm *tmp;
 820         int idx;
 821
 822         idx = srcu_read_lock(&encl->srcu);
 823
 824         list_for_each_entry_rcu(tmp, &encl->mm_list, list) {
 825                 if (tmp->mm == mm) {
 826                         encl_mm = tmp;
 827                         break;
 828                 }
 829         }
 830
 831         srcu_read_unlock(&encl->srcu, idx);
 832
 833         return encl_mm;
 834 }
 835
 836 int sgx_encl_mm_add(struct sgx_encl *encl, struct mm_struct *mm)
 837 {
 838         struct sgx_encl_mm *encl_mm;
 839         int ret;
 840
 841         /*
 842          * Even though a single enclave may be mapped into an mm more than once,
 843          * each 'mm' only appears once on encl->mm_list. This is guaranteed by
 844          * holding the mm's mmap lock for write before an mm can be added or
 845          * remove to an encl->mm_list.
 846          */
 847         mmap_assert_write_locked(mm);
 848
 849         /*
 850          * It's possible that an entry already exists in the mm_list, because it
 851          * is removed only on VFS release or process exit.
 852          */
 853         if (sgx_encl_find_mm(encl, mm))
 854                 return 0;
 855
 856         encl_mm = kzalloc(sizeof(*encl_mm), GFP_KERNEL);
 857         if (!encl_mm)
 858                 return -ENOMEM;
 859
 860         /* Grab a refcount for the encl_mm->encl reference: */
 861         kref_get(&encl->refcount);
 862         encl_mm->encl = encl;
 863         encl_mm->mm = mm;
 864         encl_mm->mmu_notifier.ops = &sgx_mmu_notifier_ops;
 865
 866         ret = __mmu_notifier_register(&encl_mm->mmu_notifier, mm);
 867         if (ret) {
 868                 kfree(encl_mm);
 869                 return ret;
 870         }
 871
 872         spin_lock(&encl->mm_lock);
 873         list_add_rcu(&encl_mm->list, &encl->mm_list);
 874         /* Pairs with smp_rmb() in sgx_zap_enclave_ptes(). */
 875         smp_wmb();
 876         encl->mm_list_version++;
 877         spin_unlock(&encl->mm_lock);
 878
 879         return 0;
 880 }
 881
 882 /**
 883  * sgx_encl_cpumask() - Query which CPUs might be accessing the enclave
 884  * @encl: the enclave
 885  *
 886  * Some SGX functions require that no cached linear-to-physical address
 887  * mappings are present before they can succeed. For example, ENCLS[EWB]
 888  * copies a page from the enclave page cache to regular main memory but
 889  * it fails if it cannot ensure that there are no cached
 890  * linear-to-physical address mappings referring to the page.
 891  *
 892  * SGX hardware flushes all cached linear-to-physical mappings on a CPU
 893  * when an enclave is exited via ENCLU[EEXIT] or an Asynchronous Enclave
 894  * Exit (AEX). Exiting an enclave will thus ensure cached linear-to-physical
 895  * address mappings are cleared but coordination with the tracking done within
 896  * the SGX hardware is needed to support the SGX functions that depend on this
 897  * cache clearing.
 898  *
 899  * When the ENCLS[ETRACK] function is issued on an enclave the hardware
 900  * tracks threads operating inside the enclave at that time. The SGX
 901  * hardware tracking require that all the identified threads must have
 902  * exited the enclave in order to flush the mappings before a function such
 903  * as ENCLS[EWB] will be permitted
 904  *
 905  * The following flow is used to support SGX functions that require that
 906  * no cached linear-to-physical address mappings are present:
 907  * 1) Execute ENCLS[ETRACK] to initiate hardware tracking.
 908  * 2) Use this function (sgx_encl_cpumask()) to query which CPUs might be
 909  *    accessing the enclave.
 910  * 3) Send IPI to identified CPUs, kicking them out of the enclave and
 911  *    thus flushing all locally cached linear-to-physical address mappings.
 912  * 4) Execute SGX function.
 913  *
 914  * Context: It is required to call this function after ENCLS[ETRACK].
 915  *          This will ensure that if any new mm appears (racing with
 916  *          sgx_encl_mm_add()) then the new mm will enter into the
 917  *          enclave with fresh linear-to-physical address mappings.
 918  *
 919  *          It is required that all IPIs are completed before a new
 920  *          ENCLS[ETRACK] is issued so be sure to protect steps 1 to 3
 921  *          of the above flow with the enclave's mutex.
 922  *
 923  * Return: cpumask of CPUs that might be accessing @encl
 924  */
 925 const cpumask_t *sgx_encl_cpumask(struct sgx_encl *encl)
 926 {
 927         cpumask_t *cpumask = &encl->cpumask;
 928         struct sgx_encl_mm *encl_mm;
 929         int idx;
 930
 931         cpumask_clear(cpumask);
 932
 933         idx = srcu_read_lock(&encl->srcu);
 934
 935         list_for_each_entry_rcu(encl_mm, &encl->mm_list, list) {
 936                 if (!mmget_not_zero(encl_mm->mm))
 937                         continue;
 938
 939                 cpumask_or(cpumask, cpumask, mm_cpumask(encl_mm->mm));
 940
 941                 mmput_async(encl_mm->mm);
 942         }
 943
 944         srcu_read_unlock(&encl->srcu, idx);
 945
 946         return cpumask;
 947 }
 948
 949 static struct page *sgx_encl_get_backing_page(struct sgx_encl *encl,
 950                                               pgoff_t index)
 951 {
 952         struct address_space *mapping = encl->backing->f_mapping;
 953         gfp_t gfpmask = mapping_gfp_mask(mapping);
 954
 955         return shmem_read_mapping_page_gfp(mapping, index, gfpmask);
 956 }
 957
 958 /**
 959  * __sgx_encl_get_backing() - Pin the backing storage
 960  * @encl:       an enclave pointer
 961  * @page_index: enclave page index
 962  * @backing:    data for accessing backing storage for the page
 963  *
 964  * Pin the backing storage pages for storing the encrypted contents and Paging
 965  * Crypto MetaData (PCMD) of an enclave page.
 966  *
 967  * Return:
 968  *   0 on success,
 969  *   -errno otherwise.
 970  */
 971 static int __sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index,
 972                          struct sgx_backing *backing)
 973 {
 974         pgoff_t page_pcmd_off = sgx_encl_get_backing_page_pcmd_offset(encl, page_index);
 975         struct page *contents;
 976         struct page *pcmd;
 977
 978         contents = sgx_encl_get_backing_page(encl, page_index);
 979         if (IS_ERR(contents))
 980                 return PTR_ERR(contents);
 981
 982         pcmd = sgx_encl_get_backing_page(encl, PFN_DOWN(page_pcmd_off));
 983         if (IS_ERR(pcmd)) {
 984                 put_page(contents);
 985                 return PTR_ERR(pcmd);
 986         }
 987
 988         backing->contents = contents;
 989         backing->pcmd = pcmd;
 990         backing->pcmd_offset = page_pcmd_off & (PAGE_SIZE - 1);
 991
 992         return 0;
 993 }
 994
 995 /*
 996  * When called from ksgxd, returns the mem_cgroup of a struct mm stored
 997  * in the enclave's mm_list. When not called from ksgxd, just returns
 998  * the mem_cgroup of the current task.
 999  */
1000 static struct mem_cgroup *sgx_encl_get_mem_cgroup(struct sgx_encl *encl)
1001 {
1002         struct mem_cgroup *memcg = NULL;
1003         struct sgx_encl_mm *encl_mm;
1004         int idx;
1005
1006         /*
1007          * If called from normal task context, return the mem_cgroup
1008          * of the current task's mm. The remainder of the handling is for
1009          * ksgxd.
1010          */
1011         if (!current_is_ksgxd())
1012                 return get_mem_cgroup_from_mm(current->mm);
1013
1014         /*
1015          * Search the enclave's mm_list to find an mm associated with
1016          * this enclave to charge the allocation to.
1017          */
1018         idx = srcu_read_lock(&encl->srcu);
1019
1020         list_for_each_entry_rcu(encl_mm, &encl->mm_list, list) {
1021                 if (!mmget_not_zero(encl_mm->mm))
1022                         continue;
1023
1024                 memcg = get_mem_cgroup_from_mm(encl_mm->mm);
1025
1026                 mmput_async(encl_mm->mm);
1027
1028                 break;
1029         }
1030
1031         srcu_read_unlock(&encl->srcu, idx);
1032
1033         /*
1034          * In the rare case that there isn't an mm associated with
1035          * the enclave, set memcg to the current active mem_cgroup.
1036          * This will be the root mem_cgroup if there is no active
1037          * mem_cgroup.
1038          */
1039         if (!memcg)
1040                 return get_mem_cgroup_from_mm(NULL);
1041
1042         return memcg;
1043 }
1044
1045 /**
1046  * sgx_encl_alloc_backing() - create a new backing storage page
1047  * @encl:       an enclave pointer
1048  * @page_index: enclave page index
1049  * @backing:    data for accessing backing storage for the page
1050  *
1051  * When called from ksgxd, sets the active memcg from one of the
1052  * mms in the enclave's mm_list prior to any backing page allocation,
1053  * in order to ensure that shmem page allocations are charged to the
1054  * enclave.  Create a backing page for loading data back into an EPC page with
1055  * ELDU.  This function takes a reference on a new backing page which
1056  * must be dropped with a corresponding call to sgx_encl_put_backing().
1057  *
1058  * Return:
1059  *   0 on success,
1060  *   -errno otherwise.
1061  */
1062 int sgx_encl_alloc_backing(struct sgx_encl *encl, unsigned long page_index,
1063                            struct sgx_backing *backing)
1064 {
1065         struct mem_cgroup *encl_memcg = sgx_encl_get_mem_cgroup(encl);
1066         struct mem_cgroup *memcg = set_active_memcg(encl_memcg);
1067         int ret;
1068
1069         ret = __sgx_encl_get_backing(encl, page_index, backing);
1070
1071         set_active_memcg(memcg);
1072         mem_cgroup_put(encl_memcg);
1073
1074         return ret;
1075 }
1076
1077 /**
1078  * sgx_encl_lookup_backing() - retrieve an existing backing storage page
1079  * @encl:       an enclave pointer
1080  * @page_index: enclave page index
1081  * @backing:    data for accessing backing storage for the page
1082  *
1083  * Retrieve a backing page for loading data back into an EPC page with ELDU.
1084  * It is the caller's responsibility to ensure that it is appropriate to use
1085  * sgx_encl_lookup_backing() rather than sgx_encl_alloc_backing(). If lookup is
1086  * not used correctly, this will cause an allocation which is not accounted for.
1087  * This function takes a reference on an existing backing page which must be
1088  * dropped with a corresponding call to sgx_encl_put_backing().
1089  *
1090  * Return:
1091  *   0 on success,
1092  *   -errno otherwise.
1093  */
1094 static int sgx_encl_lookup_backing(struct sgx_encl *encl, unsigned long page_index,
1095                            struct sgx_backing *backing)
1096 {
1097         return __sgx_encl_get_backing(encl, page_index, backing);
1098 }
1099
1100 /**
1101  * sgx_encl_put_backing() - Unpin the backing storage
1102  * @backing:    data for accessing backing storage for the page
1103  */
1104 void sgx_encl_put_backing(struct sgx_backing *backing)
1105 {
1106         put_page(backing->pcmd);
1107         put_page(backing->contents);
1108 }
1109
1110 static int sgx_encl_test_and_clear_young_cb(pte_t *ptep, unsigned long addr,
1111                                             void *data)
1112 {
1113         pte_t pte;
1114         int ret;
1115
1116         ret = pte_young(*ptep);
1117         if (ret) {
1118                 pte = pte_mkold(*ptep);
1119                 set_pte_at((struct mm_struct *)data, addr, ptep, pte);
1120         }
1121
1122         return ret;
1123 }
1124
1125 /**
1126  * sgx_encl_test_and_clear_young() - Test and reset the accessed bit
1127  * @mm:         mm_struct that is checked
1128  * @page:       enclave page to be tested for recent access
1129  *
1130  * Checks the Access (A) bit from the PTE corresponding to the enclave page and
1131  * clears it.
1132  *
1133  * Return: 1 if the page has been recently accessed and 0 if not.
1134  */
1135 int sgx_encl_test_and_clear_young(struct mm_struct *mm,
1136                                   struct sgx_encl_page *page)
1137 {
1138         unsigned long addr = page->desc & PAGE_MASK;
1139         struct sgx_encl *encl = page->encl;
1140         struct vm_area_struct *vma;
1141         int ret;
1142
1143         ret = sgx_encl_find(mm, addr, &vma);
1144         if (ret)
1145                 return 0;
1146
1147         if (encl != vma->vm_private_data)
1148                 return 0;
1149
1150         ret = apply_to_page_range(vma->vm_mm, addr, PAGE_SIZE,
1151                                   sgx_encl_test_and_clear_young_cb, vma->vm_mm);
1152         if (ret < 0)
1153                 return 0;
1154
1155         return ret;
1156 }
1157
1158 struct sgx_encl_page *sgx_encl_page_alloc(struct sgx_encl *encl,
1159                                           unsigned long offset,
1160                                           u64 secinfo_flags)
1161 {
1162         struct sgx_encl_page *encl_page;
1163         unsigned long prot;
1164
1165         encl_page = kzalloc(sizeof(*encl_page), GFP_KERNEL);
1166         if (!encl_page)
1167                 return ERR_PTR(-ENOMEM);
1168
1169         encl_page->desc = encl->base + offset;
1170         encl_page->encl = encl;
1171
1172         prot = _calc_vm_trans(secinfo_flags, SGX_SECINFO_R, PROT_READ)  |
1173                _calc_vm_trans(secinfo_flags, SGX_SECINFO_W, PROT_WRITE) |
1174                _calc_vm_trans(secinfo_flags, SGX_SECINFO_X, PROT_EXEC);
1175
1176         /*
1177          * TCS pages must always RW set for CPU access while the SECINFO
1178          * permissions are *always* zero - the CPU ignores the user provided
1179          * values and silently overwrites them with zero permissions.
1180          */
1181         if ((secinfo_flags & SGX_SECINFO_PAGE_TYPE_MASK) == SGX_SECINFO_TCS)
1182                 prot |= PROT_READ | PROT_WRITE;
1183
1184         /* Calculate maximum of the VM flags for the page. */
1185         encl_page->vm_max_prot_bits = calc_vm_prot_bits(prot, 0);
1186
1187         return encl_page;
1188 }
1189
1190 /**
1191  * sgx_zap_enclave_ptes() - remove PTEs mapping the address from enclave
1192  * @encl: the enclave
1193  * @addr: page aligned pointer to single page for which PTEs will be removed
1194  *
1195  * Multiple VMAs may have an enclave page mapped. Remove the PTE mapping
1196  * @addr from each VMA. Ensure that page fault handler is ready to handle
1197  * new mappings of @addr before calling this function.
1198  */
1199 void sgx_zap_enclave_ptes(struct sgx_encl *encl, unsigned long addr)
1200 {
1201         unsigned long mm_list_version;
1202         struct sgx_encl_mm *encl_mm;
1203         struct vm_area_struct *vma;
1204         int idx, ret;
1205
1206         do {
1207                 mm_list_version = encl->mm_list_version;
1208
1209                 /* Pairs with smp_wmb() in sgx_encl_mm_add(). */
1210                 smp_rmb();
1211
1212                 idx = srcu_read_lock(&encl->srcu);
1213
1214                 list_for_each_entry_rcu(encl_mm, &encl->mm_list, list) {
1215                         if (!mmget_not_zero(encl_mm->mm))
1216                                 continue;
1217
1218                         mmap_read_lock(encl_mm->mm);
1219
1220                         ret = sgx_encl_find(encl_mm->mm, addr, &vma);
1221                         if (!ret && encl == vma->vm_private_data)
1222                                 zap_vma_ptes(vma, addr, PAGE_SIZE);
1223
1224                         mmap_read_unlock(encl_mm->mm);
1225
1226                         mmput_async(encl_mm->mm);
1227                 }
1228
1229                 srcu_read_unlock(&encl->srcu, idx);
1230         } while (unlikely(encl->mm_list_version != mm_list_version));
1231 }
1232
1233 /**
1234  * sgx_alloc_va_page() - Allocate a Version Array (VA) page
1235  * @reclaim: Reclaim EPC pages directly if none available. Enclave
1236  *           mutex should not be held if this is set.
1237  *
1238  * Allocate a free EPC page and convert it to a Version Array (VA) page.
1239  *
1240  * Return:
1241  *   a VA page,
1242  *   -errno otherwise
1243  */
1244 struct sgx_epc_page *sgx_alloc_va_page(bool reclaim)
1245 {
1246         struct sgx_epc_page *epc_page;
1247         int ret;
1248
1249         epc_page = sgx_alloc_epc_page(NULL, reclaim);
1250         if (IS_ERR(epc_page))
1251                 return ERR_CAST(epc_page);
1252
1253         ret = __epa(sgx_get_epc_virt_addr(epc_page));
1254         if (ret) {
1255                 WARN_ONCE(1, "EPA returned %d (0x%x)", ret, ret);
1256                 sgx_encl_free_epc_page(epc_page);
1257                 return ERR_PTR(-EFAULT);
1258         }
1259
1260         return epc_page;
1261 }
1262
1263 /**
1264  * sgx_alloc_va_slot - allocate a VA slot
1265  * @va_page:    a &struct sgx_va_page instance
1266  *
1267  * Allocates a slot from a &struct sgx_va_page instance.
1268  *
1269  * Return: offset of the slot inside the VA page
1270  */
1271 unsigned int sgx_alloc_va_slot(struct sgx_va_page *va_page)
1272 {
1273         int slot = find_first_zero_bit(va_page->slots, SGX_VA_SLOT_COUNT);
1274
1275         if (slot < SGX_VA_SLOT_COUNT)
1276                 set_bit(slot, va_page->slots);
1277
1278         return slot << 3;
1279 }
1280
1281 /**
1282  * sgx_free_va_slot - free a VA slot
1283  * @va_page:    a &struct sgx_va_page instance
1284  * @offset:     offset of the slot inside the VA page
1285  *
1286  * Frees a slot from a &struct sgx_va_page instance.
1287  */
1288 void sgx_free_va_slot(struct sgx_va_page *va_page, unsigned int offset)
1289 {
1290         clear_bit(offset >> 3, va_page->slots);
1291 }
1292
1293 /**
1294  * sgx_va_page_full - is the VA page full?
1295  * @va_page:    a &struct sgx_va_page instance
1296  *
1297  * Return: true if all slots have been taken
1298  */
1299 bool sgx_va_page_full(struct sgx_va_page *va_page)
1300 {
1301         int slot = find_first_zero_bit(va_page->slots, SGX_VA_SLOT_COUNT);
1302
1303         return slot == SGX_VA_SLOT_COUNT;
1304 }
1305
1306 /**
1307  * sgx_encl_free_epc_page - free an EPC page assigned to an enclave
1308  * @page:       EPC page to be freed
1309  *
1310  * Free an EPC page assigned to an enclave. It does EREMOVE for the page, and
1311  * only upon success, it puts the page back to free page list.  Otherwise, it
1312  * gives a WARNING to indicate page is leaked.
1313  */
1314 void sgx_encl_free_epc_page(struct sgx_epc_page *page)
1315 {
1316         int ret;
1317
1318         WARN_ON_ONCE(page->flags & SGX_EPC_PAGE_RECLAIMER_TRACKED);
1319
1320         ret = __eremove(sgx_get_epc_virt_addr(page));
1321         if (WARN_ONCE(ret, EREMOVE_ERROR_MESSAGE, ret, ret))
1322                 return;
1323
1324         sgx_free_epc_page(page);
1325 }