1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright(c) 2016-20 Intel Corporation. */
4 #include <linux/file.h>
5 #include <linux/freezer.h>
6 #include <linux/highmem.h>
7 #include <linux/kthread.h>
8 #include <linux/miscdevice.h>
9 #include <linux/node.h>
10 #include <linux/pagemap.h>
11 #include <linux/ratelimit.h>
12 #include <linux/sched/mm.h>
13 #include <linux/sched/signal.h>
14 #include <linux/slab.h>
15 #include <linux/sysfs.h>
16 #include <linux/vmalloc.h>
22 struct sgx_epc_section sgx_epc_sections
[SGX_MAX_EPC_SECTIONS
];
23 static int sgx_nr_epc_sections
;
24 static struct task_struct
*ksgxd_tsk
;
25 static DECLARE_WAIT_QUEUE_HEAD(ksgxd_waitq
);
26 static DEFINE_XARRAY(sgx_epc_address_space
);
29 * These variables are part of the state of the reclaimer, and must be accessed
30 * with sgx_reclaimer_lock acquired.
32 static LIST_HEAD(sgx_active_page_list
);
33 static DEFINE_SPINLOCK(sgx_reclaimer_lock
);
35 static atomic_long_t sgx_nr_free_pages
= ATOMIC_LONG_INIT(0);
37 /* Nodes with one or more EPC sections. */
38 static nodemask_t sgx_numa_mask
;
41 * Array with one list_head for each possible NUMA node. Each
42 * list contains all the sgx_epc_section's which are on that
45 static struct sgx_numa_node
*sgx_numa_nodes
;
47 static LIST_HEAD(sgx_dirty_page_list
);
50 * Reset post-kexec EPC pages to the uninitialized state. The pages are removed
51 * from the input list, and made available for the page allocator. SECS pages
52 * prepending their children in the input list are left intact.
54 * Return 0 when sanitization was successful or kthread was stopped, and the
55 * number of unsanitized pages otherwise.
57 static unsigned long __sgx_sanitize_pages(struct list_head
*dirty_page_list
)
59 unsigned long left_dirty
= 0;
60 struct sgx_epc_page
*page
;
64 /* dirty_page_list is thread-local, no need for a lock: */
65 while (!list_empty(dirty_page_list
)) {
66 if (kthread_should_stop())
69 page
= list_first_entry(dirty_page_list
, struct sgx_epc_page
, list
);
72 * Checking page->poison without holding the node->lock
73 * is racy, but losing the race (i.e. poison is set just
74 * after the check) just means __eremove() will be uselessly
75 * called for a page that sgx_free_epc_page() will put onto
76 * the node->sgx_poison_page_list later.
79 struct sgx_epc_section
*section
= &sgx_epc_sections
[page
->section
];
80 struct sgx_numa_node
*node
= section
->node
;
82 spin_lock(&node
->lock
);
83 list_move(&page
->list
, &node
->sgx_poison_page_list
);
84 spin_unlock(&node
->lock
);
89 ret
= __eremove(sgx_get_epc_virt_addr(page
));
92 * page is now sanitized. Make it available via the SGX
95 list_del(&page
->list
);
96 sgx_free_epc_page(page
);
98 /* The page is not yet clean - move to the dirty list. */
99 list_move_tail(&page
->list
, &dirty
);
106 list_splice(&dirty
, dirty_page_list
);
110 static bool sgx_reclaimer_age(struct sgx_epc_page
*epc_page
)
112 struct sgx_encl_page
*page
= epc_page
->owner
;
113 struct sgx_encl
*encl
= page
->encl
;
114 struct sgx_encl_mm
*encl_mm
;
118 idx
= srcu_read_lock(&encl
->srcu
);
120 list_for_each_entry_rcu(encl_mm
, &encl
->mm_list
, list
) {
121 if (!mmget_not_zero(encl_mm
->mm
))
124 mmap_read_lock(encl_mm
->mm
);
125 ret
= !sgx_encl_test_and_clear_young(encl_mm
->mm
, page
);
126 mmap_read_unlock(encl_mm
->mm
);
128 mmput_async(encl_mm
->mm
);
134 srcu_read_unlock(&encl
->srcu
, idx
);
142 static void sgx_reclaimer_block(struct sgx_epc_page
*epc_page
)
144 struct sgx_encl_page
*page
= epc_page
->owner
;
145 unsigned long addr
= page
->desc
& PAGE_MASK
;
146 struct sgx_encl
*encl
= page
->encl
;
149 sgx_zap_enclave_ptes(encl
, addr
);
151 mutex_lock(&encl
->lock
);
153 ret
= __eblock(sgx_get_epc_virt_addr(epc_page
));
154 if (encls_failed(ret
))
155 ENCLS_WARN(ret
, "EBLOCK");
157 mutex_unlock(&encl
->lock
);
160 static int __sgx_encl_ewb(struct sgx_epc_page
*epc_page
, void *va_slot
,
161 struct sgx_backing
*backing
)
163 struct sgx_pageinfo pginfo
;
169 pginfo
.contents
= (unsigned long)kmap_local_page(backing
->contents
);
170 pginfo
.metadata
= (unsigned long)kmap_local_page(backing
->pcmd
) +
171 backing
->pcmd_offset
;
173 ret
= __ewb(&pginfo
, sgx_get_epc_virt_addr(epc_page
), va_slot
);
174 set_page_dirty(backing
->pcmd
);
175 set_page_dirty(backing
->contents
);
177 kunmap_local((void *)(unsigned long)(pginfo
.metadata
-
178 backing
->pcmd_offset
));
179 kunmap_local((void *)(unsigned long)pginfo
.contents
);
184 void sgx_ipi_cb(void *info
)
189 * Swap page to the regular memory transformed to the blocked state by using
190 * EBLOCK, which means that it can no longer be referenced (no new TLB entries).
192 * The first trial just tries to write the page assuming that some other thread
193 * has reset the count for threads inside the enclave by using ETRACK, and
194 * previous thread count has been zeroed out. The second trial calls ETRACK
195 * before EWB. If that fails we kick all the HW threads out, and then do EWB,
196 * which should be guaranteed the succeed.
198 static void sgx_encl_ewb(struct sgx_epc_page
*epc_page
,
199 struct sgx_backing
*backing
)
201 struct sgx_encl_page
*encl_page
= epc_page
->owner
;
202 struct sgx_encl
*encl
= encl_page
->encl
;
203 struct sgx_va_page
*va_page
;
204 unsigned int va_offset
;
208 encl_page
->desc
&= ~SGX_ENCL_PAGE_BEING_RECLAIMED
;
210 va_page
= list_first_entry(&encl
->va_pages
, struct sgx_va_page
,
212 va_offset
= sgx_alloc_va_slot(va_page
);
213 va_slot
= sgx_get_epc_virt_addr(va_page
->epc_page
) + va_offset
;
214 if (sgx_va_page_full(va_page
))
215 list_move_tail(&va_page
->list
, &encl
->va_pages
);
217 ret
= __sgx_encl_ewb(epc_page
, va_slot
, backing
);
218 if (ret
== SGX_NOT_TRACKED
) {
219 ret
= __etrack(sgx_get_epc_virt_addr(encl
->secs
.epc_page
));
221 if (encls_failed(ret
))
222 ENCLS_WARN(ret
, "ETRACK");
225 ret
= __sgx_encl_ewb(epc_page
, va_slot
, backing
);
226 if (ret
== SGX_NOT_TRACKED
) {
228 * Slow path, send IPIs to kick cpus out of the
229 * enclave. Note, it's imperative that the cpu
230 * mask is generated *after* ETRACK, else we'll
231 * miss cpus that entered the enclave between
232 * generating the mask and incrementing epoch.
234 on_each_cpu_mask(sgx_encl_cpumask(encl
),
235 sgx_ipi_cb
, NULL
, 1);
236 ret
= __sgx_encl_ewb(epc_page
, va_slot
, backing
);
241 if (encls_failed(ret
))
242 ENCLS_WARN(ret
, "EWB");
244 sgx_free_va_slot(va_page
, va_offset
);
246 encl_page
->desc
|= va_offset
;
247 encl_page
->va_page
= va_page
;
251 static void sgx_reclaimer_write(struct sgx_epc_page
*epc_page
,
252 struct sgx_backing
*backing
)
254 struct sgx_encl_page
*encl_page
= epc_page
->owner
;
255 struct sgx_encl
*encl
= encl_page
->encl
;
256 struct sgx_backing secs_backing
;
259 mutex_lock(&encl
->lock
);
261 sgx_encl_ewb(epc_page
, backing
);
262 encl_page
->epc_page
= NULL
;
263 encl
->secs_child_cnt
--;
264 sgx_encl_put_backing(backing
);
266 if (!encl
->secs_child_cnt
&& test_bit(SGX_ENCL_INITIALIZED
, &encl
->flags
)) {
267 ret
= sgx_encl_alloc_backing(encl
, PFN_DOWN(encl
->size
),
272 sgx_encl_ewb(encl
->secs
.epc_page
, &secs_backing
);
274 sgx_encl_free_epc_page(encl
->secs
.epc_page
);
275 encl
->secs
.epc_page
= NULL
;
277 sgx_encl_put_backing(&secs_backing
);
281 mutex_unlock(&encl
->lock
);
285 * Take a fixed number of pages from the head of the active page pool and
286 * reclaim them to the enclave's private shmem files. Skip the pages, which have
287 * been accessed since the last scan. Move those pages to the tail of active
288 * page pool so that the pages get scanned in LRU like fashion.
290 * Batch process a chunk of pages (at the moment 16) in order to degrade amount
291 * of IPI's and ETRACK's potentially required. sgx_encl_ewb() does degrade a bit
292 * among the HW threads with three stage EWB pipeline (EWB, ETRACK + EWB and IPI
293 * + EWB) but not sufficiently. Reclaiming one page at a time would also be
294 * problematic as it would increase the lock contention too much, which would
295 * halt forward progress.
297 static void sgx_reclaim_pages(void)
299 struct sgx_epc_page
*chunk
[SGX_NR_TO_SCAN
];
300 struct sgx_backing backing
[SGX_NR_TO_SCAN
];
301 struct sgx_encl_page
*encl_page
;
302 struct sgx_epc_page
*epc_page
;
308 spin_lock(&sgx_reclaimer_lock
);
309 for (i
= 0; i
< SGX_NR_TO_SCAN
; i
++) {
310 if (list_empty(&sgx_active_page_list
))
313 epc_page
= list_first_entry(&sgx_active_page_list
,
314 struct sgx_epc_page
, list
);
315 list_del_init(&epc_page
->list
);
316 encl_page
= epc_page
->owner
;
318 if (kref_get_unless_zero(&encl_page
->encl
->refcount
) != 0)
319 chunk
[cnt
++] = epc_page
;
321 /* The owner is freeing the page. No need to add the
322 * page back to the list of reclaimable pages.
324 epc_page
->flags
&= ~SGX_EPC_PAGE_RECLAIMER_TRACKED
;
326 spin_unlock(&sgx_reclaimer_lock
);
328 for (i
= 0; i
< cnt
; i
++) {
330 encl_page
= epc_page
->owner
;
332 if (!sgx_reclaimer_age(epc_page
))
335 page_index
= PFN_DOWN(encl_page
->desc
- encl_page
->encl
->base
);
337 mutex_lock(&encl_page
->encl
->lock
);
338 ret
= sgx_encl_alloc_backing(encl_page
->encl
, page_index
, &backing
[i
]);
340 mutex_unlock(&encl_page
->encl
->lock
);
344 encl_page
->desc
|= SGX_ENCL_PAGE_BEING_RECLAIMED
;
345 mutex_unlock(&encl_page
->encl
->lock
);
349 spin_lock(&sgx_reclaimer_lock
);
350 list_add_tail(&epc_page
->list
, &sgx_active_page_list
);
351 spin_unlock(&sgx_reclaimer_lock
);
353 kref_put(&encl_page
->encl
->refcount
, sgx_encl_release
);
358 for (i
= 0; i
< cnt
; i
++) {
361 sgx_reclaimer_block(epc_page
);
364 for (i
= 0; i
< cnt
; i
++) {
369 encl_page
= epc_page
->owner
;
370 sgx_reclaimer_write(epc_page
, &backing
[i
]);
372 kref_put(&encl_page
->encl
->refcount
, sgx_encl_release
);
373 epc_page
->flags
&= ~SGX_EPC_PAGE_RECLAIMER_TRACKED
;
375 sgx_free_epc_page(epc_page
);
379 static bool sgx_should_reclaim(unsigned long watermark
)
381 return atomic_long_read(&sgx_nr_free_pages
) < watermark
&&
382 !list_empty(&sgx_active_page_list
);
386 * sgx_reclaim_direct() should be called (without enclave's mutex held)
387 * in locations where SGX memory resources might be low and might be
388 * needed in order to make forward progress.
390 void sgx_reclaim_direct(void)
392 if (sgx_should_reclaim(SGX_NR_LOW_PAGES
))
396 static int ksgxd(void *p
)
401 * Sanitize pages in order to recover from kexec(). The 2nd pass is
402 * required for SECS pages, whose child pages blocked EREMOVE.
404 __sgx_sanitize_pages(&sgx_dirty_page_list
);
405 WARN_ON(__sgx_sanitize_pages(&sgx_dirty_page_list
));
407 while (!kthread_should_stop()) {
411 wait_event_freezable(ksgxd_waitq
,
412 kthread_should_stop() ||
413 sgx_should_reclaim(SGX_NR_HIGH_PAGES
));
415 if (sgx_should_reclaim(SGX_NR_HIGH_PAGES
))
424 static bool __init
sgx_page_reclaimer_init(void)
426 struct task_struct
*tsk
;
428 tsk
= kthread_run(ksgxd
, NULL
, "ksgxd");
437 bool current_is_ksgxd(void)
439 return current
== ksgxd_tsk
;
442 static struct sgx_epc_page
*__sgx_alloc_epc_page_from_node(int nid
)
444 struct sgx_numa_node
*node
= &sgx_numa_nodes
[nid
];
445 struct sgx_epc_page
*page
= NULL
;
447 spin_lock(&node
->lock
);
449 if (list_empty(&node
->free_page_list
)) {
450 spin_unlock(&node
->lock
);
454 page
= list_first_entry(&node
->free_page_list
, struct sgx_epc_page
, list
);
455 list_del_init(&page
->list
);
458 spin_unlock(&node
->lock
);
459 atomic_long_dec(&sgx_nr_free_pages
);
465 * __sgx_alloc_epc_page() - Allocate an EPC page
467 * Iterate through NUMA nodes and reserve ia free EPC page to the caller. Start
468 * from the NUMA node, where the caller is executing.
471 * - an EPC page: A borrowed EPC pages were available.
472 * - NULL: Out of EPC pages.
474 struct sgx_epc_page
*__sgx_alloc_epc_page(void)
476 struct sgx_epc_page
*page
;
477 int nid_of_current
= numa_node_id();
481 * Try local node first. If it doesn't have an EPC section,
482 * fall back to the non-local NUMA nodes.
484 if (node_isset(nid_of_current
, sgx_numa_mask
))
485 nid_start
= nid_of_current
;
487 nid_start
= next_node_in(nid_of_current
, sgx_numa_mask
);
491 page
= __sgx_alloc_epc_page_from_node(nid
);
495 nid
= next_node_in(nid
, sgx_numa_mask
);
496 } while (nid
!= nid_start
);
498 return ERR_PTR(-ENOMEM
);
502 * sgx_mark_page_reclaimable() - Mark a page as reclaimable
505 * Mark a page as reclaimable and add it to the active page list. Pages
506 * are automatically removed from the active list when freed.
508 void sgx_mark_page_reclaimable(struct sgx_epc_page
*page
)
510 spin_lock(&sgx_reclaimer_lock
);
511 page
->flags
|= SGX_EPC_PAGE_RECLAIMER_TRACKED
;
512 list_add_tail(&page
->list
, &sgx_active_page_list
);
513 spin_unlock(&sgx_reclaimer_lock
);
517 * sgx_unmark_page_reclaimable() - Remove a page from the reclaim list
520 * Clear the reclaimable flag and remove the page from the active page list.
524 * -EBUSY if the page is in the process of being reclaimed
526 int sgx_unmark_page_reclaimable(struct sgx_epc_page
*page
)
528 spin_lock(&sgx_reclaimer_lock
);
529 if (page
->flags
& SGX_EPC_PAGE_RECLAIMER_TRACKED
) {
530 /* The page is being reclaimed. */
531 if (list_empty(&page
->list
)) {
532 spin_unlock(&sgx_reclaimer_lock
);
536 list_del(&page
->list
);
537 page
->flags
&= ~SGX_EPC_PAGE_RECLAIMER_TRACKED
;
539 spin_unlock(&sgx_reclaimer_lock
);
545 * sgx_alloc_epc_page() - Allocate an EPC page
546 * @owner: the owner of the EPC page
547 * @reclaim: reclaim pages if necessary
549 * Iterate through EPC sections and borrow a free EPC page to the caller. When a
550 * page is no longer needed it must be released with sgx_free_epc_page(). If
551 * @reclaim is set to true, directly reclaim pages when we are out of pages. No
552 * mm's can be locked when @reclaim is set to true.
554 * Finally, wake up ksgxd when the number of pages goes below the watermark
555 * before returning back to the caller.
561 struct sgx_epc_page
*sgx_alloc_epc_page(void *owner
, bool reclaim
)
563 struct sgx_epc_page
*page
;
566 page
= __sgx_alloc_epc_page();
572 if (list_empty(&sgx_active_page_list
))
573 return ERR_PTR(-ENOMEM
);
576 page
= ERR_PTR(-EBUSY
);
580 if (signal_pending(current
)) {
581 page
= ERR_PTR(-ERESTARTSYS
);
589 if (sgx_should_reclaim(SGX_NR_LOW_PAGES
))
590 wake_up(&ksgxd_waitq
);
596 * sgx_free_epc_page() - Free an EPC page
599 * Put the EPC page back to the list of free pages. It's the caller's
600 * responsibility to make sure that the page is in uninitialized state. In other
601 * words, do EREMOVE, EWB or whatever operation is necessary before calling
604 void sgx_free_epc_page(struct sgx_epc_page
*page
)
606 struct sgx_epc_section
*section
= &sgx_epc_sections
[page
->section
];
607 struct sgx_numa_node
*node
= section
->node
;
609 spin_lock(&node
->lock
);
613 list_add(&page
->list
, &node
->sgx_poison_page_list
);
615 list_add_tail(&page
->list
, &node
->free_page_list
);
616 page
->flags
= SGX_EPC_PAGE_IS_FREE
;
618 spin_unlock(&node
->lock
);
619 atomic_long_inc(&sgx_nr_free_pages
);
622 static bool __init
sgx_setup_epc_section(u64 phys_addr
, u64 size
,
624 struct sgx_epc_section
*section
)
626 unsigned long nr_pages
= size
>> PAGE_SHIFT
;
629 section
->virt_addr
= memremap(phys_addr
, size
, MEMREMAP_WB
);
630 if (!section
->virt_addr
)
633 section
->pages
= vmalloc_array(nr_pages
, sizeof(struct sgx_epc_page
));
634 if (!section
->pages
) {
635 memunmap(section
->virt_addr
);
639 section
->phys_addr
= phys_addr
;
640 xa_store_range(&sgx_epc_address_space
, section
->phys_addr
,
641 phys_addr
+ size
- 1, section
, GFP_KERNEL
);
643 for (i
= 0; i
< nr_pages
; i
++) {
644 section
->pages
[i
].section
= index
;
645 section
->pages
[i
].flags
= 0;
646 section
->pages
[i
].owner
= NULL
;
647 section
->pages
[i
].poison
= 0;
648 list_add_tail(§ion
->pages
[i
].list
, &sgx_dirty_page_list
);
654 bool arch_is_platform_page(u64 paddr
)
656 return !!xa_load(&sgx_epc_address_space
, paddr
);
658 EXPORT_SYMBOL_GPL(arch_is_platform_page
);
660 static struct sgx_epc_page
*sgx_paddr_to_page(u64 paddr
)
662 struct sgx_epc_section
*section
;
664 section
= xa_load(&sgx_epc_address_space
, paddr
);
668 return §ion
->pages
[PFN_DOWN(paddr
- section
->phys_addr
)];
672 * Called in process context to handle a hardware reported
673 * error in an SGX EPC page.
674 * If the MF_ACTION_REQUIRED bit is set in flags, then the
675 * context is the task that consumed the poison data. Otherwise
676 * this is called from a kernel thread unrelated to the page.
678 int arch_memory_failure(unsigned long pfn
, int flags
)
680 struct sgx_epc_page
*page
= sgx_paddr_to_page(pfn
<< PAGE_SHIFT
);
681 struct sgx_epc_section
*section
;
682 struct sgx_numa_node
*node
;
685 * mm/memory-failure.c calls this routine for all errors
686 * where there isn't a "struct page" for the address. But that
687 * includes other address ranges besides SGX.
693 * If poison was consumed synchronously. Send a SIGBUS to
694 * the task. Hardware has already exited the SGX enclave and
695 * will not allow re-entry to an enclave that has a memory
696 * error. The signal may help the task understand why the
699 if (flags
& MF_ACTION_REQUIRED
)
702 section
= &sgx_epc_sections
[page
->section
];
703 node
= section
->node
;
705 spin_lock(&node
->lock
);
707 /* Already poisoned? Nothing more to do */
714 * If the page is on a free list, move it to the per-node
717 if (page
->flags
& SGX_EPC_PAGE_IS_FREE
) {
718 list_move(&page
->list
, &node
->sgx_poison_page_list
);
723 * TBD: Add additional plumbing to enable pre-emptive
724 * action for asynchronous poison notification. Until
725 * then just hope that the poison:
726 * a) is not accessed - sgx_free_epc_page() will deal with it
727 * when the user gives it back
728 * b) results in a recoverable machine check rather than
732 spin_unlock(&node
->lock
);
737 * A section metric is concatenated in a way that @low bits 12-31 define the
738 * bits 12-31 of the metric and @high bits 0-19 define the bits 32-51 of the
741 static inline u64 __init
sgx_calc_section_metric(u64 low
, u64 high
)
743 return (low
& GENMASK_ULL(31, 12)) +
744 ((high
& GENMASK_ULL(19, 0)) << 32);
748 static ssize_t
sgx_total_bytes_show(struct device
*dev
, struct device_attribute
*attr
, char *buf
)
750 return sysfs_emit(buf
, "%lu\n", sgx_numa_nodes
[dev
->id
].size
);
752 static DEVICE_ATTR_RO(sgx_total_bytes
);
754 static umode_t
arch_node_attr_is_visible(struct kobject
*kobj
,
755 struct attribute
*attr
, int idx
)
757 /* Make all x86/ attributes invisible when SGX is not initialized: */
758 if (nodes_empty(sgx_numa_mask
))
764 static struct attribute
*arch_node_dev_attrs
[] = {
765 &dev_attr_sgx_total_bytes
.attr
,
769 const struct attribute_group arch_node_dev_group
= {
771 .attrs
= arch_node_dev_attrs
,
772 .is_visible
= arch_node_attr_is_visible
,
775 static void __init
arch_update_sysfs_visibility(int nid
)
777 struct node
*node
= node_devices
[nid
];
780 ret
= sysfs_update_group(&node
->dev
.kobj
, &arch_node_dev_group
);
783 pr_err("sysfs update failed (%d), files may be invisible", ret
);
785 #else /* !CONFIG_NUMA */
786 static void __init
arch_update_sysfs_visibility(int nid
) {}
789 static bool __init
sgx_page_cache_init(void)
791 u32 eax
, ebx
, ecx
, edx
, type
;
796 sgx_numa_nodes
= kmalloc_array(num_possible_nodes(), sizeof(*sgx_numa_nodes
), GFP_KERNEL
);
800 for (i
= 0; i
< ARRAY_SIZE(sgx_epc_sections
); i
++) {
801 cpuid_count(SGX_CPUID
, i
+ SGX_CPUID_EPC
, &eax
, &ebx
, &ecx
, &edx
);
803 type
= eax
& SGX_CPUID_EPC_MASK
;
804 if (type
== SGX_CPUID_EPC_INVALID
)
807 if (type
!= SGX_CPUID_EPC_SECTION
) {
808 pr_err_once("Unknown EPC section type: %u\n", type
);
812 pa
= sgx_calc_section_metric(eax
, ebx
);
813 size
= sgx_calc_section_metric(ecx
, edx
);
815 pr_info("EPC section 0x%llx-0x%llx\n", pa
, pa
+ size
- 1);
817 if (!sgx_setup_epc_section(pa
, size
, i
, &sgx_epc_sections
[i
])) {
818 pr_err("No free memory for an EPC section\n");
822 nid
= numa_map_to_online_node(phys_to_target_node(pa
));
823 if (nid
== NUMA_NO_NODE
) {
824 /* The physical address is already printed above. */
825 pr_warn(FW_BUG
"Unable to map EPC section to online node. Fallback to the NUMA node 0.\n");
829 if (!node_isset(nid
, sgx_numa_mask
)) {
830 spin_lock_init(&sgx_numa_nodes
[nid
].lock
);
831 INIT_LIST_HEAD(&sgx_numa_nodes
[nid
].free_page_list
);
832 INIT_LIST_HEAD(&sgx_numa_nodes
[nid
].sgx_poison_page_list
);
833 node_set(nid
, sgx_numa_mask
);
834 sgx_numa_nodes
[nid
].size
= 0;
836 /* Make SGX-specific node sysfs files visible: */
837 arch_update_sysfs_visibility(nid
);
840 sgx_epc_sections
[i
].node
= &sgx_numa_nodes
[nid
];
841 sgx_numa_nodes
[nid
].size
+= size
;
843 sgx_nr_epc_sections
++;
846 if (!sgx_nr_epc_sections
) {
847 pr_err("There are zero EPC sections.\n");
851 for_each_online_node(nid
) {
852 if (!node_isset(nid
, sgx_numa_mask
) &&
853 node_state(nid
, N_MEMORY
) && node_state(nid
, N_CPU
))
854 pr_info("node%d has both CPUs and memory but doesn't have an EPC section\n",
862 * Update the SGX_LEPUBKEYHASH MSRs to the values specified by caller.
863 * Bare-metal driver requires to update them to hash of enclave's signer
864 * before EINIT. KVM needs to update them to guest's virtual MSR values
865 * before doing EINIT from guest.
867 void sgx_update_lepubkeyhash(u64
*lepubkeyhash
)
871 WARN_ON_ONCE(preemptible());
873 for (i
= 0; i
< 4; i
++)
874 wrmsrl(MSR_IA32_SGXLEPUBKEYHASH0
+ i
, lepubkeyhash
[i
]);
877 const struct file_operations sgx_provision_fops
= {
878 .owner
= THIS_MODULE
,
881 static struct miscdevice sgx_dev_provision
= {
882 .minor
= MISC_DYNAMIC_MINOR
,
883 .name
= "sgx_provision",
884 .nodename
= "sgx_provision",
885 .fops
= &sgx_provision_fops
,
889 * sgx_set_attribute() - Update allowed attributes given file descriptor
890 * @allowed_attributes: Pointer to allowed enclave attributes
891 * @attribute_fd: File descriptor for specific attribute
893 * Append enclave attribute indicated by file descriptor to allowed
894 * attributes. Currently only SGX_ATTR_PROVISIONKEY indicated by
895 * /dev/sgx_provision is supported.
898 * -0: SGX_ATTR_PROVISIONKEY is appended to allowed_attributes
899 * -EINVAL: Invalid, or not supported file descriptor
901 int sgx_set_attribute(unsigned long *allowed_attributes
,
902 unsigned int attribute_fd
)
904 CLASS(fd
, f
)(attribute_fd
);
909 if (fd_file(f
)->f_op
!= &sgx_provision_fops
)
912 *allowed_attributes
|= SGX_ATTR_PROVISIONKEY
;
915 EXPORT_SYMBOL_GPL(sgx_set_attribute
);
917 static int __init
sgx_init(void)
922 if (!cpu_feature_enabled(X86_FEATURE_SGX
))
925 if (!sgx_page_cache_init())
928 if (!sgx_page_reclaimer_init()) {
933 ret
= misc_register(&sgx_dev_provision
);
938 * Always try to initialize the native *and* KVM drivers.
939 * The KVM driver is less picky than the native one and
940 * can function if the native one is not supported on the
941 * current system or fails to initialize.
943 * Error out only if both fail to initialize.
945 ret
= sgx_drv_init();
947 if (sgx_vepc_init() && ret
)
953 misc_deregister(&sgx_dev_provision
);
956 kthread_stop(ksgxd_tsk
);
959 for (i
= 0; i
< sgx_nr_epc_sections
; i
++) {
960 vfree(sgx_epc_sections
[i
].pages
);
961 memunmap(sgx_epc_sections
[i
].virt_addr
);
967 device_initcall(sgx_init
);