1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright 2020-2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
7 * DOC: Sample flow of using the ioctl interface provided by the Nitro Enclaves (NE)
13 * Load the nitro_enclaves module, setting also the enclave CPU pool. The
14 * enclave CPUs need to be full cores from the same NUMA node. CPU 0 and its
15 * siblings have to remain available for the primary / parent VM, so they
16 * cannot be included in the enclave CPU pool.
18 * See the cpu list section from the kernel documentation.
19 * https://www.kernel.org/doc/html/latest/admin-guide/kernel-parameters.html#cpu-lists
21 * insmod drivers/virt/nitro_enclaves/nitro_enclaves.ko
24 * The CPU pool can be set at runtime, after the kernel module is loaded.
26 * echo <cpu-list> > /sys/module/nitro_enclaves/parameters/ne_cpus
28 * NUMA and CPU siblings information can be found using:
33 * Check the online / offline CPU list. The CPUs from the pool should be
38 * Check dmesg for any warnings / errors through the NE driver lifetime / usage.
39 * The NE logs contain the "nitro_enclaves" or "pci 0000:00:02.0" pattern.
43 * Setup hugetlbfs huge pages. The memory needs to be from the same NUMA node as
46 * https://www.kernel.org/doc/html/latest/admin-guide/mm/hugetlbpage.html
48 * By default, the allocation of hugetlb pages are distributed on all possible
49 * NUMA nodes. Use the following configuration files to set the number of huge
50 * pages from a NUMA node:
52 * /sys/devices/system/node/node<X>/hugepages/hugepages-2048kB/nr_hugepages
53 * /sys/devices/system/node/node<X>/hugepages/hugepages-1048576kB/nr_hugepages
55 * or, if not on a system with multiple NUMA nodes, can also set the number
56 * of 2 MiB / 1 GiB huge pages using
58 * /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
59 * /sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages
61 * In this example 256 hugepages of 2 MiB are used.
63 * Build and run the NE sample.
65 * make -C samples/nitro_enclaves clean
66 * make -C samples/nitro_enclaves
67 * ./samples/nitro_enclaves/ne_ioctl_sample <path_to_enclave_image>
69 * Unload the nitro_enclaves module.
71 * rmmod nitro_enclaves
83 #include <sys/eventfd.h>
84 #include <sys/ioctl.h>
86 #include <sys/socket.h>
88 #include <sys/types.h>
91 #include <linux/mman.h>
92 #include <linux/nitro_enclaves.h>
93 #include <linux/vm_sockets.h>
96 * NE_DEV_NAME - Nitro Enclaves (NE) misc device that provides the ioctl interface.
98 #define NE_DEV_NAME "/dev/nitro_enclaves"
101 * NE_POLL_WAIT_TIME - Timeout in seconds for each poll event.
103 #define NE_POLL_WAIT_TIME (60)
105 * NE_POLL_WAIT_TIME_MS - Timeout in milliseconds for each poll event.
107 #define NE_POLL_WAIT_TIME_MS (NE_POLL_WAIT_TIME * 1000)
110 * NE_SLEEP_TIME - Amount of time in seconds for the process to keep the enclave alive.
112 #define NE_SLEEP_TIME (300)
115 * NE_DEFAULT_NR_VCPUS - Default number of vCPUs set for an enclave.
117 #define NE_DEFAULT_NR_VCPUS (2)
120 * NE_MIN_MEM_REGION_SIZE - Minimum size of a memory region - 2 MiB.
122 #define NE_MIN_MEM_REGION_SIZE (2 * 1024 * 1024)
125 * NE_DEFAULT_NR_MEM_REGIONS - Default number of memory regions of 2 MiB set for
128 #define NE_DEFAULT_NR_MEM_REGIONS (256)
131 * NE_IMAGE_LOAD_HEARTBEAT_CID - Vsock CID for enclave image loading heartbeat logic.
133 #define NE_IMAGE_LOAD_HEARTBEAT_CID (3)
135 * NE_IMAGE_LOAD_HEARTBEAT_PORT - Vsock port for enclave image loading heartbeat logic.
137 #define NE_IMAGE_LOAD_HEARTBEAT_PORT (9000)
139 * NE_IMAGE_LOAD_HEARTBEAT_VALUE - Heartbeat value for enclave image loading.
141 #define NE_IMAGE_LOAD_HEARTBEAT_VALUE (0xb7)
144 * struct ne_user_mem_region - User space memory region set for an enclave.
145 * @userspace_addr: Address of the user space memory region.
146 * @memory_size: Size of the user space memory region.
148 struct ne_user_mem_region
{
149 void *userspace_addr
;
154 * ne_create_vm() - Create a slot for the enclave VM.
155 * @ne_dev_fd: The file descriptor of the NE misc device.
156 * @slot_uid: The generated slot uid for the enclave.
157 * @enclave_fd : The generated file descriptor for the enclave.
159 * Context: Process context.
162 * * Negative return value on failure.
164 static int ne_create_vm(int ne_dev_fd
, unsigned long *slot_uid
, int *enclave_fd
)
167 *enclave_fd
= ioctl(ne_dev_fd
, NE_CREATE_VM
, slot_uid
);
169 if (*enclave_fd
< 0) {
172 case NE_ERR_NO_CPUS_AVAIL_IN_POOL
: {
173 printf("Error in create VM, no CPUs available in the NE CPU pool\n");
179 printf("Error in create VM [%m]\n");
189 * ne_poll_enclave_fd() - Thread function for polling the enclave fd.
190 * @data: Argument provided for the polling function.
192 * Context: Process context.
194 * * NULL on success / failure.
196 void *ne_poll_enclave_fd(void *data
)
198 int enclave_fd
= *(int *)data
;
199 struct pollfd fds
[1] = {};
203 printf("Running from poll thread, enclave fd %d\n", enclave_fd
);
205 fds
[0].fd
= enclave_fd
;
206 fds
[0].events
= POLLIN
| POLLERR
| POLLHUP
;
208 /* Keep on polling until the current process is terminated. */
210 printf("[iter %d] Polling ...\n", i
);
212 rc
= poll(fds
, 1, NE_POLL_WAIT_TIME_MS
);
214 printf("Error in poll [%m]\n");
222 printf("Poll: %d seconds elapsed\n",
223 i
* NE_POLL_WAIT_TIME
);
228 printf("Poll received value 0x%x\n", fds
[0].revents
);
230 if (fds
[0].revents
& POLLHUP
) {
231 printf("Received POLLHUP\n");
236 if (fds
[0].revents
& POLLNVAL
) {
237 printf("Received POLLNVAL\n");
247 * ne_alloc_user_mem_region() - Allocate a user space memory region for an enclave.
248 * @ne_user_mem_region: User space memory region allocated using hugetlbfs.
250 * Context: Process context.
253 * * Negative return value on failure.
255 static int ne_alloc_user_mem_region(struct ne_user_mem_region
*ne_user_mem_region
)
258 * Check available hugetlb encodings for different huge page sizes in
259 * include/uapi/linux/mman.h.
261 ne_user_mem_region
->userspace_addr
= mmap(NULL
, ne_user_mem_region
->memory_size
,
262 PROT_READ
| PROT_WRITE
,
263 MAP_PRIVATE
| MAP_ANONYMOUS
|
264 MAP_HUGETLB
| MAP_HUGE_2MB
, -1, 0);
265 if (ne_user_mem_region
->userspace_addr
== MAP_FAILED
) {
266 printf("Error in mmap memory [%m]\n");
275 * ne_load_enclave_image() - Place the enclave image in the enclave memory.
276 * @enclave_fd : The file descriptor associated with the enclave.
277 * @ne_user_mem_regions: User space memory regions allocated for the enclave.
278 * @enclave_image_path : The file path of the enclave image.
280 * Context: Process context.
283 * * Negative return value on failure.
285 static int ne_load_enclave_image(int enclave_fd
, struct ne_user_mem_region ne_user_mem_regions
[],
286 char *enclave_image_path
)
288 unsigned char *enclave_image
= NULL
;
289 int enclave_image_fd
= -1;
290 size_t enclave_image_size
= 0;
291 size_t enclave_memory_size
= 0;
293 size_t image_written_bytes
= 0;
294 struct ne_image_load_info image_load_info
= {
295 .flags
= NE_EIF_IMAGE
,
297 struct stat image_stat_buf
= {};
299 size_t temp_image_offset
= 0;
301 for (i
= 0; i
< NE_DEFAULT_NR_MEM_REGIONS
; i
++)
302 enclave_memory_size
+= ne_user_mem_regions
[i
].memory_size
;
304 rc
= stat(enclave_image_path
, &image_stat_buf
);
306 printf("Error in get image stat info [%m]\n");
311 enclave_image_size
= image_stat_buf
.st_size
;
313 if (enclave_memory_size
< enclave_image_size
) {
314 printf("The enclave memory is smaller than the enclave image size\n");
319 rc
= ioctl(enclave_fd
, NE_GET_IMAGE_LOAD_INFO
, &image_load_info
);
322 case NE_ERR_NOT_IN_INIT_STATE
: {
323 printf("Error in get image load info, enclave not in init state\n");
328 case NE_ERR_INVALID_FLAG_VALUE
: {
329 printf("Error in get image load info, provided invalid flag\n");
335 printf("Error in get image load info [%m]\n");
341 printf("Enclave image offset in enclave memory is %lld\n",
342 image_load_info
.memory_offset
);
344 enclave_image_fd
= open(enclave_image_path
, O_RDONLY
);
345 if (enclave_image_fd
< 0) {
346 printf("Error in open enclave image file [%m]\n");
348 return enclave_image_fd
;
351 enclave_image
= mmap(NULL
, enclave_image_size
, PROT_READ
,
352 MAP_PRIVATE
, enclave_image_fd
, 0);
353 if (enclave_image
== MAP_FAILED
) {
354 printf("Error in mmap enclave image [%m]\n");
359 temp_image_offset
= image_load_info
.memory_offset
;
361 for (i
= 0; i
< NE_DEFAULT_NR_MEM_REGIONS
; i
++) {
362 size_t bytes_to_write
= 0;
363 size_t memory_offset
= 0;
364 size_t memory_size
= ne_user_mem_regions
[i
].memory_size
;
365 size_t remaining_bytes
= 0;
366 void *userspace_addr
= ne_user_mem_regions
[i
].userspace_addr
;
368 if (temp_image_offset
>= memory_size
) {
369 temp_image_offset
-= memory_size
;
372 } else if (temp_image_offset
!= 0) {
373 memory_offset
= temp_image_offset
;
374 memory_size
-= temp_image_offset
;
375 temp_image_offset
= 0;
378 remaining_bytes
= enclave_image_size
- image_written_bytes
;
379 bytes_to_write
= memory_size
< remaining_bytes
?
380 memory_size
: remaining_bytes
;
382 memcpy(userspace_addr
+ memory_offset
,
383 enclave_image
+ image_written_bytes
, bytes_to_write
);
385 image_written_bytes
+= bytes_to_write
;
387 if (image_written_bytes
== enclave_image_size
)
391 munmap(enclave_image
, enclave_image_size
);
393 close(enclave_image_fd
);
399 * ne_set_user_mem_region() - Set a user space memory region for the given enclave.
400 * @enclave_fd : The file descriptor associated with the enclave.
401 * @ne_user_mem_region : User space memory region to be set for the enclave.
403 * Context: Process context.
406 * * Negative return value on failure.
408 static int ne_set_user_mem_region(int enclave_fd
, struct ne_user_mem_region ne_user_mem_region
)
410 struct ne_user_memory_region mem_region
= {
411 .flags
= NE_DEFAULT_MEMORY_REGION
,
412 .memory_size
= ne_user_mem_region
.memory_size
,
413 .userspace_addr
= (__u64
)ne_user_mem_region
.userspace_addr
,
417 rc
= ioctl(enclave_fd
, NE_SET_USER_MEMORY_REGION
, &mem_region
);
420 case NE_ERR_NOT_IN_INIT_STATE
: {
421 printf("Error in set user memory region, enclave not in init state\n");
426 case NE_ERR_INVALID_MEM_REGION_SIZE
: {
427 printf("Error in set user memory region, mem size not multiple of 2 MiB\n");
432 case NE_ERR_INVALID_MEM_REGION_ADDR
: {
433 printf("Error in set user memory region, invalid user space address\n");
438 case NE_ERR_UNALIGNED_MEM_REGION_ADDR
: {
439 printf("Error in set user memory region, unaligned user space address\n");
444 case NE_ERR_MEM_REGION_ALREADY_USED
: {
445 printf("Error in set user memory region, memory region already used\n");
450 case NE_ERR_MEM_NOT_HUGE_PAGE
: {
451 printf("Error in set user memory region, not backed by huge pages\n");
456 case NE_ERR_MEM_DIFFERENT_NUMA_NODE
: {
457 printf("Error in set user memory region, different NUMA node than CPUs\n");
462 case NE_ERR_MEM_MAX_REGIONS
: {
463 printf("Error in set user memory region, max memory regions reached\n");
468 case NE_ERR_INVALID_PAGE_SIZE
: {
469 printf("Error in set user memory region, has page not multiple of 2 MiB\n");
474 case NE_ERR_INVALID_FLAG_VALUE
: {
475 printf("Error in set user memory region, provided invalid flag\n");
481 printf("Error in set user memory region [%m]\n");
491 * ne_free_mem_regions() - Unmap all the user space memory regions that were set
492 * aside for the enclave.
493 * @ne_user_mem_regions: The user space memory regions associated with an enclave.
495 * Context: Process context.
497 static void ne_free_mem_regions(struct ne_user_mem_region ne_user_mem_regions
[])
501 for (i
= 0; i
< NE_DEFAULT_NR_MEM_REGIONS
; i
++)
502 munmap(ne_user_mem_regions
[i
].userspace_addr
,
503 ne_user_mem_regions
[i
].memory_size
);
507 * ne_add_vcpu() - Add a vCPU to the given enclave.
508 * @enclave_fd : The file descriptor associated with the enclave.
509 * @vcpu_id: vCPU id to be set for the enclave, either provided or
510 * auto-generated (if provided vCPU id is 0).
512 * Context: Process context.
515 * * Negative return value on failure.
517 static int ne_add_vcpu(int enclave_fd
, unsigned int *vcpu_id
)
521 rc
= ioctl(enclave_fd
, NE_ADD_VCPU
, vcpu_id
);
524 case NE_ERR_NO_CPUS_AVAIL_IN_POOL
: {
525 printf("Error in add vcpu, no CPUs available in the NE CPU pool\n");
530 case NE_ERR_VCPU_ALREADY_USED
: {
531 printf("Error in add vcpu, the provided vCPU is already used\n");
536 case NE_ERR_VCPU_NOT_IN_CPU_POOL
: {
537 printf("Error in add vcpu, the provided vCPU is not in the NE CPU pool\n");
542 case NE_ERR_VCPU_INVALID_CPU_CORE
: {
543 printf("Error in add vcpu, the core id of the provided vCPU is invalid\n");
548 case NE_ERR_NOT_IN_INIT_STATE
: {
549 printf("Error in add vcpu, enclave not in init state\n");
554 case NE_ERR_INVALID_VCPU
: {
555 printf("Error in add vcpu, the provided vCPU is out of avail CPUs range\n");
561 printf("Error in add vcpu [%m]\n");
571 * ne_start_enclave() - Start the given enclave.
572 * @enclave_fd : The file descriptor associated with the enclave.
573 * @enclave_start_info : Enclave metadata used for starting e.g. vsock CID.
575 * Context: Process context.
578 * * Negative return value on failure.
580 static int ne_start_enclave(int enclave_fd
, struct ne_enclave_start_info
*enclave_start_info
)
584 rc
= ioctl(enclave_fd
, NE_START_ENCLAVE
, enclave_start_info
);
587 case NE_ERR_NOT_IN_INIT_STATE
: {
588 printf("Error in start enclave, enclave not in init state\n");
593 case NE_ERR_NO_MEM_REGIONS_ADDED
: {
594 printf("Error in start enclave, no memory regions have been added\n");
599 case NE_ERR_NO_VCPUS_ADDED
: {
600 printf("Error in start enclave, no vCPUs have been added\n");
605 case NE_ERR_FULL_CORES_NOT_USED
: {
606 printf("Error in start enclave, enclave has no full cores set\n");
611 case NE_ERR_ENCLAVE_MEM_MIN_SIZE
: {
612 printf("Error in start enclave, enclave memory is less than min size\n");
617 case NE_ERR_INVALID_FLAG_VALUE
: {
618 printf("Error in start enclave, provided invalid flag\n");
623 case NE_ERR_INVALID_ENCLAVE_CID
: {
624 printf("Error in start enclave, provided invalid enclave CID\n");
630 printf("Error in start enclave [%m]\n");
640 * ne_start_enclave_check_booted() - Start the enclave and wait for a heartbeat
641 * from it, on a newly created vsock channel,
642 * to check it has booted.
643 * @enclave_fd : The file descriptor associated with the enclave.
645 * Context: Process context.
648 * * Negative return value on failure.
650 static int ne_start_enclave_check_booted(int enclave_fd
)
652 struct sockaddr_vm client_vsock_addr
= {};
653 int client_vsock_fd
= -1;
654 socklen_t client_vsock_len
= sizeof(client_vsock_addr
);
655 struct ne_enclave_start_info enclave_start_info
= {};
656 struct pollfd fds
[1] = {};
658 unsigned char recv_buf
= 0;
659 struct sockaddr_vm server_vsock_addr
= {
660 .svm_family
= AF_VSOCK
,
661 .svm_cid
= NE_IMAGE_LOAD_HEARTBEAT_CID
,
662 .svm_port
= NE_IMAGE_LOAD_HEARTBEAT_PORT
,
664 int server_vsock_fd
= -1;
666 server_vsock_fd
= socket(AF_VSOCK
, SOCK_STREAM
, 0);
667 if (server_vsock_fd
< 0) {
668 rc
= server_vsock_fd
;
670 printf("Error in socket [%m]\n");
675 rc
= bind(server_vsock_fd
, (struct sockaddr
*)&server_vsock_addr
,
676 sizeof(server_vsock_addr
));
678 printf("Error in bind [%m]\n");
683 rc
= listen(server_vsock_fd
, 1);
685 printf("Error in listen [%m]\n");
690 rc
= ne_start_enclave(enclave_fd
, &enclave_start_info
);
694 printf("Enclave started, CID %llu\n", enclave_start_info
.enclave_cid
);
696 fds
[0].fd
= server_vsock_fd
;
697 fds
[0].events
= POLLIN
;
699 rc
= poll(fds
, 1, NE_POLL_WAIT_TIME_MS
);
701 printf("Error in poll [%m]\n");
707 printf("Poll timeout, %d seconds elapsed\n", NE_POLL_WAIT_TIME
);
714 if ((fds
[0].revents
& POLLIN
) == 0) {
715 printf("Poll received value %d\n", fds
[0].revents
);
722 rc
= accept(server_vsock_fd
, (struct sockaddr
*)&client_vsock_addr
,
725 printf("Error in accept [%m]\n");
730 client_vsock_fd
= rc
;
733 * Read the heartbeat value that the init process in the enclave sends
734 * after vsock connect.
736 rc
= read(client_vsock_fd
, &recv_buf
, sizeof(recv_buf
));
738 printf("Error in read [%m]\n");
743 if (rc
!= sizeof(recv_buf
) || recv_buf
!= NE_IMAGE_LOAD_HEARTBEAT_VALUE
) {
744 printf("Read %d instead of %d\n", recv_buf
,
745 NE_IMAGE_LOAD_HEARTBEAT_VALUE
);
750 /* Write the heartbeat value back. */
751 rc
= write(client_vsock_fd
, &recv_buf
, sizeof(recv_buf
));
753 printf("Error in write [%m]\n");
761 close(server_vsock_fd
);
766 int main(int argc
, char *argv
[])
771 struct ne_user_mem_region ne_user_mem_regions
[NE_DEFAULT_NR_MEM_REGIONS
] = {};
772 unsigned int ne_vcpus
[NE_DEFAULT_NR_VCPUS
] = {};
774 pthread_t thread_id
= 0;
775 unsigned long slot_uid
= 0;
778 printf("Usage: %s <path_to_enclave_image>\n", argv
[0]);
783 if (strlen(argv
[1]) >= PATH_MAX
) {
784 printf("The size of the path to enclave image is higher than max path\n");
789 ne_dev_fd
= open(NE_DEV_NAME
, O_RDWR
| O_CLOEXEC
);
791 printf("Error in open NE device [%m]\n");
796 printf("Creating enclave slot ...\n");
798 rc
= ne_create_vm(ne_dev_fd
, &slot_uid
, &enclave_fd
);
805 printf("Enclave fd %d\n", enclave_fd
);
807 rc
= pthread_create(&thread_id
, NULL
, ne_poll_enclave_fd
, (void *)&enclave_fd
);
809 printf("Error in thread create [%m]\n");
816 for (i
= 0; i
< NE_DEFAULT_NR_MEM_REGIONS
; i
++) {
817 ne_user_mem_regions
[i
].memory_size
= NE_MIN_MEM_REGION_SIZE
;
819 rc
= ne_alloc_user_mem_region(&ne_user_mem_regions
[i
]);
821 printf("Error in alloc userspace memory region, iter %d\n", i
);
823 goto release_enclave_fd
;
827 rc
= ne_load_enclave_image(enclave_fd
, ne_user_mem_regions
, argv
[1]);
829 goto release_enclave_fd
;
831 for (i
= 0; i
< NE_DEFAULT_NR_MEM_REGIONS
; i
++) {
832 rc
= ne_set_user_mem_region(enclave_fd
, ne_user_mem_regions
[i
]);
834 printf("Error in set memory region, iter %d\n", i
);
836 goto release_enclave_fd
;
840 printf("Enclave memory regions were added\n");
842 for (i
= 0; i
< NE_DEFAULT_NR_VCPUS
; i
++) {
844 * The vCPU is chosen from the enclave vCPU pool, if the value
845 * of the vcpu_id is 0.
848 rc
= ne_add_vcpu(enclave_fd
, &ne_vcpus
[i
]);
850 printf("Error in add vcpu, iter %d\n", i
);
852 goto release_enclave_fd
;
855 printf("Added vCPU %d to the enclave\n", ne_vcpus
[i
]);
858 printf("Enclave vCPUs were added\n");
860 rc
= ne_start_enclave_check_booted(enclave_fd
);
862 printf("Error in the enclave start / image loading heartbeat logic [rc=%d]\n", rc
);
864 goto release_enclave_fd
;
867 printf("Entering sleep for %d seconds ...\n", NE_SLEEP_TIME
);
869 sleep(NE_SLEEP_TIME
);
873 ne_free_mem_regions(ne_user_mem_regions
);
879 ne_free_mem_regions(ne_user_mem_regions
);