1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
7 * DOC: Sample flow of using the ioctl interface provided by the Nitro Enclaves (NE)
13 * Load the nitro_enclaves module, setting also the enclave CPU pool. The
14 * enclave CPUs need to be full cores from the same NUMA node. CPU 0 and its
15 * siblings have to remain available for the primary / parent VM, so they
16 * cannot be included in the enclave CPU pool.
18 * See the cpu list section from the kernel documentation.
19 * https://www.kernel.org/doc/html/latest/admin-guide/kernel-parameters.html#cpu-lists
21 * insmod drivers/virt/nitro_enclaves/nitro_enclaves.ko
24 * The CPU pool can be set at runtime, after the kernel module is loaded.
26 * echo <cpu-list> > /sys/module/nitro_enclaves/parameters/ne_cpus
28 * NUMA and CPU siblings information can be found using:
33 * Check the online / offline CPU list. The CPUs from the pool should be
38 * Check dmesg for any warnings / errors through the NE driver lifetime / usage.
39 * The NE logs contain the "nitro_enclaves" or "pci 0000:00:02.0" pattern.
43 * Setup hugetlbfs huge pages. The memory needs to be from the same NUMA node as
46 * https://www.kernel.org/doc/html/latest/admin-guide/mm/hugetlbpage.html
48 * By default, the allocation of hugetlb pages are distributed on all possible
49 * NUMA nodes. Use the following configuration files to set the number of huge
50 * pages from a NUMA node:
52 * /sys/devices/system/node/node<X>/hugepages/hugepages-2048kB/nr_hugepages
53 * /sys/devices/system/node/node<X>/hugepages/hugepages-1048576kB/nr_hugepages
55 * or, if not on a system with multiple NUMA nodes, can also set the number
56 * of 2 MiB / 1 GiB huge pages using
58 * /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
59 * /sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages
61 * In this example 256 hugepages of 2 MiB are used.
63 * Build and run the NE sample.
65 * make -C samples/nitro_enclaves clean
66 * make -C samples/nitro_enclaves
67 * ./samples/nitro_enclaves/ne_ioctl_sample <path_to_enclave_image>
69 * Unload the nitro_enclaves module.
71 * rmmod nitro_enclaves
83 #include <sys/eventfd.h>
84 #include <sys/ioctl.h>
86 #include <sys/socket.h>
88 #include <sys/types.h>
91 #include <linux/mman.h>
92 #include <linux/nitro_enclaves.h>
93 #include <linux/vm_sockets.h>
96 * NE_DEV_NAME - Nitro Enclaves (NE) misc device that provides the ioctl interface.
98 #define NE_DEV_NAME "/dev/nitro_enclaves"
101 * NE_POLL_WAIT_TIME - Timeout in seconds for each poll event.
103 #define NE_POLL_WAIT_TIME (60)
105 * NE_POLL_WAIT_TIME_MS - Timeout in milliseconds for each poll event.
107 #define NE_POLL_WAIT_TIME_MS (NE_POLL_WAIT_TIME * 1000)
110 * NE_SLEEP_TIME - Amount of time in seconds for the process to keep the enclave alive.
112 #define NE_SLEEP_TIME (300)
115 * NE_DEFAULT_NR_VCPUS - Default number of vCPUs set for an enclave.
117 #define NE_DEFAULT_NR_VCPUS (2)
120 * NE_MIN_MEM_REGION_SIZE - Minimum size of a memory region - 2 MiB.
122 #define NE_MIN_MEM_REGION_SIZE (2 * 1024 * 1024)
125 * NE_DEFAULT_NR_MEM_REGIONS - Default number of memory regions of 2 MiB set for
128 #define NE_DEFAULT_NR_MEM_REGIONS (256)
131 * NE_IMAGE_LOAD_HEARTBEAT_CID - Vsock CID for enclave image loading heartbeat logic.
133 #define NE_IMAGE_LOAD_HEARTBEAT_CID (3)
135 * NE_IMAGE_LOAD_HEARTBEAT_PORT - Vsock port for enclave image loading heartbeat logic.
137 #define NE_IMAGE_LOAD_HEARTBEAT_PORT (9000)
139 * NE_IMAGE_LOAD_HEARTBEAT_VALUE - Heartbeat value for enclave image loading.
141 #define NE_IMAGE_LOAD_HEARTBEAT_VALUE (0xb7)
144 * struct ne_user_mem_region - User space memory region set for an enclave.
145 * @userspace_addr: Address of the user space memory region.
146 * @memory_size: Size of the user space memory region.
148 struct ne_user_mem_region
{
149 void *userspace_addr
;
154 * ne_create_vm() - Create a slot for the enclave VM.
155 * @ne_dev_fd: The file descriptor of the NE misc device.
156 * @slot_uid: The generated slot uid for the enclave.
157 * @enclave_fd : The generated file descriptor for the enclave.
159 * Context: Process context.
162 * * Negative return value on failure.
164 static int ne_create_vm(int ne_dev_fd
, unsigned long *slot_uid
, int *enclave_fd
)
167 *enclave_fd
= ioctl(ne_dev_fd
, NE_CREATE_VM
, slot_uid
);
169 if (*enclave_fd
< 0) {
172 case NE_ERR_NO_CPUS_AVAIL_IN_POOL
: {
173 printf("Error in create VM, no CPUs available in the NE CPU pool\n");
179 printf("Error in create VM [%m]\n");
190 * ne_poll_enclave_fd() - Thread function for polling the enclave fd.
191 * @data: Argument provided for the polling function.
193 * Context: Process context.
195 * * NULL on success / failure.
197 void *ne_poll_enclave_fd(void *data
)
199 int enclave_fd
= *(int *)data
;
200 struct pollfd fds
[1] = {};
204 printf("Running from poll thread, enclave fd %d\n", enclave_fd
);
206 fds
[0].fd
= enclave_fd
;
207 fds
[0].events
= POLLIN
| POLLERR
| POLLHUP
;
209 /* Keep on polling until the current process is terminated. */
211 printf("[iter %d] Polling ...\n", i
);
213 rc
= poll(fds
, 1, NE_POLL_WAIT_TIME_MS
);
215 printf("Error in poll [%m]\n");
223 printf("Poll: %d seconds elapsed\n",
224 i
* NE_POLL_WAIT_TIME
);
229 printf("Poll received value 0x%x\n", fds
[0].revents
);
231 if (fds
[0].revents
& POLLHUP
) {
232 printf("Received POLLHUP\n");
237 if (fds
[0].revents
& POLLNVAL
) {
238 printf("Received POLLNVAL\n");
248 * ne_alloc_user_mem_region() - Allocate a user space memory region for an enclave.
249 * @ne_user_mem_region: User space memory region allocated using hugetlbfs.
251 * Context: Process context.
254 * * Negative return value on failure.
256 static int ne_alloc_user_mem_region(struct ne_user_mem_region
*ne_user_mem_region
)
259 * Check available hugetlb encodings for different huge page sizes in
260 * include/uapi/linux/mman.h.
262 ne_user_mem_region
->userspace_addr
= mmap(NULL
, ne_user_mem_region
->memory_size
,
263 PROT_READ
| PROT_WRITE
,
264 MAP_PRIVATE
| MAP_ANONYMOUS
|
265 MAP_HUGETLB
| MAP_HUGE_2MB
, -1, 0);
266 if (ne_user_mem_region
->userspace_addr
== MAP_FAILED
) {
267 printf("Error in mmap memory [%m]\n");
276 * ne_load_enclave_image() - Place the enclave image in the enclave memory.
277 * @enclave_fd : The file descriptor associated with the enclave.
278 * @ne_user_mem_regions: User space memory regions allocated for the enclave.
279 * @enclave_image_path : The file path of the enclave image.
281 * Context: Process context.
284 * * Negative return value on failure.
286 static int ne_load_enclave_image(int enclave_fd
, struct ne_user_mem_region ne_user_mem_regions
[],
287 char *enclave_image_path
)
289 unsigned char *enclave_image
= NULL
;
290 int enclave_image_fd
= -1;
291 size_t enclave_image_size
= 0;
292 size_t enclave_memory_size
= 0;
294 size_t image_written_bytes
= 0;
295 struct ne_image_load_info image_load_info
= {
296 .flags
= NE_EIF_IMAGE
,
298 struct stat image_stat_buf
= {};
300 size_t temp_image_offset
= 0;
302 for (i
= 0; i
< NE_DEFAULT_NR_MEM_REGIONS
; i
++)
303 enclave_memory_size
+= ne_user_mem_regions
[i
].memory_size
;
305 rc
= stat(enclave_image_path
, &image_stat_buf
);
307 printf("Error in get image stat info [%m]\n");
312 enclave_image_size
= image_stat_buf
.st_size
;
314 if (enclave_memory_size
< enclave_image_size
) {
315 printf("The enclave memory is smaller than the enclave image size\n");
320 rc
= ioctl(enclave_fd
, NE_GET_IMAGE_LOAD_INFO
, &image_load_info
);
323 case NE_ERR_NOT_IN_INIT_STATE
: {
324 printf("Error in get image load info, enclave not in init state\n");
329 case NE_ERR_INVALID_FLAG_VALUE
: {
330 printf("Error in get image load info, provided invalid flag\n");
336 printf("Error in get image load info [%m]\n");
342 printf("Enclave image offset in enclave memory is %lld\n",
343 image_load_info
.memory_offset
);
345 enclave_image_fd
= open(enclave_image_path
, O_RDONLY
);
346 if (enclave_image_fd
< 0) {
347 printf("Error in open enclave image file [%m]\n");
349 return enclave_image_fd
;
352 enclave_image
= mmap(NULL
, enclave_image_size
, PROT_READ
,
353 MAP_PRIVATE
, enclave_image_fd
, 0);
354 if (enclave_image
== MAP_FAILED
) {
355 printf("Error in mmap enclave image [%m]\n");
360 temp_image_offset
= image_load_info
.memory_offset
;
362 for (i
= 0; i
< NE_DEFAULT_NR_MEM_REGIONS
; i
++) {
363 size_t bytes_to_write
= 0;
364 size_t memory_offset
= 0;
365 size_t memory_size
= ne_user_mem_regions
[i
].memory_size
;
366 size_t remaining_bytes
= 0;
367 void *userspace_addr
= ne_user_mem_regions
[i
].userspace_addr
;
369 if (temp_image_offset
>= memory_size
) {
370 temp_image_offset
-= memory_size
;
373 } else if (temp_image_offset
!= 0) {
374 memory_offset
= temp_image_offset
;
375 memory_size
-= temp_image_offset
;
376 temp_image_offset
= 0;
379 remaining_bytes
= enclave_image_size
- image_written_bytes
;
380 bytes_to_write
= memory_size
< remaining_bytes
?
381 memory_size
: remaining_bytes
;
383 memcpy(userspace_addr
+ memory_offset
,
384 enclave_image
+ image_written_bytes
, bytes_to_write
);
386 image_written_bytes
+= bytes_to_write
;
388 if (image_written_bytes
== enclave_image_size
)
392 munmap(enclave_image
, enclave_image_size
);
394 close(enclave_image_fd
);
400 * ne_set_user_mem_region() - Set a user space memory region for the given enclave.
401 * @enclave_fd : The file descriptor associated with the enclave.
402 * @ne_user_mem_region : User space memory region to be set for the enclave.
404 * Context: Process context.
407 * * Negative return value on failure.
409 static int ne_set_user_mem_region(int enclave_fd
, struct ne_user_mem_region ne_user_mem_region
)
411 struct ne_user_memory_region mem_region
= {
412 .flags
= NE_DEFAULT_MEMORY_REGION
,
413 .memory_size
= ne_user_mem_region
.memory_size
,
414 .userspace_addr
= (__u64
)ne_user_mem_region
.userspace_addr
,
418 rc
= ioctl(enclave_fd
, NE_SET_USER_MEMORY_REGION
, &mem_region
);
421 case NE_ERR_NOT_IN_INIT_STATE
: {
422 printf("Error in set user memory region, enclave not in init state\n");
427 case NE_ERR_INVALID_MEM_REGION_SIZE
: {
428 printf("Error in set user memory region, mem size not multiple of 2 MiB\n");
433 case NE_ERR_INVALID_MEM_REGION_ADDR
: {
434 printf("Error in set user memory region, invalid user space address\n");
439 case NE_ERR_UNALIGNED_MEM_REGION_ADDR
: {
440 printf("Error in set user memory region, unaligned user space address\n");
445 case NE_ERR_MEM_REGION_ALREADY_USED
: {
446 printf("Error in set user memory region, memory region already used\n");
451 case NE_ERR_MEM_NOT_HUGE_PAGE
: {
452 printf("Error in set user memory region, not backed by huge pages\n");
457 case NE_ERR_MEM_DIFFERENT_NUMA_NODE
: {
458 printf("Error in set user memory region, different NUMA node than CPUs\n");
463 case NE_ERR_MEM_MAX_REGIONS
: {
464 printf("Error in set user memory region, max memory regions reached\n");
469 case NE_ERR_INVALID_PAGE_SIZE
: {
470 printf("Error in set user memory region, has page not multiple of 2 MiB\n");
475 case NE_ERR_INVALID_FLAG_VALUE
: {
476 printf("Error in set user memory region, provided invalid flag\n");
482 printf("Error in set user memory region [%m]\n");
492 * ne_free_mem_regions() - Unmap all the user space memory regions that were set
493 * aside for the enclave.
494 * @ne_user_mem_regions: The user space memory regions associated with an enclave.
496 * Context: Process context.
498 static void ne_free_mem_regions(struct ne_user_mem_region ne_user_mem_regions
[])
502 for (i
= 0; i
< NE_DEFAULT_NR_MEM_REGIONS
; i
++)
503 munmap(ne_user_mem_regions
[i
].userspace_addr
,
504 ne_user_mem_regions
[i
].memory_size
);
508 * ne_add_vcpu() - Add a vCPU to the given enclave.
509 * @enclave_fd : The file descriptor associated with the enclave.
510 * @vcpu_id: vCPU id to be set for the enclave, either provided or
511 * auto-generated (if provided vCPU id is 0).
513 * Context: Process context.
516 * * Negative return value on failure.
518 static int ne_add_vcpu(int enclave_fd
, unsigned int *vcpu_id
)
522 rc
= ioctl(enclave_fd
, NE_ADD_VCPU
, vcpu_id
);
525 case NE_ERR_NO_CPUS_AVAIL_IN_POOL
: {
526 printf("Error in add vcpu, no CPUs available in the NE CPU pool\n");
531 case NE_ERR_VCPU_ALREADY_USED
: {
532 printf("Error in add vcpu, the provided vCPU is already used\n");
537 case NE_ERR_VCPU_NOT_IN_CPU_POOL
: {
538 printf("Error in add vcpu, the provided vCPU is not in the NE CPU pool\n");
543 case NE_ERR_VCPU_INVALID_CPU_CORE
: {
544 printf("Error in add vcpu, the core id of the provided vCPU is invalid\n");
549 case NE_ERR_NOT_IN_INIT_STATE
: {
550 printf("Error in add vcpu, enclave not in init state\n");
555 case NE_ERR_INVALID_VCPU
: {
556 printf("Error in add vcpu, the provided vCPU is out of avail CPUs range\n");
562 printf("Error in add vcpu [%m]\n");
572 * ne_start_enclave() - Start the given enclave.
573 * @enclave_fd : The file descriptor associated with the enclave.
574 * @enclave_start_info : Enclave metadata used for starting e.g. vsock CID.
576 * Context: Process context.
579 * * Negative return value on failure.
581 static int ne_start_enclave(int enclave_fd
, struct ne_enclave_start_info
*enclave_start_info
)
585 rc
= ioctl(enclave_fd
, NE_START_ENCLAVE
, enclave_start_info
);
588 case NE_ERR_NOT_IN_INIT_STATE
: {
589 printf("Error in start enclave, enclave not in init state\n");
594 case NE_ERR_NO_MEM_REGIONS_ADDED
: {
595 printf("Error in start enclave, no memory regions have been added\n");
600 case NE_ERR_NO_VCPUS_ADDED
: {
601 printf("Error in start enclave, no vCPUs have been added\n");
606 case NE_ERR_FULL_CORES_NOT_USED
: {
607 printf("Error in start enclave, enclave has no full cores set\n");
612 case NE_ERR_ENCLAVE_MEM_MIN_SIZE
: {
613 printf("Error in start enclave, enclave memory is less than min size\n");
618 case NE_ERR_INVALID_FLAG_VALUE
: {
619 printf("Error in start enclave, provided invalid flag\n");
624 case NE_ERR_INVALID_ENCLAVE_CID
: {
625 printf("Error in start enclave, provided invalid enclave CID\n");
631 printf("Error in start enclave [%m]\n");
641 * ne_start_enclave_check_booted() - Start the enclave and wait for a hearbeat
642 * from it, on a newly created vsock channel,
643 * to check it has booted.
644 * @enclave_fd : The file descriptor associated with the enclave.
646 * Context: Process context.
649 * * Negative return value on failure.
651 static int ne_start_enclave_check_booted(int enclave_fd
)
653 struct sockaddr_vm client_vsock_addr
= {};
654 int client_vsock_fd
= -1;
655 socklen_t client_vsock_len
= sizeof(client_vsock_addr
);
656 struct ne_enclave_start_info enclave_start_info
= {};
657 struct pollfd fds
[1] = {};
659 unsigned char recv_buf
= 0;
660 struct sockaddr_vm server_vsock_addr
= {
661 .svm_family
= AF_VSOCK
,
662 .svm_cid
= NE_IMAGE_LOAD_HEARTBEAT_CID
,
663 .svm_port
= NE_IMAGE_LOAD_HEARTBEAT_PORT
,
665 int server_vsock_fd
= -1;
667 server_vsock_fd
= socket(AF_VSOCK
, SOCK_STREAM
, 0);
668 if (server_vsock_fd
< 0) {
669 rc
= server_vsock_fd
;
671 printf("Error in socket [%m]\n");
676 rc
= bind(server_vsock_fd
, (struct sockaddr
*)&server_vsock_addr
,
677 sizeof(server_vsock_addr
));
679 printf("Error in bind [%m]\n");
684 rc
= listen(server_vsock_fd
, 1);
686 printf("Error in listen [%m]\n");
691 rc
= ne_start_enclave(enclave_fd
, &enclave_start_info
);
695 printf("Enclave started, CID %llu\n", enclave_start_info
.enclave_cid
);
697 fds
[0].fd
= server_vsock_fd
;
698 fds
[0].events
= POLLIN
;
700 rc
= poll(fds
, 1, NE_POLL_WAIT_TIME_MS
);
702 printf("Error in poll [%m]\n");
708 printf("Poll timeout, %d seconds elapsed\n", NE_POLL_WAIT_TIME
);
715 if ((fds
[0].revents
& POLLIN
) == 0) {
716 printf("Poll received value %d\n", fds
[0].revents
);
723 rc
= accept(server_vsock_fd
, (struct sockaddr
*)&client_vsock_addr
,
726 printf("Error in accept [%m]\n");
731 client_vsock_fd
= rc
;
734 * Read the heartbeat value that the init process in the enclave sends
735 * after vsock connect.
737 rc
= read(client_vsock_fd
, &recv_buf
, sizeof(recv_buf
));
739 printf("Error in read [%m]\n");
744 if (rc
!= sizeof(recv_buf
) || recv_buf
!= NE_IMAGE_LOAD_HEARTBEAT_VALUE
) {
745 printf("Read %d instead of %d\n", recv_buf
,
746 NE_IMAGE_LOAD_HEARTBEAT_VALUE
);
751 /* Write the heartbeat value back. */
752 rc
= write(client_vsock_fd
, &recv_buf
, sizeof(recv_buf
));
754 printf("Error in write [%m]\n");
762 close(server_vsock_fd
);
767 int main(int argc
, char *argv
[])
772 struct ne_user_mem_region ne_user_mem_regions
[NE_DEFAULT_NR_MEM_REGIONS
] = {};
773 unsigned int ne_vcpus
[NE_DEFAULT_NR_VCPUS
] = {};
775 pthread_t thread_id
= 0;
776 unsigned long slot_uid
= 0;
779 printf("Usage: %s <path_to_enclave_image>\n", argv
[0]);
784 if (strlen(argv
[1]) >= PATH_MAX
) {
785 printf("The size of the path to enclave image is higher than max path\n");
790 ne_dev_fd
= open(NE_DEV_NAME
, O_RDWR
| O_CLOEXEC
);
792 printf("Error in open NE device [%m]\n");
797 printf("Creating enclave slot ...\n");
799 rc
= ne_create_vm(ne_dev_fd
, &slot_uid
, &enclave_fd
);
806 printf("Enclave fd %d\n", enclave_fd
);
808 rc
= pthread_create(&thread_id
, NULL
, ne_poll_enclave_fd
, (void *)&enclave_fd
);
810 printf("Error in thread create [%m]\n");
817 for (i
= 0; i
< NE_DEFAULT_NR_MEM_REGIONS
; i
++) {
818 ne_user_mem_regions
[i
].memory_size
= NE_MIN_MEM_REGION_SIZE
;
820 rc
= ne_alloc_user_mem_region(&ne_user_mem_regions
[i
]);
822 printf("Error in alloc userspace memory region, iter %d\n", i
);
824 goto release_enclave_fd
;
828 rc
= ne_load_enclave_image(enclave_fd
, ne_user_mem_regions
, argv
[1]);
830 goto release_enclave_fd
;
832 for (i
= 0; i
< NE_DEFAULT_NR_MEM_REGIONS
; i
++) {
833 rc
= ne_set_user_mem_region(enclave_fd
, ne_user_mem_regions
[i
]);
835 printf("Error in set memory region, iter %d\n", i
);
837 goto release_enclave_fd
;
841 printf("Enclave memory regions were added\n");
843 for (i
= 0; i
< NE_DEFAULT_NR_VCPUS
; i
++) {
845 * The vCPU is chosen from the enclave vCPU pool, if the value
846 * of the vcpu_id is 0.
849 rc
= ne_add_vcpu(enclave_fd
, &ne_vcpus
[i
]);
851 printf("Error in add vcpu, iter %d\n", i
);
853 goto release_enclave_fd
;
856 printf("Added vCPU %d to the enclave\n", ne_vcpus
[i
]);
859 printf("Enclave vCPUs were added\n");
861 rc
= ne_start_enclave_check_booted(enclave_fd
);
863 printf("Error in the enclave start / image loading heartbeat logic [rc=%d]\n", rc
);
865 goto release_enclave_fd
;
868 printf("Entering sleep for %d seconds ...\n", NE_SLEEP_TIME
);
870 sleep(NE_SLEEP_TIME
);
874 ne_free_mem_regions(ne_user_mem_regions
);
880 ne_free_mem_regions(ne_user_mem_regions
);