1 // SPDX-License-Identifier: GPL-2.0
3 * KVM demand paging test
4 * Adapted from dirty_log_test.c
6 * Copyright (C) 2018, Red Hat, Inc.
7 * Copyright (C) 2019, Google, Inc.
14 #include <linux/userfaultfd.h>
15 #include <sys/syscall.h>
18 #include "test_util.h"
19 #include "memstress.h"
20 #include "guest_modes.h"
21 #include "ucall_common.h"
22 #include "userfaultfd_util.h"
24 #ifdef __NR_userfaultfd
26 static int nr_vcpus
= 1;
27 static uint64_t guest_percpu_mem_size
= DEFAULT_PER_VCPU_MEM_SIZE
;
29 static size_t demand_paging_size
;
30 static char *guest_data_prototype
;
32 static void vcpu_worker(struct memstress_vcpu_args
*vcpu_args
)
34 struct kvm_vcpu
*vcpu
= vcpu_args
->vcpu
;
35 int vcpu_idx
= vcpu_args
->vcpu_idx
;
36 struct kvm_run
*run
= vcpu
->run
;
37 struct timespec start
;
38 struct timespec ts_diff
;
41 clock_gettime(CLOCK_MONOTONIC
, &start
);
43 /* Let the guest access its memory */
44 ret
= _vcpu_run(vcpu
);
45 TEST_ASSERT(ret
== 0, "vcpu_run failed: %d", ret
);
46 if (get_ucall(vcpu
, NULL
) != UCALL_SYNC
) {
48 "Invalid guest sync status: exit_reason=%s",
49 exit_reason_str(run
->exit_reason
));
52 ts_diff
= timespec_elapsed(start
);
53 PER_VCPU_DEBUG("vCPU %d execution time: %ld.%.9lds\n", vcpu_idx
,
54 ts_diff
.tv_sec
, ts_diff
.tv_nsec
);
57 static int handle_uffd_page_request(int uffd_mode
, int uffd
,
60 pid_t tid
= syscall(__NR_gettid
);
61 uint64_t addr
= msg
->arg
.pagefault
.address
;
62 struct timespec start
;
63 struct timespec ts_diff
;
66 clock_gettime(CLOCK_MONOTONIC
, &start
);
68 if (uffd_mode
== UFFDIO_REGISTER_MODE_MISSING
) {
69 struct uffdio_copy copy
;
71 copy
.src
= (uint64_t)guest_data_prototype
;
73 copy
.len
= demand_paging_size
;
76 r
= ioctl(uffd
, UFFDIO_COPY
, ©
);
78 * With multiple vCPU threads fault on a single page and there are
79 * multiple readers for the UFFD, at least one of the UFFDIO_COPYs
80 * will fail with EEXIST: handle that case without signaling an
83 * Note that this also suppress any EEXISTs occurring from,
84 * e.g., the first UFFDIO_COPY/CONTINUEs on a page. That never
85 * happens here, but a realistic VMM might potentially maintain
86 * some external state to correctly surface EEXISTs to userspace
87 * (or prevent duplicate COPY/CONTINUEs in the first place).
89 if (r
== -1 && errno
!= EEXIST
) {
90 pr_info("Failed UFFDIO_COPY in 0x%lx from thread %d, errno = %d\n",
94 } else if (uffd_mode
== UFFDIO_REGISTER_MODE_MINOR
) {
95 struct uffdio_continue cont
= {0};
97 cont
.range
.start
= addr
;
98 cont
.range
.len
= demand_paging_size
;
100 r
= ioctl(uffd
, UFFDIO_CONTINUE
, &cont
);
102 * With multiple vCPU threads fault on a single page and there are
103 * multiple readers for the UFFD, at least one of the UFFDIO_COPYs
104 * will fail with EEXIST: handle that case without signaling an
107 * Note that this also suppress any EEXISTs occurring from,
108 * e.g., the first UFFDIO_COPY/CONTINUEs on a page. That never
109 * happens here, but a realistic VMM might potentially maintain
110 * some external state to correctly surface EEXISTs to userspace
111 * (or prevent duplicate COPY/CONTINUEs in the first place).
113 if (r
== -1 && errno
!= EEXIST
) {
114 pr_info("Failed UFFDIO_CONTINUE in 0x%lx, thread %d, errno = %d\n",
119 TEST_FAIL("Invalid uffd mode %d", uffd_mode
);
122 ts_diff
= timespec_elapsed(start
);
124 PER_PAGE_DEBUG("UFFD page-in %d \t%ld ns\n", tid
,
125 timespec_to_ns(ts_diff
));
126 PER_PAGE_DEBUG("Paged in %ld bytes at 0x%lx from thread %d\n",
127 demand_paging_size
, addr
, tid
);
135 useconds_t uffd_delay
;
136 int readers_per_uffd
;
137 enum vm_mem_backing_src_type src_type
;
138 bool partition_vcpu_memory_access
;
141 static void prefault_mem(void *alias
, uint64_t len
)
145 TEST_ASSERT(alias
!= NULL
, "Alias required for minor faults");
146 for (p
= 0; p
< (len
/ demand_paging_size
); ++p
) {
147 memcpy(alias
+ (p
* demand_paging_size
),
148 guest_data_prototype
, demand_paging_size
);
152 static void run_test(enum vm_guest_mode mode
, void *arg
)
154 struct memstress_vcpu_args
*vcpu_args
;
155 struct test_params
*p
= arg
;
156 struct uffd_desc
**uffd_descs
= NULL
;
157 uint64_t uffd_region_size
;
158 struct timespec start
;
159 struct timespec ts_diff
;
160 double vcpu_paging_rate
;
162 int i
, num_uffds
= 0;
164 vm
= memstress_create_vm(mode
, nr_vcpus
, guest_percpu_mem_size
, 1,
165 p
->src_type
, p
->partition_vcpu_memory_access
);
167 demand_paging_size
= get_backing_src_pagesz(p
->src_type
);
169 guest_data_prototype
= malloc(demand_paging_size
);
170 TEST_ASSERT(guest_data_prototype
,
171 "Failed to allocate buffer for guest data pattern");
172 memset(guest_data_prototype
, 0xAB, demand_paging_size
);
174 if (p
->uffd_mode
== UFFDIO_REGISTER_MODE_MINOR
) {
175 num_uffds
= p
->single_uffd
? 1 : nr_vcpus
;
176 for (i
= 0; i
< num_uffds
; i
++) {
177 vcpu_args
= &memstress_args
.vcpu_args
[i
];
178 prefault_mem(addr_gpa2alias(vm
, vcpu_args
->gpa
),
179 vcpu_args
->pages
* memstress_args
.guest_page_size
);
184 num_uffds
= p
->single_uffd
? 1 : nr_vcpus
;
185 uffd_region_size
= nr_vcpus
* guest_percpu_mem_size
/ num_uffds
;
187 uffd_descs
= malloc(num_uffds
* sizeof(struct uffd_desc
*));
188 TEST_ASSERT(uffd_descs
, "Memory allocation failed");
189 for (i
= 0; i
< num_uffds
; i
++) {
190 struct memstress_vcpu_args
*vcpu_args
;
193 vcpu_args
= &memstress_args
.vcpu_args
[i
];
195 /* Cache the host addresses of the region */
196 vcpu_hva
= addr_gpa2hva(vm
, vcpu_args
->gpa
);
198 * Set up user fault fd to handle demand paging
201 uffd_descs
[i
] = uffd_setup_demand_paging(
202 p
->uffd_mode
, p
->uffd_delay
, vcpu_hva
,
205 &handle_uffd_page_request
);
209 pr_info("Finished creating vCPUs and starting uffd threads\n");
211 clock_gettime(CLOCK_MONOTONIC
, &start
);
212 memstress_start_vcpu_threads(nr_vcpus
, vcpu_worker
);
213 pr_info("Started all vCPUs\n");
215 memstress_join_vcpu_threads(nr_vcpus
);
216 ts_diff
= timespec_elapsed(start
);
217 pr_info("All vCPU threads joined\n");
220 /* Tell the user fault fd handler threads to quit */
221 for (i
= 0; i
< num_uffds
; i
++)
222 uffd_stop_demand_paging(uffd_descs
[i
]);
225 pr_info("Total guest execution time:\t%ld.%.9lds\n",
226 ts_diff
.tv_sec
, ts_diff
.tv_nsec
);
228 vcpu_paging_rate
= memstress_args
.vcpu_args
[0].pages
/
229 ((double)ts_diff
.tv_sec
+ (double)ts_diff
.tv_nsec
/ NSEC_PER_SEC
);
230 pr_info("Per-vcpu demand paging rate:\t%f pgs/sec/vcpu\n",
232 pr_info("Overall demand paging rate:\t%f pgs/sec\n",
233 vcpu_paging_rate
* nr_vcpus
);
235 memstress_destroy_vm(vm
);
237 free(guest_data_prototype
);
242 static void help(char *name
)
245 printf("usage: %s [-h] [-m vm_mode] [-u uffd_mode] [-a]\n"
246 " [-d uffd_delay_usec] [-r readers_per_uffd] [-b memory]\n"
247 " [-s type] [-v vcpus] [-c cpu_list] [-o]\n", name
);
249 printf(" -u: use userfaultfd to handle vCPU page faults. Mode is a\n"
250 " UFFD registration mode: 'MISSING' or 'MINOR'.\n");
251 kvm_print_vcpu_pinning_help();
252 printf(" -a: Use a single userfaultfd for all of guest memory, instead of\n"
253 " creating one for each region paged by a unique vCPU\n"
254 " Set implicitly with -o, and no effect without -u.\n");
255 printf(" -d: add a delay in usec to the User Fault\n"
256 " FD handler to simulate demand paging\n"
257 " overheads. Ignored without -u.\n");
258 printf(" -r: Set the number of reader threads per uffd.\n");
259 printf(" -b: specify the size of the memory region which should be\n"
260 " demand paged by each vCPU. e.g. 10M or 3G.\n"
262 backing_src_help("-s");
263 printf(" -v: specify the number of vCPUs to run.\n");
264 printf(" -o: Overlap guest memory accesses instead of partitioning\n"
265 " them into a separate region of memory for each vCPU.\n");
270 int main(int argc
, char *argv
[])
272 int max_vcpus
= kvm_check_cap(KVM_CAP_MAX_VCPUS
);
273 const char *cpulist
= NULL
;
274 struct test_params p
= {
275 .src_type
= DEFAULT_VM_MEM_SRC
,
276 .partition_vcpu_memory_access
= true,
277 .readers_per_uffd
= 1,
278 .single_uffd
= false,
282 guest_modes_append_default();
284 while ((opt
= getopt(argc
, argv
, "ahom:u:d:b:s:v:c:r:")) != -1) {
287 guest_modes_cmdline(optarg
);
290 if (!strcmp("MISSING", optarg
))
291 p
.uffd_mode
= UFFDIO_REGISTER_MODE_MISSING
;
292 else if (!strcmp("MINOR", optarg
))
293 p
.uffd_mode
= UFFDIO_REGISTER_MODE_MINOR
;
294 TEST_ASSERT(p
.uffd_mode
, "UFFD mode must be 'MISSING' or 'MINOR'.");
297 p
.single_uffd
= true;
300 p
.uffd_delay
= strtoul(optarg
, NULL
, 0);
301 TEST_ASSERT(p
.uffd_delay
>= 0, "A negative UFFD delay is not supported.");
304 guest_percpu_mem_size
= parse_size(optarg
);
307 p
.src_type
= parse_backing_src_type(optarg
);
310 nr_vcpus
= atoi_positive("Number of vCPUs", optarg
);
311 TEST_ASSERT(nr_vcpus
<= max_vcpus
,
312 "Invalid number of vcpus, must be between 1 and %d", max_vcpus
);
318 p
.partition_vcpu_memory_access
= false;
319 p
.single_uffd
= true;
322 p
.readers_per_uffd
= atoi(optarg
);
323 TEST_ASSERT(p
.readers_per_uffd
>= 1,
324 "Invalid number of readers per uffd %d: must be >=1",
334 if (p
.uffd_mode
== UFFDIO_REGISTER_MODE_MINOR
&&
335 !backing_src_is_shared(p
.src_type
)) {
336 TEST_FAIL("userfaultfd MINOR mode requires shared memory; pick a different -s");
340 kvm_parse_vcpu_pinning(cpulist
, memstress_args
.vcpu_to_pcpu
,
342 memstress_args
.pin_vcpus
= true;
345 for_each_guest_mode(run_test
, &p
);
350 #else /* __NR_userfaultfd */
352 #warning "missing __NR_userfaultfd definition"
356 print_skip("__NR_userfaultfd must be present for userfaultfd test");
360 #endif /* __NR_userfaultfd */