1 // SPDX-License-Identifier: GPL-2.0
3 * KVM dirty page logging performance test
5 * Based on dirty_log_test.c
7 * Copyright (C) 2018, Red Hat, Inc.
8 * Copyright (C) 2020, Google, Inc.
15 #include <linux/bitmap.h>
18 #include "test_util.h"
19 #include "memstress.h"
20 #include "guest_modes.h"
21 #include "ucall_common.h"
24 #include "aarch64/vgic.h"
28 static void arch_setup_vm(struct kvm_vm
*vm
, unsigned int nr_vcpus
)
31 * The test can still run even if hardware does not support GICv3, as it
32 * is only an optimization to reduce guest exits.
34 gic_fd
= vgic_v3_setup(vm
, nr_vcpus
, 64);
37 static void arch_cleanup_vm(struct kvm_vm
*vm
)
43 #else /* __aarch64__ */
45 static void arch_setup_vm(struct kvm_vm
*vm
, unsigned int nr_vcpus
)
49 static void arch_cleanup_vm(struct kvm_vm
*vm
)
55 /* How many host loops to run by default (one KVM_GET_DIRTY_LOG for each loop)*/
56 #define TEST_HOST_LOOP_N 2UL
58 static int nr_vcpus
= 1;
59 static uint64_t guest_percpu_mem_size
= DEFAULT_PER_VCPU_MEM_SIZE
;
60 static bool run_vcpus_while_disabling_dirty_logging
;
63 static u64 dirty_log_manual_caps
;
64 static bool host_quit
;
66 static int vcpu_last_completed_iteration
[KVM_MAX_VCPUS
];
68 static void vcpu_worker(struct memstress_vcpu_args
*vcpu_args
)
70 struct kvm_vcpu
*vcpu
= vcpu_args
->vcpu
;
71 int vcpu_idx
= vcpu_args
->vcpu_idx
;
72 uint64_t pages_count
= 0;
74 struct timespec start
;
75 struct timespec ts_diff
;
76 struct timespec total
= (struct timespec
){0};
82 while (!READ_ONCE(host_quit
)) {
83 int current_iteration
= READ_ONCE(iteration
);
85 clock_gettime(CLOCK_MONOTONIC
, &start
);
86 ret
= _vcpu_run(vcpu
);
87 ts_diff
= timespec_elapsed(start
);
89 TEST_ASSERT(ret
== 0, "vcpu_run failed: %d", ret
);
90 TEST_ASSERT(get_ucall(vcpu
, NULL
) == UCALL_SYNC
,
91 "Invalid guest sync status: exit_reason=%s",
92 exit_reason_str(run
->exit_reason
));
94 pr_debug("Got sync event from vCPU %d\n", vcpu_idx
);
95 vcpu_last_completed_iteration
[vcpu_idx
] = current_iteration
;
96 pr_debug("vCPU %d updated last completed iteration to %d\n",
97 vcpu_idx
, vcpu_last_completed_iteration
[vcpu_idx
]);
99 if (current_iteration
) {
100 pages_count
+= vcpu_args
->pages
;
101 total
= timespec_add(total
, ts_diff
);
102 pr_debug("vCPU %d iteration %d dirty memory time: %ld.%.9lds\n",
103 vcpu_idx
, current_iteration
, ts_diff
.tv_sec
,
106 pr_debug("vCPU %d iteration %d populate memory time: %ld.%.9lds\n",
107 vcpu_idx
, current_iteration
, ts_diff
.tv_sec
,
112 * Keep running the guest while dirty logging is being disabled
113 * (iteration is negative) so that vCPUs are accessing memory
114 * for the entire duration of zapping collapsible SPTEs.
116 while (current_iteration
== READ_ONCE(iteration
) &&
117 READ_ONCE(iteration
) >= 0 && !READ_ONCE(host_quit
)) {}
120 avg
= timespec_div(total
, vcpu_last_completed_iteration
[vcpu_idx
]);
121 pr_debug("\nvCPU %d dirtied 0x%lx pages over %d iterations in %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n",
122 vcpu_idx
, pages_count
, vcpu_last_completed_iteration
[vcpu_idx
],
123 total
.tv_sec
, total
.tv_nsec
, avg
.tv_sec
, avg
.tv_nsec
);
127 unsigned long iterations
;
128 uint64_t phys_offset
;
129 bool partition_vcpu_memory_access
;
130 enum vm_mem_backing_src_type backing_src
;
132 uint32_t write_percent
;
136 static void run_test(enum vm_guest_mode mode
, void *arg
)
138 struct test_params
*p
= arg
;
140 unsigned long **bitmaps
;
141 uint64_t guest_num_pages
;
142 uint64_t host_num_pages
;
143 uint64_t pages_per_slot
;
144 struct timespec start
;
145 struct timespec ts_diff
;
146 struct timespec get_dirty_log_total
= (struct timespec
){0};
147 struct timespec vcpu_dirty_total
= (struct timespec
){0};
149 struct timespec clear_dirty_log_total
= (struct timespec
){0};
152 vm
= memstress_create_vm(mode
, nr_vcpus
, guest_percpu_mem_size
,
153 p
->slots
, p
->backing_src
,
154 p
->partition_vcpu_memory_access
);
156 memstress_set_write_percent(vm
, p
->write_percent
);
158 guest_num_pages
= (nr_vcpus
* guest_percpu_mem_size
) >> vm
->page_shift
;
159 guest_num_pages
= vm_adjust_num_guest_pages(mode
, guest_num_pages
);
160 host_num_pages
= vm_num_host_pages(mode
, guest_num_pages
);
161 pages_per_slot
= host_num_pages
/ p
->slots
;
163 bitmaps
= memstress_alloc_bitmaps(p
->slots
, pages_per_slot
);
165 if (dirty_log_manual_caps
)
166 vm_enable_cap(vm
, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2
,
167 dirty_log_manual_caps
);
169 arch_setup_vm(vm
, nr_vcpus
);
171 /* Start the iterations */
175 clock_gettime(CLOCK_MONOTONIC
, &start
);
176 for (i
= 0; i
< nr_vcpus
; i
++)
177 vcpu_last_completed_iteration
[i
] = -1;
180 * Use 100% writes during the population phase to ensure all
181 * memory is actually populated and not just mapped to the zero
182 * page. The prevents expensive copy-on-write faults from
183 * occurring during the dirty memory iterations below, which
184 * would pollute the performance results.
186 memstress_set_write_percent(vm
, 100);
187 memstress_set_random_access(vm
, false);
188 memstress_start_vcpu_threads(nr_vcpus
, vcpu_worker
);
190 /* Allow the vCPUs to populate memory */
191 pr_debug("Starting iteration %d - Populating\n", iteration
);
192 for (i
= 0; i
< nr_vcpus
; i
++) {
193 while (READ_ONCE(vcpu_last_completed_iteration
[i
]) !=
198 ts_diff
= timespec_elapsed(start
);
199 pr_info("Populate memory time: %ld.%.9lds\n",
200 ts_diff
.tv_sec
, ts_diff
.tv_nsec
);
202 /* Enable dirty logging */
203 clock_gettime(CLOCK_MONOTONIC
, &start
);
204 memstress_enable_dirty_logging(vm
, p
->slots
);
205 ts_diff
= timespec_elapsed(start
);
206 pr_info("Enabling dirty logging time: %ld.%.9lds\n\n",
207 ts_diff
.tv_sec
, ts_diff
.tv_nsec
);
209 memstress_set_write_percent(vm
, p
->write_percent
);
210 memstress_set_random_access(vm
, p
->random_access
);
212 while (iteration
< p
->iterations
) {
214 * Incrementing the iteration number will start the vCPUs
215 * dirtying memory again.
217 clock_gettime(CLOCK_MONOTONIC
, &start
);
220 pr_debug("Starting iteration %d\n", iteration
);
221 for (i
= 0; i
< nr_vcpus
; i
++) {
222 while (READ_ONCE(vcpu_last_completed_iteration
[i
])
227 ts_diff
= timespec_elapsed(start
);
228 vcpu_dirty_total
= timespec_add(vcpu_dirty_total
, ts_diff
);
229 pr_info("Iteration %d dirty memory time: %ld.%.9lds\n",
230 iteration
, ts_diff
.tv_sec
, ts_diff
.tv_nsec
);
232 clock_gettime(CLOCK_MONOTONIC
, &start
);
233 memstress_get_dirty_log(vm
, bitmaps
, p
->slots
);
234 ts_diff
= timespec_elapsed(start
);
235 get_dirty_log_total
= timespec_add(get_dirty_log_total
,
237 pr_info("Iteration %d get dirty log time: %ld.%.9lds\n",
238 iteration
, ts_diff
.tv_sec
, ts_diff
.tv_nsec
);
240 if (dirty_log_manual_caps
) {
241 clock_gettime(CLOCK_MONOTONIC
, &start
);
242 memstress_clear_dirty_log(vm
, bitmaps
, p
->slots
,
244 ts_diff
= timespec_elapsed(start
);
245 clear_dirty_log_total
= timespec_add(clear_dirty_log_total
,
247 pr_info("Iteration %d clear dirty log time: %ld.%.9lds\n",
248 iteration
, ts_diff
.tv_sec
, ts_diff
.tv_nsec
);
253 * Run vCPUs while dirty logging is being disabled to stress disabling
254 * in terms of both performance and correctness. Opt-in via command
255 * line as this significantly increases time to disable dirty logging.
257 if (run_vcpus_while_disabling_dirty_logging
)
258 WRITE_ONCE(iteration
, -1);
260 /* Disable dirty logging */
261 clock_gettime(CLOCK_MONOTONIC
, &start
);
262 memstress_disable_dirty_logging(vm
, p
->slots
);
263 ts_diff
= timespec_elapsed(start
);
264 pr_info("Disabling dirty logging time: %ld.%.9lds\n",
265 ts_diff
.tv_sec
, ts_diff
.tv_nsec
);
268 * Tell the vCPU threads to quit. No need to manually check that vCPUs
269 * have stopped running after disabling dirty logging, the join will
270 * wait for them to exit.
273 memstress_join_vcpu_threads(nr_vcpus
);
275 avg
= timespec_div(get_dirty_log_total
, p
->iterations
);
276 pr_info("Get dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n",
277 p
->iterations
, get_dirty_log_total
.tv_sec
,
278 get_dirty_log_total
.tv_nsec
, avg
.tv_sec
, avg
.tv_nsec
);
280 if (dirty_log_manual_caps
) {
281 avg
= timespec_div(clear_dirty_log_total
, p
->iterations
);
282 pr_info("Clear dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n",
283 p
->iterations
, clear_dirty_log_total
.tv_sec
,
284 clear_dirty_log_total
.tv_nsec
, avg
.tv_sec
, avg
.tv_nsec
);
287 memstress_free_bitmaps(bitmaps
, p
->slots
);
289 memstress_destroy_vm(vm
);
292 static void help(char *name
)
295 printf("usage: %s [-h] [-a] [-i iterations] [-p offset] [-g] "
296 "[-m mode] [-n] [-b vcpu bytes] [-v vcpus] [-o] [-r random seed ] [-s mem type]"
297 "[-x memslots] [-w percentage] [-c physical cpus to run test on]\n", name
);
299 printf(" -a: access memory randomly rather than in order.\n");
300 printf(" -i: specify iteration counts (default: %"PRIu64
")\n",
302 printf(" -g: Do not enable KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2. This\n"
303 " makes KVM_GET_DIRTY_LOG clear the dirty log (i.e.\n"
304 " KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE is not enabled)\n"
305 " and writes will be tracked as soon as dirty logging is\n"
306 " enabled on the memslot (i.e. KVM_DIRTY_LOG_INITIALLY_SET\n"
307 " is not enabled).\n");
308 printf(" -p: specify guest physical test memory offset\n"
309 " Warning: a low offset can conflict with the loaded test code.\n");
311 printf(" -n: Run the vCPUs in nested mode (L2)\n");
312 printf(" -e: Run vCPUs while dirty logging is being disabled. This\n"
313 " can significantly increase runtime, especially if there\n"
314 " isn't a dedicated pCPU for the main thread.\n");
315 printf(" -b: specify the size of the memory region which should be\n"
316 " dirtied by each vCPU. e.g. 10M or 3G.\n"
318 printf(" -v: specify the number of vCPUs to run.\n");
319 printf(" -o: Overlap guest memory accesses instead of partitioning\n"
320 " them into a separate region of memory for each vCPU.\n");
321 printf(" -r: specify the starting random seed.\n");
322 backing_src_help("-s");
323 printf(" -x: Split the memory region into this number of memslots.\n"
325 printf(" -w: specify the percentage of pages which should be written to\n"
326 " as an integer from 0-100 inclusive. This is probabilistic,\n"
327 " so -w X means each page has an X%% chance of writing\n"
328 " and a (100-X)%% chance of reading.\n"
329 " (default: 100 i.e. all pages are written to.)\n");
330 kvm_print_vcpu_pinning_help();
335 int main(int argc
, char *argv
[])
337 int max_vcpus
= kvm_check_cap(KVM_CAP_MAX_VCPUS
);
338 const char *pcpu_list
= NULL
;
339 struct test_params p
= {
340 .iterations
= TEST_HOST_LOOP_N
,
341 .partition_vcpu_memory_access
= true,
342 .backing_src
= DEFAULT_VM_MEM_SRC
,
344 .write_percent
= 100,
348 /* Override the seed to be deterministic by default. */
349 guest_random_seed
= 1;
351 dirty_log_manual_caps
=
352 kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2
);
353 dirty_log_manual_caps
&= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE
|
354 KVM_DIRTY_LOG_INITIALLY_SET
);
356 guest_modes_append_default();
358 while ((opt
= getopt(argc
, argv
, "ab:c:eghi:m:nop:r:s:v:x:w:")) != -1) {
361 p
.random_access
= true;
364 guest_percpu_mem_size
= parse_size(optarg
);
370 /* 'e' is for evil. */
371 run_vcpus_while_disabling_dirty_logging
= true;
374 dirty_log_manual_caps
= 0;
380 p
.iterations
= atoi_positive("Number of iterations", optarg
);
383 guest_modes_cmdline(optarg
);
386 memstress_args
.nested
= true;
389 p
.partition_vcpu_memory_access
= false;
392 p
.phys_offset
= strtoull(optarg
, NULL
, 0);
395 guest_random_seed
= atoi_positive("Random seed", optarg
);
398 p
.backing_src
= parse_backing_src_type(optarg
);
401 nr_vcpus
= atoi_positive("Number of vCPUs", optarg
);
402 TEST_ASSERT(nr_vcpus
<= max_vcpus
,
403 "Invalid number of vcpus, must be between 1 and %d", max_vcpus
);
406 p
.write_percent
= atoi_non_negative("Write percentage", optarg
);
407 TEST_ASSERT(p
.write_percent
<= 100,
408 "Write percentage must be between 0 and 100");
411 p
.slots
= atoi_positive("Number of slots", optarg
);
420 kvm_parse_vcpu_pinning(pcpu_list
, memstress_args
.vcpu_to_pcpu
,
422 memstress_args
.pin_vcpus
= true;
425 TEST_ASSERT(p
.iterations
>= 2, "The test should have at least two iterations");
427 pr_info("Test iterations: %"PRIu64
"\n", p
.iterations
);
429 for_each_guest_mode(run_test
, &p
);