tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Copyright (C) 2022, Google LLC.
   4  */
   5 #include <fcntl.h>
   6 #include <limits.h>
   7 #include <pthread.h>
   8 #include <sched.h>
   9 #include <signal.h>
  10 #include <stdio.h>
  11 #include <stdlib.h>
  12 #include <string.h>
  13 #include <sys/ioctl.h>
  14
  15 #include <linux/compiler.h>
  16 #include <linux/kernel.h>
  17 #include <linux/kvm_para.h>
  18 #include <linux/memfd.h>
  19 #include <linux/sizes.h>
  20
  21 #include <test_util.h>
  22 #include <kvm_util.h>
  23 #include <processor.h>
  24
  25 #define BASE_DATA_SLOT          10
  26 #define BASE_DATA_GPA           ((uint64_t)(1ull << 32))
  27 #define PER_CPU_DATA_SIZE       ((uint64_t)(SZ_2M + PAGE_SIZE))
  28
  29 /* Horrific macro so that the line info is captured accurately :-( */
  30 #define memcmp_g(gpa, pattern,  size)                                                           \
  31 do {                                                                                            \
  32         uint8_t *mem = (uint8_t *)gpa;                                                          \
  33         size_t i;                                                                               \
  34                                                                                                 \
  35         for (i = 0; i < size; i++)                                                              \
  36                 __GUEST_ASSERT(mem[i] == pattern,                                               \
  37                                "Guest expected 0x%x at offset %lu (gpa 0x%lx), got 0x%x",       \
  38                                pattern, i, gpa + i, mem[i]);                                    \
  39 } while (0)
  40
  41 static void memcmp_h(uint8_t *mem, uint64_t gpa, uint8_t pattern, size_t size)
  42 {
  43         size_t i;
  44
  45         for (i = 0; i < size; i++)
  46                 TEST_ASSERT(mem[i] == pattern,
  47                             "Host expected 0x%x at gpa 0x%lx, got 0x%x",
  48                             pattern, gpa + i, mem[i]);
  49 }
  50
  51 /*
  52  * Run memory conversion tests with explicit conversion:
  53  * Execute KVM hypercall to map/unmap gpa range which will cause userspace exit
  54  * to back/unback private memory. Subsequent accesses by guest to the gpa range
  55  * will not cause exit to userspace.
  56  *
  57  * Test memory conversion scenarios with following steps:
  58  * 1) Access private memory using private access and verify that memory contents
  59  *   are not visible to userspace.
  60  * 2) Convert memory to shared using explicit conversions and ensure that
  61  *   userspace is able to access the shared regions.
  62  * 3) Convert memory back to private using explicit conversions and ensure that
  63  *   userspace is again not able to access converted private regions.
  64  */
  65
  66 #define GUEST_STAGE(o, s) { .offset = o, .size = s }
  67
  68 enum ucall_syncs {
  69         SYNC_SHARED,
  70         SYNC_PRIVATE,
  71 };
  72
  73 static void guest_sync_shared(uint64_t gpa, uint64_t size,
  74                               uint8_t current_pattern, uint8_t new_pattern)
  75 {
  76         GUEST_SYNC5(SYNC_SHARED, gpa, size, current_pattern, new_pattern);
  77 }
  78
  79 static void guest_sync_private(uint64_t gpa, uint64_t size, uint8_t pattern)
  80 {
  81         GUEST_SYNC4(SYNC_PRIVATE, gpa, size, pattern);
  82 }
  83
  84 /* Arbitrary values, KVM doesn't care about the attribute flags. */
  85 #define MAP_GPA_SET_ATTRIBUTES  BIT(0)
  86 #define MAP_GPA_SHARED          BIT(1)
  87 #define MAP_GPA_DO_FALLOCATE    BIT(2)
  88
  89 static void guest_map_mem(uint64_t gpa, uint64_t size, bool map_shared,
  90                           bool do_fallocate)
  91 {
  92         uint64_t flags = MAP_GPA_SET_ATTRIBUTES;
  93
  94         if (map_shared)
  95                 flags |= MAP_GPA_SHARED;
  96         if (do_fallocate)
  97                 flags |= MAP_GPA_DO_FALLOCATE;
  98         kvm_hypercall_map_gpa_range(gpa, size, flags);
  99 }
 100
 101 static void guest_map_shared(uint64_t gpa, uint64_t size, bool do_fallocate)
 102 {
 103         guest_map_mem(gpa, size, true, do_fallocate);
 104 }
 105
 106 static void guest_map_private(uint64_t gpa, uint64_t size, bool do_fallocate)
 107 {
 108         guest_map_mem(gpa, size, false, do_fallocate);
 109 }
 110
 111 struct {
 112         uint64_t offset;
 113         uint64_t size;
 114 } static const test_ranges[] = {
 115         GUEST_STAGE(0, PAGE_SIZE),
 116         GUEST_STAGE(0, SZ_2M),
 117         GUEST_STAGE(PAGE_SIZE, PAGE_SIZE),
 118         GUEST_STAGE(PAGE_SIZE, SZ_2M),
 119         GUEST_STAGE(SZ_2M, PAGE_SIZE),
 120 };
 121
 122 static void guest_test_explicit_conversion(uint64_t base_gpa, bool do_fallocate)
 123 {
 124         const uint8_t def_p = 0xaa;
 125         const uint8_t init_p = 0xcc;
 126         uint64_t j;
 127         int i;
 128
 129         /* Memory should be shared by default. */
 130         memset((void *)base_gpa, def_p, PER_CPU_DATA_SIZE);
 131         memcmp_g(base_gpa, def_p, PER_CPU_DATA_SIZE);
 132         guest_sync_shared(base_gpa, PER_CPU_DATA_SIZE, def_p, init_p);
 133
 134         memcmp_g(base_gpa, init_p, PER_CPU_DATA_SIZE);
 135
 136         for (i = 0; i < ARRAY_SIZE(test_ranges); i++) {
 137                 uint64_t gpa = base_gpa + test_ranges[i].offset;
 138                 uint64_t size = test_ranges[i].size;
 139                 uint8_t p1 = 0x11;
 140                 uint8_t p2 = 0x22;
 141                 uint8_t p3 = 0x33;
 142                 uint8_t p4 = 0x44;
 143
 144                 /*
 145                  * Set the test region to pattern one to differentiate it from
 146                  * the data range as a whole (contains the initial pattern).
 147                  */
 148                 memset((void *)gpa, p1, size);
 149
 150                 /*
 151                  * Convert to private, set and verify the private data, and
 152                  * then verify that the rest of the data (map shared) still
 153                  * holds the initial pattern, and that the host always sees the
 154                  * shared memory (initial pattern).  Unlike shared memory,
 155                  * punching a hole in private memory is destructive, i.e.
 156                  * previous values aren't guaranteed to be preserved.
 157                  */
 158                 guest_map_private(gpa, size, do_fallocate);
 159
 160                 if (size > PAGE_SIZE) {
 161                         memset((void *)gpa, p2, PAGE_SIZE);
 162                         goto skip;
 163                 }
 164
 165                 memset((void *)gpa, p2, size);
 166                 guest_sync_private(gpa, size, p1);
 167
 168                 /*
 169                  * Verify that the private memory was set to pattern two, and
 170                  * that shared memory still holds the initial pattern.
 171                  */
 172                 memcmp_g(gpa, p2, size);
 173                 if (gpa > base_gpa)
 174                         memcmp_g(base_gpa, init_p, gpa - base_gpa);
 175                 if (gpa + size < base_gpa + PER_CPU_DATA_SIZE)
 176                         memcmp_g(gpa + size, init_p,
 177                                  (base_gpa + PER_CPU_DATA_SIZE) - (gpa + size));
 178
 179                 /*
 180                  * Convert odd-number page frames back to shared to verify KVM
 181                  * also correctly handles holes in private ranges.
 182                  */
 183                 for (j = 0; j < size; j += PAGE_SIZE) {
 184                         if ((j >> PAGE_SHIFT) & 1) {
 185                                 guest_map_shared(gpa + j, PAGE_SIZE, do_fallocate);
 186                                 guest_sync_shared(gpa + j, PAGE_SIZE, p1, p3);
 187
 188                                 memcmp_g(gpa + j, p3, PAGE_SIZE);
 189                         } else {
 190                                 guest_sync_private(gpa + j, PAGE_SIZE, p1);
 191                         }
 192                 }
 193
 194 skip:
 195                 /*
 196                  * Convert the entire region back to shared, explicitly write
 197                  * pattern three to fill in the even-number frames before
 198                  * asking the host to verify (and write pattern four).
 199                  */
 200                 guest_map_shared(gpa, size, do_fallocate);
 201                 memset((void *)gpa, p3, size);
 202                 guest_sync_shared(gpa, size, p3, p4);
 203                 memcmp_g(gpa, p4, size);
 204
 205                 /* Reset the shared memory back to the initial pattern. */
 206                 memset((void *)gpa, init_p, size);
 207
 208                 /*
 209                  * Free (via PUNCH_HOLE) *all* private memory so that the next
 210                  * iteration starts from a clean slate, e.g. with respect to
 211                  * whether or not there are pages/folios in guest_mem.
 212                  */
 213                 guest_map_shared(base_gpa, PER_CPU_DATA_SIZE, true);
 214         }
 215 }
 216
 217 static void guest_punch_hole(uint64_t gpa, uint64_t size)
 218 {
 219         /* "Mapping" memory shared via fallocate() is done via PUNCH_HOLE. */
 220         uint64_t flags = MAP_GPA_SHARED | MAP_GPA_DO_FALLOCATE;
 221
 222         kvm_hypercall_map_gpa_range(gpa, size, flags);
 223 }
 224
 225 /*
 226  * Test that PUNCH_HOLE actually frees memory by punching holes without doing a
 227  * proper conversion.  Freeing (PUNCH_HOLE) should zap SPTEs, and reallocating
 228  * (subsequent fault) should zero memory.
 229  */
 230 static void guest_test_punch_hole(uint64_t base_gpa, bool precise)
 231 {
 232         const uint8_t init_p = 0xcc;
 233         int i;
 234
 235         /*
 236          * Convert the entire range to private, this testcase is all about
 237          * punching holes in guest_memfd, i.e. shared mappings aren't needed.
 238          */
 239         guest_map_private(base_gpa, PER_CPU_DATA_SIZE, false);
 240
 241         for (i = 0; i < ARRAY_SIZE(test_ranges); i++) {
 242                 uint64_t gpa = base_gpa + test_ranges[i].offset;
 243                 uint64_t size = test_ranges[i].size;
 244
 245                 /*
 246                  * Free all memory before each iteration, even for the !precise
 247                  * case where the memory will be faulted back in.  Freeing and
 248                  * reallocating should obviously work, and freeing all memory
 249                  * minimizes the probability of cross-testcase influence.
 250                  */
 251                 guest_punch_hole(base_gpa, PER_CPU_DATA_SIZE);
 252
 253                 /* Fault-in and initialize memory, and verify the pattern. */
 254                 if (precise) {
 255                         memset((void *)gpa, init_p, size);
 256                         memcmp_g(gpa, init_p, size);
 257                 } else {
 258                         memset((void *)base_gpa, init_p, PER_CPU_DATA_SIZE);
 259                         memcmp_g(base_gpa, init_p, PER_CPU_DATA_SIZE);
 260                 }
 261
 262                 /*
 263                  * Punch a hole at the target range and verify that reads from
 264                  * the guest succeed and return zeroes.
 265                  */
 266                 guest_punch_hole(gpa, size);
 267                 memcmp_g(gpa, 0, size);
 268         }
 269 }
 270
 271 static void guest_code(uint64_t base_gpa)
 272 {
 273         /*
 274          * Run the conversion test twice, with and without doing fallocate() on
 275          * the guest_memfd backing when converting between shared and private.
 276          */
 277         guest_test_explicit_conversion(base_gpa, false);
 278         guest_test_explicit_conversion(base_gpa, true);
 279
 280         /*
 281          * Run the PUNCH_HOLE test twice too, once with the entire guest_memfd
 282          * faulted in, once with only the target range faulted in.
 283          */
 284         guest_test_punch_hole(base_gpa, false);
 285         guest_test_punch_hole(base_gpa, true);
 286         GUEST_DONE();
 287 }
 288
 289 static void handle_exit_hypercall(struct kvm_vcpu *vcpu)
 290 {
 291         struct kvm_run *run = vcpu->run;
 292         uint64_t gpa = run->hypercall.args[0];
 293         uint64_t size = run->hypercall.args[1] * PAGE_SIZE;
 294         bool set_attributes = run->hypercall.args[2] & MAP_GPA_SET_ATTRIBUTES;
 295         bool map_shared = run->hypercall.args[2] & MAP_GPA_SHARED;
 296         bool do_fallocate = run->hypercall.args[2] & MAP_GPA_DO_FALLOCATE;
 297         struct kvm_vm *vm = vcpu->vm;
 298
 299         TEST_ASSERT(run->hypercall.nr == KVM_HC_MAP_GPA_RANGE,
 300                     "Wanted MAP_GPA_RANGE (%u), got '%llu'",
 301                     KVM_HC_MAP_GPA_RANGE, run->hypercall.nr);
 302
 303         if (do_fallocate)
 304                 vm_guest_mem_fallocate(vm, gpa, size, map_shared);
 305
 306         if (set_attributes)
 307                 vm_set_memory_attributes(vm, gpa, size,
 308                                          map_shared ? 0 : KVM_MEMORY_ATTRIBUTE_PRIVATE);
 309         run->hypercall.ret = 0;
 310 }
 311
 312 static bool run_vcpus;
 313
 314 static void *__test_mem_conversions(void *__vcpu)
 315 {
 316         struct kvm_vcpu *vcpu = __vcpu;
 317         struct kvm_run *run = vcpu->run;
 318         struct kvm_vm *vm = vcpu->vm;
 319         struct ucall uc;
 320
 321         while (!READ_ONCE(run_vcpus))
 322                 ;
 323
 324         for ( ;; ) {
 325                 vcpu_run(vcpu);
 326
 327                 if (run->exit_reason == KVM_EXIT_HYPERCALL) {
 328                         handle_exit_hypercall(vcpu);
 329                         continue;
 330                 }
 331
 332                 TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
 333                             "Wanted KVM_EXIT_IO, got exit reason: %u (%s)",
 334                             run->exit_reason, exit_reason_str(run->exit_reason));
 335
 336                 switch (get_ucall(vcpu, &uc)) {
 337                 case UCALL_ABORT:
 338                         REPORT_GUEST_ASSERT(uc);
 339                 case UCALL_SYNC: {
 340                         uint64_t gpa  = uc.args[1];
 341                         size_t size = uc.args[2];
 342                         size_t i;
 343
 344                         TEST_ASSERT(uc.args[0] == SYNC_SHARED ||
 345                                     uc.args[0] == SYNC_PRIVATE,
 346                                     "Unknown sync command '%ld'", uc.args[0]);
 347
 348                         for (i = 0; i < size; i += vm->page_size) {
 349                                 size_t nr_bytes = min_t(size_t, vm->page_size, size - i);
 350                                 uint8_t *hva = addr_gpa2hva(vm, gpa + i);
 351
 352                                 /* In all cases, the host should observe the shared data. */
 353                                 memcmp_h(hva, gpa + i, uc.args[3], nr_bytes);
 354
 355                                 /* For shared, write the new pattern to guest memory. */
 356                                 if (uc.args[0] == SYNC_SHARED)
 357                                         memset(hva, uc.args[4], nr_bytes);
 358                         }
 359                         break;
 360                 }
 361                 case UCALL_DONE:
 362                         return NULL;
 363                 default:
 364                         TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
 365                 }
 366         }
 367 }
 368
 369 static void test_mem_conversions(enum vm_mem_backing_src_type src_type, uint32_t nr_vcpus,
 370                                  uint32_t nr_memslots)
 371 {
 372         /*
 373          * Allocate enough memory so that each vCPU's chunk of memory can be
 374          * naturally aligned with respect to the size of the backing store.
 375          */
 376         const size_t alignment = max_t(size_t, SZ_2M, get_backing_src_pagesz(src_type));
 377         const size_t per_cpu_size = align_up(PER_CPU_DATA_SIZE, alignment);
 378         const size_t memfd_size = per_cpu_size * nr_vcpus;
 379         const size_t slot_size = memfd_size / nr_memslots;
 380         struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
 381         pthread_t threads[KVM_MAX_VCPUS];
 382         struct kvm_vm *vm;
 383         int memfd, i, r;
 384
 385         const struct vm_shape shape = {
 386                 .mode = VM_MODE_DEFAULT,
 387                 .type = KVM_X86_SW_PROTECTED_VM,
 388         };
 389
 390         TEST_ASSERT(slot_size * nr_memslots == memfd_size,
 391                     "The memfd size (0x%lx) needs to be cleanly divisible by the number of memslots (%u)",
 392                     memfd_size, nr_memslots);
 393         vm = __vm_create_with_vcpus(shape, nr_vcpus, 0, guest_code, vcpus);
 394
 395         vm_enable_cap(vm, KVM_CAP_EXIT_HYPERCALL, (1 << KVM_HC_MAP_GPA_RANGE));
 396
 397         memfd = vm_create_guest_memfd(vm, memfd_size, 0);
 398
 399         for (i = 0; i < nr_memslots; i++)
 400                 vm_mem_add(vm, src_type, BASE_DATA_GPA + slot_size * i,
 401                            BASE_DATA_SLOT + i, slot_size / vm->page_size,
 402                            KVM_MEM_GUEST_MEMFD, memfd, slot_size * i);
 403
 404         for (i = 0; i < nr_vcpus; i++) {
 405                 uint64_t gpa =  BASE_DATA_GPA + i * per_cpu_size;
 406
 407                 vcpu_args_set(vcpus[i], 1, gpa);
 408
 409                 /*
 410                  * Map only what is needed so that an out-of-bounds access
 411                  * results #PF => SHUTDOWN instead of data corruption.
 412                  */
 413                 virt_map(vm, gpa, gpa, PER_CPU_DATA_SIZE / vm->page_size);
 414
 415                 pthread_create(&threads[i], NULL, __test_mem_conversions, vcpus[i]);
 416         }
 417
 418         WRITE_ONCE(run_vcpus, true);
 419
 420         for (i = 0; i < nr_vcpus; i++)
 421                 pthread_join(threads[i], NULL);
 422
 423         kvm_vm_free(vm);
 424
 425         /*
 426          * Allocate and free memory from the guest_memfd after closing the VM
 427          * fd.  The guest_memfd is gifted a reference to its owning VM, i.e.
 428          * should prevent the VM from being fully destroyed until the last
 429          * reference to the guest_memfd is also put.
 430          */
 431         r = fallocate(memfd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, memfd_size);
 432         TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r));
 433
 434         r = fallocate(memfd, FALLOC_FL_KEEP_SIZE, 0, memfd_size);
 435         TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r));
 436
 437         close(memfd);
 438 }
 439
 440 static void usage(const char *cmd)
 441 {
 442         puts("");
 443         printf("usage: %s [-h] [-m nr_memslots] [-s mem_type] [-n nr_vcpus]\n", cmd);
 444         puts("");
 445         backing_src_help("-s");
 446         puts("");
 447         puts(" -n: specify the number of vcpus (default: 1)");
 448         puts("");
 449         puts(" -m: specify the number of memslots (default: 1)");
 450         puts("");
 451 }
 452
 453 int main(int argc, char *argv[])
 454 {
 455         enum vm_mem_backing_src_type src_type = DEFAULT_VM_MEM_SRC;
 456         uint32_t nr_memslots = 1;
 457         uint32_t nr_vcpus = 1;
 458         int opt;
 459
 460         TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM));
 461
 462         while ((opt = getopt(argc, argv, "hm:s:n:")) != -1) {
 463                 switch (opt) {
 464                 case 's':
 465                         src_type = parse_backing_src_type(optarg);
 466                         break;
 467                 case 'n':
 468                         nr_vcpus = atoi_positive("nr_vcpus", optarg);
 469                         break;
 470                 case 'm':
 471                         nr_memslots = atoi_positive("nr_memslots", optarg);
 472                         break;
 473                 case 'h':
 474                 default:
 475                         usage(argv[0]);
 476                         exit(0);
 477                 }
 478         }
 479
 480         test_mem_conversions(src_type, nr_vcpus, nr_memslots);
 481
 482         return 0;
 483 }