1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright © 2021 Amazon.com, Inc. or its affiliates.
16 #include <sys/eventfd.h>
18 #define SHINFO_REGION_GVA 0xc0000000ULL
19 #define SHINFO_REGION_GPA 0xc0000000ULL
20 #define SHINFO_REGION_SLOT 10
22 #define DUMMY_REGION_GPA (SHINFO_REGION_GPA + (3 * PAGE_SIZE))
23 #define DUMMY_REGION_SLOT 11
25 #define DUMMY_REGION_GPA_2 (SHINFO_REGION_GPA + (4 * PAGE_SIZE))
26 #define DUMMY_REGION_SLOT_2 12
28 #define SHINFO_ADDR (SHINFO_REGION_GPA)
29 #define VCPU_INFO_ADDR (SHINFO_REGION_GPA + 0x40)
30 #define PVTIME_ADDR (SHINFO_REGION_GPA + PAGE_SIZE)
31 #define RUNSTATE_ADDR (SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE - 15)
33 #define SHINFO_VADDR (SHINFO_REGION_GVA)
34 #define VCPU_INFO_VADDR (SHINFO_REGION_GVA + 0x40)
35 #define RUNSTATE_VADDR (SHINFO_REGION_GVA + PAGE_SIZE + PAGE_SIZE - 15)
37 #define EVTCHN_VECTOR 0x10
39 #define EVTCHN_TEST1 15
40 #define EVTCHN_TEST2 66
41 #define EVTCHN_TIMER 13
44 TEST_INJECT_VECTOR
= 0,
45 TEST_RUNSTATE_runnable
,
46 TEST_RUNSTATE_blocked
,
47 TEST_RUNSTATE_offline
,
54 TEST_EVTCHN_SEND_IOCTL
,
56 TEST_EVTCHN_HCALL_SLOWPATH
,
57 TEST_EVTCHN_HCALL_EVENTFD
,
67 TEST_LOCKING_SEND_RACE
,
68 TEST_LOCKING_POLL_RACE
,
69 TEST_LOCKING_POLL_TIMEOUT
,
75 #define XEN_HYPERCALL_MSR 0x40000000
77 #define MIN_STEAL_TIME 50000
79 #define SHINFO_RACE_TIMEOUT 2 /* seconds */
81 #define __HYPERVISOR_set_timer_op 15
82 #define __HYPERVISOR_sched_op 29
83 #define __HYPERVISOR_event_channel_op 32
85 #define SCHEDOP_poll 3
87 #define EVTCHNOP_send 4
89 #define EVTCHNSTAT_interdomain 2
97 unsigned int nr_ports
;
101 struct pvclock_vcpu_time_info
{
106 u32 tsc_to_system_mul
;
110 } __attribute__((__packed__
)); /* 32 bytes */
112 struct pvclock_wall_clock
{
116 } __attribute__((__packed__
));
118 struct vcpu_runstate_info
{
120 uint64_t state_entry_time
;
121 uint64_t time
[5]; /* Extra field for overrun check */
124 struct compat_vcpu_runstate_info
{
126 uint64_t state_entry_time
;
128 } __attribute__((__packed__
));
130 struct arch_vcpu_info
{
132 unsigned long pad
; /* sizeof(vcpu_info_t) == 64 */
136 uint8_t evtchn_upcall_pending
;
137 uint8_t evtchn_upcall_mask
;
138 unsigned long evtchn_pending_sel
;
139 struct arch_vcpu_info arch
;
140 struct pvclock_vcpu_time_info time
;
141 }; /* 64 bytes (x86) */
144 struct vcpu_info vcpu_info
[32];
145 unsigned long evtchn_pending
[64];
146 unsigned long evtchn_mask
[64];
147 struct pvclock_wall_clock wc
;
149 /* arch_shared_info here */
152 #define RUNSTATE_running 0
153 #define RUNSTATE_runnable 1
154 #define RUNSTATE_blocked 2
155 #define RUNSTATE_offline 3
157 static const char *runstate_names
[] = {
165 struct kvm_irq_routing info
;
166 struct kvm_irq_routing_entry entries
[2];
169 static volatile bool guest_saw_irq
;
171 static void evtchn_handler(struct ex_regs
*regs
)
173 struct vcpu_info
*vi
= (void *)VCPU_INFO_VADDR
;
175 vcpu_arch_put_guest(vi
->evtchn_upcall_pending
, 0);
176 vcpu_arch_put_guest(vi
->evtchn_pending_sel
, 0);
177 guest_saw_irq
= true;
179 GUEST_SYNC(TEST_GUEST_SAW_IRQ
);
182 static void guest_wait_for_irq(void)
184 while (!guest_saw_irq
)
185 __asm__
__volatile__ ("rep nop" : : : "memory");
186 guest_saw_irq
= false;
189 static void guest_code(void)
191 struct vcpu_runstate_info
*rs
= (void *)RUNSTATE_VADDR
;
194 __asm__
__volatile__(
199 /* Trigger an interrupt injection */
200 GUEST_SYNC(TEST_INJECT_VECTOR
);
202 guest_wait_for_irq();
204 /* Test having the host set runstates manually */
205 GUEST_SYNC(TEST_RUNSTATE_runnable
);
206 GUEST_ASSERT(rs
->time
[RUNSTATE_runnable
] != 0);
207 GUEST_ASSERT(rs
->state
== 0);
209 GUEST_SYNC(TEST_RUNSTATE_blocked
);
210 GUEST_ASSERT(rs
->time
[RUNSTATE_blocked
] != 0);
211 GUEST_ASSERT(rs
->state
== 0);
213 GUEST_SYNC(TEST_RUNSTATE_offline
);
214 GUEST_ASSERT(rs
->time
[RUNSTATE_offline
] != 0);
215 GUEST_ASSERT(rs
->state
== 0);
217 /* Test runstate time adjust */
218 GUEST_SYNC(TEST_RUNSTATE_ADJUST
);
219 GUEST_ASSERT(rs
->time
[RUNSTATE_blocked
] == 0x5a);
220 GUEST_ASSERT(rs
->time
[RUNSTATE_offline
] == 0x6b6b);
222 /* Test runstate time set */
223 GUEST_SYNC(TEST_RUNSTATE_DATA
);
224 GUEST_ASSERT(rs
->state_entry_time
>= 0x8000);
225 GUEST_ASSERT(rs
->time
[RUNSTATE_runnable
] == 0);
226 GUEST_ASSERT(rs
->time
[RUNSTATE_blocked
] == 0x6b6b);
227 GUEST_ASSERT(rs
->time
[RUNSTATE_offline
] == 0x5a);
229 /* sched_yield() should result in some 'runnable' time */
230 GUEST_SYNC(TEST_STEAL_TIME
);
231 GUEST_ASSERT(rs
->time
[RUNSTATE_runnable
] >= MIN_STEAL_TIME
);
233 /* Attempt to deliver a *masked* interrupt */
234 GUEST_SYNC(TEST_EVTCHN_MASKED
);
236 /* Wait until we see the bit set */
237 struct shared_info
*si
= (void *)SHINFO_VADDR
;
238 while (!si
->evtchn_pending
[0])
239 __asm__
__volatile__ ("rep nop" : : : "memory");
241 /* Now deliver an *unmasked* interrupt */
242 GUEST_SYNC(TEST_EVTCHN_UNMASKED
);
244 guest_wait_for_irq();
246 /* Change memslots and deliver an interrupt */
247 GUEST_SYNC(TEST_EVTCHN_SLOWPATH
);
249 guest_wait_for_irq();
251 /* Deliver event channel with KVM_XEN_HVM_EVTCHN_SEND */
252 GUEST_SYNC(TEST_EVTCHN_SEND_IOCTL
);
254 guest_wait_for_irq();
256 GUEST_SYNC(TEST_EVTCHN_HCALL
);
258 /* Our turn. Deliver event channel (to ourselves) with
259 * EVTCHNOP_send hypercall. */
260 struct evtchn_send s
= { .port
= 127 };
261 xen_hypercall(__HYPERVISOR_event_channel_op
, EVTCHNOP_send
, &s
);
263 guest_wait_for_irq();
265 GUEST_SYNC(TEST_EVTCHN_HCALL_SLOWPATH
);
268 * Same again, but this time the host has messed with memslots so it
269 * should take the slow path in kvm_xen_set_evtchn().
271 xen_hypercall(__HYPERVISOR_event_channel_op
, EVTCHNOP_send
, &s
);
273 guest_wait_for_irq();
275 GUEST_SYNC(TEST_EVTCHN_HCALL_EVENTFD
);
277 /* Deliver "outbound" event channel to an eventfd which
278 * happens to be one of our own irqfds. */
280 xen_hypercall(__HYPERVISOR_event_channel_op
, EVTCHNOP_send
, &s
);
282 guest_wait_for_irq();
284 GUEST_SYNC(TEST_TIMER_SETUP
);
286 /* Set a timer 100ms in the future. */
287 xen_hypercall(__HYPERVISOR_set_timer_op
,
288 rs
->state_entry_time
+ 100000000, NULL
);
290 GUEST_SYNC(TEST_TIMER_WAIT
);
292 /* Now wait for the timer */
293 guest_wait_for_irq();
295 GUEST_SYNC(TEST_TIMER_RESTORE
);
297 /* The host has 'restored' the timer. Just wait for it. */
298 guest_wait_for_irq();
300 GUEST_SYNC(TEST_POLL_READY
);
302 /* Poll for an event channel port which is already set */
303 u32 ports
[1] = { EVTCHN_TIMER
};
304 struct sched_poll p
= {
310 xen_hypercall(__HYPERVISOR_sched_op
, SCHEDOP_poll
, &p
);
312 GUEST_SYNC(TEST_POLL_TIMEOUT
);
314 /* Poll for an unset port and wait for the timeout. */
315 p
.timeout
= 100000000;
316 xen_hypercall(__HYPERVISOR_sched_op
, SCHEDOP_poll
, &p
);
318 GUEST_SYNC(TEST_POLL_MASKED
);
320 /* A timer will wake the masked port we're waiting on, while we poll */
322 xen_hypercall(__HYPERVISOR_sched_op
, SCHEDOP_poll
, &p
);
324 GUEST_SYNC(TEST_POLL_WAKE
);
326 /* Set the vcpu_info to point at exactly the place it already is to
327 * make sure the attribute is functional. */
328 GUEST_SYNC(SET_VCPU_INFO
);
330 /* A timer wake an *unmasked* port which should wake us with an
331 * actual interrupt, while we're polling on a different port. */
334 xen_hypercall(__HYPERVISOR_sched_op
, SCHEDOP_poll
, &p
);
336 guest_wait_for_irq();
338 GUEST_SYNC(TEST_TIMER_PAST
);
340 /* Timer should have fired already */
341 guest_wait_for_irq();
343 GUEST_SYNC(TEST_LOCKING_SEND_RACE
);
344 /* Racing host ioctls */
346 guest_wait_for_irq();
348 GUEST_SYNC(TEST_LOCKING_POLL_RACE
);
349 /* Racing vmcall against host ioctl */
353 p
= (struct sched_poll
) {
361 * Poll for a timer wake event while the worker thread is mucking with
362 * the shared info. KVM XEN drops timer IRQs if the shared info is
363 * invalid when the timer expires. Arbitrarily poll 100 times before
364 * giving up and asking the VMM to re-arm the timer. 100 polls should
365 * consume enough time to beat on KVM without taking too long if the
366 * timer IRQ is dropped due to an invalid event channel.
368 for (i
= 0; i
< 100 && !guest_saw_irq
; i
++)
369 __xen_hypercall(__HYPERVISOR_sched_op
, SCHEDOP_poll
, &p
);
372 * Re-send the timer IRQ if it was (likely) dropped due to the timer
373 * expiring while the event channel was invalid.
375 if (!guest_saw_irq
) {
376 GUEST_SYNC(TEST_LOCKING_POLL_TIMEOUT
);
379 guest_saw_irq
= false;
381 GUEST_SYNC(TEST_DONE
);
384 static struct shared_info
*shinfo
;
385 static struct vcpu_info
*vinfo
;
386 static struct kvm_vcpu
*vcpu
;
388 static void handle_alrm(int sig
)
391 printf("evtchn_upcall_pending 0x%x\n", vinfo
->evtchn_upcall_pending
);
392 vcpu_dump(stdout
, vcpu
, 0);
393 TEST_FAIL("IRQ delivery timed out");
396 static void *juggle_shinfo_state(void *arg
)
398 struct kvm_vm
*vm
= (struct kvm_vm
*)arg
;
400 struct kvm_xen_hvm_attr cache_activate_gfn
= {
401 .type
= KVM_XEN_ATTR_TYPE_SHARED_INFO
,
402 .u
.shared_info
.gfn
= SHINFO_REGION_GPA
/ PAGE_SIZE
405 struct kvm_xen_hvm_attr cache_deactivate_gfn
= {
406 .type
= KVM_XEN_ATTR_TYPE_SHARED_INFO
,
407 .u
.shared_info
.gfn
= KVM_XEN_INVALID_GFN
410 struct kvm_xen_hvm_attr cache_activate_hva
= {
411 .type
= KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA
,
412 .u
.shared_info
.hva
= (unsigned long)shinfo
415 struct kvm_xen_hvm_attr cache_deactivate_hva
= {
416 .type
= KVM_XEN_ATTR_TYPE_SHARED_INFO
,
417 .u
.shared_info
.hva
= 0
420 int xen_caps
= kvm_check_cap(KVM_CAP_XEN_HVM
);
423 __vm_ioctl(vm
, KVM_XEN_HVM_SET_ATTR
, &cache_activate_gfn
);
424 pthread_testcancel();
425 __vm_ioctl(vm
, KVM_XEN_HVM_SET_ATTR
, &cache_deactivate_gfn
);
427 if (xen_caps
& KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA
) {
428 __vm_ioctl(vm
, KVM_XEN_HVM_SET_ATTR
, &cache_activate_hva
);
429 pthread_testcancel();
430 __vm_ioctl(vm
, KVM_XEN_HVM_SET_ATTR
, &cache_deactivate_hva
);
437 int main(int argc
, char *argv
[])
439 struct kvm_xen_hvm_attr evt_reset
;
445 verbose
= argc
> 1 && (!strncmp(argv
[1], "-v", 3) ||
446 !strncmp(argv
[1], "--verbose", 10));
448 int xen_caps
= kvm_check_cap(KVM_CAP_XEN_HVM
);
449 TEST_REQUIRE(xen_caps
& KVM_XEN_HVM_CONFIG_SHARED_INFO
);
451 bool do_runstate_tests
= !!(xen_caps
& KVM_XEN_HVM_CONFIG_RUNSTATE
);
452 bool do_runstate_flag
= !!(xen_caps
& KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG
);
453 bool do_eventfd_tests
= !!(xen_caps
& KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL
);
454 bool do_evtchn_tests
= do_eventfd_tests
&& !!(xen_caps
& KVM_XEN_HVM_CONFIG_EVTCHN_SEND
);
455 bool has_shinfo_hva
= !!(xen_caps
& KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA
);
457 vm
= vm_create_with_one_vcpu(&vcpu
, guest_code
);
459 /* Map a region for the shared_info page */
460 vm_userspace_mem_region_add(vm
, VM_MEM_SRC_ANONYMOUS
,
461 SHINFO_REGION_GPA
, SHINFO_REGION_SLOT
, 3, 0);
462 virt_map(vm
, SHINFO_REGION_GVA
, SHINFO_REGION_GPA
, 3);
464 shinfo
= addr_gpa2hva(vm
, SHINFO_VADDR
);
466 int zero_fd
= open("/dev/zero", O_RDONLY
);
467 TEST_ASSERT(zero_fd
!= -1, "Failed to open /dev/zero");
469 struct kvm_xen_hvm_config hvmc
= {
470 .flags
= KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL
,
471 .msr
= XEN_HYPERCALL_MSR
,
474 /* Let the kernel know that we *will* use it for sending all
475 * event channels, which lets it intercept SCHEDOP_poll */
477 hvmc
.flags
|= KVM_XEN_HVM_CONFIG_EVTCHN_SEND
;
479 vm_ioctl(vm
, KVM_XEN_HVM_CONFIG
, &hvmc
);
481 struct kvm_xen_hvm_attr lm
= {
482 .type
= KVM_XEN_ATTR_TYPE_LONG_MODE
,
485 vm_ioctl(vm
, KVM_XEN_HVM_SET_ATTR
, &lm
);
487 if (do_runstate_flag
) {
488 struct kvm_xen_hvm_attr ruf
= {
489 .type
= KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG
,
490 .u
.runstate_update_flag
= 1,
492 vm_ioctl(vm
, KVM_XEN_HVM_SET_ATTR
, &ruf
);
494 ruf
.u
.runstate_update_flag
= 0;
495 vm_ioctl(vm
, KVM_XEN_HVM_GET_ATTR
, &ruf
);
496 TEST_ASSERT(ruf
.u
.runstate_update_flag
== 1,
497 "Failed to read back RUNSTATE_UPDATE_FLAG attr");
500 struct kvm_xen_hvm_attr ha
= {};
502 if (has_shinfo_hva
) {
503 ha
.type
= KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA
;
504 ha
.u
.shared_info
.hva
= (unsigned long)shinfo
;
506 ha
.type
= KVM_XEN_ATTR_TYPE_SHARED_INFO
;
507 ha
.u
.shared_info
.gfn
= SHINFO_ADDR
/ PAGE_SIZE
;
510 vm_ioctl(vm
, KVM_XEN_HVM_SET_ATTR
, &ha
);
513 * Test what happens when the HVA of the shinfo page is remapped after
514 * the kernel has a reference to it. But make sure we copy the clock
515 * info over since that's only set at setup time, and we test it later.
517 struct pvclock_wall_clock wc_copy
= shinfo
->wc
;
518 void *m
= mmap(shinfo
, PAGE_SIZE
, PROT_READ
|PROT_WRITE
, MAP_FIXED
|MAP_PRIVATE
, zero_fd
, 0);
519 TEST_ASSERT(m
== shinfo
, "Failed to map /dev/zero over shared info");
520 shinfo
->wc
= wc_copy
;
522 struct kvm_xen_vcpu_attr vi
= {
523 .type
= KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO
,
524 .u
.gpa
= VCPU_INFO_ADDR
,
526 vcpu_ioctl(vcpu
, KVM_XEN_VCPU_SET_ATTR
, &vi
);
528 struct kvm_xen_vcpu_attr pvclock
= {
529 .type
= KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO
,
530 .u
.gpa
= PVTIME_ADDR
,
532 vcpu_ioctl(vcpu
, KVM_XEN_VCPU_SET_ATTR
, &pvclock
);
534 struct kvm_xen_hvm_attr vec
= {
535 .type
= KVM_XEN_ATTR_TYPE_UPCALL_VECTOR
,
536 .u
.vector
= EVTCHN_VECTOR
,
538 vm_ioctl(vm
, KVM_XEN_HVM_SET_ATTR
, &vec
);
540 vm_install_exception_handler(vm
, EVTCHN_VECTOR
, evtchn_handler
);
542 if (do_runstate_tests
) {
543 struct kvm_xen_vcpu_attr st
= {
544 .type
= KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR
,
545 .u
.gpa
= RUNSTATE_ADDR
,
547 vcpu_ioctl(vcpu
, KVM_XEN_VCPU_SET_ATTR
, &st
);
550 int irq_fd
[2] = { -1, -1 };
552 if (do_eventfd_tests
) {
553 irq_fd
[0] = eventfd(0, 0);
554 irq_fd
[1] = eventfd(0, 0);
556 /* Unexpected, but not a KVM failure */
557 if (irq_fd
[0] == -1 || irq_fd
[1] == -1)
558 do_evtchn_tests
= do_eventfd_tests
= false;
561 if (do_eventfd_tests
) {
562 irq_routes
.info
.nr
= 2;
564 irq_routes
.entries
[0].gsi
= 32;
565 irq_routes
.entries
[0].type
= KVM_IRQ_ROUTING_XEN_EVTCHN
;
566 irq_routes
.entries
[0].u
.xen_evtchn
.port
= EVTCHN_TEST1
;
567 irq_routes
.entries
[0].u
.xen_evtchn
.vcpu
= vcpu
->id
;
568 irq_routes
.entries
[0].u
.xen_evtchn
.priority
= KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL
;
570 irq_routes
.entries
[1].gsi
= 33;
571 irq_routes
.entries
[1].type
= KVM_IRQ_ROUTING_XEN_EVTCHN
;
572 irq_routes
.entries
[1].u
.xen_evtchn
.port
= EVTCHN_TEST2
;
573 irq_routes
.entries
[1].u
.xen_evtchn
.vcpu
= vcpu
->id
;
574 irq_routes
.entries
[1].u
.xen_evtchn
.priority
= KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL
;
576 vm_ioctl(vm
, KVM_SET_GSI_ROUTING
, &irq_routes
.info
);
578 struct kvm_irqfd ifd
= { };
582 vm_ioctl(vm
, KVM_IRQFD
, &ifd
);
586 vm_ioctl(vm
, KVM_IRQFD
, &ifd
);
588 struct sigaction sa
= { };
589 sa
.sa_handler
= handle_alrm
;
590 sigaction(SIGALRM
, &sa
, NULL
);
593 struct kvm_xen_vcpu_attr tmr
= {
594 .type
= KVM_XEN_VCPU_ATTR_TYPE_TIMER
,
595 .u
.timer
.port
= EVTCHN_TIMER
,
596 .u
.timer
.priority
= KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL
,
597 .u
.timer
.expires_ns
= 0
600 if (do_evtchn_tests
) {
601 struct kvm_xen_hvm_attr inj
= {
602 .type
= KVM_XEN_ATTR_TYPE_EVTCHN
,
603 .u
.evtchn
.send_port
= 127,
604 .u
.evtchn
.type
= EVTCHNSTAT_interdomain
,
606 .u
.evtchn
.deliver
.port
.port
= EVTCHN_TEST1
,
607 .u
.evtchn
.deliver
.port
.vcpu
= vcpu
->id
+ 1,
608 .u
.evtchn
.deliver
.port
.priority
= KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL
,
610 vm_ioctl(vm
, KVM_XEN_HVM_SET_ATTR
, &inj
);
612 /* Test migration to a different vCPU */
613 inj
.u
.evtchn
.flags
= KVM_XEN_EVTCHN_UPDATE
;
614 inj
.u
.evtchn
.deliver
.port
.vcpu
= vcpu
->id
;
615 vm_ioctl(vm
, KVM_XEN_HVM_SET_ATTR
, &inj
);
617 inj
.u
.evtchn
.send_port
= 197;
618 inj
.u
.evtchn
.deliver
.eventfd
.port
= 0;
619 inj
.u
.evtchn
.deliver
.eventfd
.fd
= irq_fd
[1];
620 inj
.u
.evtchn
.flags
= 0;
621 vm_ioctl(vm
, KVM_XEN_HVM_SET_ATTR
, &inj
);
623 vcpu_ioctl(vcpu
, KVM_XEN_VCPU_SET_ATTR
, &tmr
);
625 vinfo
= addr_gpa2hva(vm
, VCPU_INFO_VADDR
);
626 vinfo
->evtchn_upcall_pending
= 0;
628 struct vcpu_runstate_info
*rs
= addr_gpa2hva(vm
, RUNSTATE_ADDR
);
631 bool evtchn_irq_expected
= false;
637 TEST_ASSERT_KVM_EXIT_REASON(vcpu
, KVM_EXIT_IO
);
639 switch (get_ucall(vcpu
, &uc
)) {
641 REPORT_GUEST_ASSERT(uc
);
644 struct kvm_xen_vcpu_attr rst
;
647 if (do_runstate_tests
)
648 TEST_ASSERT(rs
->state_entry_time
== rs
->time
[0] +
649 rs
->time
[1] + rs
->time
[2] + rs
->time
[3],
650 "runstate times don't add up");
652 switch (uc
.args
[1]) {
653 case TEST_INJECT_VECTOR
:
655 printf("Delivering evtchn upcall\n");
656 evtchn_irq_expected
= true;
657 vinfo
->evtchn_upcall_pending
= 1;
660 case TEST_RUNSTATE_runnable
...TEST_RUNSTATE_offline
:
661 TEST_ASSERT(!evtchn_irq_expected
, "Event channel IRQ not seen");
662 if (!do_runstate_tests
)
665 printf("Testing runstate %s\n", runstate_names
[uc
.args
[1]]);
666 rst
.type
= KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT
;
667 rst
.u
.runstate
.state
= uc
.args
[1] + RUNSTATE_runnable
-
668 TEST_RUNSTATE_runnable
;
669 vcpu_ioctl(vcpu
, KVM_XEN_VCPU_SET_ATTR
, &rst
);
672 case TEST_RUNSTATE_ADJUST
:
674 printf("Testing RUNSTATE_ADJUST\n");
675 rst
.type
= KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST
;
676 memset(&rst
.u
, 0, sizeof(rst
.u
));
677 rst
.u
.runstate
.state
= (uint64_t)-1;
678 rst
.u
.runstate
.time_blocked
=
679 0x5a - rs
->time
[RUNSTATE_blocked
];
680 rst
.u
.runstate
.time_offline
=
681 0x6b6b - rs
->time
[RUNSTATE_offline
];
682 rst
.u
.runstate
.time_runnable
= -rst
.u
.runstate
.time_blocked
-
683 rst
.u
.runstate
.time_offline
;
684 vcpu_ioctl(vcpu
, KVM_XEN_VCPU_SET_ATTR
, &rst
);
687 case TEST_RUNSTATE_DATA
:
689 printf("Testing RUNSTATE_DATA\n");
690 rst
.type
= KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA
;
691 memset(&rst
.u
, 0, sizeof(rst
.u
));
692 rst
.u
.runstate
.state
= RUNSTATE_running
;
693 rst
.u
.runstate
.state_entry_time
= 0x6b6b + 0x5a;
694 rst
.u
.runstate
.time_blocked
= 0x6b6b;
695 rst
.u
.runstate
.time_offline
= 0x5a;
696 vcpu_ioctl(vcpu
, KVM_XEN_VCPU_SET_ATTR
, &rst
);
699 case TEST_STEAL_TIME
:
701 printf("Testing steal time\n");
702 /* Yield until scheduler delay exceeds target */
703 rundelay
= get_run_delay() + MIN_STEAL_TIME
;
706 } while (get_run_delay() < rundelay
);
709 case TEST_EVTCHN_MASKED
:
710 if (!do_eventfd_tests
)
713 printf("Testing masked event channel\n");
714 shinfo
->evtchn_mask
[0] = 1UL << EVTCHN_TEST1
;
715 eventfd_write(irq_fd
[0], 1UL);
719 case TEST_EVTCHN_UNMASKED
:
721 printf("Testing unmasked event channel\n");
722 /* Unmask that, but deliver the other one */
723 shinfo
->evtchn_pending
[0] = 0;
724 shinfo
->evtchn_mask
[0] = 0;
725 eventfd_write(irq_fd
[1], 1UL);
726 evtchn_irq_expected
= true;
730 case TEST_EVTCHN_SLOWPATH
:
731 TEST_ASSERT(!evtchn_irq_expected
,
732 "Expected event channel IRQ but it didn't happen");
733 shinfo
->evtchn_pending
[1] = 0;
735 printf("Testing event channel after memslot change\n");
736 vm_userspace_mem_region_add(vm
, VM_MEM_SRC_ANONYMOUS
,
737 DUMMY_REGION_GPA
, DUMMY_REGION_SLOT
, 1, 0);
738 eventfd_write(irq_fd
[0], 1UL);
739 evtchn_irq_expected
= true;
743 case TEST_EVTCHN_SEND_IOCTL
:
744 TEST_ASSERT(!evtchn_irq_expected
,
745 "Expected event channel IRQ but it didn't happen");
746 if (!do_evtchn_tests
)
749 shinfo
->evtchn_pending
[0] = 0;
751 printf("Testing injection with KVM_XEN_HVM_EVTCHN_SEND\n");
753 struct kvm_irq_routing_xen_evtchn e
;
754 e
.port
= EVTCHN_TEST2
;
756 e
.priority
= KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL
;
758 vm_ioctl(vm
, KVM_XEN_HVM_EVTCHN_SEND
, &e
);
759 evtchn_irq_expected
= true;
763 case TEST_EVTCHN_HCALL
:
764 TEST_ASSERT(!evtchn_irq_expected
,
765 "Expected event channel IRQ but it didn't happen");
766 shinfo
->evtchn_pending
[1] = 0;
769 printf("Testing guest EVTCHNOP_send direct to evtchn\n");
770 evtchn_irq_expected
= true;
774 case TEST_EVTCHN_HCALL_SLOWPATH
:
775 TEST_ASSERT(!evtchn_irq_expected
,
776 "Expected event channel IRQ but it didn't happen");
777 shinfo
->evtchn_pending
[0] = 0;
780 printf("Testing guest EVTCHNOP_send direct to evtchn after memslot change\n");
781 vm_userspace_mem_region_add(vm
, VM_MEM_SRC_ANONYMOUS
,
782 DUMMY_REGION_GPA_2
, DUMMY_REGION_SLOT_2
, 1, 0);
783 evtchn_irq_expected
= true;
787 case TEST_EVTCHN_HCALL_EVENTFD
:
788 TEST_ASSERT(!evtchn_irq_expected
,
789 "Expected event channel IRQ but it didn't happen");
790 shinfo
->evtchn_pending
[0] = 0;
793 printf("Testing guest EVTCHNOP_send to eventfd\n");
794 evtchn_irq_expected
= true;
798 case TEST_TIMER_SETUP
:
799 TEST_ASSERT(!evtchn_irq_expected
,
800 "Expected event channel IRQ but it didn't happen");
801 shinfo
->evtchn_pending
[1] = 0;
804 printf("Testing guest oneshot timer\n");
807 case TEST_TIMER_WAIT
:
808 memset(&tmr
, 0, sizeof(tmr
));
809 tmr
.type
= KVM_XEN_VCPU_ATTR_TYPE_TIMER
;
810 vcpu_ioctl(vcpu
, KVM_XEN_VCPU_GET_ATTR
, &tmr
);
811 TEST_ASSERT(tmr
.u
.timer
.port
== EVTCHN_TIMER
,
812 "Timer port not returned");
813 TEST_ASSERT(tmr
.u
.timer
.priority
== KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL
,
814 "Timer priority not returned");
815 TEST_ASSERT(tmr
.u
.timer
.expires_ns
> rs
->state_entry_time
,
816 "Timer expiry not returned");
817 evtchn_irq_expected
= true;
821 case TEST_TIMER_RESTORE
:
822 TEST_ASSERT(!evtchn_irq_expected
,
823 "Expected event channel IRQ but it didn't happen");
824 shinfo
->evtchn_pending
[0] = 0;
827 printf("Testing restored oneshot timer\n");
829 tmr
.u
.timer
.expires_ns
= rs
->state_entry_time
+ 100000000;
830 vcpu_ioctl(vcpu
, KVM_XEN_VCPU_SET_ATTR
, &tmr
);
831 evtchn_irq_expected
= true;
835 case TEST_POLL_READY
:
836 TEST_ASSERT(!evtchn_irq_expected
,
837 "Expected event channel IRQ but it didn't happen");
840 printf("Testing SCHEDOP_poll with already pending event\n");
841 shinfo
->evtchn_pending
[0] = shinfo
->evtchn_mask
[0] = 1UL << EVTCHN_TIMER
;
845 case TEST_POLL_TIMEOUT
:
847 printf("Testing SCHEDOP_poll timeout\n");
848 shinfo
->evtchn_pending
[0] = 0;
852 case TEST_POLL_MASKED
:
854 printf("Testing SCHEDOP_poll wake on masked event\n");
856 tmr
.u
.timer
.expires_ns
= rs
->state_entry_time
+ 100000000;
857 vcpu_ioctl(vcpu
, KVM_XEN_VCPU_SET_ATTR
, &tmr
);
862 shinfo
->evtchn_pending
[0] = shinfo
->evtchn_mask
[0] = 0;
864 printf("Testing SCHEDOP_poll wake on unmasked event\n");
866 evtchn_irq_expected
= true;
867 tmr
.u
.timer
.expires_ns
= rs
->state_entry_time
+ 100000000;
868 vcpu_ioctl(vcpu
, KVM_XEN_VCPU_SET_ATTR
, &tmr
);
870 /* Read it back and check the pending time is reported correctly */
871 tmr
.u
.timer
.expires_ns
= 0;
872 vcpu_ioctl(vcpu
, KVM_XEN_VCPU_GET_ATTR
, &tmr
);
873 TEST_ASSERT(tmr
.u
.timer
.expires_ns
== rs
->state_entry_time
+ 100000000,
874 "Timer not reported pending");
879 if (has_shinfo_hva
) {
880 struct kvm_xen_vcpu_attr vih
= {
881 .type
= KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO_HVA
,
882 .u
.hva
= (unsigned long)vinfo
884 vcpu_ioctl(vcpu
, KVM_XEN_VCPU_SET_ATTR
, &vih
);
888 case TEST_TIMER_PAST
:
889 TEST_ASSERT(!evtchn_irq_expected
,
890 "Expected event channel IRQ but it didn't happen");
891 /* Read timer and check it is no longer pending */
892 vcpu_ioctl(vcpu
, KVM_XEN_VCPU_GET_ATTR
, &tmr
);
893 TEST_ASSERT(!tmr
.u
.timer
.expires_ns
, "Timer still reported pending");
895 shinfo
->evtchn_pending
[0] = 0;
897 printf("Testing timer in the past\n");
899 evtchn_irq_expected
= true;
900 tmr
.u
.timer
.expires_ns
= rs
->state_entry_time
- 100000000ULL;
901 vcpu_ioctl(vcpu
, KVM_XEN_VCPU_SET_ATTR
, &tmr
);
905 case TEST_LOCKING_SEND_RACE
:
906 TEST_ASSERT(!evtchn_irq_expected
,
907 "Expected event channel IRQ but it didn't happen");
911 printf("Testing shinfo lock corruption (KVM_XEN_HVM_EVTCHN_SEND)\n");
913 ret
= pthread_create(&thread
, NULL
, &juggle_shinfo_state
, (void *)vm
);
914 TEST_ASSERT(ret
== 0, "pthread_create() failed: %s", strerror(ret
));
916 struct kvm_irq_routing_xen_evtchn uxe
= {
919 .priority
= KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL
922 evtchn_irq_expected
= true;
923 for (time_t t
= time(NULL
) + SHINFO_RACE_TIMEOUT
; time(NULL
) < t
;)
924 __vm_ioctl(vm
, KVM_XEN_HVM_EVTCHN_SEND
, &uxe
);
927 case TEST_LOCKING_POLL_RACE
:
928 TEST_ASSERT(!evtchn_irq_expected
,
929 "Expected event channel IRQ but it didn't happen");
932 printf("Testing shinfo lock corruption (SCHEDOP_poll)\n");
934 shinfo
->evtchn_pending
[0] = 1;
936 evtchn_irq_expected
= true;
937 tmr
.u
.timer
.expires_ns
= rs
->state_entry_time
+
938 SHINFO_RACE_TIMEOUT
* 1000000000ULL;
939 vcpu_ioctl(vcpu
, KVM_XEN_VCPU_SET_ATTR
, &tmr
);
942 case TEST_LOCKING_POLL_TIMEOUT
:
944 * Optional and possibly repeated sync point.
945 * Injecting the timer IRQ may fail if the
946 * shinfo is invalid when the timer expires.
947 * If the timer has expired but the IRQ hasn't
948 * been delivered, rearm the timer and retry.
950 vcpu_ioctl(vcpu
, KVM_XEN_VCPU_GET_ATTR
, &tmr
);
952 /* Resume the guest if the timer is still pending. */
953 if (tmr
.u
.timer
.expires_ns
)
956 /* All done if the IRQ was delivered. */
957 if (!evtchn_irq_expected
)
960 tmr
.u
.timer
.expires_ns
= rs
->state_entry_time
+
961 SHINFO_RACE_TIMEOUT
* 1000000000ULL;
962 vcpu_ioctl(vcpu
, KVM_XEN_VCPU_SET_ATTR
, &tmr
);
965 TEST_ASSERT(!evtchn_irq_expected
,
966 "Expected event channel IRQ but it didn't happen");
968 ret
= pthread_cancel(thread
);
969 TEST_ASSERT(ret
== 0, "pthread_cancel() failed: %s", strerror(ret
));
971 ret
= pthread_join(thread
, 0);
972 TEST_ASSERT(ret
== 0, "pthread_join() failed: %s", strerror(ret
));
975 case TEST_GUEST_SAW_IRQ
:
976 TEST_ASSERT(evtchn_irq_expected
, "Unexpected event channel IRQ");
977 evtchn_irq_expected
= false;
985 TEST_FAIL("Unknown ucall 0x%lx.", uc
.cmd
);
990 evt_reset
.type
= KVM_XEN_ATTR_TYPE_EVTCHN
;
991 evt_reset
.u
.evtchn
.flags
= KVM_XEN_EVTCHN_RESET
;
992 vm_ioctl(vm
, KVM_XEN_HVM_SET_ATTR
, &evt_reset
);
997 * Just a *really* basic check that things are being put in the
998 * right place. The actual calculations are much the same for
999 * Xen as they are for the KVM variants, so no need to check.
1001 struct pvclock_wall_clock
*wc
;
1002 struct pvclock_vcpu_time_info
*ti
, *ti2
;
1003 struct kvm_clock_data kcdata
;
1006 wc
= addr_gpa2hva(vm
, SHINFO_REGION_GPA
+ 0xc00);
1007 ti
= addr_gpa2hva(vm
, SHINFO_REGION_GPA
+ 0x40 + 0x20);
1008 ti2
= addr_gpa2hva(vm
, PVTIME_ADDR
);
1011 printf("Wall clock (v %d) %d.%09d\n", wc
->version
, wc
->sec
, wc
->nsec
);
1012 printf("Time info 1: v %u tsc %" PRIu64
" time %" PRIu64
" mul %u shift %u flags %x\n",
1013 ti
->version
, ti
->tsc_timestamp
, ti
->system_time
, ti
->tsc_to_system_mul
,
1014 ti
->tsc_shift
, ti
->flags
);
1015 printf("Time info 2: v %u tsc %" PRIu64
" time %" PRIu64
" mul %u shift %u flags %x\n",
1016 ti2
->version
, ti2
->tsc_timestamp
, ti2
->system_time
, ti2
->tsc_to_system_mul
,
1017 ti2
->tsc_shift
, ti2
->flags
);
1020 TEST_ASSERT(wc
->version
&& !(wc
->version
& 1),
1021 "Bad wallclock version %x", wc
->version
);
1023 vm_ioctl(vm
, KVM_GET_CLOCK
, &kcdata
);
1025 if (kcdata
.flags
& KVM_CLOCK_REALTIME
) {
1027 printf("KVM_GET_CLOCK clock: %lld.%09lld\n",
1028 kcdata
.clock
/ NSEC_PER_SEC
, kcdata
.clock
% NSEC_PER_SEC
);
1029 printf("KVM_GET_CLOCK realtime: %lld.%09lld\n",
1030 kcdata
.realtime
/ NSEC_PER_SEC
, kcdata
.realtime
% NSEC_PER_SEC
);
1033 delta
= (wc
->sec
* NSEC_PER_SEC
+ wc
->nsec
) - (kcdata
.realtime
- kcdata
.clock
);
1036 * KVM_GET_CLOCK gives CLOCK_REALTIME which jumps on leap seconds updates but
1037 * unfortunately KVM doesn't currently offer a CLOCK_TAI alternative. Accept 1s
1038 * delta as testing clock accuracy is not the goal here. The test just needs to
1039 * check that the value in shinfo is somewhat sane.
1041 TEST_ASSERT(llabs(delta
) < NSEC_PER_SEC
,
1042 "Guest's epoch from shinfo %d.%09d differs from KVM_GET_CLOCK %lld.%lld",
1043 wc
->sec
, wc
->nsec
, (kcdata
.realtime
- kcdata
.clock
) / NSEC_PER_SEC
,
1044 (kcdata
.realtime
- kcdata
.clock
) % NSEC_PER_SEC
);
1046 pr_info("Missing KVM_CLOCK_REALTIME, skipping shinfo epoch sanity check\n");
1049 TEST_ASSERT(ti
->version
&& !(ti
->version
& 1),
1050 "Bad time_info version %x", ti
->version
);
1051 TEST_ASSERT(ti2
->version
&& !(ti2
->version
& 1),
1052 "Bad time_info version %x", ti
->version
);
1054 if (do_runstate_tests
) {
1056 * Fetch runstate and check sanity. Strictly speaking in the
1057 * general case we might not expect the numbers to be identical
1058 * but in this case we know we aren't running the vCPU any more.
1060 struct kvm_xen_vcpu_attr rst
= {
1061 .type
= KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA
,
1063 vcpu_ioctl(vcpu
, KVM_XEN_VCPU_GET_ATTR
, &rst
);
1066 printf("Runstate: %s(%d), entry %" PRIu64
" ns\n",
1067 rs
->state
<= RUNSTATE_offline
? runstate_names
[rs
->state
] : "unknown",
1068 rs
->state
, rs
->state_entry_time
);
1069 for (int i
= RUNSTATE_running
; i
<= RUNSTATE_offline
; i
++) {
1070 printf("State %s: %" PRIu64
" ns\n",
1071 runstate_names
[i
], rs
->time
[i
]);
1076 * Exercise runstate info at all points across the page boundary, in
1077 * 32-bit and 64-bit mode. In particular, test the case where it is
1078 * configured in 32-bit mode and then switched to 64-bit mode while
1079 * active, which takes it onto the second page.
1081 unsigned long runstate_addr
;
1082 struct compat_vcpu_runstate_info
*crs
;
1083 for (runstate_addr
= SHINFO_REGION_GPA
+ PAGE_SIZE
+ PAGE_SIZE
- sizeof(*rs
) - 4;
1084 runstate_addr
< SHINFO_REGION_GPA
+ PAGE_SIZE
+ PAGE_SIZE
+ 4; runstate_addr
++) {
1086 rs
= addr_gpa2hva(vm
, runstate_addr
);
1089 memset(rs
, 0xa5, sizeof(*rs
));
1091 /* Set to compatibility mode */
1093 vm_ioctl(vm
, KVM_XEN_HVM_SET_ATTR
, &lm
);
1095 /* Set runstate to new address (kernel will write it) */
1096 struct kvm_xen_vcpu_attr st
= {
1097 .type
= KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR
,
1098 .u
.gpa
= runstate_addr
,
1100 vcpu_ioctl(vcpu
, KVM_XEN_VCPU_SET_ATTR
, &st
);
1103 printf("Compatibility runstate at %08lx\n", runstate_addr
);
1105 TEST_ASSERT(crs
->state
== rst
.u
.runstate
.state
, "Runstate mismatch");
1106 TEST_ASSERT(crs
->state_entry_time
== rst
.u
.runstate
.state_entry_time
,
1107 "State entry time mismatch");
1108 TEST_ASSERT(crs
->time
[RUNSTATE_running
] == rst
.u
.runstate
.time_running
,
1109 "Running time mismatch");
1110 TEST_ASSERT(crs
->time
[RUNSTATE_runnable
] == rst
.u
.runstate
.time_runnable
,
1111 "Runnable time mismatch");
1112 TEST_ASSERT(crs
->time
[RUNSTATE_blocked
] == rst
.u
.runstate
.time_blocked
,
1113 "Blocked time mismatch");
1114 TEST_ASSERT(crs
->time
[RUNSTATE_offline
] == rst
.u
.runstate
.time_offline
,
1115 "Offline time mismatch");
1116 TEST_ASSERT(crs
->time
[RUNSTATE_offline
+ 1] == 0xa5a5a5a5a5a5a5a5ULL
,
1117 "Structure overrun");
1118 TEST_ASSERT(crs
->state_entry_time
== crs
->time
[0] +
1119 crs
->time
[1] + crs
->time
[2] + crs
->time
[3],
1120 "runstate times don't add up");
1123 /* Now switch to 64-bit mode */
1125 vm_ioctl(vm
, KVM_XEN_HVM_SET_ATTR
, &lm
);
1127 memset(rs
, 0xa5, sizeof(*rs
));
1129 /* Don't change the address, just trigger a write */
1130 struct kvm_xen_vcpu_attr adj
= {
1131 .type
= KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST
,
1132 .u
.runstate
.state
= (uint64_t)-1
1134 vcpu_ioctl(vcpu
, KVM_XEN_VCPU_SET_ATTR
, &adj
);
1137 printf("64-bit runstate at %08lx\n", runstate_addr
);
1139 TEST_ASSERT(rs
->state
== rst
.u
.runstate
.state
, "Runstate mismatch");
1140 TEST_ASSERT(rs
->state_entry_time
== rst
.u
.runstate
.state_entry_time
,
1141 "State entry time mismatch");
1142 TEST_ASSERT(rs
->time
[RUNSTATE_running
] == rst
.u
.runstate
.time_running
,
1143 "Running time mismatch");
1144 TEST_ASSERT(rs
->time
[RUNSTATE_runnable
] == rst
.u
.runstate
.time_runnable
,
1145 "Runnable time mismatch");
1146 TEST_ASSERT(rs
->time
[RUNSTATE_blocked
] == rst
.u
.runstate
.time_blocked
,
1147 "Blocked time mismatch");
1148 TEST_ASSERT(rs
->time
[RUNSTATE_offline
] == rst
.u
.runstate
.time_offline
,
1149 "Offline time mismatch");
1150 TEST_ASSERT(rs
->time
[RUNSTATE_offline
+ 1] == 0xa5a5a5a5a5a5a5a5ULL
,
1151 "Structure overrun");
1153 TEST_ASSERT(rs
->state_entry_time
== rs
->time
[0] +
1154 rs
->time
[1] + rs
->time
[2] + rs
->time
[3],
1155 "runstate times don't add up");