2 * Xen HVM emulation support in KVM
4 * Copyright © 2019 Oracle and/or its affiliates. All rights reserved.
5 * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
7 * This work is licensed under the terms of the GNU GPL, version 2 or later.
8 * See the COPYING file in the top-level directory.
12 #include "qemu/osdep.h"
14 #include "qemu/main-loop.h"
15 #include "qemu/error-report.h"
16 #include "hw/xen/xen.h"
17 #include "sysemu/kvm_int.h"
18 #include "sysemu/kvm_xen.h"
19 #include "kvm/kvm_i386.h"
20 #include "exec/address-spaces.h"
23 #include "sysemu/runstate.h"
25 #include "hw/pci/msi.h"
26 #include "hw/i386/apic-msidef.h"
27 #include "hw/i386/e820_memory_layout.h"
28 #include "hw/i386/kvm/xen_overlay.h"
29 #include "hw/i386/kvm/xen_evtchn.h"
30 #include "hw/i386/kvm/xen_gnttab.h"
31 #include "hw/i386/kvm/xen_xenstore.h"
33 #include "hw/xen/interface/version.h"
34 #include "hw/xen/interface/sched.h"
35 #include "hw/xen/interface/memory.h"
36 #include "hw/xen/interface/hvm/hvm_op.h"
37 #include "hw/xen/interface/hvm/params.h"
38 #include "hw/xen/interface/vcpu.h"
39 #include "hw/xen/interface/event_channel.h"
40 #include "hw/xen/interface/grant_table.h"
42 #include "xen-compat.h"
44 static void xen_vcpu_singleshot_timer_event(void *opaque
);
45 static void xen_vcpu_periodic_timer_event(void *opaque
);
48 #define hypercall_compat32(longmode) (!(longmode))
50 #define hypercall_compat32(longmode) (false)
53 static bool kvm_gva_to_gpa(CPUState
*cs
, uint64_t gva
, uint64_t *gpa
,
54 size_t *len
, bool is_write
)
56 struct kvm_translation tr
= {
57 .linear_address
= gva
,
61 *len
= TARGET_PAGE_SIZE
- (gva
& ~TARGET_PAGE_MASK
);
64 if (kvm_vcpu_ioctl(cs
, KVM_TRANSLATE
, &tr
) || !tr
.valid
||
65 (is_write
&& !tr
.writeable
)) {
68 *gpa
= tr
.physical_address
;
72 static int kvm_gva_rw(CPUState
*cs
, uint64_t gva
, void *_buf
, size_t sz
,
75 uint8_t *buf
= (uint8_t *)_buf
;
80 if (!kvm_gva_to_gpa(cs
, gva
, &gpa
, &len
, is_write
)) {
87 cpu_physical_memory_rw(gpa
, buf
, len
, is_write
);
97 static inline int kvm_copy_from_gva(CPUState
*cs
, uint64_t gva
, void *buf
,
100 return kvm_gva_rw(cs
, gva
, buf
, sz
, false);
103 static inline int kvm_copy_to_gva(CPUState
*cs
, uint64_t gva
, void *buf
,
106 return kvm_gva_rw(cs
, gva
, buf
, sz
, true);
109 int kvm_xen_init(KVMState
*s
, uint32_t hypercall_msr
)
111 const int required_caps
= KVM_XEN_HVM_CONFIG_HYPERCALL_MSR
|
112 KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL
| KVM_XEN_HVM_CONFIG_SHARED_INFO
;
113 struct kvm_xen_hvm_config cfg
= {
114 .msr
= hypercall_msr
,
115 .flags
= KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL
,
119 xen_caps
= kvm_check_extension(s
, KVM_CAP_XEN_HVM
);
120 if (required_caps
& ~xen_caps
) {
121 error_report("kvm: Xen HVM guest support not present or insufficient");
125 if (xen_caps
& KVM_XEN_HVM_CONFIG_EVTCHN_SEND
) {
126 struct kvm_xen_hvm_attr ha
= {
127 .type
= KVM_XEN_ATTR_TYPE_XEN_VERSION
,
128 .u
.xen_version
= s
->xen_version
,
130 (void)kvm_vm_ioctl(s
, KVM_XEN_HVM_SET_ATTR
, &ha
);
132 cfg
.flags
|= KVM_XEN_HVM_CONFIG_EVTCHN_SEND
;
135 ret
= kvm_vm_ioctl(s
, KVM_XEN_HVM_CONFIG
, &cfg
);
137 error_report("kvm: Failed to enable Xen HVM support: %s",
142 /* If called a second time, don't repeat the rest of the setup. */
148 * Event channel delivery via GSI/PCI_INTX needs to poll the vcpu_info
149 * of vCPU0 to deassert the IRQ when ->evtchn_upcall_pending is cleared.
151 * In the kernel, there's a notifier hook on the PIC/IOAPIC which allows
152 * such things to be polled at precisely the right time. We *could* do
153 * it nicely in the kernel: check vcpu_info[0]->evtchn_upcall_pending at
154 * the moment the IRQ is acked, and see if it should be reasserted.
156 * But the in-kernel irqchip is deprecated, so we're unlikely to add
157 * that support in the kernel. Insist on using the split irqchip mode
160 * This leaves us polling for the level going low in QEMU, which lacks
161 * the appropriate hooks in its PIC/IOAPIC code. Even VFIO is sending a
162 * spurious 'ack' to an INTX IRQ every time there's any MMIO access to
163 * the device (for which it has to unmap the device and trap access, for
164 * some period after an IRQ!!). In the Xen case, we do it on exit from
165 * KVM_RUN, if the flag is set to say that the GSI is currently asserted.
166 * Which is kind of icky, but less so than the VFIO one. I may fix them
169 if (!kvm_kernel_irqchip_split()) {
170 error_report("kvm: Xen support requires kernel-irqchip=split");
174 s
->xen_caps
= xen_caps
;
176 /* Tell fw_cfg to notify the BIOS to reserve the range. */
177 ret
= e820_add_entry(XEN_SPECIAL_AREA_ADDR
, XEN_SPECIAL_AREA_SIZE
,
180 fprintf(stderr
, "e820_add_entry() table is full\n");
184 /* The page couldn't be overlaid until KVM was initialized */
185 xen_xenstore_reset();
190 int kvm_xen_init_vcpu(CPUState
*cs
)
192 X86CPU
*cpu
= X86_CPU(cs
);
193 CPUX86State
*env
= &cpu
->env
;
197 * The kernel needs to know the Xen/ACPI vCPU ID because that's
198 * what the guest uses in hypercalls such as timers. It doesn't
199 * match the APIC ID which is generally used for talking to the
200 * kernel about vCPUs. And if vCPU threads race with creating
201 * their KVM vCPUs out of order, it doesn't necessarily match
202 * with the kernel's internal vCPU indices either.
204 if (kvm_xen_has_cap(EVTCHN_SEND
)) {
205 struct kvm_xen_vcpu_attr va
= {
206 .type
= KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID
,
207 .u
.vcpu_id
= cs
->cpu_index
,
209 err
= kvm_vcpu_ioctl(cs
, KVM_XEN_VCPU_SET_ATTR
, &va
);
211 error_report("kvm: Failed to set Xen vCPU ID attribute: %s",
217 env
->xen_vcpu_info_gpa
= INVALID_GPA
;
218 env
->xen_vcpu_info_default_gpa
= INVALID_GPA
;
219 env
->xen_vcpu_time_info_gpa
= INVALID_GPA
;
220 env
->xen_vcpu_runstate_gpa
= INVALID_GPA
;
222 qemu_mutex_init(&env
->xen_timers_lock
);
223 env
->xen_singleshot_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL
,
224 xen_vcpu_singleshot_timer_event
,
226 if (!env
->xen_singleshot_timer
) {
229 env
->xen_singleshot_timer
->opaque
= cs
;
231 env
->xen_periodic_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL
,
232 xen_vcpu_periodic_timer_event
,
234 if (!env
->xen_periodic_timer
) {
237 env
->xen_periodic_timer
->opaque
= cs
;
242 uint32_t kvm_xen_get_caps(void)
244 return kvm_state
->xen_caps
;
247 static bool kvm_xen_hcall_xen_version(struct kvm_xen_exit
*exit
, X86CPU
*cpu
,
248 int cmd
, uint64_t arg
)
253 case XENVER_get_features
: {
254 struct xen_feature_info fi
;
256 /* No need for 32/64 compat handling */
257 qemu_build_assert(sizeof(fi
) == 8);
259 err
= kvm_copy_from_gva(CPU(cpu
), arg
, &fi
, sizeof(fi
));
265 if (fi
.submap_idx
== 0) {
266 fi
.submap
|= 1 << XENFEAT_writable_page_tables
|
267 1 << XENFEAT_writable_descriptor_tables
|
268 1 << XENFEAT_auto_translated_physmap
|
269 1 << XENFEAT_supervisor_mode_kernel
|
270 1 << XENFEAT_hvm_callback_vector
|
271 1 << XENFEAT_hvm_safe_pvclock
|
272 1 << XENFEAT_hvm_pirqs
;
275 err
= kvm_copy_to_gva(CPU(cpu
), arg
, &fi
, sizeof(fi
));
283 exit
->u
.hcall
.result
= err
;
287 static int kvm_xen_set_vcpu_attr(CPUState
*cs
, uint16_t type
, uint64_t gpa
)
289 struct kvm_xen_vcpu_attr xhsi
;
294 trace_kvm_xen_set_vcpu_attr(cs
->cpu_index
, type
, gpa
);
296 return kvm_vcpu_ioctl(cs
, KVM_XEN_VCPU_SET_ATTR
, &xhsi
);
299 static int kvm_xen_set_vcpu_callback_vector(CPUState
*cs
)
301 uint8_t vector
= X86_CPU(cs
)->env
.xen_vcpu_callback_vector
;
302 struct kvm_xen_vcpu_attr xva
;
304 xva
.type
= KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR
;
305 xva
.u
.vector
= vector
;
307 trace_kvm_xen_set_vcpu_callback(cs
->cpu_index
, vector
);
309 return kvm_vcpu_ioctl(cs
, KVM_XEN_HVM_SET_ATTR
, &xva
);
312 static void do_set_vcpu_callback_vector(CPUState
*cs
, run_on_cpu_data data
)
314 X86CPU
*cpu
= X86_CPU(cs
);
315 CPUX86State
*env
= &cpu
->env
;
317 env
->xen_vcpu_callback_vector
= data
.host_int
;
319 if (kvm_xen_has_cap(EVTCHN_SEND
)) {
320 kvm_xen_set_vcpu_callback_vector(cs
);
324 static int set_vcpu_info(CPUState
*cs
, uint64_t gpa
)
326 X86CPU
*cpu
= X86_CPU(cs
);
327 CPUX86State
*env
= &cpu
->env
;
328 MemoryRegionSection mrs
= { .mr
= NULL
};
329 void *vcpu_info_hva
= NULL
;
332 ret
= kvm_xen_set_vcpu_attr(cs
, KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO
, gpa
);
333 if (ret
|| gpa
== INVALID_GPA
) {
337 mrs
= memory_region_find(get_system_memory(), gpa
,
338 sizeof(struct vcpu_info
));
339 if (mrs
.mr
&& mrs
.mr
->ram_block
&&
340 !int128_lt(mrs
.size
, int128_make64(sizeof(struct vcpu_info
)))) {
341 vcpu_info_hva
= qemu_map_ram_ptr(mrs
.mr
->ram_block
,
342 mrs
.offset_within_region
);
344 if (!vcpu_info_hva
) {
346 memory_region_unref(mrs
.mr
);
353 if (env
->xen_vcpu_info_mr
) {
354 memory_region_unref(env
->xen_vcpu_info_mr
);
356 env
->xen_vcpu_info_hva
= vcpu_info_hva
;
357 env
->xen_vcpu_info_mr
= mrs
.mr
;
361 static void do_set_vcpu_info_default_gpa(CPUState
*cs
, run_on_cpu_data data
)
363 X86CPU
*cpu
= X86_CPU(cs
);
364 CPUX86State
*env
= &cpu
->env
;
366 env
->xen_vcpu_info_default_gpa
= data
.host_ulong
;
368 /* Changing the default does nothing if a vcpu_info was explicitly set. */
369 if (env
->xen_vcpu_info_gpa
== INVALID_GPA
) {
370 set_vcpu_info(cs
, env
->xen_vcpu_info_default_gpa
);
374 static void do_set_vcpu_info_gpa(CPUState
*cs
, run_on_cpu_data data
)
376 X86CPU
*cpu
= X86_CPU(cs
);
377 CPUX86State
*env
= &cpu
->env
;
379 env
->xen_vcpu_info_gpa
= data
.host_ulong
;
381 set_vcpu_info(cs
, env
->xen_vcpu_info_gpa
);
384 void *kvm_xen_get_vcpu_info_hva(uint32_t vcpu_id
)
386 CPUState
*cs
= qemu_get_cpu(vcpu_id
);
391 return X86_CPU(cs
)->env
.xen_vcpu_info_hva
;
394 void kvm_xen_maybe_deassert_callback(CPUState
*cs
)
396 CPUX86State
*env
= &X86_CPU(cs
)->env
;
397 struct vcpu_info
*vi
= env
->xen_vcpu_info_hva
;
402 /* If the evtchn_upcall_pending flag is cleared, turn the GSI off. */
403 if (!vi
->evtchn_upcall_pending
) {
404 qemu_mutex_lock_iothread();
406 * Check again now we have the lock, because it may have been
407 * asserted in the interim. And we don't want to take the lock
408 * every time because this is a fast path.
410 if (!vi
->evtchn_upcall_pending
) {
411 X86_CPU(cs
)->env
.xen_callback_asserted
= false;
412 xen_evtchn_set_callback_level(0);
414 qemu_mutex_unlock_iothread();
418 void kvm_xen_set_callback_asserted(void)
420 CPUState
*cs
= qemu_get_cpu(0);
423 X86_CPU(cs
)->env
.xen_callback_asserted
= true;
427 void kvm_xen_inject_vcpu_callback_vector(uint32_t vcpu_id
, int type
)
429 CPUState
*cs
= qemu_get_cpu(vcpu_id
);
436 vector
= X86_CPU(cs
)->env
.xen_vcpu_callback_vector
;
439 * The per-vCPU callback vector injected via lapic. Just
440 * deliver it as an MSI.
443 .address
= APIC_DEFAULT_ADDRESS
| X86_CPU(cs
)->apic_id
,
444 .data
= vector
| (1UL << MSI_DATA_LEVEL_SHIFT
),
446 kvm_irqchip_send_msi(kvm_state
, msg
);
451 case HVM_PARAM_CALLBACK_TYPE_VECTOR
:
453 * If the evtchn_upcall_pending field in the vcpu_info is set, then
454 * KVM will automatically deliver the vector on entering the vCPU
455 * so all we have to do is kick it out.
460 case HVM_PARAM_CALLBACK_TYPE_GSI
:
461 case HVM_PARAM_CALLBACK_TYPE_PCI_INTX
:
463 xen_evtchn_set_callback_level(1);
469 static int kvm_xen_set_vcpu_timer(CPUState
*cs
)
471 X86CPU
*cpu
= X86_CPU(cs
);
472 CPUX86State
*env
= &cpu
->env
;
474 struct kvm_xen_vcpu_attr va
= {
475 .type
= KVM_XEN_VCPU_ATTR_TYPE_TIMER
,
476 .u
.timer
.port
= env
->xen_virq
[VIRQ_TIMER
],
477 .u
.timer
.priority
= KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL
,
478 .u
.timer
.expires_ns
= env
->xen_singleshot_timer_ns
,
481 return kvm_vcpu_ioctl(cs
, KVM_XEN_VCPU_SET_ATTR
, &va
);
484 static void do_set_vcpu_timer_virq(CPUState
*cs
, run_on_cpu_data data
)
486 kvm_xen_set_vcpu_timer(cs
);
489 int kvm_xen_set_vcpu_virq(uint32_t vcpu_id
, uint16_t virq
, uint16_t port
)
491 CPUState
*cs
= qemu_get_cpu(vcpu_id
);
497 /* cpu.h doesn't include the actual Xen header. */
498 qemu_build_assert(NR_VIRQS
== XEN_NR_VIRQS
);
500 if (virq
>= NR_VIRQS
) {
504 if (port
&& X86_CPU(cs
)->env
.xen_virq
[virq
]) {
508 X86_CPU(cs
)->env
.xen_virq
[virq
] = port
;
509 if (virq
== VIRQ_TIMER
&& kvm_xen_has_cap(EVTCHN_SEND
)) {
510 async_run_on_cpu(cs
, do_set_vcpu_timer_virq
,
511 RUN_ON_CPU_HOST_INT(port
));
516 static void do_set_vcpu_time_info_gpa(CPUState
*cs
, run_on_cpu_data data
)
518 X86CPU
*cpu
= X86_CPU(cs
);
519 CPUX86State
*env
= &cpu
->env
;
521 env
->xen_vcpu_time_info_gpa
= data
.host_ulong
;
523 kvm_xen_set_vcpu_attr(cs
, KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO
,
524 env
->xen_vcpu_time_info_gpa
);
527 static void do_set_vcpu_runstate_gpa(CPUState
*cs
, run_on_cpu_data data
)
529 X86CPU
*cpu
= X86_CPU(cs
);
530 CPUX86State
*env
= &cpu
->env
;
532 env
->xen_vcpu_runstate_gpa
= data
.host_ulong
;
534 kvm_xen_set_vcpu_attr(cs
, KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR
,
535 env
->xen_vcpu_runstate_gpa
);
538 static void do_vcpu_soft_reset(CPUState
*cs
, run_on_cpu_data data
)
540 X86CPU
*cpu
= X86_CPU(cs
);
541 CPUX86State
*env
= &cpu
->env
;
543 env
->xen_vcpu_info_gpa
= INVALID_GPA
;
544 env
->xen_vcpu_info_default_gpa
= INVALID_GPA
;
545 env
->xen_vcpu_time_info_gpa
= INVALID_GPA
;
546 env
->xen_vcpu_runstate_gpa
= INVALID_GPA
;
547 env
->xen_vcpu_callback_vector
= 0;
548 env
->xen_singleshot_timer_ns
= 0;
549 memset(env
->xen_virq
, 0, sizeof(env
->xen_virq
));
551 set_vcpu_info(cs
, INVALID_GPA
);
552 kvm_xen_set_vcpu_attr(cs
, KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO
,
554 kvm_xen_set_vcpu_attr(cs
, KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR
,
556 if (kvm_xen_has_cap(EVTCHN_SEND
)) {
557 kvm_xen_set_vcpu_callback_vector(cs
);
558 kvm_xen_set_vcpu_timer(cs
);
563 static int xen_set_shared_info(uint64_t gfn
)
565 uint64_t gpa
= gfn
<< TARGET_PAGE_BITS
;
568 QEMU_IOTHREAD_LOCK_GUARD();
571 * The xen_overlay device tells KVM about it too, since it had to
572 * do that on migration load anyway (unless we're going to jump
573 * through lots of hoops to maintain the fiction that this isn't
576 err
= xen_overlay_map_shinfo_page(gpa
);
581 trace_kvm_xen_set_shared_info(gfn
);
583 for (i
= 0; i
< XEN_LEGACY_MAX_VCPUS
; i
++) {
584 CPUState
*cpu
= qemu_get_cpu(i
);
586 async_run_on_cpu(cpu
, do_set_vcpu_info_default_gpa
,
587 RUN_ON_CPU_HOST_ULONG(gpa
));
589 gpa
+= sizeof(vcpu_info_t
);
595 static int add_to_physmap_one(uint32_t space
, uint64_t idx
, uint64_t gfn
)
598 case XENMAPSPACE_shared_info
:
602 return xen_set_shared_info(gfn
);
604 case XENMAPSPACE_grant_table
:
605 return xen_gnttab_map_page(idx
, gfn
);
607 case XENMAPSPACE_gmfn
:
608 case XENMAPSPACE_gmfn_range
:
611 case XENMAPSPACE_gmfn_foreign
:
612 case XENMAPSPACE_dev_mmio
:
620 static int do_add_to_physmap(struct kvm_xen_exit
*exit
, X86CPU
*cpu
,
623 struct xen_add_to_physmap xatp
;
624 CPUState
*cs
= CPU(cpu
);
626 if (hypercall_compat32(exit
->u
.hcall
.longmode
)) {
627 struct compat_xen_add_to_physmap xatp32
;
629 qemu_build_assert(sizeof(struct compat_xen_add_to_physmap
) == 16);
630 if (kvm_copy_from_gva(cs
, arg
, &xatp32
, sizeof(xatp32
))) {
633 xatp
.domid
= xatp32
.domid
;
634 xatp
.size
= xatp32
.size
;
635 xatp
.space
= xatp32
.space
;
636 xatp
.idx
= xatp32
.idx
;
637 xatp
.gpfn
= xatp32
.gpfn
;
639 if (kvm_copy_from_gva(cs
, arg
, &xatp
, sizeof(xatp
))) {
644 if (xatp
.domid
!= DOMID_SELF
&& xatp
.domid
!= xen_domid
) {
648 return add_to_physmap_one(xatp
.space
, xatp
.idx
, xatp
.gpfn
);
651 static int do_add_to_physmap_batch(struct kvm_xen_exit
*exit
, X86CPU
*cpu
,
654 struct xen_add_to_physmap_batch xatpb
;
655 unsigned long idxs_gva
, gpfns_gva
, errs_gva
;
656 CPUState
*cs
= CPU(cpu
);
659 if (hypercall_compat32(exit
->u
.hcall
.longmode
)) {
660 struct compat_xen_add_to_physmap_batch xatpb32
;
662 qemu_build_assert(sizeof(struct compat_xen_add_to_physmap_batch
) == 20);
663 if (kvm_copy_from_gva(cs
, arg
, &xatpb32
, sizeof(xatpb32
))) {
666 xatpb
.domid
= xatpb32
.domid
;
667 xatpb
.space
= xatpb32
.space
;
668 xatpb
.size
= xatpb32
.size
;
670 idxs_gva
= xatpb32
.idxs
.c
;
671 gpfns_gva
= xatpb32
.gpfns
.c
;
672 errs_gva
= xatpb32
.errs
.c
;
673 op_sz
= sizeof(uint32_t);
675 if (kvm_copy_from_gva(cs
, arg
, &xatpb
, sizeof(xatpb
))) {
678 op_sz
= sizeof(unsigned long);
679 idxs_gva
= (unsigned long)xatpb
.idxs
.p
;
680 gpfns_gva
= (unsigned long)xatpb
.gpfns
.p
;
681 errs_gva
= (unsigned long)xatpb
.errs
.p
;
684 if (xatpb
.domid
!= DOMID_SELF
&& xatpb
.domid
!= xen_domid
) {
688 /* Explicitly invalid for the batch op. Not that we implement it anyway. */
689 if (xatpb
.space
== XENMAPSPACE_gmfn_range
) {
693 while (xatpb
.size
--) {
694 unsigned long idx
= 0;
695 unsigned long gpfn
= 0;
698 /* For 32-bit compat this only copies the low 32 bits of each */
699 if (kvm_copy_from_gva(cs
, idxs_gva
, &idx
, op_sz
) ||
700 kvm_copy_from_gva(cs
, gpfns_gva
, &gpfn
, op_sz
)) {
706 err
= add_to_physmap_one(xatpb
.space
, idx
, gpfn
);
708 if (kvm_copy_to_gva(cs
, errs_gva
, &err
, sizeof(err
))) {
711 errs_gva
+= sizeof(err
);
716 static bool kvm_xen_hcall_memory_op(struct kvm_xen_exit
*exit
, X86CPU
*cpu
,
717 int cmd
, uint64_t arg
)
722 case XENMEM_add_to_physmap
:
723 err
= do_add_to_physmap(exit
, cpu
, arg
);
726 case XENMEM_add_to_physmap_batch
:
727 err
= do_add_to_physmap_batch(exit
, cpu
, arg
);
734 exit
->u
.hcall
.result
= err
;
738 static bool handle_set_param(struct kvm_xen_exit
*exit
, X86CPU
*cpu
,
741 CPUState
*cs
= CPU(cpu
);
742 struct xen_hvm_param hp
;
745 /* No need for 32/64 compat handling */
746 qemu_build_assert(sizeof(hp
) == 16);
748 if (kvm_copy_from_gva(cs
, arg
, &hp
, sizeof(hp
))) {
753 if (hp
.domid
!= DOMID_SELF
&& hp
.domid
!= xen_domid
) {
759 case HVM_PARAM_CALLBACK_IRQ
:
760 qemu_mutex_lock_iothread();
761 err
= xen_evtchn_set_callback_param(hp
.value
);
762 qemu_mutex_unlock_iothread();
763 xen_set_long_mode(exit
->u
.hcall
.longmode
);
770 exit
->u
.hcall
.result
= err
;
774 static bool handle_get_param(struct kvm_xen_exit
*exit
, X86CPU
*cpu
,
777 CPUState
*cs
= CPU(cpu
);
778 struct xen_hvm_param hp
;
781 /* No need for 32/64 compat handling */
782 qemu_build_assert(sizeof(hp
) == 16);
784 if (kvm_copy_from_gva(cs
, arg
, &hp
, sizeof(hp
))) {
789 if (hp
.domid
!= DOMID_SELF
&& hp
.domid
!= xen_domid
) {
795 case HVM_PARAM_STORE_PFN
:
796 hp
.value
= XEN_SPECIAL_PFN(XENSTORE
);
798 case HVM_PARAM_STORE_EVTCHN
:
799 hp
.value
= xen_xenstore_get_port();
805 if (kvm_copy_to_gva(cs
, arg
, &hp
, sizeof(hp
))) {
809 exit
->u
.hcall
.result
= err
;
813 static int kvm_xen_hcall_evtchn_upcall_vector(struct kvm_xen_exit
*exit
,
814 X86CPU
*cpu
, uint64_t arg
)
816 struct xen_hvm_evtchn_upcall_vector up
;
819 /* No need for 32/64 compat handling */
820 qemu_build_assert(sizeof(up
) == 8);
822 if (kvm_copy_from_gva(CPU(cpu
), arg
, &up
, sizeof(up
))) {
826 if (up
.vector
< 0x10) {
830 target_cs
= qemu_get_cpu(up
.vcpu
);
835 async_run_on_cpu(target_cs
, do_set_vcpu_callback_vector
,
836 RUN_ON_CPU_HOST_INT(up
.vector
));
840 static bool kvm_xen_hcall_hvm_op(struct kvm_xen_exit
*exit
, X86CPU
*cpu
,
841 int cmd
, uint64_t arg
)
845 case HVMOP_set_evtchn_upcall_vector
:
846 ret
= kvm_xen_hcall_evtchn_upcall_vector(exit
, cpu
,
847 exit
->u
.hcall
.params
[0]);
850 case HVMOP_pagetable_dying
:
854 case HVMOP_set_param
:
855 return handle_set_param(exit
, cpu
, arg
);
857 case HVMOP_get_param
:
858 return handle_get_param(exit
, cpu
, arg
);
864 exit
->u
.hcall
.result
= ret
;
868 static int vcpuop_register_vcpu_info(CPUState
*cs
, CPUState
*target
,
871 struct vcpu_register_vcpu_info rvi
;
874 /* No need for 32/64 compat handling */
875 qemu_build_assert(sizeof(rvi
) == 16);
876 qemu_build_assert(sizeof(struct vcpu_info
) == 64);
882 if (kvm_copy_from_gva(cs
, arg
, &rvi
, sizeof(rvi
))) {
886 if (rvi
.offset
> TARGET_PAGE_SIZE
- sizeof(struct vcpu_info
)) {
890 gpa
= ((rvi
.mfn
<< TARGET_PAGE_BITS
) + rvi
.offset
);
891 async_run_on_cpu(target
, do_set_vcpu_info_gpa
, RUN_ON_CPU_HOST_ULONG(gpa
));
895 static int vcpuop_register_vcpu_time_info(CPUState
*cs
, CPUState
*target
,
898 struct vcpu_register_time_memory_area tma
;
902 /* No need for 32/64 compat handling */
903 qemu_build_assert(sizeof(tma
) == 8);
904 qemu_build_assert(sizeof(struct vcpu_time_info
) == 32);
910 if (kvm_copy_from_gva(cs
, arg
, &tma
, sizeof(tma
))) {
915 * Xen actually uses the GVA and does the translation through the guest
916 * page tables each time. But Linux/KVM uses the GPA, on the assumption
917 * that guests only ever use *global* addresses (kernel virtual addresses)
918 * for it. If Linux is changed to redo the GVA→GPA translation each time,
919 * it will offer a new vCPU attribute for that, and we'll use it instead.
921 if (!kvm_gva_to_gpa(cs
, tma
.addr
.p
, &gpa
, &len
, false) ||
922 len
< sizeof(struct vcpu_time_info
)) {
926 async_run_on_cpu(target
, do_set_vcpu_time_info_gpa
,
927 RUN_ON_CPU_HOST_ULONG(gpa
));
931 static int vcpuop_register_runstate_info(CPUState
*cs
, CPUState
*target
,
934 struct vcpu_register_runstate_memory_area rma
;
938 /* No need for 32/64 compat handling */
939 qemu_build_assert(sizeof(rma
) == 8);
940 /* The runstate area actually does change size, but Linux copes. */
946 if (kvm_copy_from_gva(cs
, arg
, &rma
, sizeof(rma
))) {
950 /* As with vcpu_time_info, Xen actually uses the GVA but KVM doesn't. */
951 if (!kvm_gva_to_gpa(cs
, rma
.addr
.p
, &gpa
, &len
, false)) {
955 async_run_on_cpu(target
, do_set_vcpu_runstate_gpa
,
956 RUN_ON_CPU_HOST_ULONG(gpa
));
960 static uint64_t kvm_get_current_ns(void)
962 struct kvm_clock_data data
;
965 ret
= kvm_vm_ioctl(kvm_state
, KVM_GET_CLOCK
, &data
);
967 fprintf(stderr
, "KVM_GET_CLOCK failed: %s\n", strerror(ret
));
974 static void xen_vcpu_singleshot_timer_event(void *opaque
)
976 CPUState
*cpu
= opaque
;
977 CPUX86State
*env
= &X86_CPU(cpu
)->env
;
978 uint16_t port
= env
->xen_virq
[VIRQ_TIMER
];
981 xen_evtchn_set_port(port
);
984 qemu_mutex_lock(&env
->xen_timers_lock
);
985 env
->xen_singleshot_timer_ns
= 0;
986 qemu_mutex_unlock(&env
->xen_timers_lock
);
989 static void xen_vcpu_periodic_timer_event(void *opaque
)
991 CPUState
*cpu
= opaque
;
992 CPUX86State
*env
= &X86_CPU(cpu
)->env
;
993 uint16_t port
= env
->xen_virq
[VIRQ_TIMER
];
997 xen_evtchn_set_port(port
);
1000 qemu_mutex_lock(&env
->xen_timers_lock
);
1002 qemu_now
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
);
1003 timer_mod_ns(env
->xen_periodic_timer
,
1004 qemu_now
+ env
->xen_periodic_timer_period
);
1006 qemu_mutex_unlock(&env
->xen_timers_lock
);
1009 static int do_set_periodic_timer(CPUState
*target
, uint64_t period_ns
)
1011 CPUX86State
*tenv
= &X86_CPU(target
)->env
;
1014 timer_del(tenv
->xen_periodic_timer
);
1016 qemu_mutex_lock(&tenv
->xen_timers_lock
);
1018 qemu_now
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
);
1019 timer_mod_ns(tenv
->xen_periodic_timer
, qemu_now
+ period_ns
);
1020 tenv
->xen_periodic_timer_period
= period_ns
;
1022 qemu_mutex_unlock(&tenv
->xen_timers_lock
);
1026 #define MILLISECS(_ms) ((int64_t)((_ms) * 1000000ULL))
1027 #define MICROSECS(_us) ((int64_t)((_us) * 1000ULL))
1028 #define STIME_MAX ((time_t)((int64_t)~0ull >> 1))
1029 /* Chosen so (NOW() + delta) wont overflow without an uptime of 200 years */
1030 #define STIME_DELTA_MAX ((int64_t)((uint64_t)~0ull >> 2))
1032 static int vcpuop_set_periodic_timer(CPUState
*cs
, CPUState
*target
,
1035 struct vcpu_set_periodic_timer spt
;
1037 qemu_build_assert(sizeof(spt
) == 8);
1038 if (kvm_copy_from_gva(cs
, arg
, &spt
, sizeof(spt
))) {
1042 if (spt
.period_ns
< MILLISECS(1) || spt
.period_ns
> STIME_DELTA_MAX
) {
1046 return do_set_periodic_timer(target
, spt
.period_ns
);
1049 static int vcpuop_stop_periodic_timer(CPUState
*target
)
1051 CPUX86State
*tenv
= &X86_CPU(target
)->env
;
1053 qemu_mutex_lock(&tenv
->xen_timers_lock
);
1055 timer_del(tenv
->xen_periodic_timer
);
1056 tenv
->xen_periodic_timer_period
= 0;
1058 qemu_mutex_unlock(&tenv
->xen_timers_lock
);
1062 static int do_set_singleshot_timer(CPUState
*cs
, uint64_t timeout_abs
,
1063 bool future
, bool linux_wa
)
1065 CPUX86State
*env
= &X86_CPU(cs
)->env
;
1066 int64_t now
= kvm_get_current_ns();
1067 int64_t qemu_now
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
);
1068 int64_t delta
= timeout_abs
- now
;
1070 if (future
&& timeout_abs
< now
) {
1074 if (linux_wa
&& unlikely((int64_t)timeout_abs
< 0 ||
1075 (delta
> 0 && (uint32_t)(delta
>> 50) != 0))) {
1077 * Xen has a 'Linux workaround' in do_set_timer_op() which checks
1078 * for negative absolute timeout values (caused by integer
1079 * overflow), and for values about 13 days in the future (2^50ns)
1080 * which would be caused by jiffies overflow. For those cases, it
1081 * sets the timeout 100ms in the future (not *too* soon, since if
1082 * a guest really did set a long timeout on purpose we don't want
1083 * to keep churning CPU time by waking it up).
1085 delta
= (100 * SCALE_MS
);
1086 timeout_abs
= now
+ delta
;
1089 qemu_mutex_lock(&env
->xen_timers_lock
);
1091 timer_mod_ns(env
->xen_singleshot_timer
, qemu_now
+ delta
);
1092 env
->xen_singleshot_timer_ns
= now
+ delta
;
1094 qemu_mutex_unlock(&env
->xen_timers_lock
);
1098 static int vcpuop_set_singleshot_timer(CPUState
*cs
, uint64_t arg
)
1100 struct vcpu_set_singleshot_timer sst
= { 0 };
1103 * The struct is a uint64_t followed by a uint32_t. On 32-bit that
1104 * makes it 12 bytes. On 64-bit it gets padded to 16. The parts
1105 * that get used are identical, and there's four bytes of padding
1106 * unused at the end. For true Xen compatibility we should attempt
1107 * to copy the full 16 bytes from 64-bit guests, and return -EFAULT
1108 * if we can't get the padding too. But that's daft. Just copy what
1111 qemu_build_assert(offsetof(struct vcpu_set_singleshot_timer
, flags
) == 8);
1112 qemu_build_assert(sizeof(sst
) >= 12);
1114 if (kvm_copy_from_gva(cs
, arg
, &sst
, 12)) {
1118 return do_set_singleshot_timer(cs
, sst
.timeout_abs_ns
,
1119 !!(sst
.flags
& VCPU_SSHOTTMR_future
),
1123 static int vcpuop_stop_singleshot_timer(CPUState
*cs
)
1125 CPUX86State
*env
= &X86_CPU(cs
)->env
;
1127 qemu_mutex_lock(&env
->xen_timers_lock
);
1129 timer_del(env
->xen_singleshot_timer
);
1130 env
->xen_singleshot_timer_ns
= 0;
1132 qemu_mutex_unlock(&env
->xen_timers_lock
);
1136 static bool kvm_xen_hcall_set_timer_op(struct kvm_xen_exit
*exit
, X86CPU
*cpu
,
1141 if (unlikely(timeout
== 0)) {
1142 err
= vcpuop_stop_singleshot_timer(CPU(cpu
));
1144 err
= do_set_singleshot_timer(CPU(cpu
), timeout
, false, true);
1146 exit
->u
.hcall
.result
= err
;
1150 static bool kvm_xen_hcall_vcpu_op(struct kvm_xen_exit
*exit
, X86CPU
*cpu
,
1151 int cmd
, int vcpu_id
, uint64_t arg
)
1153 CPUState
*cs
= CPU(cpu
);
1154 CPUState
*dest
= cs
->cpu_index
== vcpu_id
? cs
: qemu_get_cpu(vcpu_id
);
1163 case VCPUOP_register_runstate_memory_area
:
1164 err
= vcpuop_register_runstate_info(cs
, dest
, arg
);
1166 case VCPUOP_register_vcpu_time_memory_area
:
1167 err
= vcpuop_register_vcpu_time_info(cs
, dest
, arg
);
1169 case VCPUOP_register_vcpu_info
:
1170 err
= vcpuop_register_vcpu_info(cs
, dest
, arg
);
1172 case VCPUOP_set_singleshot_timer
: {
1173 if (cs
->cpu_index
== vcpu_id
) {
1174 err
= vcpuop_set_singleshot_timer(dest
, arg
);
1180 case VCPUOP_stop_singleshot_timer
:
1181 if (cs
->cpu_index
== vcpu_id
) {
1182 err
= vcpuop_stop_singleshot_timer(dest
);
1187 case VCPUOP_set_periodic_timer
: {
1188 err
= vcpuop_set_periodic_timer(cs
, dest
, arg
);
1191 case VCPUOP_stop_periodic_timer
:
1192 err
= vcpuop_stop_periodic_timer(dest
);
1200 exit
->u
.hcall
.result
= err
;
1204 static bool kvm_xen_hcall_evtchn_op(struct kvm_xen_exit
*exit
, X86CPU
*cpu
,
1205 int cmd
, uint64_t arg
)
1207 CPUState
*cs
= CPU(cpu
);
1211 case EVTCHNOP_init_control
:
1212 case EVTCHNOP_expand_array
:
1213 case EVTCHNOP_set_priority
:
1214 /* We do not support FIFO channels at this point */
1218 case EVTCHNOP_status
: {
1219 struct evtchn_status status
;
1221 qemu_build_assert(sizeof(status
) == 24);
1222 if (kvm_copy_from_gva(cs
, arg
, &status
, sizeof(status
))) {
1227 err
= xen_evtchn_status_op(&status
);
1228 if (!err
&& kvm_copy_to_gva(cs
, arg
, &status
, sizeof(status
))) {
1233 case EVTCHNOP_close
: {
1234 struct evtchn_close close
;
1236 qemu_build_assert(sizeof(close
) == 4);
1237 if (kvm_copy_from_gva(cs
, arg
, &close
, sizeof(close
))) {
1242 err
= xen_evtchn_close_op(&close
);
1245 case EVTCHNOP_unmask
: {
1246 struct evtchn_unmask unmask
;
1248 qemu_build_assert(sizeof(unmask
) == 4);
1249 if (kvm_copy_from_gva(cs
, arg
, &unmask
, sizeof(unmask
))) {
1254 err
= xen_evtchn_unmask_op(&unmask
);
1257 case EVTCHNOP_bind_virq
: {
1258 struct evtchn_bind_virq virq
;
1260 qemu_build_assert(sizeof(virq
) == 12);
1261 if (kvm_copy_from_gva(cs
, arg
, &virq
, sizeof(virq
))) {
1266 err
= xen_evtchn_bind_virq_op(&virq
);
1267 if (!err
&& kvm_copy_to_gva(cs
, arg
, &virq
, sizeof(virq
))) {
1272 case EVTCHNOP_bind_pirq
: {
1273 struct evtchn_bind_pirq pirq
;
1275 qemu_build_assert(sizeof(pirq
) == 12);
1276 if (kvm_copy_from_gva(cs
, arg
, &pirq
, sizeof(pirq
))) {
1281 err
= xen_evtchn_bind_pirq_op(&pirq
);
1282 if (!err
&& kvm_copy_to_gva(cs
, arg
, &pirq
, sizeof(pirq
))) {
1287 case EVTCHNOP_bind_ipi
: {
1288 struct evtchn_bind_ipi ipi
;
1290 qemu_build_assert(sizeof(ipi
) == 8);
1291 if (kvm_copy_from_gva(cs
, arg
, &ipi
, sizeof(ipi
))) {
1296 err
= xen_evtchn_bind_ipi_op(&ipi
);
1297 if (!err
&& kvm_copy_to_gva(cs
, arg
, &ipi
, sizeof(ipi
))) {
1302 case EVTCHNOP_send
: {
1303 struct evtchn_send send
;
1305 qemu_build_assert(sizeof(send
) == 4);
1306 if (kvm_copy_from_gva(cs
, arg
, &send
, sizeof(send
))) {
1311 err
= xen_evtchn_send_op(&send
);
1314 case EVTCHNOP_alloc_unbound
: {
1315 struct evtchn_alloc_unbound alloc
;
1317 qemu_build_assert(sizeof(alloc
) == 8);
1318 if (kvm_copy_from_gva(cs
, arg
, &alloc
, sizeof(alloc
))) {
1323 err
= xen_evtchn_alloc_unbound_op(&alloc
);
1324 if (!err
&& kvm_copy_to_gva(cs
, arg
, &alloc
, sizeof(alloc
))) {
1329 case EVTCHNOP_bind_interdomain
: {
1330 struct evtchn_bind_interdomain interdomain
;
1332 qemu_build_assert(sizeof(interdomain
) == 12);
1333 if (kvm_copy_from_gva(cs
, arg
, &interdomain
, sizeof(interdomain
))) {
1338 err
= xen_evtchn_bind_interdomain_op(&interdomain
);
1340 kvm_copy_to_gva(cs
, arg
, &interdomain
, sizeof(interdomain
))) {
1345 case EVTCHNOP_bind_vcpu
: {
1346 struct evtchn_bind_vcpu vcpu
;
1348 qemu_build_assert(sizeof(vcpu
) == 8);
1349 if (kvm_copy_from_gva(cs
, arg
, &vcpu
, sizeof(vcpu
))) {
1354 err
= xen_evtchn_bind_vcpu_op(&vcpu
);
1357 case EVTCHNOP_reset
: {
1358 struct evtchn_reset reset
;
1360 qemu_build_assert(sizeof(reset
) == 2);
1361 if (kvm_copy_from_gva(cs
, arg
, &reset
, sizeof(reset
))) {
1366 err
= xen_evtchn_reset_op(&reset
);
1373 exit
->u
.hcall
.result
= err
;
1377 int kvm_xen_soft_reset(void)
1382 assert(qemu_mutex_iothread_locked());
1384 trace_kvm_xen_soft_reset();
1386 err
= xen_evtchn_soft_reset();
1392 * Zero is the reset/startup state for HVM_PARAM_CALLBACK_IRQ. Strictly,
1393 * it maps to HVM_PARAM_CALLBACK_TYPE_GSI with GSI#0, but Xen refuses to
1394 * to deliver to the timer interrupt and treats that as 'disabled'.
1396 err
= xen_evtchn_set_callback_param(0);
1402 async_run_on_cpu(cpu
, do_vcpu_soft_reset
, RUN_ON_CPU_NULL
);
1405 err
= xen_overlay_map_shinfo_page(INVALID_GFN
);
1410 err
= xen_gnttab_reset();
1415 err
= xen_xenstore_reset();
1423 static int schedop_shutdown(CPUState
*cs
, uint64_t arg
)
1425 struct sched_shutdown shutdown
;
1428 /* No need for 32/64 compat handling */
1429 qemu_build_assert(sizeof(shutdown
) == 4);
1431 if (kvm_copy_from_gva(cs
, arg
, &shutdown
, sizeof(shutdown
))) {
1435 switch (shutdown
.reason
) {
1436 case SHUTDOWN_crash
:
1437 cpu_dump_state(cs
, stderr
, CPU_DUMP_CODE
);
1438 qemu_system_guest_panicked(NULL
);
1441 case SHUTDOWN_reboot
:
1442 qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET
);
1445 case SHUTDOWN_poweroff
:
1446 qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN
);
1449 case SHUTDOWN_soft_reset
:
1450 qemu_mutex_lock_iothread();
1451 ret
= kvm_xen_soft_reset();
1452 qemu_mutex_unlock_iothread();
1463 static bool kvm_xen_hcall_sched_op(struct kvm_xen_exit
*exit
, X86CPU
*cpu
,
1464 int cmd
, uint64_t arg
)
1466 CPUState
*cs
= CPU(cpu
);
1470 case SCHEDOP_shutdown
:
1471 err
= schedop_shutdown(cs
, arg
);
1476 * Linux will panic if this doesn't work. Just yield; it's not
1477 * worth overthinking it because with event channel handling
1478 * in KVM, the kernel will intercept this and it will never
1479 * reach QEMU anyway. The semantics of the hypercall explicltly
1480 * permit spurious wakeups.
1491 exit
->u
.hcall
.result
= err
;
1495 static bool kvm_xen_hcall_gnttab_op(struct kvm_xen_exit
*exit
, X86CPU
*cpu
,
1496 int cmd
, uint64_t arg
, int count
)
1498 CPUState
*cs
= CPU(cpu
);
1502 case GNTTABOP_set_version
: {
1503 struct gnttab_set_version set
;
1505 qemu_build_assert(sizeof(set
) == 4);
1506 if (kvm_copy_from_gva(cs
, arg
, &set
, sizeof(set
))) {
1511 err
= xen_gnttab_set_version_op(&set
);
1512 if (!err
&& kvm_copy_to_gva(cs
, arg
, &set
, sizeof(set
))) {
1517 case GNTTABOP_get_version
: {
1518 struct gnttab_get_version get
;
1520 qemu_build_assert(sizeof(get
) == 8);
1521 if (kvm_copy_from_gva(cs
, arg
, &get
, sizeof(get
))) {
1526 err
= xen_gnttab_get_version_op(&get
);
1527 if (!err
&& kvm_copy_to_gva(cs
, arg
, &get
, sizeof(get
))) {
1532 case GNTTABOP_query_size
: {
1533 struct gnttab_query_size size
;
1535 qemu_build_assert(sizeof(size
) == 16);
1536 if (kvm_copy_from_gva(cs
, arg
, &size
, sizeof(size
))) {
1541 err
= xen_gnttab_query_size_op(&size
);
1542 if (!err
&& kvm_copy_to_gva(cs
, arg
, &size
, sizeof(size
))) {
1547 case GNTTABOP_setup_table
:
1549 case GNTTABOP_map_grant_ref
:
1550 case GNTTABOP_unmap_grant_ref
:
1551 case GNTTABOP_swap_grant_ref
:
1555 /* Xen explicitly returns -ENOSYS to HVM guests for all others */
1560 exit
->u
.hcall
.result
= err
;
1564 static bool kvm_xen_hcall_physdev_op(struct kvm_xen_exit
*exit
, X86CPU
*cpu
,
1565 int cmd
, uint64_t arg
)
1567 CPUState
*cs
= CPU(cpu
);
1571 case PHYSDEVOP_map_pirq
: {
1572 struct physdev_map_pirq map
;
1574 if (hypercall_compat32(exit
->u
.hcall
.longmode
)) {
1575 struct compat_physdev_map_pirq
*map32
= (void *)&map
;
1577 if (kvm_copy_from_gva(cs
, arg
, map32
, sizeof(*map32
))) {
1582 * The only thing that's different is the alignment of the
1583 * uint64_t table_base at the end, which gets padding to make
1584 * it 64-bit aligned in the 64-bit version.
1586 qemu_build_assert(sizeof(*map32
) == 36);
1587 qemu_build_assert(offsetof(struct physdev_map_pirq
, entry_nr
) ==
1588 offsetof(struct compat_physdev_map_pirq
, entry_nr
));
1589 memmove(&map
.table_base
, &map32
->table_base
, sizeof(map
.table_base
));
1591 if (kvm_copy_from_gva(cs
, arg
, &map
, sizeof(map
))) {
1596 err
= xen_physdev_map_pirq(&map
);
1598 * Since table_base is an IN parameter and won't be changed, just
1599 * copy the size of the compat structure back to the guest.
1601 if (!err
&& kvm_copy_to_gva(cs
, arg
, &map
,
1602 sizeof(struct compat_physdev_map_pirq
))) {
1607 case PHYSDEVOP_unmap_pirq
: {
1608 struct physdev_unmap_pirq unmap
;
1610 qemu_build_assert(sizeof(unmap
) == 8);
1611 if (kvm_copy_from_gva(cs
, arg
, &unmap
, sizeof(unmap
))) {
1616 err
= xen_physdev_unmap_pirq(&unmap
);
1617 if (!err
&& kvm_copy_to_gva(cs
, arg
, &unmap
, sizeof(unmap
))) {
1622 case PHYSDEVOP_eoi
: {
1623 struct physdev_eoi eoi
;
1625 qemu_build_assert(sizeof(eoi
) == 4);
1626 if (kvm_copy_from_gva(cs
, arg
, &eoi
, sizeof(eoi
))) {
1631 err
= xen_physdev_eoi_pirq(&eoi
);
1632 if (!err
&& kvm_copy_to_gva(cs
, arg
, &eoi
, sizeof(eoi
))) {
1637 case PHYSDEVOP_irq_status_query
: {
1638 struct physdev_irq_status_query query
;
1640 qemu_build_assert(sizeof(query
) == 8);
1641 if (kvm_copy_from_gva(cs
, arg
, &query
, sizeof(query
))) {
1646 err
= xen_physdev_query_pirq(&query
);
1647 if (!err
&& kvm_copy_to_gva(cs
, arg
, &query
, sizeof(query
))) {
1652 case PHYSDEVOP_get_free_pirq
: {
1653 struct physdev_get_free_pirq get
;
1655 qemu_build_assert(sizeof(get
) == 8);
1656 if (kvm_copy_from_gva(cs
, arg
, &get
, sizeof(get
))) {
1661 err
= xen_physdev_get_free_pirq(&get
);
1662 if (!err
&& kvm_copy_to_gva(cs
, arg
, &get
, sizeof(get
))) {
1667 case PHYSDEVOP_pirq_eoi_gmfn_v2
: /* FreeBSD 13 makes this hypercall */
1675 exit
->u
.hcall
.result
= err
;
1679 static bool do_kvm_xen_handle_exit(X86CPU
*cpu
, struct kvm_xen_exit
*exit
)
1681 uint16_t code
= exit
->u
.hcall
.input
;
1683 if (exit
->u
.hcall
.cpl
> 0) {
1684 exit
->u
.hcall
.result
= -EPERM
;
1689 case __HYPERVISOR_set_timer_op
:
1690 if (exit
->u
.hcall
.longmode
) {
1691 return kvm_xen_hcall_set_timer_op(exit
, cpu
,
1692 exit
->u
.hcall
.params
[0]);
1694 /* In 32-bit mode, the 64-bit timer value is in two args. */
1695 uint64_t val
= ((uint64_t)exit
->u
.hcall
.params
[1]) << 32 |
1696 (uint32_t)exit
->u
.hcall
.params
[0];
1697 return kvm_xen_hcall_set_timer_op(exit
, cpu
, val
);
1699 case __HYPERVISOR_grant_table_op
:
1700 return kvm_xen_hcall_gnttab_op(exit
, cpu
, exit
->u
.hcall
.params
[0],
1701 exit
->u
.hcall
.params
[1],
1702 exit
->u
.hcall
.params
[2]);
1703 case __HYPERVISOR_sched_op
:
1704 return kvm_xen_hcall_sched_op(exit
, cpu
, exit
->u
.hcall
.params
[0],
1705 exit
->u
.hcall
.params
[1]);
1706 case __HYPERVISOR_event_channel_op
:
1707 return kvm_xen_hcall_evtchn_op(exit
, cpu
, exit
->u
.hcall
.params
[0],
1708 exit
->u
.hcall
.params
[1]);
1709 case __HYPERVISOR_vcpu_op
:
1710 return kvm_xen_hcall_vcpu_op(exit
, cpu
,
1711 exit
->u
.hcall
.params
[0],
1712 exit
->u
.hcall
.params
[1],
1713 exit
->u
.hcall
.params
[2]);
1714 case __HYPERVISOR_hvm_op
:
1715 return kvm_xen_hcall_hvm_op(exit
, cpu
, exit
->u
.hcall
.params
[0],
1716 exit
->u
.hcall
.params
[1]);
1717 case __HYPERVISOR_memory_op
:
1718 return kvm_xen_hcall_memory_op(exit
, cpu
, exit
->u
.hcall
.params
[0],
1719 exit
->u
.hcall
.params
[1]);
1720 case __HYPERVISOR_physdev_op
:
1721 return kvm_xen_hcall_physdev_op(exit
, cpu
, exit
->u
.hcall
.params
[0],
1722 exit
->u
.hcall
.params
[1]);
1723 case __HYPERVISOR_xen_version
:
1724 return kvm_xen_hcall_xen_version(exit
, cpu
, exit
->u
.hcall
.params
[0],
1725 exit
->u
.hcall
.params
[1]);
1731 int kvm_xen_handle_exit(X86CPU
*cpu
, struct kvm_xen_exit
*exit
)
1733 if (exit
->type
!= KVM_EXIT_XEN_HCALL
) {
1738 * The kernel latches the guest 32/64 mode when the MSR is used to fill
1739 * the hypercall page. So if we see a hypercall in a mode that doesn't
1740 * match our own idea of the guest mode, fetch the kernel's idea of the
1741 * "long mode" to remain in sync.
1743 if (exit
->u
.hcall
.longmode
!= xen_is_long_mode()) {
1744 xen_sync_long_mode();
1747 if (!do_kvm_xen_handle_exit(cpu
, exit
)) {
1749 * Some hypercalls will be deliberately "implemented" by returning
1750 * -ENOSYS. This case is for hypercalls which are unexpected.
1752 exit
->u
.hcall
.result
= -ENOSYS
;
1753 qemu_log_mask(LOG_UNIMP
, "Unimplemented Xen hypercall %"
1754 PRId64
" (0x%" PRIx64
" 0x%" PRIx64
" 0x%" PRIx64
")\n",
1755 (uint64_t)exit
->u
.hcall
.input
,
1756 (uint64_t)exit
->u
.hcall
.params
[0],
1757 (uint64_t)exit
->u
.hcall
.params
[1],
1758 (uint64_t)exit
->u
.hcall
.params
[2]);
1761 trace_kvm_xen_hypercall(CPU(cpu
)->cpu_index
, exit
->u
.hcall
.cpl
,
1762 exit
->u
.hcall
.input
, exit
->u
.hcall
.params
[0],
1763 exit
->u
.hcall
.params
[1], exit
->u
.hcall
.params
[2],
1764 exit
->u
.hcall
.result
);
1768 uint16_t kvm_xen_get_gnttab_max_frames(void)
1770 KVMState
*s
= KVM_STATE(current_accel());
1771 return s
->xen_gnttab_max_frames
;
1774 uint16_t kvm_xen_get_evtchn_max_pirq(void)
1776 KVMState
*s
= KVM_STATE(current_accel());
1777 return s
->xen_evtchn_max_pirq
;
1780 int kvm_put_xen_state(CPUState
*cs
)
1782 X86CPU
*cpu
= X86_CPU(cs
);
1783 CPUX86State
*env
= &cpu
->env
;
1787 gpa
= env
->xen_vcpu_info_gpa
;
1788 if (gpa
== INVALID_GPA
) {
1789 gpa
= env
->xen_vcpu_info_default_gpa
;
1792 if (gpa
!= INVALID_GPA
) {
1793 ret
= set_vcpu_info(cs
, gpa
);
1799 gpa
= env
->xen_vcpu_time_info_gpa
;
1800 if (gpa
!= INVALID_GPA
) {
1801 ret
= kvm_xen_set_vcpu_attr(cs
, KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO
,
1808 gpa
= env
->xen_vcpu_runstate_gpa
;
1809 if (gpa
!= INVALID_GPA
) {
1810 ret
= kvm_xen_set_vcpu_attr(cs
, KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR
,
1817 if (env
->xen_periodic_timer_period
) {
1818 ret
= do_set_periodic_timer(cs
, env
->xen_periodic_timer_period
);
1824 if (!kvm_xen_has_cap(EVTCHN_SEND
)) {
1826 * If the kernel has EVTCHN_SEND support then it handles timers too,
1827 * so the timer will be restored by kvm_xen_set_vcpu_timer() below.
1829 if (env
->xen_singleshot_timer_ns
) {
1830 ret
= do_set_singleshot_timer(cs
, env
->xen_singleshot_timer_ns
,
1839 if (env
->xen_vcpu_callback_vector
) {
1840 ret
= kvm_xen_set_vcpu_callback_vector(cs
);
1846 if (env
->xen_virq
[VIRQ_TIMER
]) {
1847 ret
= kvm_xen_set_vcpu_timer(cs
);
1855 int kvm_get_xen_state(CPUState
*cs
)
1857 X86CPU
*cpu
= X86_CPU(cs
);
1858 CPUX86State
*env
= &cpu
->env
;
1863 * The kernel does not mark vcpu_info as dirty when it delivers interrupts
1864 * to it. It's up to userspace to *assume* that any page shared thus is
1865 * always considered dirty. The shared_info page is different since it's
1866 * an overlay and migrated separately anyway.
1868 gpa
= env
->xen_vcpu_info_gpa
;
1869 if (gpa
== INVALID_GPA
) {
1870 gpa
= env
->xen_vcpu_info_default_gpa
;
1872 if (gpa
!= INVALID_GPA
) {
1873 MemoryRegionSection mrs
= memory_region_find(get_system_memory(),
1875 sizeof(struct vcpu_info
));
1877 !int128_lt(mrs
.size
, int128_make64(sizeof(struct vcpu_info
)))) {
1878 memory_region_set_dirty(mrs
.mr
, mrs
.offset_within_region
,
1879 sizeof(struct vcpu_info
));
1883 if (!kvm_xen_has_cap(EVTCHN_SEND
)) {
1888 * If the kernel is accelerating timers, read out the current value of the
1889 * singleshot timer deadline.
1891 if (env
->xen_virq
[VIRQ_TIMER
]) {
1892 struct kvm_xen_vcpu_attr va
= {
1893 .type
= KVM_XEN_VCPU_ATTR_TYPE_TIMER
,
1895 ret
= kvm_vcpu_ioctl(cs
, KVM_XEN_VCPU_GET_ATTR
, &va
);
1899 env
->xen_singleshot_timer_ns
= va
.u
.timer
.expires_ns
;