2 * QEMU Windows Hypervisor Platform accelerator (WHPX)
4 * Copyright Microsoft Corp. 2017
6 * This work is licensed under the terms of the GNU GPL, version 2 or later.
7 * See the COPYING file in the top-level directory.
11 #include "qemu/osdep.h"
13 #include "exec/address-spaces.h"
14 #include "exec/ioport.h"
15 #include "gdbstub/helpers.h"
16 #include "qemu/accel.h"
17 #include "sysemu/whpx.h"
18 #include "sysemu/cpus.h"
19 #include "sysemu/runstate.h"
20 #include "qemu/main-loop.h"
21 #include "hw/boards.h"
22 #include "hw/intc/ioapic.h"
23 #include "hw/i386/apic_internal.h"
24 #include "qemu/error-report.h"
25 #include "qapi/error.h"
26 #include "qapi/qapi-types-common.h"
27 #include "qapi/qapi-visit-common.h"
28 #include "migration/blocker.h"
31 #include "whpx-internal.h"
32 #include "whpx-accel-ops.h"
34 #include <WinHvPlatform.h>
35 #include <WinHvEmulation.h>
37 #define HYPERV_APIC_BUS_FREQUENCY (200000000ULL)
39 static const WHV_REGISTER_NAME whpx_register_names
[] = {
41 /* X64 General purpose registers */
61 /* X64 Segment registers */
71 /* X64 Table registers */
75 /* X64 Control Registers */
82 /* X64 Debug Registers */
92 /* X64 Floating Point and Vector Registers */
109 WHvX64RegisterFpMmx0
,
110 WHvX64RegisterFpMmx1
,
111 WHvX64RegisterFpMmx2
,
112 WHvX64RegisterFpMmx3
,
113 WHvX64RegisterFpMmx4
,
114 WHvX64RegisterFpMmx5
,
115 WHvX64RegisterFpMmx6
,
116 WHvX64RegisterFpMmx7
,
117 WHvX64RegisterFpControlStatus
,
118 WHvX64RegisterXmmControlStatus
,
123 WHvX64RegisterKernelGsBase
,
125 WHvX64RegisterApicBase
,
126 /* WHvX64RegisterPat, */
127 WHvX64RegisterSysenterCs
,
128 WHvX64RegisterSysenterEip
,
129 WHvX64RegisterSysenterEsp
,
134 WHvX64RegisterSfmask
,
137 /* Interrupt / Event Registers */
139 * WHvRegisterPendingInterruption,
140 * WHvRegisterInterruptState,
141 * WHvRegisterPendingEvent0,
142 * WHvRegisterPendingEvent1
143 * WHvX64RegisterDeliverabilityNotifications,
147 struct whpx_register_set
{
148 WHV_REGISTER_VALUE values
[RTL_NUMBER_OF(whpx_register_names
)];
152 * The current implementation of instruction stepping sets the TF flag
153 * in RFLAGS, causing the CPU to raise an INT1 after each instruction.
154 * This corresponds to the WHvX64ExceptionTypeDebugTrapOrFault exception.
156 * This approach has a few limitations:
157 * 1. Stepping over a PUSHF/SAHF instruction will save the TF flag
158 * along with the other flags, possibly restoring it later. It would
159 * result in another INT1 when the flags are restored, triggering
160 * a stop in gdb that could be cleared by doing another step.
162 * Stepping over a POPF/LAHF instruction will let it overwrite the
163 * TF flags, ending the stepping mode.
165 * 2. Stepping over an instruction raising an exception (e.g. INT, DIV,
166 * or anything that could result in a page fault) will save the flags
167 * to the stack, clear the TF flag, and let the guest execute the
168 * handler. Normally, the guest will restore the original flags,
169 * that will continue single-stepping.
171 * 3. Debuggers running on the guest may wish to set TF to do instruction
172 * stepping. INT1 events generated by it would be intercepted by us,
173 * as long as the gdb is connected to QEMU.
175 * In practice this means that:
176 * 1. Stepping through flags-modifying instructions may cause gdb to
177 * continue or stop in unexpected places. This will be fully recoverable
178 * and will not crash the target.
180 * 2. Stepping over an instruction that triggers an exception will step
181 * over the exception handler, not into it.
183 * 3. Debugging the guest via gdb, while running debugger on the guest
184 * at the same time may lead to unexpected effects. Removing all
185 * breakpoints set via QEMU will prevent any further interference
186 * with the guest-level debuggers.
188 * The limitations can be addressed as shown below:
189 * 1. PUSHF/SAHF/POPF/LAHF/IRET instructions can be emulated instead of
190 * stepping through them. The exact semantics of the instructions is
191 * defined in the "Combined Volume Set of Intel 64 and IA-32
192 * Architectures Software Developer's Manuals", however it involves a
193 * fair amount of corner cases due to compatibility with real mode,
194 * virtual 8086 mode, and differences between 64-bit and 32-bit modes.
196 * 2. We could step into the guest's exception handlers using the following
198 * a. Temporarily enable catching of all exception types via
199 * whpx_set_exception_exit_bitmap().
200 * b. Once an exception is intercepted, read the IDT/GDT and locate
201 * the original handler.
202 * c. Patch the original handler, injecting an INT3 at the beginning.
203 * d. Update the exception exit bitmap to only catch the
204 * WHvX64ExceptionTypeBreakpointTrap exception.
205 * e. Let the affected CPU run in the exclusive mode.
206 * f. Restore the original handler and the exception exit bitmap.
207 * Note that handling all corner cases related to IDT/GDT is harder
208 * than it may seem. See x86_cpu_get_phys_page_attrs_debug() for a
211 * 3. In order to properly support guest-level debugging in parallel with
212 * the QEMU-level debugging, we would need to be able to pass some INT1
213 * events to the guest. This could be done via the following methods:
214 * a. Using the WHvRegisterPendingEvent register. As of Windows 21H1,
215 * it seems to only work for interrupts and not software
217 * b. Locating and patching the original handler by parsing IDT/GDT.
218 * This involves relatively complex logic outlined in the previous
220 * c. Emulating the exception invocation (i.e. manually updating RIP,
221 * RFLAGS, and pushing the old values to stack). This is even more
222 * complicated than the previous option, since it involves checking
223 * CPL, gate attributes, and doing various adjustments depending
224 * on the current CPU mode, whether the CPL is changing, etc.
226 typedef enum WhpxStepMode
{
228 /* Halt other VCPUs */
233 WHV_EMULATOR_HANDLE emulator
;
234 bool window_registered
;
236 bool ready_for_pic_interrupt
;
239 bool interruption_pending
;
241 /* Must be the last field as it may have a tail */
242 WHV_RUN_VP_EXIT_CONTEXT exit_ctx
;
245 static bool whpx_allowed
;
246 static bool whp_dispatch_initialized
;
247 static HMODULE hWinHvPlatform
, hWinHvEmulation
;
248 static uint32_t max_vcpu_index
;
249 static WHV_PROCESSOR_XSAVE_FEATURES whpx_xsave_cap
;
251 struct whpx_state whpx_global
;
252 struct WHPDispatch whp_dispatch
;
254 static bool whpx_has_xsave(void)
256 return whpx_xsave_cap
.XsaveSupport
;
263 static struct whpx_vcpu
*get_whpx_vcpu(CPUState
*cpu
)
265 return (struct whpx_vcpu
*)cpu
->hax_vcpu
;
268 static WHV_X64_SEGMENT_REGISTER
whpx_seg_q2h(const SegmentCache
*qs
, int v86
,
271 WHV_X64_SEGMENT_REGISTER hs
;
272 unsigned flags
= qs
->flags
;
275 hs
.Limit
= qs
->limit
;
276 hs
.Selector
= qs
->selector
;
282 hs
.DescriptorPrivilegeLevel
= 3;
283 hs
.NonSystemSegment
= 1;
286 hs
.Attributes
= (flags
>> DESC_TYPE_SHIFT
);
289 /* hs.Base &= 0xfffff; */
296 static SegmentCache
whpx_seg_h2q(const WHV_X64_SEGMENT_REGISTER
*hs
)
301 qs
.limit
= hs
->Limit
;
302 qs
.selector
= hs
->Selector
;
304 qs
.flags
= ((uint32_t)hs
->Attributes
) << DESC_TYPE_SHIFT
;
309 /* X64 Extended Control Registers */
310 static void whpx_set_xcrs(CPUState
*cpu
)
312 CPUX86State
*env
= cpu
->env_ptr
;
314 struct whpx_state
*whpx
= &whpx_global
;
315 WHV_REGISTER_VALUE xcr0
;
316 WHV_REGISTER_NAME xcr0_name
= WHvX64RegisterXCr0
;
318 if (!whpx_has_xsave()) {
322 /* Only xcr0 is supported by the hypervisor currently */
323 xcr0
.Reg64
= env
->xcr0
;
324 hr
= whp_dispatch
.WHvSetVirtualProcessorRegisters(
325 whpx
->partition
, cpu
->cpu_index
, &xcr0_name
, 1, &xcr0
);
327 error_report("WHPX: Failed to set register xcr0, hr=%08lx", hr
);
331 static int whpx_set_tsc(CPUState
*cpu
)
333 CPUX86State
*env
= cpu
->env_ptr
;
334 WHV_REGISTER_NAME tsc_reg
= WHvX64RegisterTsc
;
335 WHV_REGISTER_VALUE tsc_val
;
337 struct whpx_state
*whpx
= &whpx_global
;
340 * Suspend the partition prior to setting the TSC to reduce the variance
341 * in TSC across vCPUs. When the first vCPU runs post suspend, the
342 * partition is automatically resumed.
344 if (whp_dispatch
.WHvSuspendPartitionTime
) {
347 * Unable to suspend partition while setting TSC is not a fatal
348 * error. It just increases the likelihood of TSC variance between
349 * vCPUs and some guest OS are able to handle that just fine.
351 hr
= whp_dispatch
.WHvSuspendPartitionTime(whpx
->partition
);
353 warn_report("WHPX: Failed to suspend partition, hr=%08lx", hr
);
357 tsc_val
.Reg64
= env
->tsc
;
358 hr
= whp_dispatch
.WHvSetVirtualProcessorRegisters(
359 whpx
->partition
, cpu
->cpu_index
, &tsc_reg
, 1, &tsc_val
);
361 error_report("WHPX: Failed to set TSC, hr=%08lx", hr
);
369 * The CR8 register in the CPU is mapped to the TPR register of the APIC,
370 * however, they use a slightly different encoding. Specifically:
372 * APIC.TPR[bits 7:4] = CR8[bits 3:0]
374 * This mechanism is described in section 10.8.6.1 of Volume 3 of Intel 64
375 * and IA-32 Architectures Software Developer's Manual.
377 * The functions below translate the value of CR8 to TPR and vice versa.
380 static uint64_t whpx_apic_tpr_to_cr8(uint64_t tpr
)
385 static uint64_t whpx_cr8_to_apic_tpr(uint64_t cr8
)
390 static void whpx_set_registers(CPUState
*cpu
, int level
)
392 struct whpx_state
*whpx
= &whpx_global
;
393 struct whpx_vcpu
*vcpu
= get_whpx_vcpu(cpu
);
394 CPUX86State
*env
= cpu
->env_ptr
;
395 X86CPU
*x86_cpu
= X86_CPU(cpu
);
396 struct whpx_register_set vcxt
;
403 assert(cpu_is_stopped(cpu
) || qemu_cpu_is_self(cpu
));
406 * Following MSRs have side effects on the guest or are too heavy for
407 * runtime. Limit them to full state update.
409 if (level
>= WHPX_SET_RESET_STATE
) {
413 memset(&vcxt
, 0, sizeof(struct whpx_register_set
));
415 v86
= (env
->eflags
& VM_MASK
);
416 r86
= !(env
->cr
[0] & CR0_PE_MASK
);
418 vcpu
->tpr
= whpx_apic_tpr_to_cr8(cpu_get_apic_tpr(x86_cpu
->apic_state
));
419 vcpu
->apic_base
= cpu_get_apic_base(x86_cpu
->apic_state
);
423 /* Indexes for first 16 registers match between HV and QEMU definitions */
425 for (idx
= 0; idx
< CPU_NB_REGS
; idx
+= 1) {
426 vcxt
.values
[idx
].Reg64
= (uint64_t)env
->regs
[idx
];
430 /* Same goes for RIP and RFLAGS */
431 assert(whpx_register_names
[idx
] == WHvX64RegisterRip
);
432 vcxt
.values
[idx
++].Reg64
= env
->eip
;
434 assert(whpx_register_names
[idx
] == WHvX64RegisterRflags
);
435 vcxt
.values
[idx
++].Reg64
= env
->eflags
;
437 /* Translate 6+4 segment registers. HV and QEMU order matches */
438 assert(idx
== WHvX64RegisterEs
);
439 for (i
= 0; i
< 6; i
+= 1, idx
+= 1) {
440 vcxt
.values
[idx
].Segment
= whpx_seg_q2h(&env
->segs
[i
], v86
, r86
);
443 assert(idx
== WHvX64RegisterLdtr
);
444 vcxt
.values
[idx
++].Segment
= whpx_seg_q2h(&env
->ldt
, 0, 0);
446 assert(idx
== WHvX64RegisterTr
);
447 vcxt
.values
[idx
++].Segment
= whpx_seg_q2h(&env
->tr
, 0, 0);
449 assert(idx
== WHvX64RegisterIdtr
);
450 vcxt
.values
[idx
].Table
.Base
= env
->idt
.base
;
451 vcxt
.values
[idx
].Table
.Limit
= env
->idt
.limit
;
454 assert(idx
== WHvX64RegisterGdtr
);
455 vcxt
.values
[idx
].Table
.Base
= env
->gdt
.base
;
456 vcxt
.values
[idx
].Table
.Limit
= env
->gdt
.limit
;
459 /* CR0, 2, 3, 4, 8 */
460 assert(whpx_register_names
[idx
] == WHvX64RegisterCr0
);
461 vcxt
.values
[idx
++].Reg64
= env
->cr
[0];
462 assert(whpx_register_names
[idx
] == WHvX64RegisterCr2
);
463 vcxt
.values
[idx
++].Reg64
= env
->cr
[2];
464 assert(whpx_register_names
[idx
] == WHvX64RegisterCr3
);
465 vcxt
.values
[idx
++].Reg64
= env
->cr
[3];
466 assert(whpx_register_names
[idx
] == WHvX64RegisterCr4
);
467 vcxt
.values
[idx
++].Reg64
= env
->cr
[4];
468 assert(whpx_register_names
[idx
] == WHvX64RegisterCr8
);
469 vcxt
.values
[idx
++].Reg64
= vcpu
->tpr
;
471 /* 8 Debug Registers - Skipped */
474 * Extended control registers needs to be handled separately depending
475 * on whether xsave is supported/enabled or not.
479 /* 16 XMM registers */
480 assert(whpx_register_names
[idx
] == WHvX64RegisterXmm0
);
482 for (i
= 0; i
< sizeof(env
->xmm_regs
) / sizeof(ZMMReg
); i
+= 1, idx
+= 1) {
483 vcxt
.values
[idx
].Reg128
.Low64
= env
->xmm_regs
[i
].ZMM_Q(0);
484 vcxt
.values
[idx
].Reg128
.High64
= env
->xmm_regs
[i
].ZMM_Q(1);
489 assert(whpx_register_names
[idx
] == WHvX64RegisterFpMmx0
);
490 for (i
= 0; i
< 8; i
+= 1, idx
+= 1) {
491 vcxt
.values
[idx
].Fp
.AsUINT128
.Low64
= env
->fpregs
[i
].mmx
.MMX_Q(0);
492 /* vcxt.values[idx].Fp.AsUINT128.High64 =
493 env->fpregs[i].mmx.MMX_Q(1);
497 /* FP control status register */
498 assert(whpx_register_names
[idx
] == WHvX64RegisterFpControlStatus
);
499 vcxt
.values
[idx
].FpControlStatus
.FpControl
= env
->fpuc
;
500 vcxt
.values
[idx
].FpControlStatus
.FpStatus
=
501 (env
->fpus
& ~0x3800) | (env
->fpstt
& 0x7) << 11;
502 vcxt
.values
[idx
].FpControlStatus
.FpTag
= 0;
503 for (i
= 0; i
< 8; ++i
) {
504 vcxt
.values
[idx
].FpControlStatus
.FpTag
|= (!env
->fptags
[i
]) << i
;
506 vcxt
.values
[idx
].FpControlStatus
.Reserved
= 0;
507 vcxt
.values
[idx
].FpControlStatus
.LastFpOp
= env
->fpop
;
508 vcxt
.values
[idx
].FpControlStatus
.LastFpRip
= env
->fpip
;
511 /* XMM control status register */
512 assert(whpx_register_names
[idx
] == WHvX64RegisterXmmControlStatus
);
513 vcxt
.values
[idx
].XmmControlStatus
.LastFpRdp
= 0;
514 vcxt
.values
[idx
].XmmControlStatus
.XmmStatusControl
= env
->mxcsr
;
515 vcxt
.values
[idx
].XmmControlStatus
.XmmStatusControlMask
= 0x0000ffff;
519 assert(whpx_register_names
[idx
] == WHvX64RegisterEfer
);
520 vcxt
.values
[idx
++].Reg64
= env
->efer
;
522 assert(whpx_register_names
[idx
] == WHvX64RegisterKernelGsBase
);
523 vcxt
.values
[idx
++].Reg64
= env
->kernelgsbase
;
526 assert(whpx_register_names
[idx
] == WHvX64RegisterApicBase
);
527 vcxt
.values
[idx
++].Reg64
= vcpu
->apic_base
;
529 /* WHvX64RegisterPat - Skipped */
531 assert(whpx_register_names
[idx
] == WHvX64RegisterSysenterCs
);
532 vcxt
.values
[idx
++].Reg64
= env
->sysenter_cs
;
533 assert(whpx_register_names
[idx
] == WHvX64RegisterSysenterEip
);
534 vcxt
.values
[idx
++].Reg64
= env
->sysenter_eip
;
535 assert(whpx_register_names
[idx
] == WHvX64RegisterSysenterEsp
);
536 vcxt
.values
[idx
++].Reg64
= env
->sysenter_esp
;
537 assert(whpx_register_names
[idx
] == WHvX64RegisterStar
);
538 vcxt
.values
[idx
++].Reg64
= env
->star
;
540 assert(whpx_register_names
[idx
] == WHvX64RegisterLstar
);
541 vcxt
.values
[idx
++].Reg64
= env
->lstar
;
542 assert(whpx_register_names
[idx
] == WHvX64RegisterCstar
);
543 vcxt
.values
[idx
++].Reg64
= env
->cstar
;
544 assert(whpx_register_names
[idx
] == WHvX64RegisterSfmask
);
545 vcxt
.values
[idx
++].Reg64
= env
->fmask
;
548 /* Interrupt / Event Registers - Skipped */
550 assert(idx
== RTL_NUMBER_OF(whpx_register_names
));
552 hr
= whp_dispatch
.WHvSetVirtualProcessorRegisters(
553 whpx
->partition
, cpu
->cpu_index
,
555 RTL_NUMBER_OF(whpx_register_names
),
559 error_report("WHPX: Failed to set virtual processor context, hr=%08lx",
566 static int whpx_get_tsc(CPUState
*cpu
)
568 CPUX86State
*env
= cpu
->env_ptr
;
569 WHV_REGISTER_NAME tsc_reg
= WHvX64RegisterTsc
;
570 WHV_REGISTER_VALUE tsc_val
;
572 struct whpx_state
*whpx
= &whpx_global
;
574 hr
= whp_dispatch
.WHvGetVirtualProcessorRegisters(
575 whpx
->partition
, cpu
->cpu_index
, &tsc_reg
, 1, &tsc_val
);
577 error_report("WHPX: Failed to get TSC, hr=%08lx", hr
);
581 env
->tsc
= tsc_val
.Reg64
;
585 /* X64 Extended Control Registers */
586 static void whpx_get_xcrs(CPUState
*cpu
)
588 CPUX86State
*env
= cpu
->env_ptr
;
590 struct whpx_state
*whpx
= &whpx_global
;
591 WHV_REGISTER_VALUE xcr0
;
592 WHV_REGISTER_NAME xcr0_name
= WHvX64RegisterXCr0
;
594 if (!whpx_has_xsave()) {
598 /* Only xcr0 is supported by the hypervisor currently */
599 hr
= whp_dispatch
.WHvGetVirtualProcessorRegisters(
600 whpx
->partition
, cpu
->cpu_index
, &xcr0_name
, 1, &xcr0
);
602 error_report("WHPX: Failed to get register xcr0, hr=%08lx", hr
);
606 env
->xcr0
= xcr0
.Reg64
;
609 static void whpx_get_registers(CPUState
*cpu
)
611 struct whpx_state
*whpx
= &whpx_global
;
612 struct whpx_vcpu
*vcpu
= get_whpx_vcpu(cpu
);
613 CPUX86State
*env
= cpu
->env_ptr
;
614 X86CPU
*x86_cpu
= X86_CPU(cpu
);
615 struct whpx_register_set vcxt
;
616 uint64_t tpr
, apic_base
;
622 assert(cpu_is_stopped(cpu
) || qemu_cpu_is_self(cpu
));
624 if (!env
->tsc_valid
) {
626 env
->tsc_valid
= !runstate_is_running();
629 hr
= whp_dispatch
.WHvGetVirtualProcessorRegisters(
630 whpx
->partition
, cpu
->cpu_index
,
632 RTL_NUMBER_OF(whpx_register_names
),
635 error_report("WHPX: Failed to get virtual processor context, hr=%08lx",
639 if (whpx_apic_in_platform()) {
641 * Fetch the TPR value from the emulated APIC. It may get overwritten
642 * below with the value from CR8 returned by
643 * WHvGetVirtualProcessorRegisters().
645 whpx_apic_get(x86_cpu
->apic_state
);
646 vcpu
->tpr
= whpx_apic_tpr_to_cr8(
647 cpu_get_apic_tpr(x86_cpu
->apic_state
));
652 /* Indexes for first 16 registers match between HV and QEMU definitions */
654 for (idx
= 0; idx
< CPU_NB_REGS
; idx
+= 1) {
655 env
->regs
[idx
] = vcxt
.values
[idx
].Reg64
;
659 /* Same goes for RIP and RFLAGS */
660 assert(whpx_register_names
[idx
] == WHvX64RegisterRip
);
661 env
->eip
= vcxt
.values
[idx
++].Reg64
;
662 assert(whpx_register_names
[idx
] == WHvX64RegisterRflags
);
663 env
->eflags
= vcxt
.values
[idx
++].Reg64
;
665 /* Translate 6+4 segment registers. HV and QEMU order matches */
666 assert(idx
== WHvX64RegisterEs
);
667 for (i
= 0; i
< 6; i
+= 1, idx
+= 1) {
668 env
->segs
[i
] = whpx_seg_h2q(&vcxt
.values
[idx
].Segment
);
671 assert(idx
== WHvX64RegisterLdtr
);
672 env
->ldt
= whpx_seg_h2q(&vcxt
.values
[idx
++].Segment
);
673 assert(idx
== WHvX64RegisterTr
);
674 env
->tr
= whpx_seg_h2q(&vcxt
.values
[idx
++].Segment
);
675 assert(idx
== WHvX64RegisterIdtr
);
676 env
->idt
.base
= vcxt
.values
[idx
].Table
.Base
;
677 env
->idt
.limit
= vcxt
.values
[idx
].Table
.Limit
;
679 assert(idx
== WHvX64RegisterGdtr
);
680 env
->gdt
.base
= vcxt
.values
[idx
].Table
.Base
;
681 env
->gdt
.limit
= vcxt
.values
[idx
].Table
.Limit
;
684 /* CR0, 2, 3, 4, 8 */
685 assert(whpx_register_names
[idx
] == WHvX64RegisterCr0
);
686 env
->cr
[0] = vcxt
.values
[idx
++].Reg64
;
687 assert(whpx_register_names
[idx
] == WHvX64RegisterCr2
);
688 env
->cr
[2] = vcxt
.values
[idx
++].Reg64
;
689 assert(whpx_register_names
[idx
] == WHvX64RegisterCr3
);
690 env
->cr
[3] = vcxt
.values
[idx
++].Reg64
;
691 assert(whpx_register_names
[idx
] == WHvX64RegisterCr4
);
692 env
->cr
[4] = vcxt
.values
[idx
++].Reg64
;
693 assert(whpx_register_names
[idx
] == WHvX64RegisterCr8
);
694 tpr
= vcxt
.values
[idx
++].Reg64
;
695 if (tpr
!= vcpu
->tpr
) {
697 cpu_set_apic_tpr(x86_cpu
->apic_state
, whpx_cr8_to_apic_tpr(tpr
));
700 /* 8 Debug Registers - Skipped */
703 * Extended control registers needs to be handled separately depending
704 * on whether xsave is supported/enabled or not.
708 /* 16 XMM registers */
709 assert(whpx_register_names
[idx
] == WHvX64RegisterXmm0
);
711 for (i
= 0; i
< sizeof(env
->xmm_regs
) / sizeof(ZMMReg
); i
+= 1, idx
+= 1) {
712 env
->xmm_regs
[i
].ZMM_Q(0) = vcxt
.values
[idx
].Reg128
.Low64
;
713 env
->xmm_regs
[i
].ZMM_Q(1) = vcxt
.values
[idx
].Reg128
.High64
;
718 assert(whpx_register_names
[idx
] == WHvX64RegisterFpMmx0
);
719 for (i
= 0; i
< 8; i
+= 1, idx
+= 1) {
720 env
->fpregs
[i
].mmx
.MMX_Q(0) = vcxt
.values
[idx
].Fp
.AsUINT128
.Low64
;
721 /* env->fpregs[i].mmx.MMX_Q(1) =
722 vcxt.values[idx].Fp.AsUINT128.High64;
726 /* FP control status register */
727 assert(whpx_register_names
[idx
] == WHvX64RegisterFpControlStatus
);
728 env
->fpuc
= vcxt
.values
[idx
].FpControlStatus
.FpControl
;
729 env
->fpstt
= (vcxt
.values
[idx
].FpControlStatus
.FpStatus
>> 11) & 0x7;
730 env
->fpus
= vcxt
.values
[idx
].FpControlStatus
.FpStatus
& ~0x3800;
731 for (i
= 0; i
< 8; ++i
) {
732 env
->fptags
[i
] = !((vcxt
.values
[idx
].FpControlStatus
.FpTag
>> i
) & 1);
734 env
->fpop
= vcxt
.values
[idx
].FpControlStatus
.LastFpOp
;
735 env
->fpip
= vcxt
.values
[idx
].FpControlStatus
.LastFpRip
;
738 /* XMM control status register */
739 assert(whpx_register_names
[idx
] == WHvX64RegisterXmmControlStatus
);
740 env
->mxcsr
= vcxt
.values
[idx
].XmmControlStatus
.XmmStatusControl
;
744 assert(whpx_register_names
[idx
] == WHvX64RegisterEfer
);
745 env
->efer
= vcxt
.values
[idx
++].Reg64
;
747 assert(whpx_register_names
[idx
] == WHvX64RegisterKernelGsBase
);
748 env
->kernelgsbase
= vcxt
.values
[idx
++].Reg64
;
751 assert(whpx_register_names
[idx
] == WHvX64RegisterApicBase
);
752 apic_base
= vcxt
.values
[idx
++].Reg64
;
753 if (apic_base
!= vcpu
->apic_base
) {
754 vcpu
->apic_base
= apic_base
;
755 cpu_set_apic_base(x86_cpu
->apic_state
, vcpu
->apic_base
);
758 /* WHvX64RegisterPat - Skipped */
760 assert(whpx_register_names
[idx
] == WHvX64RegisterSysenterCs
);
761 env
->sysenter_cs
= vcxt
.values
[idx
++].Reg64
;
762 assert(whpx_register_names
[idx
] == WHvX64RegisterSysenterEip
);
763 env
->sysenter_eip
= vcxt
.values
[idx
++].Reg64
;
764 assert(whpx_register_names
[idx
] == WHvX64RegisterSysenterEsp
);
765 env
->sysenter_esp
= vcxt
.values
[idx
++].Reg64
;
766 assert(whpx_register_names
[idx
] == WHvX64RegisterStar
);
767 env
->star
= vcxt
.values
[idx
++].Reg64
;
769 assert(whpx_register_names
[idx
] == WHvX64RegisterLstar
);
770 env
->lstar
= vcxt
.values
[idx
++].Reg64
;
771 assert(whpx_register_names
[idx
] == WHvX64RegisterCstar
);
772 env
->cstar
= vcxt
.values
[idx
++].Reg64
;
773 assert(whpx_register_names
[idx
] == WHvX64RegisterSfmask
);
774 env
->fmask
= vcxt
.values
[idx
++].Reg64
;
777 /* Interrupt / Event Registers - Skipped */
779 assert(idx
== RTL_NUMBER_OF(whpx_register_names
));
781 if (whpx_apic_in_platform()) {
782 whpx_apic_get(x86_cpu
->apic_state
);
785 x86_update_hflags(env
);
790 static HRESULT CALLBACK
whpx_emu_ioport_callback(
792 WHV_EMULATOR_IO_ACCESS_INFO
*IoAccess
)
794 MemTxAttrs attrs
= { 0 };
795 address_space_rw(&address_space_io
, IoAccess
->Port
, attrs
,
796 &IoAccess
->Data
, IoAccess
->AccessSize
,
797 IoAccess
->Direction
);
801 static HRESULT CALLBACK
whpx_emu_mmio_callback(
803 WHV_EMULATOR_MEMORY_ACCESS_INFO
*ma
)
805 cpu_physical_memory_rw(ma
->GpaAddress
, ma
->Data
, ma
->AccessSize
,
810 static HRESULT CALLBACK
whpx_emu_getreg_callback(
812 const WHV_REGISTER_NAME
*RegisterNames
,
813 UINT32 RegisterCount
,
814 WHV_REGISTER_VALUE
*RegisterValues
)
817 struct whpx_state
*whpx
= &whpx_global
;
818 CPUState
*cpu
= (CPUState
*)ctx
;
820 hr
= whp_dispatch
.WHvGetVirtualProcessorRegisters(
821 whpx
->partition
, cpu
->cpu_index
,
822 RegisterNames
, RegisterCount
,
825 error_report("WHPX: Failed to get virtual processor registers,"
832 static HRESULT CALLBACK
whpx_emu_setreg_callback(
834 const WHV_REGISTER_NAME
*RegisterNames
,
835 UINT32 RegisterCount
,
836 const WHV_REGISTER_VALUE
*RegisterValues
)
839 struct whpx_state
*whpx
= &whpx_global
;
840 CPUState
*cpu
= (CPUState
*)ctx
;
842 hr
= whp_dispatch
.WHvSetVirtualProcessorRegisters(
843 whpx
->partition
, cpu
->cpu_index
,
844 RegisterNames
, RegisterCount
,
847 error_report("WHPX: Failed to set virtual processor registers,"
852 * The emulator just successfully wrote the register state. We clear the
853 * dirty state so we avoid the double write on resume of the VP.
855 cpu
->vcpu_dirty
= false;
860 static HRESULT CALLBACK
whpx_emu_translate_callback(
862 WHV_GUEST_VIRTUAL_ADDRESS Gva
,
863 WHV_TRANSLATE_GVA_FLAGS TranslateFlags
,
864 WHV_TRANSLATE_GVA_RESULT_CODE
*TranslationResult
,
865 WHV_GUEST_PHYSICAL_ADDRESS
*Gpa
)
868 struct whpx_state
*whpx
= &whpx_global
;
869 CPUState
*cpu
= (CPUState
*)ctx
;
870 WHV_TRANSLATE_GVA_RESULT res
;
872 hr
= whp_dispatch
.WHvTranslateGva(whpx
->partition
, cpu
->cpu_index
,
873 Gva
, TranslateFlags
, &res
, Gpa
);
875 error_report("WHPX: Failed to translate GVA, hr=%08lx", hr
);
877 *TranslationResult
= res
.ResultCode
;
883 static const WHV_EMULATOR_CALLBACKS whpx_emu_callbacks
= {
884 .Size
= sizeof(WHV_EMULATOR_CALLBACKS
),
885 .WHvEmulatorIoPortCallback
= whpx_emu_ioport_callback
,
886 .WHvEmulatorMemoryCallback
= whpx_emu_mmio_callback
,
887 .WHvEmulatorGetVirtualProcessorRegisters
= whpx_emu_getreg_callback
,
888 .WHvEmulatorSetVirtualProcessorRegisters
= whpx_emu_setreg_callback
,
889 .WHvEmulatorTranslateGvaPage
= whpx_emu_translate_callback
,
892 static int whpx_handle_mmio(CPUState
*cpu
, WHV_MEMORY_ACCESS_CONTEXT
*ctx
)
895 struct whpx_vcpu
*vcpu
= get_whpx_vcpu(cpu
);
896 WHV_EMULATOR_STATUS emu_status
;
898 hr
= whp_dispatch
.WHvEmulatorTryMmioEmulation(
900 &vcpu
->exit_ctx
.VpContext
, ctx
,
903 error_report("WHPX: Failed to parse MMIO access, hr=%08lx", hr
);
907 if (!emu_status
.EmulationSuccessful
) {
908 error_report("WHPX: Failed to emulate MMIO access with"
909 " EmulatorReturnStatus: %u", emu_status
.AsUINT32
);
916 static int whpx_handle_portio(CPUState
*cpu
,
917 WHV_X64_IO_PORT_ACCESS_CONTEXT
*ctx
)
920 struct whpx_vcpu
*vcpu
= get_whpx_vcpu(cpu
);
921 WHV_EMULATOR_STATUS emu_status
;
923 hr
= whp_dispatch
.WHvEmulatorTryIoEmulation(
925 &vcpu
->exit_ctx
.VpContext
, ctx
,
928 error_report("WHPX: Failed to parse PortIO access, hr=%08lx", hr
);
932 if (!emu_status
.EmulationSuccessful
) {
933 error_report("WHPX: Failed to emulate PortIO access with"
934 " EmulatorReturnStatus: %u", emu_status
.AsUINT32
);
942 * Controls whether we should intercept various exceptions on the guest,
943 * namely breakpoint/single-step events.
945 * The 'exceptions' argument accepts a bitmask, e.g:
946 * (1 << WHvX64ExceptionTypeDebugTrapOrFault) | (...)
948 static HRESULT
whpx_set_exception_exit_bitmap(UINT64 exceptions
)
950 struct whpx_state
*whpx
= &whpx_global
;
951 WHV_PARTITION_PROPERTY prop
= { 0, };
954 if (exceptions
== whpx
->exception_exit_bitmap
) {
958 prop
.ExceptionExitBitmap
= exceptions
;
960 hr
= whp_dispatch
.WHvSetPartitionProperty(
962 WHvPartitionPropertyCodeExceptionExitBitmap
,
964 sizeof(WHV_PARTITION_PROPERTY
));
967 whpx
->exception_exit_bitmap
= exceptions
;
975 * This function is called before/after stepping over a single instruction.
976 * It will update the CPU registers to arm/disarm the instruction stepping
979 static HRESULT
whpx_vcpu_configure_single_stepping(CPUState
*cpu
,
981 uint64_t *exit_context_rflags
)
983 WHV_REGISTER_NAME reg_name
;
984 WHV_REGISTER_VALUE reg_value
;
986 struct whpx_state
*whpx
= &whpx_global
;
989 * If we are trying to step over a single instruction, we need to set the
990 * TF bit in rflags. Otherwise, clear it.
992 reg_name
= WHvX64RegisterRflags
;
993 hr
= whp_dispatch
.WHvGetVirtualProcessorRegisters(
1001 error_report("WHPX: Failed to get rflags, hr=%08lx", hr
);
1005 if (exit_context_rflags
) {
1006 assert(*exit_context_rflags
== reg_value
.Reg64
);
1010 /* Raise WHvX64ExceptionTypeDebugTrapOrFault after each instruction */
1011 reg_value
.Reg64
|= TF_MASK
;
1013 reg_value
.Reg64
&= ~TF_MASK
;
1016 if (exit_context_rflags
) {
1017 *exit_context_rflags
= reg_value
.Reg64
;
1020 hr
= whp_dispatch
.WHvSetVirtualProcessorRegisters(
1028 error_report("WHPX: Failed to set rflags,"
1034 reg_name
= WHvRegisterInterruptState
;
1035 reg_value
.Reg64
= 0;
1037 /* Suspend delivery of hardware interrupts during single-stepping. */
1038 reg_value
.InterruptState
.InterruptShadow
= set
!= 0;
1040 hr
= whp_dispatch
.WHvSetVirtualProcessorRegisters(
1048 error_report("WHPX: Failed to set InterruptState,"
1056 * We have just finished stepping over a single instruction,
1057 * and intercepted the INT1 generated by it.
1058 * We need to now hide the INT1 from the guest,
1059 * as it would not be expecting it.
1062 reg_name
= WHvX64RegisterPendingDebugException
;
1063 hr
= whp_dispatch
.WHvGetVirtualProcessorRegisters(
1071 error_report("WHPX: Failed to get pending debug exceptions,"
1076 if (reg_value
.PendingDebugException
.SingleStep
) {
1077 reg_value
.PendingDebugException
.SingleStep
= 0;
1079 hr
= whp_dispatch
.WHvSetVirtualProcessorRegisters(
1087 error_report("WHPX: Failed to clear pending debug exceptions,"
1098 /* Tries to find a breakpoint at the specified address. */
1099 static struct whpx_breakpoint
*whpx_lookup_breakpoint_by_addr(uint64_t address
)
1101 struct whpx_state
*whpx
= &whpx_global
;
1104 if (whpx
->breakpoints
.breakpoints
) {
1105 for (i
= 0; i
< whpx
->breakpoints
.breakpoints
->used
; i
++) {
1106 if (address
== whpx
->breakpoints
.breakpoints
->data
[i
].address
) {
1107 return &whpx
->breakpoints
.breakpoints
->data
[i
];
1116 * Linux uses int3 (0xCC) during startup (see int3_selftest()) and for
1117 * debugging user-mode applications. Since the WHPX API does not offer
1118 * an easy way to pass the intercepted exception back to the guest, we
1119 * resort to using INT1 instead, and let the guest always handle INT3.
1121 static const uint8_t whpx_breakpoint_instruction
= 0xF1;
1124 * The WHPX QEMU backend implements breakpoints by writing the INT1
1125 * instruction into memory (ignoring the DRx registers). This raises a few
1126 * issues that need to be carefully handled:
1128 * 1. Although unlikely, other parts of QEMU may set multiple breakpoints
1129 * at the same location, and later remove them in arbitrary order.
1130 * This should not cause memory corruption, and should only remove the
1131 * physical breakpoint instruction when the last QEMU breakpoint is gone.
1133 * 2. Writing arbitrary virtual memory may fail if it's not mapped to a valid
1134 * physical location. Hence, physically adding/removing a breakpoint can
1135 * theoretically fail at any time. We need to keep track of it.
1137 * The function below rebuilds a list of low-level breakpoints (one per
1138 * address, tracking the original instruction and any errors) from the list of
1139 * high-level breakpoints (set via cpu_breakpoint_insert()).
1141 * In order to optimize performance, this function stores the list of
1142 * high-level breakpoints (a.k.a. CPU breakpoints) used to compute the
1143 * low-level ones, so that it won't be re-invoked until these breakpoints
1146 * Note that this function decides which breakpoints should be inserted into,
1147 * memory, but doesn't actually do it. The memory accessing is done in
1148 * whpx_apply_breakpoints().
1150 static void whpx_translate_cpu_breakpoints(
1151 struct whpx_breakpoints
*breakpoints
,
1153 int cpu_breakpoint_count
)
1156 int cpu_bp_index
= 0;
1158 breakpoints
->original_addresses
=
1159 g_renew(vaddr
, breakpoints
->original_addresses
, cpu_breakpoint_count
);
1161 breakpoints
->original_address_count
= cpu_breakpoint_count
;
1163 int max_breakpoints
= cpu_breakpoint_count
+
1164 (breakpoints
->breakpoints
? breakpoints
->breakpoints
->used
: 0);
1166 struct whpx_breakpoint_collection
*new_breakpoints
=
1167 g_malloc0(sizeof(struct whpx_breakpoint_collection
)
1168 + max_breakpoints
* sizeof(struct whpx_breakpoint
));
1170 new_breakpoints
->allocated
= max_breakpoints
;
1171 new_breakpoints
->used
= 0;
1174 * 1. Preserve all old breakpoints that could not be automatically
1175 * cleared when the CPU got stopped.
1177 if (breakpoints
->breakpoints
) {
1179 for (i
= 0; i
< breakpoints
->breakpoints
->used
; i
++) {
1180 if (breakpoints
->breakpoints
->data
[i
].state
!= WHPX_BP_CLEARED
) {
1181 new_breakpoints
->data
[new_breakpoints
->used
++] =
1182 breakpoints
->breakpoints
->data
[i
];
1187 /* 2. Map all CPU breakpoints to WHPX breakpoints */
1188 QTAILQ_FOREACH(bp
, &cpu
->breakpoints
, entry
) {
1192 /* This will be used to detect changed CPU breakpoints later. */
1193 breakpoints
->original_addresses
[cpu_bp_index
++] = bp
->pc
;
1195 for (i
= 0; i
< new_breakpoints
->used
; i
++) {
1197 * WARNING: This loop has O(N^2) complexity, where N is the
1198 * number of breakpoints. It should not be a bottleneck in
1199 * real-world scenarios, since it only needs to run once after
1200 * the breakpoints have been modified.
1201 * If this ever becomes a concern, it can be optimized by storing
1202 * high-level breakpoint objects in a tree or hash map.
1205 if (new_breakpoints
->data
[i
].address
== bp
->pc
) {
1206 /* There was already a breakpoint at this address. */
1207 if (new_breakpoints
->data
[i
].state
== WHPX_BP_CLEAR_PENDING
) {
1208 new_breakpoints
->data
[i
].state
= WHPX_BP_SET
;
1209 } else if (new_breakpoints
->data
[i
].state
== WHPX_BP_SET
) {
1210 new_breakpoints
->data
[i
].state
= WHPX_BP_SET_PENDING
;
1218 if (!found
&& new_breakpoints
->used
< new_breakpoints
->allocated
) {
1219 /* No WHPX breakpoint at this address. Create one. */
1220 new_breakpoints
->data
[new_breakpoints
->used
].address
= bp
->pc
;
1221 new_breakpoints
->data
[new_breakpoints
->used
].state
=
1222 WHPX_BP_SET_PENDING
;
1223 new_breakpoints
->used
++;
1228 * Free the previous breakpoint list. This can be optimized by keeping
1229 * it as shadow buffer for the next computation instead of freeing
1232 g_free(breakpoints
->breakpoints
);
1234 breakpoints
->breakpoints
= new_breakpoints
;
1238 * Physically inserts/removes the breakpoints by reading and writing the
1239 * physical memory, keeping a track of the failed attempts.
1241 * Passing resuming=true will try to set all previously unset breakpoints.
1242 * Passing resuming=false will remove all inserted ones.
1244 static void whpx_apply_breakpoints(
1245 struct whpx_breakpoint_collection
*breakpoints
,
1254 for (i
= 0; i
< breakpoints
->used
; i
++) {
1255 /* Decide what to do right now based on the last known state. */
1256 WhpxBreakpointState state
= breakpoints
->data
[i
].state
;
1258 case WHPX_BP_CLEARED
:
1260 state
= WHPX_BP_SET_PENDING
;
1263 case WHPX_BP_SET_PENDING
:
1265 state
= WHPX_BP_CLEARED
;
1270 state
= WHPX_BP_CLEAR_PENDING
;
1273 case WHPX_BP_CLEAR_PENDING
:
1275 state
= WHPX_BP_SET
;
1280 if (state
== WHPX_BP_SET_PENDING
) {
1281 /* Remember the original instruction. */
1282 rc
= cpu_memory_rw_debug(cpu
,
1283 breakpoints
->data
[i
].address
,
1284 &breakpoints
->data
[i
].original_instruction
,
1289 /* Write the breakpoint instruction. */
1290 rc
= cpu_memory_rw_debug(cpu
,
1291 breakpoints
->data
[i
].address
,
1292 (void *)&whpx_breakpoint_instruction
,
1298 state
= WHPX_BP_SET
;
1303 if (state
== WHPX_BP_CLEAR_PENDING
) {
1304 /* Restore the original instruction. */
1305 rc
= cpu_memory_rw_debug(cpu
,
1306 breakpoints
->data
[i
].address
,
1307 &breakpoints
->data
[i
].original_instruction
,
1312 state
= WHPX_BP_CLEARED
;
1316 breakpoints
->data
[i
].state
= state
;
1321 * This function is called when the a VCPU is about to start and no other
1322 * VCPUs have been started so far. Since the VCPU start order could be
1323 * arbitrary, it doesn't have to be VCPU#0.
1325 * It is used to commit the breakpoints into memory, and configure WHPX
1326 * to intercept debug exceptions.
1328 * Note that whpx_set_exception_exit_bitmap() cannot be called if one or
1329 * more VCPUs are already running, so this is the best place to do it.
1331 static int whpx_first_vcpu_starting(CPUState
*cpu
)
1333 struct whpx_state
*whpx
= &whpx_global
;
1336 g_assert(qemu_mutex_iothread_locked());
1338 if (!QTAILQ_EMPTY(&cpu
->breakpoints
) ||
1339 (whpx
->breakpoints
.breakpoints
&&
1340 whpx
->breakpoints
.breakpoints
->used
)) {
1343 bool update_pending
= false;
1345 QTAILQ_FOREACH(bp
, &cpu
->breakpoints
, entry
) {
1346 if (i
>= whpx
->breakpoints
.original_address_count
||
1347 bp
->pc
!= whpx
->breakpoints
.original_addresses
[i
]) {
1348 update_pending
= true;
1354 if (i
!= whpx
->breakpoints
.original_address_count
) {
1355 update_pending
= true;
1358 if (update_pending
) {
1360 * The CPU breakpoints have changed since the last call to
1361 * whpx_translate_cpu_breakpoints(). WHPX breakpoints must
1362 * now be recomputed.
1364 whpx_translate_cpu_breakpoints(&whpx
->breakpoints
, cpu
, i
);
1367 /* Actually insert the breakpoints into the memory. */
1368 whpx_apply_breakpoints(whpx
->breakpoints
.breakpoints
, cpu
, true);
1371 uint64_t exception_mask
;
1372 if (whpx
->step_pending
||
1373 (whpx
->breakpoints
.breakpoints
&&
1374 whpx
->breakpoints
.breakpoints
->used
)) {
1376 * We are either attempting to single-step one or more CPUs, or
1377 * have one or more breakpoints enabled. Both require intercepting
1378 * the WHvX64ExceptionTypeBreakpointTrap exception.
1381 exception_mask
= 1UL << WHvX64ExceptionTypeDebugTrapOrFault
;
1383 /* Let the guest handle all exceptions. */
1387 hr
= whpx_set_exception_exit_bitmap(exception_mask
);
1388 if (!SUCCEEDED(hr
)) {
1389 error_report("WHPX: Failed to update exception exit mask,"
1398 * This function is called when the last VCPU has finished running.
1399 * It is used to remove any previously set breakpoints from memory.
1401 static int whpx_last_vcpu_stopping(CPUState
*cpu
)
1403 whpx_apply_breakpoints(whpx_global
.breakpoints
.breakpoints
, cpu
, false);
1407 /* Returns the address of the next instruction that is about to be executed. */
1408 static vaddr
whpx_vcpu_get_pc(CPUState
*cpu
, bool exit_context_valid
)
1410 if (cpu
->vcpu_dirty
) {
1411 /* The CPU registers have been modified by other parts of QEMU. */
1412 CPUArchState
*env
= (CPUArchState
*)(cpu
->env_ptr
);
1414 } else if (exit_context_valid
) {
1416 * The CPU registers have not been modified by neither other parts
1417 * of QEMU, nor this port by calling WHvSetVirtualProcessorRegisters().
1418 * This is the most common case.
1420 struct whpx_vcpu
*vcpu
= get_whpx_vcpu(cpu
);
1421 return vcpu
->exit_ctx
.VpContext
.Rip
;
1424 * The CPU registers have been modified by a call to
1425 * WHvSetVirtualProcessorRegisters() and must be re-queried from
1428 WHV_REGISTER_VALUE reg_value
;
1429 WHV_REGISTER_NAME reg_name
= WHvX64RegisterRip
;
1431 struct whpx_state
*whpx
= &whpx_global
;
1433 hr
= whp_dispatch
.WHvGetVirtualProcessorRegisters(
1441 error_report("WHPX: Failed to get PC, hr=%08lx", hr
);
1445 return reg_value
.Reg64
;
1449 static int whpx_handle_halt(CPUState
*cpu
)
1451 CPUX86State
*env
= cpu
->env_ptr
;
1454 qemu_mutex_lock_iothread();
1455 if (!((cpu
->interrupt_request
& CPU_INTERRUPT_HARD
) &&
1456 (env
->eflags
& IF_MASK
)) &&
1457 !(cpu
->interrupt_request
& CPU_INTERRUPT_NMI
)) {
1458 cpu
->exception_index
= EXCP_HLT
;
1462 qemu_mutex_unlock_iothread();
1467 static void whpx_vcpu_pre_run(CPUState
*cpu
)
1470 struct whpx_state
*whpx
= &whpx_global
;
1471 struct whpx_vcpu
*vcpu
= get_whpx_vcpu(cpu
);
1472 CPUX86State
*env
= cpu
->env_ptr
;
1473 X86CPU
*x86_cpu
= X86_CPU(cpu
);
1476 WHV_X64_PENDING_INTERRUPTION_REGISTER new_int
;
1477 UINT32 reg_count
= 0;
1478 WHV_REGISTER_VALUE reg_values
[3];
1479 WHV_REGISTER_NAME reg_names
[3];
1481 memset(&new_int
, 0, sizeof(new_int
));
1482 memset(reg_values
, 0, sizeof(reg_values
));
1484 qemu_mutex_lock_iothread();
1487 if (!vcpu
->interruption_pending
&&
1488 cpu
->interrupt_request
& (CPU_INTERRUPT_NMI
| CPU_INTERRUPT_SMI
)) {
1489 if (cpu
->interrupt_request
& CPU_INTERRUPT_NMI
) {
1490 cpu
->interrupt_request
&= ~CPU_INTERRUPT_NMI
;
1491 vcpu
->interruptable
= false;
1492 new_int
.InterruptionType
= WHvX64PendingNmi
;
1493 new_int
.InterruptionPending
= 1;
1494 new_int
.InterruptionVector
= 2;
1496 if (cpu
->interrupt_request
& CPU_INTERRUPT_SMI
) {
1497 cpu
->interrupt_request
&= ~CPU_INTERRUPT_SMI
;
1502 * Force the VCPU out of its inner loop to process any INIT requests or
1503 * commit pending TPR access.
1505 if (cpu
->interrupt_request
& (CPU_INTERRUPT_INIT
| CPU_INTERRUPT_TPR
)) {
1506 if ((cpu
->interrupt_request
& CPU_INTERRUPT_INIT
) &&
1507 !(env
->hflags
& HF_SMM_MASK
)) {
1508 cpu
->exit_request
= 1;
1510 if (cpu
->interrupt_request
& CPU_INTERRUPT_TPR
) {
1511 cpu
->exit_request
= 1;
1515 /* Get pending hard interruption or replay one that was overwritten */
1516 if (!whpx_apic_in_platform()) {
1517 if (!vcpu
->interruption_pending
&&
1518 vcpu
->interruptable
&& (env
->eflags
& IF_MASK
)) {
1519 assert(!new_int
.InterruptionPending
);
1520 if (cpu
->interrupt_request
& CPU_INTERRUPT_HARD
) {
1521 cpu
->interrupt_request
&= ~CPU_INTERRUPT_HARD
;
1522 irq
= cpu_get_pic_interrupt(env
);
1524 new_int
.InterruptionType
= WHvX64PendingInterrupt
;
1525 new_int
.InterruptionPending
= 1;
1526 new_int
.InterruptionVector
= irq
;
1531 /* Setup interrupt state if new one was prepared */
1532 if (new_int
.InterruptionPending
) {
1533 reg_values
[reg_count
].PendingInterruption
= new_int
;
1534 reg_names
[reg_count
] = WHvRegisterPendingInterruption
;
1537 } else if (vcpu
->ready_for_pic_interrupt
&&
1538 (cpu
->interrupt_request
& CPU_INTERRUPT_HARD
)) {
1539 cpu
->interrupt_request
&= ~CPU_INTERRUPT_HARD
;
1540 irq
= cpu_get_pic_interrupt(env
);
1542 reg_names
[reg_count
] = WHvRegisterPendingEvent
;
1543 reg_values
[reg_count
].ExtIntEvent
= (WHV_X64_PENDING_EXT_INT_EVENT
)
1546 .EventType
= WHvX64PendingEventExtInt
,
1553 /* Sync the TPR to the CR8 if was modified during the intercept */
1554 tpr
= whpx_apic_tpr_to_cr8(cpu_get_apic_tpr(x86_cpu
->apic_state
));
1555 if (tpr
!= vcpu
->tpr
) {
1557 reg_values
[reg_count
].Reg64
= tpr
;
1558 cpu
->exit_request
= 1;
1559 reg_names
[reg_count
] = WHvX64RegisterCr8
;
1563 /* Update the state of the interrupt delivery notification */
1564 if (!vcpu
->window_registered
&&
1565 cpu
->interrupt_request
& CPU_INTERRUPT_HARD
) {
1566 reg_values
[reg_count
].DeliverabilityNotifications
=
1567 (WHV_X64_DELIVERABILITY_NOTIFICATIONS_REGISTER
) {
1568 .InterruptNotification
= 1
1570 vcpu
->window_registered
= 1;
1571 reg_names
[reg_count
] = WHvX64RegisterDeliverabilityNotifications
;
1575 qemu_mutex_unlock_iothread();
1576 vcpu
->ready_for_pic_interrupt
= false;
1579 hr
= whp_dispatch
.WHvSetVirtualProcessorRegisters(
1580 whpx
->partition
, cpu
->cpu_index
,
1581 reg_names
, reg_count
, reg_values
);
1583 error_report("WHPX: Failed to set interrupt state registers,"
1591 static void whpx_vcpu_post_run(CPUState
*cpu
)
1593 struct whpx_vcpu
*vcpu
= get_whpx_vcpu(cpu
);
1594 CPUX86State
*env
= cpu
->env_ptr
;
1595 X86CPU
*x86_cpu
= X86_CPU(cpu
);
1597 env
->eflags
= vcpu
->exit_ctx
.VpContext
.Rflags
;
1599 uint64_t tpr
= vcpu
->exit_ctx
.VpContext
.Cr8
;
1600 if (vcpu
->tpr
!= tpr
) {
1602 qemu_mutex_lock_iothread();
1603 cpu_set_apic_tpr(x86_cpu
->apic_state
, whpx_cr8_to_apic_tpr(vcpu
->tpr
));
1604 qemu_mutex_unlock_iothread();
1607 vcpu
->interruption_pending
=
1608 vcpu
->exit_ctx
.VpContext
.ExecutionState
.InterruptionPending
;
1610 vcpu
->interruptable
=
1611 !vcpu
->exit_ctx
.VpContext
.ExecutionState
.InterruptShadow
;
1616 static void whpx_vcpu_process_async_events(CPUState
*cpu
)
1618 CPUX86State
*env
= cpu
->env_ptr
;
1619 X86CPU
*x86_cpu
= X86_CPU(cpu
);
1620 struct whpx_vcpu
*vcpu
= get_whpx_vcpu(cpu
);
1622 if ((cpu
->interrupt_request
& CPU_INTERRUPT_INIT
) &&
1623 !(env
->hflags
& HF_SMM_MASK
)) {
1624 whpx_cpu_synchronize_state(cpu
);
1625 do_cpu_init(x86_cpu
);
1626 vcpu
->interruptable
= true;
1629 if (cpu
->interrupt_request
& CPU_INTERRUPT_POLL
) {
1630 cpu
->interrupt_request
&= ~CPU_INTERRUPT_POLL
;
1631 apic_poll_irq(x86_cpu
->apic_state
);
1634 if (((cpu
->interrupt_request
& CPU_INTERRUPT_HARD
) &&
1635 (env
->eflags
& IF_MASK
)) ||
1636 (cpu
->interrupt_request
& CPU_INTERRUPT_NMI
)) {
1637 cpu
->halted
= false;
1640 if (cpu
->interrupt_request
& CPU_INTERRUPT_SIPI
) {
1641 whpx_cpu_synchronize_state(cpu
);
1642 do_cpu_sipi(x86_cpu
);
1645 if (cpu
->interrupt_request
& CPU_INTERRUPT_TPR
) {
1646 cpu
->interrupt_request
&= ~CPU_INTERRUPT_TPR
;
1647 whpx_cpu_synchronize_state(cpu
);
1648 apic_handle_tpr_access_report(x86_cpu
->apic_state
, env
->eip
,
1649 env
->tpr_access_type
);
1655 static int whpx_vcpu_run(CPUState
*cpu
)
1658 struct whpx_state
*whpx
= &whpx_global
;
1659 struct whpx_vcpu
*vcpu
= get_whpx_vcpu(cpu
);
1660 struct whpx_breakpoint
*stepped_over_bp
= NULL
;
1661 WhpxStepMode exclusive_step_mode
= WHPX_STEP_NONE
;
1664 g_assert(qemu_mutex_iothread_locked());
1666 if (whpx
->running_cpus
++ == 0) {
1667 /* Insert breakpoints into memory, update exception exit bitmap. */
1668 ret
= whpx_first_vcpu_starting(cpu
);
1674 if (whpx
->breakpoints
.breakpoints
&&
1675 whpx
->breakpoints
.breakpoints
->used
> 0)
1677 uint64_t pc
= whpx_vcpu_get_pc(cpu
, true);
1678 stepped_over_bp
= whpx_lookup_breakpoint_by_addr(pc
);
1679 if (stepped_over_bp
&& stepped_over_bp
->state
!= WHPX_BP_SET
) {
1680 stepped_over_bp
= NULL
;
1683 if (stepped_over_bp
) {
1685 * We are trying to run the instruction overwritten by an active
1686 * breakpoint. We will temporarily disable the breakpoint, suspend
1687 * other CPUs, and step over the instruction.
1689 exclusive_step_mode
= WHPX_STEP_EXCLUSIVE
;
1693 if (exclusive_step_mode
== WHPX_STEP_NONE
) {
1694 whpx_vcpu_process_async_events(cpu
);
1695 if (cpu
->halted
&& !whpx_apic_in_platform()) {
1696 cpu
->exception_index
= EXCP_HLT
;
1697 qatomic_set(&cpu
->exit_request
, false);
1702 qemu_mutex_unlock_iothread();
1704 if (exclusive_step_mode
!= WHPX_STEP_NONE
) {
1706 g_assert(cpu
== current_cpu
);
1707 g_assert(!cpu
->running
);
1708 cpu
->running
= true;
1710 hr
= whpx_set_exception_exit_bitmap(
1711 1UL << WHvX64ExceptionTypeDebugTrapOrFault
);
1712 if (!SUCCEEDED(hr
)) {
1713 error_report("WHPX: Failed to update exception exit mask, "
1718 if (stepped_over_bp
) {
1719 /* Temporarily disable the triggered breakpoint. */
1720 cpu_memory_rw_debug(cpu
,
1721 stepped_over_bp
->address
,
1722 &stepped_over_bp
->original_instruction
,
1727 cpu_exec_start(cpu
);
1731 if (cpu
->vcpu_dirty
) {
1732 whpx_set_registers(cpu
, WHPX_SET_RUNTIME_STATE
);
1733 cpu
->vcpu_dirty
= false;
1736 if (exclusive_step_mode
== WHPX_STEP_NONE
) {
1737 whpx_vcpu_pre_run(cpu
);
1739 if (qatomic_read(&cpu
->exit_request
)) {
1740 whpx_vcpu_kick(cpu
);
1744 if (exclusive_step_mode
!= WHPX_STEP_NONE
|| cpu
->singlestep_enabled
) {
1745 whpx_vcpu_configure_single_stepping(cpu
, true, NULL
);
1748 hr
= whp_dispatch
.WHvRunVirtualProcessor(
1749 whpx
->partition
, cpu
->cpu_index
,
1750 &vcpu
->exit_ctx
, sizeof(vcpu
->exit_ctx
));
1753 error_report("WHPX: Failed to exec a virtual processor,"
1759 if (exclusive_step_mode
!= WHPX_STEP_NONE
|| cpu
->singlestep_enabled
) {
1760 whpx_vcpu_configure_single_stepping(cpu
,
1762 &vcpu
->exit_ctx
.VpContext
.Rflags
);
1765 whpx_vcpu_post_run(cpu
);
1767 switch (vcpu
->exit_ctx
.ExitReason
) {
1768 case WHvRunVpExitReasonMemoryAccess
:
1769 ret
= whpx_handle_mmio(cpu
, &vcpu
->exit_ctx
.MemoryAccess
);
1772 case WHvRunVpExitReasonX64IoPortAccess
:
1773 ret
= whpx_handle_portio(cpu
, &vcpu
->exit_ctx
.IoPortAccess
);
1776 case WHvRunVpExitReasonX64InterruptWindow
:
1777 vcpu
->ready_for_pic_interrupt
= 1;
1778 vcpu
->window_registered
= 0;
1782 case WHvRunVpExitReasonX64ApicEoi
:
1783 assert(whpx_apic_in_platform());
1784 ioapic_eoi_broadcast(vcpu
->exit_ctx
.ApicEoi
.InterruptVector
);
1787 case WHvRunVpExitReasonX64Halt
:
1789 * WARNING: as of build 19043.1526 (21H1), this exit reason is no
1792 ret
= whpx_handle_halt(cpu
);
1795 case WHvRunVpExitReasonX64ApicInitSipiTrap
: {
1796 WHV_INTERRUPT_CONTROL ipi
= {0};
1797 uint64_t icr
= vcpu
->exit_ctx
.ApicInitSipi
.ApicIcr
;
1798 uint32_t delivery_mode
=
1799 (icr
& APIC_ICR_DELIV_MOD
) >> APIC_ICR_DELIV_MOD_SHIFT
;
1800 int dest_shorthand
=
1801 (icr
& APIC_ICR_DEST_SHORT
) >> APIC_ICR_DEST_SHORT_SHIFT
;
1802 bool broadcast
= false;
1803 bool include_self
= false;
1806 /* We only registered for INIT and SIPI exits. */
1807 if ((delivery_mode
!= APIC_DM_INIT
) &&
1808 (delivery_mode
!= APIC_DM_SIPI
)) {
1810 "WHPX: Unexpected APIC exit that is not a INIT or SIPI");
1814 if (delivery_mode
== APIC_DM_INIT
) {
1815 ipi
.Type
= WHvX64InterruptTypeInit
;
1817 ipi
.Type
= WHvX64InterruptTypeSipi
;
1820 ipi
.DestinationMode
=
1821 ((icr
& APIC_ICR_DEST_MOD
) >> APIC_ICR_DEST_MOD_SHIFT
) ?
1822 WHvX64InterruptDestinationModeLogical
:
1823 WHvX64InterruptDestinationModePhysical
;
1826 ((icr
& APIC_ICR_TRIGGER_MOD
) >> APIC_ICR_TRIGGER_MOD_SHIFT
) ?
1827 WHvX64InterruptTriggerModeLevel
:
1828 WHvX64InterruptTriggerModeEdge
;
1830 ipi
.Vector
= icr
& APIC_VECTOR_MASK
;
1831 switch (dest_shorthand
) {
1832 /* no shorthand. Bits 56-63 contain the destination. */
1834 ipi
.Destination
= (icr
>> 56) & APIC_VECTOR_MASK
;
1835 hr
= whp_dispatch
.WHvRequestInterrupt(whpx
->partition
,
1838 error_report("WHPX: Failed to request interrupt hr=%08lx",
1846 include_self
= true;
1849 /* broadcast, including self */
1852 include_self
= true;
1855 /* broadcast, excluding self */
1861 if (!broadcast
&& !include_self
) {
1865 for (i
= 0; i
<= max_vcpu_index
; i
++) {
1866 if (i
== cpu
->cpu_index
&& !include_self
) {
1871 * Assuming that APIC Ids are identity mapped since
1872 * WHvX64RegisterApicId & WHvX64RegisterInitialApicId registers
1873 * are not handled yet and the hypervisor doesn't allow the
1874 * guest to modify the APIC ID.
1876 ipi
.Destination
= i
;
1877 hr
= whp_dispatch
.WHvRequestInterrupt(whpx
->partition
,
1881 "WHPX: Failed to request SIPI for %d, hr=%08lx",
1889 case WHvRunVpExitReasonCanceled
:
1890 if (exclusive_step_mode
!= WHPX_STEP_NONE
) {
1892 * We are trying to step over a single instruction, and
1893 * likely got a request to stop from another thread.
1894 * Delay it until we are done stepping
1899 cpu
->exception_index
= EXCP_INTERRUPT
;
1903 case WHvRunVpExitReasonX64MsrAccess
: {
1904 WHV_REGISTER_VALUE reg_values
[3] = {0};
1905 WHV_REGISTER_NAME reg_names
[3];
1908 reg_names
[0] = WHvX64RegisterRip
;
1909 reg_names
[1] = WHvX64RegisterRax
;
1910 reg_names
[2] = WHvX64RegisterRdx
;
1912 reg_values
[0].Reg64
=
1913 vcpu
->exit_ctx
.VpContext
.Rip
+
1914 vcpu
->exit_ctx
.VpContext
.InstructionLength
;
1917 * For all unsupported MSR access we:
1921 reg_count
= vcpu
->exit_ctx
.MsrAccess
.AccessInfo
.IsWrite
?
1924 hr
= whp_dispatch
.WHvSetVirtualProcessorRegisters(
1927 reg_names
, reg_count
,
1931 error_report("WHPX: Failed to set MsrAccess state "
1932 " registers, hr=%08lx", hr
);
1937 case WHvRunVpExitReasonX64Cpuid
: {
1938 WHV_REGISTER_VALUE reg_values
[5];
1939 WHV_REGISTER_NAME reg_names
[5];
1940 UINT32 reg_count
= 5;
1941 UINT64 cpuid_fn
, rip
= 0, rax
= 0, rcx
= 0, rdx
= 0, rbx
= 0;
1942 X86CPU
*x86_cpu
= X86_CPU(cpu
);
1943 CPUX86State
*env
= &x86_cpu
->env
;
1945 memset(reg_values
, 0, sizeof(reg_values
));
1947 rip
= vcpu
->exit_ctx
.VpContext
.Rip
+
1948 vcpu
->exit_ctx
.VpContext
.InstructionLength
;
1949 cpuid_fn
= vcpu
->exit_ctx
.CpuidAccess
.Rax
;
1952 * Ideally, these should be supplied to the hypervisor during VCPU
1953 * initialization and it should be able to satisfy this request.
1954 * But, currently, WHPX doesn't support setting CPUID values in the
1955 * hypervisor once the partition has been setup, which is too late
1956 * since VCPUs are realized later. For now, use the values from
1957 * QEMU to satisfy these requests, until WHPX adds support for
1958 * being able to set these values in the hypervisor at runtime.
1960 cpu_x86_cpuid(env
, cpuid_fn
, 0, (UINT32
*)&rax
, (UINT32
*)&rbx
,
1961 (UINT32
*)&rcx
, (UINT32
*)&rdx
);
1964 /* Expose the vmware cpu frequency cpuid leaf */
1966 rbx
= rcx
= rdx
= 0;
1971 rbx
= env
->apic_bus_freq
/ 1000; /* Hz to KHz */
1976 /* Remove any support of OSVW */
1977 rcx
&= ~CPUID_EXT3_OSVW
;
1981 reg_names
[0] = WHvX64RegisterRip
;
1982 reg_names
[1] = WHvX64RegisterRax
;
1983 reg_names
[2] = WHvX64RegisterRcx
;
1984 reg_names
[3] = WHvX64RegisterRdx
;
1985 reg_names
[4] = WHvX64RegisterRbx
;
1987 reg_values
[0].Reg64
= rip
;
1988 reg_values
[1].Reg64
= rax
;
1989 reg_values
[2].Reg64
= rcx
;
1990 reg_values
[3].Reg64
= rdx
;
1991 reg_values
[4].Reg64
= rbx
;
1993 hr
= whp_dispatch
.WHvSetVirtualProcessorRegisters(
1994 whpx
->partition
, cpu
->cpu_index
,
2000 error_report("WHPX: Failed to set CpuidAccess state registers,"
2006 case WHvRunVpExitReasonException
:
2007 whpx_get_registers(cpu
);
2009 if ((vcpu
->exit_ctx
.VpException
.ExceptionType
==
2010 WHvX64ExceptionTypeDebugTrapOrFault
) &&
2011 (vcpu
->exit_ctx
.VpException
.InstructionByteCount
>= 1) &&
2012 (vcpu
->exit_ctx
.VpException
.InstructionBytes
[0] ==
2013 whpx_breakpoint_instruction
)) {
2014 /* Stopped at a software breakpoint. */
2015 cpu
->exception_index
= EXCP_DEBUG
;
2016 } else if ((vcpu
->exit_ctx
.VpException
.ExceptionType
==
2017 WHvX64ExceptionTypeDebugTrapOrFault
) &&
2018 !cpu
->singlestep_enabled
) {
2020 * Just finished stepping over a breakpoint, but the
2021 * gdb does not expect us to do single-stepping.
2022 * Don't do anything special.
2024 cpu
->exception_index
= EXCP_INTERRUPT
;
2026 /* Another exception or debug event. Report it to GDB. */
2027 cpu
->exception_index
= EXCP_DEBUG
;
2032 case WHvRunVpExitReasonNone
:
2033 case WHvRunVpExitReasonUnrecoverableException
:
2034 case WHvRunVpExitReasonInvalidVpRegisterValue
:
2035 case WHvRunVpExitReasonUnsupportedFeature
:
2037 error_report("WHPX: Unexpected VP exit code %d",
2038 vcpu
->exit_ctx
.ExitReason
);
2039 whpx_get_registers(cpu
);
2040 qemu_mutex_lock_iothread();
2041 qemu_system_guest_panicked(cpu_get_crash_info(cpu
));
2042 qemu_mutex_unlock_iothread();
2048 if (stepped_over_bp
) {
2049 /* Restore the breakpoint we stepped over */
2050 cpu_memory_rw_debug(cpu
,
2051 stepped_over_bp
->address
,
2052 (void *)&whpx_breakpoint_instruction
,
2057 if (exclusive_step_mode
!= WHPX_STEP_NONE
) {
2058 g_assert(cpu_in_exclusive_context(cpu
));
2059 cpu
->running
= false;
2062 exclusive_step_mode
= WHPX_STEP_NONE
;
2067 qemu_mutex_lock_iothread();
2070 if (--whpx
->running_cpus
== 0) {
2071 whpx_last_vcpu_stopping(cpu
);
2074 qatomic_set(&cpu
->exit_request
, false);
2079 static void do_whpx_cpu_synchronize_state(CPUState
*cpu
, run_on_cpu_data arg
)
2081 if (!cpu
->vcpu_dirty
) {
2082 whpx_get_registers(cpu
);
2083 cpu
->vcpu_dirty
= true;
2087 static void do_whpx_cpu_synchronize_post_reset(CPUState
*cpu
,
2088 run_on_cpu_data arg
)
2090 whpx_set_registers(cpu
, WHPX_SET_RESET_STATE
);
2091 cpu
->vcpu_dirty
= false;
2094 static void do_whpx_cpu_synchronize_post_init(CPUState
*cpu
,
2095 run_on_cpu_data arg
)
2097 whpx_set_registers(cpu
, WHPX_SET_FULL_STATE
);
2098 cpu
->vcpu_dirty
= false;
2101 static void do_whpx_cpu_synchronize_pre_loadvm(CPUState
*cpu
,
2102 run_on_cpu_data arg
)
2104 cpu
->vcpu_dirty
= true;
2111 void whpx_cpu_synchronize_state(CPUState
*cpu
)
2113 if (!cpu
->vcpu_dirty
) {
2114 run_on_cpu(cpu
, do_whpx_cpu_synchronize_state
, RUN_ON_CPU_NULL
);
2118 void whpx_cpu_synchronize_post_reset(CPUState
*cpu
)
2120 run_on_cpu(cpu
, do_whpx_cpu_synchronize_post_reset
, RUN_ON_CPU_NULL
);
2123 void whpx_cpu_synchronize_post_init(CPUState
*cpu
)
2125 run_on_cpu(cpu
, do_whpx_cpu_synchronize_post_init
, RUN_ON_CPU_NULL
);
2128 void whpx_cpu_synchronize_pre_loadvm(CPUState
*cpu
)
2130 run_on_cpu(cpu
, do_whpx_cpu_synchronize_pre_loadvm
, RUN_ON_CPU_NULL
);
2133 void whpx_cpu_synchronize_pre_resume(bool step_pending
)
2135 whpx_global
.step_pending
= step_pending
;
2142 static Error
*whpx_migration_blocker
;
2144 static void whpx_cpu_update_state(void *opaque
, bool running
, RunState state
)
2146 CPUX86State
*env
= opaque
;
2149 env
->tsc_valid
= false;
2153 int whpx_init_vcpu(CPUState
*cpu
)
2156 struct whpx_state
*whpx
= &whpx_global
;
2157 struct whpx_vcpu
*vcpu
= NULL
;
2158 Error
*local_error
= NULL
;
2159 CPUX86State
*env
= cpu
->env_ptr
;
2160 X86CPU
*x86_cpu
= X86_CPU(cpu
);
2164 /* Add migration blockers for all unsupported features of the
2165 * Windows Hypervisor Platform
2167 if (whpx_migration_blocker
== NULL
) {
2168 error_setg(&whpx_migration_blocker
,
2169 "State blocked due to non-migratable CPUID feature support,"
2170 "dirty memory tracking support, and XSAVE/XRSTOR support");
2172 if (migrate_add_blocker(whpx_migration_blocker
, &local_error
) < 0) {
2173 error_report_err(local_error
);
2174 error_free(whpx_migration_blocker
);
2180 vcpu
= g_new0(struct whpx_vcpu
, 1);
2183 error_report("WHPX: Failed to allocte VCPU context.");
2188 hr
= whp_dispatch
.WHvEmulatorCreateEmulator(
2189 &whpx_emu_callbacks
,
2192 error_report("WHPX: Failed to setup instruction completion support,"
2198 hr
= whp_dispatch
.WHvCreateVirtualProcessor(
2199 whpx
->partition
, cpu
->cpu_index
, 0);
2201 error_report("WHPX: Failed to create a virtual processor,"
2203 whp_dispatch
.WHvEmulatorDestroyEmulator(vcpu
->emulator
);
2209 * vcpu's TSC frequency is either specified by user, or use the value
2210 * provided by Hyper-V if the former is not present. In the latter case, we
2211 * query it from Hyper-V and record in env->tsc_khz, so that vcpu's TSC
2212 * frequency can be migrated later via this field.
2214 if (!env
->tsc_khz
) {
2215 hr
= whp_dispatch
.WHvGetCapability(
2216 WHvCapabilityCodeProcessorClockFrequency
, &freq
, sizeof(freq
),
2218 if (hr
!= WHV_E_UNKNOWN_CAPABILITY
) {
2220 printf("WHPX: Failed to query tsc frequency, hr=0x%08lx\n", hr
);
2222 env
->tsc_khz
= freq
/ 1000; /* Hz to KHz */
2227 env
->apic_bus_freq
= HYPERV_APIC_BUS_FREQUENCY
;
2228 hr
= whp_dispatch
.WHvGetCapability(
2229 WHvCapabilityCodeInterruptClockFrequency
, &freq
, sizeof(freq
), NULL
);
2230 if (hr
!= WHV_E_UNKNOWN_CAPABILITY
) {
2232 printf("WHPX: Failed to query apic bus frequency hr=0x%08lx\n", hr
);
2234 env
->apic_bus_freq
= freq
;
2239 * If the vmware cpuid frequency leaf option is set, and we have a valid
2240 * tsc value, trap the corresponding cpuid's.
2242 if (x86_cpu
->vmware_cpuid_freq
&& env
->tsc_khz
) {
2243 UINT32 cpuidExitList
[] = {1, 0x80000001, 0x40000000, 0x40000010};
2245 hr
= whp_dispatch
.WHvSetPartitionProperty(
2247 WHvPartitionPropertyCodeCpuidExitList
,
2249 RTL_NUMBER_OF(cpuidExitList
) * sizeof(UINT32
));
2252 error_report("WHPX: Failed to set partition CpuidExitList hr=%08lx",
2259 vcpu
->interruptable
= true;
2260 cpu
->vcpu_dirty
= true;
2261 cpu
->hax_vcpu
= (struct hax_vcpu_state
*)vcpu
;
2262 max_vcpu_index
= max(max_vcpu_index
, cpu
->cpu_index
);
2263 qemu_add_vm_change_state_handler(whpx_cpu_update_state
, cpu
->env_ptr
);
2273 int whpx_vcpu_exec(CPUState
*cpu
)
2279 if (cpu
->exception_index
>= EXCP_INTERRUPT
) {
2280 ret
= cpu
->exception_index
;
2281 cpu
->exception_index
= -1;
2285 fatal
= whpx_vcpu_run(cpu
);
2288 error_report("WHPX: Failed to exec a virtual processor");
2296 void whpx_destroy_vcpu(CPUState
*cpu
)
2298 struct whpx_state
*whpx
= &whpx_global
;
2299 struct whpx_vcpu
*vcpu
= get_whpx_vcpu(cpu
);
2301 whp_dispatch
.WHvDeleteVirtualProcessor(whpx
->partition
, cpu
->cpu_index
);
2302 whp_dispatch
.WHvEmulatorDestroyEmulator(vcpu
->emulator
);
2303 g_free(cpu
->hax_vcpu
);
2307 void whpx_vcpu_kick(CPUState
*cpu
)
2309 struct whpx_state
*whpx
= &whpx_global
;
2310 whp_dispatch
.WHvCancelRunVirtualProcessor(
2311 whpx
->partition
, cpu
->cpu_index
, 0);
2318 static void whpx_update_mapping(hwaddr start_pa
, ram_addr_t size
,
2319 void *host_va
, int add
, int rom
,
2322 struct whpx_state
*whpx
= &whpx_global
;
2327 printf("WHPX: ADD PA:%p Size:%p, Host:%p, %s, '%s'\n",
2328 (void*)start_pa, (void*)size, host_va,
2329 (rom ? "ROM" : "RAM"), name);
2331 printf("WHPX: DEL PA:%p Size:%p, Host:%p, '%s'\n",
2332 (void*)start_pa, (void*)size, host_va, name);
2337 hr
= whp_dispatch
.WHvMapGpaRange(whpx
->partition
,
2341 (WHvMapGpaRangeFlagRead
|
2342 WHvMapGpaRangeFlagExecute
|
2343 (rom
? 0 : WHvMapGpaRangeFlagWrite
)));
2345 hr
= whp_dispatch
.WHvUnmapGpaRange(whpx
->partition
,
2351 error_report("WHPX: Failed to %s GPA range '%s' PA:%p, Size:%p bytes,"
2352 " Host:%p, hr=%08lx",
2353 (add
? "MAP" : "UNMAP"), name
,
2354 (void *)(uintptr_t)start_pa
, (void *)size
, host_va
, hr
);
2358 static void whpx_process_section(MemoryRegionSection
*section
, int add
)
2360 MemoryRegion
*mr
= section
->mr
;
2361 hwaddr start_pa
= section
->offset_within_address_space
;
2362 ram_addr_t size
= int128_get64(section
->size
);
2366 if (!memory_region_is_ram(mr
)) {
2370 delta
= qemu_real_host_page_size() - (start_pa
& ~qemu_real_host_page_mask());
2371 delta
&= ~qemu_real_host_page_mask();
2377 size
&= qemu_real_host_page_mask();
2378 if (!size
|| (start_pa
& ~qemu_real_host_page_mask())) {
2382 host_va
= (uintptr_t)memory_region_get_ram_ptr(mr
)
2383 + section
->offset_within_region
+ delta
;
2385 whpx_update_mapping(start_pa
, size
, (void *)(uintptr_t)host_va
, add
,
2386 memory_region_is_rom(mr
), mr
->name
);
2389 static void whpx_region_add(MemoryListener
*listener
,
2390 MemoryRegionSection
*section
)
2392 memory_region_ref(section
->mr
);
2393 whpx_process_section(section
, 1);
2396 static void whpx_region_del(MemoryListener
*listener
,
2397 MemoryRegionSection
*section
)
2399 whpx_process_section(section
, 0);
2400 memory_region_unref(section
->mr
);
2403 static void whpx_transaction_begin(MemoryListener
*listener
)
2407 static void whpx_transaction_commit(MemoryListener
*listener
)
2411 static void whpx_log_sync(MemoryListener
*listener
,
2412 MemoryRegionSection
*section
)
2414 MemoryRegion
*mr
= section
->mr
;
2416 if (!memory_region_is_ram(mr
)) {
2420 memory_region_set_dirty(mr
, 0, int128_get64(section
->size
));
2423 static MemoryListener whpx_memory_listener
= {
2425 .begin
= whpx_transaction_begin
,
2426 .commit
= whpx_transaction_commit
,
2427 .region_add
= whpx_region_add
,
2428 .region_del
= whpx_region_del
,
2429 .log_sync
= whpx_log_sync
,
2433 static void whpx_memory_init(void)
2435 memory_listener_register(&whpx_memory_listener
, &address_space_memory
);
2439 * Load the functions from the given library, using the given handle. If a
2440 * handle is provided, it is used, otherwise the library is opened. The
2441 * handle will be updated on return with the opened one.
2443 static bool load_whp_dispatch_fns(HMODULE
*handle
,
2444 WHPFunctionList function_list
)
2446 HMODULE hLib
= *handle
;
2448 #define WINHV_PLATFORM_DLL "WinHvPlatform.dll"
2449 #define WINHV_EMULATION_DLL "WinHvEmulation.dll"
2450 #define WHP_LOAD_FIELD_OPTIONAL(return_type, function_name, signature) \
2451 whp_dispatch.function_name = \
2452 (function_name ## _t)GetProcAddress(hLib, #function_name); \
2454 #define WHP_LOAD_FIELD(return_type, function_name, signature) \
2455 whp_dispatch.function_name = \
2456 (function_name ## _t)GetProcAddress(hLib, #function_name); \
2457 if (!whp_dispatch.function_name) { \
2458 error_report("Could not load function %s", #function_name); \
2462 #define WHP_LOAD_LIB(lib_name, handle_lib) \
2463 if (!handle_lib) { \
2464 handle_lib = LoadLibrary(lib_name); \
2465 if (!handle_lib) { \
2466 error_report("Could not load library %s.", lib_name); \
2471 switch (function_list) {
2472 case WINHV_PLATFORM_FNS_DEFAULT
:
2473 WHP_LOAD_LIB(WINHV_PLATFORM_DLL
, hLib
)
2474 LIST_WINHVPLATFORM_FUNCTIONS(WHP_LOAD_FIELD
)
2477 case WINHV_EMULATION_FNS_DEFAULT
:
2478 WHP_LOAD_LIB(WINHV_EMULATION_DLL
, hLib
)
2479 LIST_WINHVEMULATION_FUNCTIONS(WHP_LOAD_FIELD
)
2482 case WINHV_PLATFORM_FNS_SUPPLEMENTAL
:
2483 WHP_LOAD_LIB(WINHV_PLATFORM_DLL
, hLib
)
2484 LIST_WINHVPLATFORM_FUNCTIONS_SUPPLEMENTAL(WHP_LOAD_FIELD_OPTIONAL
)
2499 static void whpx_set_kernel_irqchip(Object
*obj
, Visitor
*v
,
2500 const char *name
, void *opaque
,
2503 struct whpx_state
*whpx
= &whpx_global
;
2506 if (!visit_type_OnOffSplit(v
, name
, &mode
, errp
)) {
2511 case ON_OFF_SPLIT_ON
:
2512 whpx
->kernel_irqchip_allowed
= true;
2513 whpx
->kernel_irqchip_required
= true;
2516 case ON_OFF_SPLIT_OFF
:
2517 whpx
->kernel_irqchip_allowed
= false;
2518 whpx
->kernel_irqchip_required
= false;
2521 case ON_OFF_SPLIT_SPLIT
:
2522 error_setg(errp
, "WHPX: split irqchip currently not supported");
2523 error_append_hint(errp
,
2524 "Try without kernel-irqchip or with kernel-irqchip=on|off");
2529 * The value was checked in visit_type_OnOffSplit() above. If
2530 * we get here, then something is wrong in QEMU.
2540 static int whpx_accel_init(MachineState
*ms
)
2542 struct whpx_state
*whpx
;
2545 WHV_CAPABILITY whpx_cap
;
2546 UINT32 whpx_cap_size
;
2547 WHV_PARTITION_PROPERTY prop
;
2548 UINT32 cpuidExitList
[] = {1, 0x80000001};
2549 WHV_CAPABILITY_FEATURES features
= {0};
2551 whpx
= &whpx_global
;
2553 if (!init_whp_dispatch()) {
2558 whpx
->mem_quota
= ms
->ram_size
;
2560 hr
= whp_dispatch
.WHvGetCapability(
2561 WHvCapabilityCodeHypervisorPresent
, &whpx_cap
,
2562 sizeof(whpx_cap
), &whpx_cap_size
);
2563 if (FAILED(hr
) || !whpx_cap
.HypervisorPresent
) {
2564 error_report("WHPX: No accelerator found, hr=%08lx", hr
);
2569 hr
= whp_dispatch
.WHvGetCapability(
2570 WHvCapabilityCodeFeatures
, &features
, sizeof(features
), NULL
);
2572 error_report("WHPX: Failed to query capabilities, hr=%08lx", hr
);
2577 hr
= whp_dispatch
.WHvCreatePartition(&whpx
->partition
);
2579 error_report("WHPX: Failed to create partition, hr=%08lx", hr
);
2585 * Query the XSAVE capability of the partition. Any error here is not
2588 hr
= whp_dispatch
.WHvGetPartitionProperty(
2590 WHvPartitionPropertyCodeProcessorXsaveFeatures
,
2592 sizeof(whpx_xsave_cap
),
2596 * Windows version which don't support this property will return with the
2597 * specific error code.
2599 if (FAILED(hr
) && hr
!= WHV_E_UNKNOWN_PROPERTY
) {
2600 error_report("WHPX: Failed to query XSAVE capability, hr=%08lx", hr
);
2603 if (!whpx_has_xsave()) {
2604 printf("WHPX: Partition is not XSAVE capable\n");
2607 memset(&prop
, 0, sizeof(WHV_PARTITION_PROPERTY
));
2608 prop
.ProcessorCount
= ms
->smp
.cpus
;
2609 hr
= whp_dispatch
.WHvSetPartitionProperty(
2611 WHvPartitionPropertyCodeProcessorCount
,
2613 sizeof(WHV_PARTITION_PROPERTY
));
2616 error_report("WHPX: Failed to set partition core count to %d,"
2617 " hr=%08lx", ms
->smp
.cores
, hr
);
2623 * Error out if WHP doesn't support apic emulation and user is requiring
2626 if (whpx
->kernel_irqchip_required
&& (!features
.LocalApicEmulation
||
2627 !whp_dispatch
.WHvSetVirtualProcessorInterruptControllerState2
)) {
2628 error_report("WHPX: kernel irqchip requested, but unavailable. "
2629 "Try without kernel-irqchip or with kernel-irqchip=off");
2634 if (whpx
->kernel_irqchip_allowed
&& features
.LocalApicEmulation
&&
2635 whp_dispatch
.WHvSetVirtualProcessorInterruptControllerState2
) {
2636 WHV_X64_LOCAL_APIC_EMULATION_MODE mode
=
2637 WHvX64LocalApicEmulationModeXApic
;
2638 printf("WHPX: setting APIC emulation mode in the hypervisor\n");
2639 hr
= whp_dispatch
.WHvSetPartitionProperty(
2641 WHvPartitionPropertyCodeLocalApicEmulationMode
,
2645 error_report("WHPX: Failed to enable kernel irqchip hr=%08lx", hr
);
2646 if (whpx
->kernel_irqchip_required
) {
2647 error_report("WHPX: kernel irqchip requested, but unavailable");
2652 whpx
->apic_in_platform
= true;
2656 /* Register for MSR and CPUID exits */
2657 memset(&prop
, 0, sizeof(WHV_PARTITION_PROPERTY
));
2658 prop
.ExtendedVmExits
.X64MsrExit
= 1;
2659 prop
.ExtendedVmExits
.X64CpuidExit
= 1;
2660 prop
.ExtendedVmExits
.ExceptionExit
= 1;
2661 if (whpx_apic_in_platform()) {
2662 prop
.ExtendedVmExits
.X64ApicInitSipiExitTrap
= 1;
2665 hr
= whp_dispatch
.WHvSetPartitionProperty(
2667 WHvPartitionPropertyCodeExtendedVmExits
,
2669 sizeof(WHV_PARTITION_PROPERTY
));
2671 error_report("WHPX: Failed to enable MSR & CPUIDexit, hr=%08lx", hr
);
2676 hr
= whp_dispatch
.WHvSetPartitionProperty(
2678 WHvPartitionPropertyCodeCpuidExitList
,
2680 RTL_NUMBER_OF(cpuidExitList
) * sizeof(UINT32
));
2683 error_report("WHPX: Failed to set partition CpuidExitList hr=%08lx",
2690 * We do not want to intercept any exceptions from the guest,
2691 * until we actually start debugging with gdb.
2693 whpx
->exception_exit_bitmap
= -1;
2694 hr
= whpx_set_exception_exit_bitmap(0);
2697 error_report("WHPX: Failed to set exception exit bitmap, hr=%08lx", hr
);
2702 hr
= whp_dispatch
.WHvSetupPartition(whpx
->partition
);
2704 error_report("WHPX: Failed to setup partition, hr=%08lx", hr
);
2711 printf("Windows Hypervisor Platform accelerator is operational\n");
2716 if (NULL
!= whpx
->partition
) {
2717 whp_dispatch
.WHvDeletePartition(whpx
->partition
);
2718 whpx
->partition
= NULL
;
2724 int whpx_enabled(void)
2726 return whpx_allowed
;
2729 bool whpx_apic_in_platform(void) {
2730 return whpx_global
.apic_in_platform
;
2733 static void whpx_accel_class_init(ObjectClass
*oc
, void *data
)
2735 AccelClass
*ac
= ACCEL_CLASS(oc
);
2737 ac
->init_machine
= whpx_accel_init
;
2738 ac
->allowed
= &whpx_allowed
;
2740 object_class_property_add(oc
, "kernel-irqchip", "on|off|split",
2741 NULL
, whpx_set_kernel_irqchip
,
2743 object_class_property_set_description(oc
, "kernel-irqchip",
2744 "Configure WHPX in-kernel irqchip");
2747 static void whpx_accel_instance_init(Object
*obj
)
2749 struct whpx_state
*whpx
= &whpx_global
;
2751 memset(whpx
, 0, sizeof(struct whpx_state
));
2752 /* Turn on kernel-irqchip, by default */
2753 whpx
->kernel_irqchip_allowed
= true;
2756 static const TypeInfo whpx_accel_type
= {
2757 .name
= ACCEL_CLASS_NAME("whpx"),
2758 .parent
= TYPE_ACCEL
,
2759 .instance_init
= whpx_accel_instance_init
,
2760 .class_init
= whpx_accel_class_init
,
2763 static void whpx_type_init(void)
2765 type_register_static(&whpx_accel_type
);
2768 bool init_whp_dispatch(void)
2770 if (whp_dispatch_initialized
) {
2774 if (!load_whp_dispatch_fns(&hWinHvPlatform
, WINHV_PLATFORM_FNS_DEFAULT
)) {
2778 if (!load_whp_dispatch_fns(&hWinHvEmulation
, WINHV_EMULATION_FNS_DEFAULT
)) {
2782 assert(load_whp_dispatch_fns(&hWinHvPlatform
,
2783 WINHV_PLATFORM_FNS_SUPPLEMENTAL
));
2784 whp_dispatch_initialized
= true;
2788 if (hWinHvPlatform
) {
2789 FreeLibrary(hWinHvPlatform
);
2792 if (hWinHvEmulation
) {
2793 FreeLibrary(hWinHvEmulation
);
2799 type_init(whpx_type_init
);