3 #include "config-host.h"
13 #define MSR_IA32_TSC 0x10
15 extern void perror(const char *s
);
18 kvm_context_t kvm_context
;
19 static struct kvm_msr_list
*kvm_msr_list
;
20 static int kvm_has_msr_star
;
23 static CPUState
*saved_env
[NR_CPU
];
25 static void set_msr_entry(struct kvm_msr_entry
*entry
, uint32_t index
,
32 /* returns 0 on success, non-0 on failure */
33 static int get_msr_entry(struct kvm_msr_entry
*entry
, CPUState
*env
)
35 switch (entry
->index
) {
36 case MSR_IA32_SYSENTER_CS
:
37 env
->sysenter_cs
= entry
->data
;
39 case MSR_IA32_SYSENTER_ESP
:
40 env
->sysenter_esp
= entry
->data
;
42 case MSR_IA32_SYSENTER_EIP
:
43 env
->sysenter_eip
= entry
->data
;
46 env
->star
= entry
->data
;
50 env
->cstar
= entry
->data
;
52 case MSR_KERNELGSBASE
:
53 env
->kernelgsbase
= entry
->data
;
56 env
->fmask
= entry
->data
;
59 env
->lstar
= entry
->data
;
63 env
->tsc
= entry
->data
;
66 printf("Warning unknown msr index 0x%x\n", entry
->index
);
78 static void set_v8086_seg(struct kvm_segment
*lhs
, const SegmentCache
*rhs
)
80 lhs
->selector
= rhs
->selector
;
81 lhs
->base
= rhs
->base
;
82 lhs
->limit
= rhs
->limit
;
94 static void set_seg(struct kvm_segment
*lhs
, const SegmentCache
*rhs
)
96 unsigned flags
= rhs
->flags
;
97 lhs
->selector
= rhs
->selector
;
98 lhs
->base
= rhs
->base
;
99 lhs
->limit
= rhs
->limit
;
100 lhs
->type
= (flags
>> DESC_TYPE_SHIFT
) & 15;
101 lhs
->present
= (flags
& DESC_P_MASK
) != 0;
102 lhs
->dpl
= rhs
->selector
& 3;
103 lhs
->db
= (flags
>> DESC_B_SHIFT
) & 1;
104 lhs
->s
= (flags
& DESC_S_MASK
) != 0;
105 lhs
->l
= (flags
>> DESC_L_SHIFT
) & 1;
106 lhs
->g
= (flags
& DESC_G_MASK
) != 0;
107 lhs
->avl
= (flags
& DESC_AVL_MASK
) != 0;
111 static void get_seg(SegmentCache
*lhs
, const struct kvm_segment
*rhs
)
113 lhs
->selector
= rhs
->selector
;
114 lhs
->base
= rhs
->base
;
115 lhs
->limit
= rhs
->limit
;
117 (rhs
->type
<< DESC_TYPE_SHIFT
)
118 | (rhs
->present
* DESC_P_MASK
)
119 | (rhs
->dpl
<< DESC_DPL_SHIFT
)
120 | (rhs
->db
<< DESC_B_SHIFT
)
121 | (rhs
->s
* DESC_S_MASK
)
122 | (rhs
->l
<< DESC_L_SHIFT
)
123 | (rhs
->g
* DESC_G_MASK
)
124 | (rhs
->avl
* DESC_AVL_MASK
);
127 /* the reset values of qemu are not compatible to SVM
128 * this function is used to fix the segment descriptor values */
129 static void fix_realmode_dataseg(struct kvm_segment
*seg
)
136 static void load_regs(CPUState
*env
)
138 struct kvm_regs regs
;
139 struct kvm_sregs sregs
;
140 struct kvm_msr_entry msrs
[MSR_COUNT
];
147 regs
.rax
= env
->regs
[R_EAX
];
148 regs
.rbx
= env
->regs
[R_EBX
];
149 regs
.rcx
= env
->regs
[R_ECX
];
150 regs
.rdx
= env
->regs
[R_EDX
];
151 regs
.rsi
= env
->regs
[R_ESI
];
152 regs
.rdi
= env
->regs
[R_EDI
];
153 regs
.rsp
= env
->regs
[R_ESP
];
154 regs
.rbp
= env
->regs
[R_EBP
];
156 regs
.r8
= env
->regs
[8];
157 regs
.r9
= env
->regs
[9];
158 regs
.r10
= env
->regs
[10];
159 regs
.r11
= env
->regs
[11];
160 regs
.r12
= env
->regs
[12];
161 regs
.r13
= env
->regs
[13];
162 regs
.r14
= env
->regs
[14];
163 regs
.r15
= env
->regs
[15];
166 regs
.rflags
= env
->eflags
;
169 kvm_set_regs(kvm_context
, 0, ®s
);
171 memcpy(sregs
.interrupt_bitmap
, env
->kvm_interrupt_bitmap
, sizeof(sregs
.interrupt_bitmap
));
173 if ((env
->eflags
& VM_MASK
)) {
174 set_v8086_seg(&sregs
.cs
, &env
->segs
[R_CS
]);
175 set_v8086_seg(&sregs
.ds
, &env
->segs
[R_DS
]);
176 set_v8086_seg(&sregs
.es
, &env
->segs
[R_ES
]);
177 set_v8086_seg(&sregs
.fs
, &env
->segs
[R_FS
]);
178 set_v8086_seg(&sregs
.gs
, &env
->segs
[R_GS
]);
179 set_v8086_seg(&sregs
.ss
, &env
->segs
[R_SS
]);
181 set_seg(&sregs
.cs
, &env
->segs
[R_CS
]);
182 set_seg(&sregs
.ds
, &env
->segs
[R_DS
]);
183 set_seg(&sregs
.es
, &env
->segs
[R_ES
]);
184 set_seg(&sregs
.fs
, &env
->segs
[R_FS
]);
185 set_seg(&sregs
.gs
, &env
->segs
[R_GS
]);
186 set_seg(&sregs
.ss
, &env
->segs
[R_SS
]);
188 if (env
->cr
[0] & CR0_PE_MASK
) {
189 /* force ss cpl to cs cpl */
190 sregs
.ss
.selector
= (sregs
.ss
.selector
& ~3) |
191 (sregs
.cs
.selector
& 3);
192 sregs
.ss
.dpl
= sregs
.ss
.selector
& 3;
195 if (!(env
->cr
[0] & CR0_PG_MASK
)) {
196 fix_realmode_dataseg(&sregs
.ds
);
197 fix_realmode_dataseg(&sregs
.es
);
198 fix_realmode_dataseg(&sregs
.fs
);
199 fix_realmode_dataseg(&sregs
.gs
);
200 fix_realmode_dataseg(&sregs
.ss
);
204 set_seg(&sregs
.tr
, &env
->tr
);
205 set_seg(&sregs
.ldt
, &env
->ldt
);
207 sregs
.idt
.limit
= env
->idt
.limit
;
208 sregs
.idt
.base
= env
->idt
.base
;
209 sregs
.gdt
.limit
= env
->gdt
.limit
;
210 sregs
.gdt
.base
= env
->gdt
.base
;
212 sregs
.cr0
= env
->cr
[0];
213 sregs
.cr2
= env
->cr
[2];
214 sregs
.cr3
= env
->cr
[3];
215 sregs
.cr4
= env
->cr
[4];
217 sregs
.apic_base
= cpu_get_apic_base(env
);
218 sregs
.efer
= env
->efer
;
219 sregs
.cr8
= cpu_get_apic_tpr(env
);
221 kvm_set_sregs(kvm_context
, 0, &sregs
);
225 set_msr_entry(&msrs
[n
++], MSR_IA32_SYSENTER_CS
, env
->sysenter_cs
);
226 set_msr_entry(&msrs
[n
++], MSR_IA32_SYSENTER_ESP
, env
->sysenter_esp
);
227 set_msr_entry(&msrs
[n
++], MSR_IA32_SYSENTER_EIP
, env
->sysenter_eip
);
228 if (kvm_has_msr_star
)
229 set_msr_entry(&msrs
[n
++], MSR_STAR
, env
->star
);
230 set_msr_entry(&msrs
[n
++], MSR_IA32_TSC
, env
->tsc
);
232 set_msr_entry(&msrs
[n
++], MSR_CSTAR
, env
->cstar
);
233 set_msr_entry(&msrs
[n
++], MSR_KERNELGSBASE
, env
->kernelgsbase
);
234 set_msr_entry(&msrs
[n
++], MSR_FMASK
, env
->fmask
);
235 set_msr_entry(&msrs
[n
++], MSR_LSTAR
, env
->lstar
);
238 rc
= kvm_set_msrs(kvm_context
, 0, msrs
, n
);
240 perror("kvm_set_msrs FAILED");
244 static void save_regs(CPUState
*env
)
246 struct kvm_regs regs
;
247 struct kvm_sregs sregs
;
248 struct kvm_msr_entry msrs
[MSR_COUNT
];
252 kvm_get_regs(kvm_context
, 0, ®s
);
254 env
->regs
[R_EAX
] = regs
.rax
;
255 env
->regs
[R_EBX
] = regs
.rbx
;
256 env
->regs
[R_ECX
] = regs
.rcx
;
257 env
->regs
[R_EDX
] = regs
.rdx
;
258 env
->regs
[R_ESI
] = regs
.rsi
;
259 env
->regs
[R_EDI
] = regs
.rdi
;
260 env
->regs
[R_ESP
] = regs
.rsp
;
261 env
->regs
[R_EBP
] = regs
.rbp
;
263 env
->regs
[8] = regs
.r8
;
264 env
->regs
[9] = regs
.r9
;
265 env
->regs
[10] = regs
.r10
;
266 env
->regs
[11] = regs
.r11
;
267 env
->regs
[12] = regs
.r12
;
268 env
->regs
[13] = regs
.r13
;
269 env
->regs
[14] = regs
.r14
;
270 env
->regs
[15] = regs
.r15
;
273 env
->eflags
= regs
.rflags
;
276 kvm_get_sregs(kvm_context
, 0, &sregs
);
278 memcpy(env
->kvm_interrupt_bitmap
, sregs
.interrupt_bitmap
, sizeof(env
->kvm_interrupt_bitmap
));
280 get_seg(&env
->segs
[R_CS
], &sregs
.cs
);
281 get_seg(&env
->segs
[R_DS
], &sregs
.ds
);
282 get_seg(&env
->segs
[R_ES
], &sregs
.es
);
283 get_seg(&env
->segs
[R_FS
], &sregs
.fs
);
284 get_seg(&env
->segs
[R_GS
], &sregs
.gs
);
285 get_seg(&env
->segs
[R_SS
], &sregs
.ss
);
287 get_seg(&env
->tr
, &sregs
.tr
);
288 get_seg(&env
->ldt
, &sregs
.ldt
);
290 env
->idt
.limit
= sregs
.idt
.limit
;
291 env
->idt
.base
= sregs
.idt
.base
;
292 env
->gdt
.limit
= sregs
.gdt
.limit
;
293 env
->gdt
.base
= sregs
.gdt
.base
;
295 env
->cr
[0] = sregs
.cr0
;
296 env
->cr
[2] = sregs
.cr2
;
297 env
->cr
[3] = sregs
.cr3
;
298 env
->cr
[4] = sregs
.cr4
;
300 cpu_set_apic_base(env
, sregs
.apic_base
);
302 env
->efer
= sregs
.efer
;
303 cpu_set_apic_tpr(env
, sregs
.cr8
);
305 #define HFLAG_COPY_MASK ~( \
306 HF_CPL_MASK | HF_PE_MASK | HF_MP_MASK | HF_EM_MASK | \
307 HF_TS_MASK | HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK | \
308 HF_OSFXSR_MASK | HF_LMA_MASK | HF_CS32_MASK | \
309 HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK)
313 hflags
= (env
->segs
[R_CS
].flags
>> DESC_DPL_SHIFT
) & HF_CPL_MASK
;
314 hflags
|= (env
->cr
[0] & CR0_PE_MASK
) << (HF_PE_SHIFT
- CR0_PE_SHIFT
);
315 hflags
|= (env
->cr
[0] << (HF_MP_SHIFT
- CR0_MP_SHIFT
)) &
316 (HF_MP_MASK
| HF_EM_MASK
| HF_TS_MASK
);
317 hflags
|= (env
->eflags
& (HF_TF_MASK
| HF_VM_MASK
| HF_IOPL_MASK
));
318 hflags
|= (env
->cr
[4] & CR4_OSFXSR_MASK
) <<
319 (HF_OSFXSR_SHIFT
- CR4_OSFXSR_SHIFT
);
321 if (env
->efer
& MSR_EFER_LMA
) {
322 hflags
|= HF_LMA_MASK
;
325 if ((hflags
& HF_LMA_MASK
) && (env
->segs
[R_CS
].flags
& DESC_L_MASK
)) {
326 hflags
|= HF_CS32_MASK
| HF_SS32_MASK
| HF_CS64_MASK
;
328 hflags
|= (env
->segs
[R_CS
].flags
& DESC_B_MASK
) >>
329 (DESC_B_SHIFT
- HF_CS32_SHIFT
);
330 hflags
|= (env
->segs
[R_SS
].flags
& DESC_B_MASK
) >>
331 (DESC_B_SHIFT
- HF_SS32_SHIFT
);
332 if (!(env
->cr
[0] & CR0_PE_MASK
) ||
333 (env
->eflags
& VM_MASK
) ||
334 !(hflags
& HF_CS32_MASK
)) {
335 hflags
|= HF_ADDSEG_MASK
;
337 hflags
|= ((env
->segs
[R_DS
].base
|
338 env
->segs
[R_ES
].base
|
339 env
->segs
[R_SS
].base
) != 0) <<
343 env
->hflags
= (env
->hflags
& HFLAG_COPY_MASK
) | hflags
;
344 CC_SRC
= env
->eflags
& (CC_O
| CC_S
| CC_Z
| CC_A
| CC_P
| CC_C
);
345 DF
= 1 - (2 * ((env
->eflags
>> 10) & 1));
346 CC_OP
= CC_OP_EFLAGS
;
347 env
->eflags
&= ~(DF_MASK
| CC_O
| CC_S
| CC_Z
| CC_A
| CC_P
| CC_C
);
353 msrs
[n
++].index
= MSR_IA32_SYSENTER_CS
;
354 msrs
[n
++].index
= MSR_IA32_SYSENTER_ESP
;
355 msrs
[n
++].index
= MSR_IA32_SYSENTER_EIP
;
356 if (kvm_has_msr_star
)
357 msrs
[n
++].index
= MSR_STAR
;
358 msrs
[n
++].index
= MSR_IA32_TSC
;
360 msrs
[n
++].index
= MSR_CSTAR
;
361 msrs
[n
++].index
= MSR_KERNELGSBASE
;
362 msrs
[n
++].index
= MSR_FMASK
;
363 msrs
[n
++].index
= MSR_LSTAR
;
365 rc
= kvm_get_msrs(kvm_context
, 0, msrs
, n
);
367 perror("kvm_get_msrs FAILED");
370 n
= rc
; /* actual number of MSRs */
371 for (i
=0 ; i
<n
; i
++) {
372 if (get_msr_entry(&msrs
[i
], env
))
381 static int try_push_interrupts(void *opaque
)
383 CPUState
**envs
= opaque
, *env
;
386 if (env
->ready_for_interrupt_injection
&&
387 (env
->interrupt_request
& CPU_INTERRUPT_HARD
) &&
388 (env
->eflags
& IF_MASK
)) {
389 env
->interrupt_request
&= ~CPU_INTERRUPT_HARD
;
390 // for now using cpu 0
391 kvm_inject_irq(kvm_context
, 0, cpu_get_pic_interrupt(env
));
394 return (env
->interrupt_request
& CPU_INTERRUPT_HARD
) != 0;
397 static void post_kvm_run(void *opaque
, struct kvm_run
*kvm_run
)
399 CPUState
**envs
= opaque
, *env
;
402 env
->eflags
= (kvm_run
->if_flag
) ? env
->eflags
| IF_MASK
:env
->eflags
& ~IF_MASK
;
403 env
->ready_for_interrupt_injection
= kvm_run
->ready_for_interrupt_injection
;
404 cpu_set_apic_tpr(env
, kvm_run
->cr8
);
405 cpu_set_apic_base(env
, kvm_run
->apic_base
);
408 static void pre_kvm_run(void *opaque
, struct kvm_run
*kvm_run
)
410 CPUState
**envs
= opaque
, *env
;
413 kvm_run
->cr8
= cpu_get_apic_tpr(env
);
416 void kvm_load_registers(CPUState
*env
)
421 void kvm_save_registers(CPUState
*env
)
426 int kvm_cpu_exec(CPUState
*env
)
429 int pending
= (!env
->ready_for_interrupt_injection
||
430 ((env
->interrupt_request
& CPU_INTERRUPT_HARD
) &&
431 (env
->eflags
& IF_MASK
)));
433 if (!pending
&& (env
->interrupt_request
& CPU_INTERRUPT_EXIT
)) {
434 env
->interrupt_request
&= ~CPU_INTERRUPT_EXIT
;
435 env
->exception_index
= EXCP_INTERRUPT
;
443 r
= kvm_run(kvm_context
, 0);
445 printf("kvm_run returned %d\n", r
);
453 static int kvm_cpuid(void *opaque
, uint64_t *rax
, uint64_t *rbx
,
454 uint64_t *rcx
, uint64_t *rdx
)
456 CPUState
**envs
= opaque
;
463 env
->regs
[R_EAX
] = *rax
;
464 env
->regs
[R_EBX
] = *rbx
;
465 env
->regs
[R_ECX
] = *rcx
;
466 env
->regs
[R_EDX
] = *rdx
;
468 *rdx
= env
->regs
[R_EDX
];
469 *rcx
= env
->regs
[R_ECX
];
470 *rbx
= env
->regs
[R_EBX
];
471 *rax
= env
->regs
[R_EAX
];
472 // don't report long mode/syscall/nx if no native support
473 if (eax
== 0x80000001) {
474 unsigned long h_eax
= eax
, h_edx
;
477 // push/pop hack to workaround gcc 3 register pressure trouble
480 "push %%rbx; push %%rcx; cpuid; pop %%rcx; pop %%rbx"
482 "push %%ebx; push %%ecx; cpuid; pop %%ecx; pop %%ebx"
484 : "+a"(h_eax
), "=d"(h_edx
));
487 if ((h_edx
& 0x20000000) == 0)
488 *rdx
&= ~0x20000000ull
;
490 if ((h_edx
& 0x00000800) == 0)
491 *rdx
&= ~0x00000800ull
;
493 if ((h_edx
& 0x00100000) == 0)
494 *rdx
&= ~0x00100000ull
;
500 static int kvm_debug(void *opaque
, int vcpu
)
502 CPUState
**envs
= opaque
;
505 env
->exception_index
= EXCP_DEBUG
;
509 static int kvm_inb(void *opaque
, uint16_t addr
, uint8_t *data
)
511 *data
= cpu_inb(0, addr
);
515 static int kvm_inw(void *opaque
, uint16_t addr
, uint16_t *data
)
517 *data
= cpu_inw(0, addr
);
521 static int kvm_inl(void *opaque
, uint16_t addr
, uint32_t *data
)
523 *data
= cpu_inl(0, addr
);
527 static int kvm_outb(void *opaque
, uint16_t addr
, uint8_t data
)
529 cpu_outb(0, addr
, data
);
533 static int kvm_outw(void *opaque
, uint16_t addr
, uint16_t data
)
535 cpu_outw(0, addr
, data
);
539 static int kvm_outl(void *opaque
, uint16_t addr
, uint32_t data
)
541 cpu_outl(0, addr
, data
);
545 static int kvm_readb(void *opaque
, uint64_t addr
, uint8_t *data
)
547 *data
= ldub_phys(addr
);
551 static int kvm_readw(void *opaque
, uint64_t addr
, uint16_t *data
)
553 *data
= lduw_phys(addr
);
557 static int kvm_readl(void *opaque
, uint64_t addr
, uint32_t *data
)
559 *data
= ldl_phys(addr
);
563 static int kvm_readq(void *opaque
, uint64_t addr
, uint64_t *data
)
565 *data
= ldq_phys(addr
);
569 static int kvm_writeb(void *opaque
, uint64_t addr
, uint8_t data
)
571 stb_phys(addr
, data
);
575 static int kvm_writew(void *opaque
, uint64_t addr
, uint16_t data
)
577 stw_phys(addr
, data
);
581 static int kvm_writel(void *opaque
, uint64_t addr
, uint32_t data
)
583 stl_phys(addr
, data
);
587 static int kvm_writeq(void *opaque
, uint64_t addr
, uint64_t data
)
589 stq_phys(addr
, data
);
593 static int kvm_io_window(void *opaque
)
599 static int kvm_halt(void *opaque
, int vcpu
)
601 CPUState
**envs
= opaque
, *env
;
604 if (!((env
->interrupt_request
& CPU_INTERRUPT_HARD
) &&
605 (env
->eflags
& IF_MASK
))) {
606 env
->hflags
|= HF_HALTED_MASK
;
607 env
->exception_index
= EXCP_HLT
;
613 static int kvm_shutdown(void *opaque
, int vcpu
)
615 qemu_system_reset_request();
619 static struct kvm_callbacks qemu_kvm_ops
= {
632 .writeb
= kvm_writeb
,
633 .writew
= kvm_writew
,
634 .writel
= kvm_writel
,
635 .writeq
= kvm_writeq
,
637 .shutdown
= kvm_shutdown
,
638 .io_window
= kvm_io_window
,
639 .try_push_interrupts
= try_push_interrupts
,
640 .post_kvm_run
= post_kvm_run
,
641 .pre_kvm_run
= pre_kvm_run
,
646 /* Try to initialize kvm */
647 kvm_context
= kvm_init(&qemu_kvm_ops
, saved_env
);
655 int kvm_qemu_create_context(void)
659 if (kvm_create(kvm_context
, phys_ram_size
, (void**)&phys_ram_base
) < 0) {
663 kvm_msr_list
= kvm_get_msr_list(kvm_context
);
668 for (i
= 0; i
< kvm_msr_list
->nmsrs
; ++i
)
669 if (kvm_msr_list
->indices
[i
] == MSR_STAR
)
670 kvm_has_msr_star
= 1;
674 void kvm_qemu_destroy(void)
676 kvm_finalize(kvm_context
);
679 int kvm_update_debugger(CPUState
*env
)
681 struct kvm_debug_guest dbg
;
685 if (env
->nb_breakpoints
|| env
->singlestep_enabled
) {
687 for (i
= 0; i
< 4 && i
< env
->nb_breakpoints
; ++i
) {
688 dbg
.breakpoints
[i
].enabled
= 1;
689 dbg
.breakpoints
[i
].address
= env
->breakpoints
[i
];
691 dbg
.singlestep
= env
->singlestep_enabled
;
693 return kvm_guest_debug(kvm_context
, 0, &dbg
);