kvm: qemu: separate TSC load from kvm_arch_load_regs
[kvm-userspace.git] / qemu / qemu-kvm-x86.c
blobf7ed70b005240c994bcf616e8edcd9ab066d1450
1 /*
2 * qemu/kvm integration, x86 specific code
4 * Copyright (C) 2006-2008 Qumranet Technologies
6 * Licensed under the terms of the GNU GPL version 2 or higher.
7 */
9 #include "config.h"
10 #include "config-host.h"
12 #include <string.h>
13 #include "hw/hw.h"
14 #include "gdbstub.h"
15 #include <sys/io.h>
17 #include "qemu-kvm.h"
18 #include <libkvm.h>
19 #include <pthread.h>
20 #include <sys/utsname.h>
21 #include <linux/kvm_para.h>
23 #define MSR_IA32_TSC 0x10
25 static struct kvm_msr_list *kvm_msr_list;
26 extern unsigned int kvm_shadow_memory;
27 extern kvm_context_t kvm_context;
28 static int kvm_has_msr_star;
30 static int lm_capable_kernel;
32 int kvm_qemu_create_memory_alias(uint64_t phys_start,
33 uint64_t len,
34 uint64_t target_phys)
36 return kvm_create_memory_alias(kvm_context, phys_start, len, target_phys);
39 int kvm_qemu_destroy_memory_alias(uint64_t phys_start)
41 return kvm_destroy_memory_alias(kvm_context, phys_start);
44 int kvm_arch_qemu_create_context(void)
46 int i;
47 struct utsname utsname;
49 uname(&utsname);
50 lm_capable_kernel = strcmp(utsname.machine, "x86_64") == 0;
52 if (kvm_shadow_memory)
53 kvm_set_shadow_pages(kvm_context, kvm_shadow_memory);
55 kvm_msr_list = kvm_get_msr_list(kvm_context);
56 if (!kvm_msr_list)
57 return -1;
58 for (i = 0; i < kvm_msr_list->nmsrs; ++i)
59 if (kvm_msr_list->indices[i] == MSR_STAR)
60 kvm_has_msr_star = 1;
61 return 0;
64 static void set_msr_entry(struct kvm_msr_entry *entry, uint32_t index,
65 uint64_t data)
67 entry->index = index;
68 entry->data = data;
71 /* returns 0 on success, non-0 on failure */
72 static int get_msr_entry(struct kvm_msr_entry *entry, CPUState *env)
74 switch (entry->index) {
75 case MSR_IA32_SYSENTER_CS:
76 env->sysenter_cs = entry->data;
77 break;
78 case MSR_IA32_SYSENTER_ESP:
79 env->sysenter_esp = entry->data;
80 break;
81 case MSR_IA32_SYSENTER_EIP:
82 env->sysenter_eip = entry->data;
83 break;
84 case MSR_STAR:
85 env->star = entry->data;
86 break;
87 #ifdef TARGET_X86_64
88 case MSR_CSTAR:
89 env->cstar = entry->data;
90 break;
91 case MSR_KERNELGSBASE:
92 env->kernelgsbase = entry->data;
93 break;
94 case MSR_FMASK:
95 env->fmask = entry->data;
96 break;
97 case MSR_LSTAR:
98 env->lstar = entry->data;
99 break;
100 #endif
101 case MSR_IA32_TSC:
102 env->tsc = entry->data;
103 break;
104 case MSR_VM_HSAVE_PA:
105 env->vm_hsave = entry->data;
106 break;
107 default:
108 printf("Warning unknown msr index 0x%x\n", entry->index);
109 return 1;
111 return 0;
114 #ifdef TARGET_X86_64
115 #define MSR_COUNT 9
116 #else
117 #define MSR_COUNT 5
118 #endif
120 static void set_v8086_seg(struct kvm_segment *lhs, const SegmentCache *rhs)
122 lhs->selector = rhs->selector;
123 lhs->base = rhs->base;
124 lhs->limit = rhs->limit;
125 lhs->type = 3;
126 lhs->present = 1;
127 lhs->dpl = 3;
128 lhs->db = 0;
129 lhs->s = 1;
130 lhs->l = 0;
131 lhs->g = 0;
132 lhs->avl = 0;
133 lhs->unusable = 0;
136 static void set_seg(struct kvm_segment *lhs, const SegmentCache *rhs)
138 unsigned flags = rhs->flags;
139 lhs->selector = rhs->selector;
140 lhs->base = rhs->base;
141 lhs->limit = rhs->limit;
142 lhs->type = (flags >> DESC_TYPE_SHIFT) & 15;
143 lhs->present = (flags & DESC_P_MASK) != 0;
144 lhs->dpl = rhs->selector & 3;
145 lhs->db = (flags >> DESC_B_SHIFT) & 1;
146 lhs->s = (flags & DESC_S_MASK) != 0;
147 lhs->l = (flags >> DESC_L_SHIFT) & 1;
148 lhs->g = (flags & DESC_G_MASK) != 0;
149 lhs->avl = (flags & DESC_AVL_MASK) != 0;
150 lhs->unusable = 0;
153 static void get_seg(SegmentCache *lhs, const struct kvm_segment *rhs)
155 lhs->selector = rhs->selector;
156 lhs->base = rhs->base;
157 lhs->limit = rhs->limit;
158 lhs->flags =
159 (rhs->type << DESC_TYPE_SHIFT)
160 | (rhs->present * DESC_P_MASK)
161 | (rhs->dpl << DESC_DPL_SHIFT)
162 | (rhs->db << DESC_B_SHIFT)
163 | (rhs->s * DESC_S_MASK)
164 | (rhs->l << DESC_L_SHIFT)
165 | (rhs->g * DESC_G_MASK)
166 | (rhs->avl * DESC_AVL_MASK);
169 void kvm_arch_load_regs(CPUState *env)
171 struct kvm_regs regs;
172 struct kvm_fpu fpu;
173 struct kvm_sregs sregs;
174 struct kvm_msr_entry msrs[MSR_COUNT];
175 int rc, n, i;
177 regs.rax = env->regs[R_EAX];
178 regs.rbx = env->regs[R_EBX];
179 regs.rcx = env->regs[R_ECX];
180 regs.rdx = env->regs[R_EDX];
181 regs.rsi = env->regs[R_ESI];
182 regs.rdi = env->regs[R_EDI];
183 regs.rsp = env->regs[R_ESP];
184 regs.rbp = env->regs[R_EBP];
185 #ifdef TARGET_X86_64
186 regs.r8 = env->regs[8];
187 regs.r9 = env->regs[9];
188 regs.r10 = env->regs[10];
189 regs.r11 = env->regs[11];
190 regs.r12 = env->regs[12];
191 regs.r13 = env->regs[13];
192 regs.r14 = env->regs[14];
193 regs.r15 = env->regs[15];
194 #endif
196 regs.rflags = env->eflags;
197 regs.rip = env->eip;
199 kvm_set_regs(kvm_context, env->cpu_index, &regs);
201 memset(&fpu, 0, sizeof fpu);
202 fpu.fsw = env->fpus & ~(7 << 11);
203 fpu.fsw |= (env->fpstt & 7) << 11;
204 fpu.fcw = env->fpuc;
205 for (i = 0; i < 8; ++i)
206 fpu.ftwx |= (!env->fptags[i]) << i;
207 memcpy(fpu.fpr, env->fpregs, sizeof env->fpregs);
208 memcpy(fpu.xmm, env->xmm_regs, sizeof env->xmm_regs);
209 fpu.mxcsr = env->mxcsr;
210 kvm_set_fpu(kvm_context, env->cpu_index, &fpu);
212 memcpy(sregs.interrupt_bitmap, env->interrupt_bitmap, sizeof(sregs.interrupt_bitmap));
214 if ((env->eflags & VM_MASK)) {
215 set_v8086_seg(&sregs.cs, &env->segs[R_CS]);
216 set_v8086_seg(&sregs.ds, &env->segs[R_DS]);
217 set_v8086_seg(&sregs.es, &env->segs[R_ES]);
218 set_v8086_seg(&sregs.fs, &env->segs[R_FS]);
219 set_v8086_seg(&sregs.gs, &env->segs[R_GS]);
220 set_v8086_seg(&sregs.ss, &env->segs[R_SS]);
221 } else {
222 set_seg(&sregs.cs, &env->segs[R_CS]);
223 set_seg(&sregs.ds, &env->segs[R_DS]);
224 set_seg(&sregs.es, &env->segs[R_ES]);
225 set_seg(&sregs.fs, &env->segs[R_FS]);
226 set_seg(&sregs.gs, &env->segs[R_GS]);
227 set_seg(&sregs.ss, &env->segs[R_SS]);
229 if (env->cr[0] & CR0_PE_MASK) {
230 /* force ss cpl to cs cpl */
231 sregs.ss.selector = (sregs.ss.selector & ~3) |
232 (sregs.cs.selector & 3);
233 sregs.ss.dpl = sregs.ss.selector & 3;
237 set_seg(&sregs.tr, &env->tr);
238 set_seg(&sregs.ldt, &env->ldt);
240 sregs.idt.limit = env->idt.limit;
241 sregs.idt.base = env->idt.base;
242 sregs.gdt.limit = env->gdt.limit;
243 sregs.gdt.base = env->gdt.base;
245 sregs.cr0 = env->cr[0];
246 sregs.cr2 = env->cr[2];
247 sregs.cr3 = env->cr[3];
248 sregs.cr4 = env->cr[4];
250 sregs.cr8 = cpu_get_apic_tpr(env);
251 sregs.apic_base = cpu_get_apic_base(env);
253 sregs.efer = env->efer;
255 kvm_set_sregs(kvm_context, env->cpu_index, &sregs);
257 /* msrs */
258 n = 0;
259 set_msr_entry(&msrs[n++], MSR_IA32_SYSENTER_CS, env->sysenter_cs);
260 set_msr_entry(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp);
261 set_msr_entry(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip);
262 if (kvm_has_msr_star)
263 set_msr_entry(&msrs[n++], MSR_STAR, env->star);
264 set_msr_entry(&msrs[n++], MSR_VM_HSAVE_PA, env->vm_hsave);
265 #ifdef TARGET_X86_64
266 if (lm_capable_kernel) {
267 set_msr_entry(&msrs[n++], MSR_CSTAR, env->cstar);
268 set_msr_entry(&msrs[n++], MSR_KERNELGSBASE, env->kernelgsbase);
269 set_msr_entry(&msrs[n++], MSR_FMASK, env->fmask);
270 set_msr_entry(&msrs[n++], MSR_LSTAR , env->lstar);
272 #endif
274 rc = kvm_set_msrs(kvm_context, env->cpu_index, msrs, n);
275 if (rc == -1)
276 perror("kvm_set_msrs FAILED");
279 void kvm_load_tsc(CPUState *env)
281 int rc;
282 struct kvm_msr_entry msr;
284 set_msr_entry(&msr, MSR_IA32_TSC, env->tsc);
286 rc = kvm_set_msrs(kvm_context, env->cpu_index, &msr, 1);
287 if (rc == -1)
288 perror("kvm_set_tsc FAILED.\n");
291 void kvm_save_mpstate(CPUState *env)
293 #ifdef KVM_CAP_MP_STATE
294 int r;
295 struct kvm_mp_state mp_state;
297 r = kvm_get_mpstate(kvm_context, env->cpu_index, &mp_state);
298 if (r < 0)
299 env->mp_state = -1;
300 else
301 env->mp_state = mp_state.mp_state;
302 #endif
305 void kvm_load_mpstate(CPUState *env)
307 #ifdef KVM_CAP_MP_STATE
308 struct kvm_mp_state mp_state = { .mp_state = env->mp_state };
311 * -1 indicates that the host did not support GET_MP_STATE ioctl,
312 * so don't touch it.
314 if (env->mp_state != -1)
315 kvm_set_mpstate(kvm_context, env->cpu_index, &mp_state);
316 #endif
319 void kvm_arch_save_regs(CPUState *env)
321 struct kvm_regs regs;
322 struct kvm_fpu fpu;
323 struct kvm_sregs sregs;
324 struct kvm_msr_entry msrs[MSR_COUNT];
325 uint32_t hflags;
326 uint32_t i, n, rc;
328 kvm_get_regs(kvm_context, env->cpu_index, &regs);
330 env->regs[R_EAX] = regs.rax;
331 env->regs[R_EBX] = regs.rbx;
332 env->regs[R_ECX] = regs.rcx;
333 env->regs[R_EDX] = regs.rdx;
334 env->regs[R_ESI] = regs.rsi;
335 env->regs[R_EDI] = regs.rdi;
336 env->regs[R_ESP] = regs.rsp;
337 env->regs[R_EBP] = regs.rbp;
338 #ifdef TARGET_X86_64
339 env->regs[8] = regs.r8;
340 env->regs[9] = regs.r9;
341 env->regs[10] = regs.r10;
342 env->regs[11] = regs.r11;
343 env->regs[12] = regs.r12;
344 env->regs[13] = regs.r13;
345 env->regs[14] = regs.r14;
346 env->regs[15] = regs.r15;
347 #endif
349 env->eflags = regs.rflags;
350 env->eip = regs.rip;
352 kvm_get_fpu(kvm_context, env->cpu_index, &fpu);
353 env->fpstt = (fpu.fsw >> 11) & 7;
354 env->fpus = fpu.fsw;
355 env->fpuc = fpu.fcw;
356 for (i = 0; i < 8; ++i)
357 env->fptags[i] = !((fpu.ftwx >> i) & 1);
358 memcpy(env->fpregs, fpu.fpr, sizeof env->fpregs);
359 memcpy(env->xmm_regs, fpu.xmm, sizeof env->xmm_regs);
360 env->mxcsr = fpu.mxcsr;
362 kvm_get_sregs(kvm_context, env->cpu_index, &sregs);
364 memcpy(env->interrupt_bitmap, sregs.interrupt_bitmap, sizeof(env->interrupt_bitmap));
366 get_seg(&env->segs[R_CS], &sregs.cs);
367 get_seg(&env->segs[R_DS], &sregs.ds);
368 get_seg(&env->segs[R_ES], &sregs.es);
369 get_seg(&env->segs[R_FS], &sregs.fs);
370 get_seg(&env->segs[R_GS], &sregs.gs);
371 get_seg(&env->segs[R_SS], &sregs.ss);
373 get_seg(&env->tr, &sregs.tr);
374 get_seg(&env->ldt, &sregs.ldt);
376 env->idt.limit = sregs.idt.limit;
377 env->idt.base = sregs.idt.base;
378 env->gdt.limit = sregs.gdt.limit;
379 env->gdt.base = sregs.gdt.base;
381 env->cr[0] = sregs.cr0;
382 env->cr[2] = sregs.cr2;
383 env->cr[3] = sregs.cr3;
384 env->cr[4] = sregs.cr4;
386 cpu_set_apic_base(env, sregs.apic_base);
388 env->efer = sregs.efer;
389 //cpu_set_apic_tpr(env, sregs.cr8);
391 #define HFLAG_COPY_MASK ~( \
392 HF_CPL_MASK | HF_PE_MASK | HF_MP_MASK | HF_EM_MASK | \
393 HF_TS_MASK | HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK | \
394 HF_OSFXSR_MASK | HF_LMA_MASK | HF_CS32_MASK | \
395 HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK)
399 hflags = (env->segs[R_CS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK;
400 hflags |= (env->cr[0] & CR0_PE_MASK) << (HF_PE_SHIFT - CR0_PE_SHIFT);
401 hflags |= (env->cr[0] << (HF_MP_SHIFT - CR0_MP_SHIFT)) &
402 (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK);
403 hflags |= (env->eflags & (HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK));
404 hflags |= (env->cr[4] & CR4_OSFXSR_MASK) <<
405 (HF_OSFXSR_SHIFT - CR4_OSFXSR_SHIFT);
407 if (env->efer & MSR_EFER_LMA) {
408 hflags |= HF_LMA_MASK;
411 if ((hflags & HF_LMA_MASK) && (env->segs[R_CS].flags & DESC_L_MASK)) {
412 hflags |= HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK;
413 } else {
414 hflags |= (env->segs[R_CS].flags & DESC_B_MASK) >>
415 (DESC_B_SHIFT - HF_CS32_SHIFT);
416 hflags |= (env->segs[R_SS].flags & DESC_B_MASK) >>
417 (DESC_B_SHIFT - HF_SS32_SHIFT);
418 if (!(env->cr[0] & CR0_PE_MASK) ||
419 (env->eflags & VM_MASK) ||
420 !(hflags & HF_CS32_MASK)) {
421 hflags |= HF_ADDSEG_MASK;
422 } else {
423 hflags |= ((env->segs[R_DS].base |
424 env->segs[R_ES].base |
425 env->segs[R_SS].base) != 0) <<
426 HF_ADDSEG_SHIFT;
429 env->hflags = (env->hflags & HFLAG_COPY_MASK) | hflags;
430 env->cc_src = env->eflags & (CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
431 env->df = 1 - (2 * ((env->eflags >> 10) & 1));
432 env->cc_op = CC_OP_EFLAGS;
433 env->eflags &= ~(DF_MASK | CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
435 /* msrs */
436 n = 0;
437 msrs[n++].index = MSR_IA32_SYSENTER_CS;
438 msrs[n++].index = MSR_IA32_SYSENTER_ESP;
439 msrs[n++].index = MSR_IA32_SYSENTER_EIP;
440 if (kvm_has_msr_star)
441 msrs[n++].index = MSR_STAR;
442 msrs[n++].index = MSR_IA32_TSC;
443 msrs[n++].index = MSR_VM_HSAVE_PA;
444 #ifdef TARGET_X86_64
445 if (lm_capable_kernel) {
446 msrs[n++].index = MSR_CSTAR;
447 msrs[n++].index = MSR_KERNELGSBASE;
448 msrs[n++].index = MSR_FMASK;
449 msrs[n++].index = MSR_LSTAR;
451 #endif
452 rc = kvm_get_msrs(kvm_context, env->cpu_index, msrs, n);
453 if (rc == -1) {
454 perror("kvm_get_msrs FAILED");
456 else {
457 n = rc; /* actual number of MSRs */
458 for (i=0 ; i<n; i++) {
459 if (get_msr_entry(&msrs[i], env))
460 return;
465 static void host_cpuid(uint32_t function, uint32_t *eax, uint32_t *ebx,
466 uint32_t *ecx, uint32_t *edx)
468 uint32_t vec[4];
470 #ifdef __x86_64__
471 asm volatile("cpuid"
472 : "=a"(vec[0]), "=b"(vec[1]),
473 "=c"(vec[2]), "=d"(vec[3])
474 : "0"(function) : "cc");
475 #else
476 asm volatile("pusha \n\t"
477 "cpuid \n\t"
478 "mov %%eax, 0(%1) \n\t"
479 "mov %%ebx, 4(%1) \n\t"
480 "mov %%ecx, 8(%1) \n\t"
481 "mov %%edx, 12(%1) \n\t"
482 "popa"
483 : : "a"(function), "S"(vec)
484 : "memory", "cc");
485 #endif
487 if (eax)
488 *eax = vec[0];
489 if (ebx)
490 *ebx = vec[1];
491 if (ecx)
492 *ecx = vec[2];
493 if (edx)
494 *edx = vec[3];
498 static void do_cpuid_ent(struct kvm_cpuid_entry *e, uint32_t function,
499 CPUState *env)
501 env->regs[R_EAX] = function;
502 qemu_kvm_cpuid_on_env(env);
503 e->function = function;
504 e->eax = env->regs[R_EAX];
505 e->ebx = env->regs[R_EBX];
506 e->ecx = env->regs[R_ECX];
507 e->edx = env->regs[R_EDX];
508 if (function == 0x80000001) {
509 uint32_t h_eax, h_edx;
511 host_cpuid(function, &h_eax, NULL, NULL, &h_edx);
513 // long mode
514 if ((h_edx & 0x20000000) == 0 || !lm_capable_kernel)
515 e->edx &= ~0x20000000u;
516 // syscall
517 if ((h_edx & 0x00000800) == 0)
518 e->edx &= ~0x00000800u;
519 // nx
520 if ((h_edx & 0x00100000) == 0)
521 e->edx &= ~0x00100000u;
522 // svm
523 if (!kvm_nested && (e->ecx & 4))
524 e->ecx &= ~4u;
526 // sysenter isn't supported on compatibility mode on AMD. and syscall
527 // isn't supported in compatibility mode on Intel. so advertise the
528 // actuall cpu, and say goodbye to migration between different vendors
529 // is you use compatibility mode.
530 if (function == 0) {
531 uint32_t bcd[3];
533 host_cpuid(0, NULL, &bcd[0], &bcd[1], &bcd[2]);
534 e->ebx = bcd[0];
535 e->ecx = bcd[1];
536 e->edx = bcd[2];
538 // "Hypervisor present" bit for Microsoft guests
539 if (function == 1)
540 e->ecx |= (1u << 31);
542 // 3dnow isn't properly emulated yet
543 if (function == 0x80000001)
544 e->edx &= ~0xc0000000;
547 struct kvm_para_features {
548 int cap;
549 int feature;
550 } para_features[] = {
551 #ifdef KVM_CAP_CLOCKSOURCE
552 { KVM_CAP_CLOCKSOURCE, KVM_FEATURE_CLOCKSOURCE },
553 #endif
554 #ifdef KVM_CAP_NOP_IO_DELAY
555 { KVM_CAP_NOP_IO_DELAY, KVM_FEATURE_NOP_IO_DELAY },
556 #endif
557 #ifdef KVM_CAP_PV_MMU
558 { KVM_CAP_PV_MMU, KVM_FEATURE_MMU_OP },
559 #endif
560 #ifdef KVM_CAP_CR3_CACHE
561 { KVM_CAP_CR3_CACHE, KVM_FEATURE_CR3_CACHE },
562 #endif
563 { -1, -1 }
566 static int get_para_features(kvm_context_t kvm_context)
568 int i, features = 0;
570 for (i = 0; i < ARRAY_SIZE(para_features)-1; i++) {
571 if (kvm_check_extension(kvm_context, para_features[i].cap))
572 features |= (1 << para_features[i].feature);
575 return features;
578 int kvm_arch_qemu_init_env(CPUState *cenv)
580 struct kvm_cpuid_entry cpuid_ent[100];
581 #ifdef KVM_CPUID_SIGNATURE
582 struct kvm_cpuid_entry *pv_ent;
583 uint32_t signature[3];
584 #endif
585 int cpuid_nent = 0;
586 CPUState copy;
587 uint32_t i, limit;
589 copy = *cenv;
591 #ifdef KVM_CPUID_SIGNATURE
592 /* Paravirtualization CPUIDs */
593 memcpy(signature, "KVMKVMKVM\0\0\0", 12);
594 pv_ent = &cpuid_ent[cpuid_nent++];
595 memset(pv_ent, 0, sizeof(*pv_ent));
596 pv_ent->function = KVM_CPUID_SIGNATURE;
597 pv_ent->eax = 0;
598 pv_ent->ebx = signature[0];
599 pv_ent->ecx = signature[1];
600 pv_ent->edx = signature[2];
602 pv_ent = &cpuid_ent[cpuid_nent++];
603 memset(pv_ent, 0, sizeof(*pv_ent));
604 pv_ent->function = KVM_CPUID_FEATURES;
605 pv_ent->eax = get_para_features(kvm_context);
606 #endif
608 copy.regs[R_EAX] = 0;
609 qemu_kvm_cpuid_on_env(&copy);
610 limit = copy.regs[R_EAX];
612 for (i = 0; i <= limit; ++i)
613 do_cpuid_ent(&cpuid_ent[cpuid_nent++], i, &copy);
615 copy.regs[R_EAX] = 0x80000000;
616 qemu_kvm_cpuid_on_env(&copy);
617 limit = copy.regs[R_EAX];
619 for (i = 0x80000000; i <= limit; ++i)
620 do_cpuid_ent(&cpuid_ent[cpuid_nent++], i, &copy);
622 kvm_setup_cpuid(kvm_context, cenv->cpu_index, cpuid_nent, cpuid_ent);
623 return 0;
626 int kvm_arch_halt(void *opaque, int vcpu)
628 CPUState *env = cpu_single_env;
630 if (!((env->interrupt_request & CPU_INTERRUPT_HARD) &&
631 (env->eflags & IF_MASK)) &&
632 !(env->interrupt_request & CPU_INTERRUPT_NMI)) {
633 env->halted = 1;
634 env->exception_index = EXCP_HLT;
636 return 1;
639 void kvm_arch_pre_kvm_run(void *opaque, CPUState *env)
641 if (!kvm_irqchip_in_kernel(kvm_context))
642 kvm_set_cr8(kvm_context, env->cpu_index, cpu_get_apic_tpr(env));
645 void kvm_arch_post_kvm_run(void *opaque, CPUState *env)
647 int vcpu = env->cpu_index;
649 cpu_single_env = env;
651 env->eflags = kvm_get_interrupt_flag(kvm_context, vcpu)
652 ? env->eflags | IF_MASK : env->eflags & ~IF_MASK;
654 cpu_set_apic_tpr(env, kvm_get_cr8(kvm_context, vcpu));
655 cpu_set_apic_base(env, kvm_get_apic_base(kvm_context, vcpu));
658 int kvm_arch_has_work(CPUState *env)
660 if (((env->interrupt_request & (CPU_INTERRUPT_HARD | CPU_INTERRUPT_EXIT)) &&
661 (env->eflags & IF_MASK)) ||
662 (env->interrupt_request & CPU_INTERRUPT_NMI))
663 return 1;
664 return 0;
667 int kvm_arch_try_push_interrupts(void *opaque)
669 CPUState *env = cpu_single_env;
670 int r, irq;
672 if (kvm_is_ready_for_interrupt_injection(kvm_context, env->cpu_index) &&
673 (env->interrupt_request & CPU_INTERRUPT_HARD) &&
674 (env->eflags & IF_MASK)) {
675 env->interrupt_request &= ~CPU_INTERRUPT_HARD;
676 irq = cpu_get_pic_interrupt(env);
677 if (irq >= 0) {
678 r = kvm_inject_irq(kvm_context, env->cpu_index, irq);
679 if (r < 0)
680 printf("cpu %d fail inject %x\n", env->cpu_index, irq);
684 return (env->interrupt_request & CPU_INTERRUPT_HARD) != 0;
687 #ifdef KVM_CAP_USER_NMI
688 void kvm_arch_push_nmi(void *opaque)
690 CPUState *env = cpu_single_env;
691 int r;
693 if (likely(!(env->interrupt_request & CPU_INTERRUPT_NMI)))
694 return;
696 env->interrupt_request &= ~CPU_INTERRUPT_NMI;
697 r = kvm_inject_nmi(kvm_context, env->cpu_index);
698 if (r < 0)
699 printf("cpu %d fail inject NMI\n", env->cpu_index);
701 #endif /* KVM_CAP_USER_NMI */
703 void kvm_arch_update_regs_for_sipi(CPUState *env)
705 SegmentCache cs = env->segs[R_CS];
707 kvm_arch_save_regs(env);
708 env->segs[R_CS] = cs;
709 env->eip = 0;
710 kvm_arch_load_regs(env);
713 int handle_tpr_access(void *opaque, int vcpu,
714 uint64_t rip, int is_write)
716 kvm_tpr_access_report(cpu_single_env, rip, is_write);
717 return 0;
720 void kvm_arch_cpu_reset(CPUState *env)
722 kvm_arch_load_regs(env);
723 if (env->cpu_index != 0) {
724 if (kvm_irqchip_in_kernel(kvm_context)) {
725 #ifdef KVM_CAP_MP_STATE
726 kvm_reset_mpstate(kvm_context, env->cpu_index);
727 #endif
728 } else {
729 env->interrupt_request &= ~CPU_INTERRUPT_HARD;
730 env->halted = 1;
731 env->exception_index = EXCP_HLT;
736 int kvm_arch_insert_sw_breakpoint(CPUState *env, struct kvm_sw_breakpoint *bp)
738 uint8_t int3 = 0xcc;
740 if (cpu_memory_rw_debug(env, bp->pc, (uint8_t *)&bp->saved_insn, 1, 0) ||
741 cpu_memory_rw_debug(env, bp->pc, &int3, 1, 1))
742 return -EINVAL;
743 return 0;
746 int kvm_arch_remove_sw_breakpoint(CPUState *env, struct kvm_sw_breakpoint *bp)
748 uint8_t int3;
750 if (cpu_memory_rw_debug(env, bp->pc, &int3, 1, 0) || int3 != 0xcc ||
751 cpu_memory_rw_debug(env, bp->pc, (uint8_t *)&bp->saved_insn, 1, 1))
752 return -EINVAL;
753 return 0;
756 #ifdef KVM_CAP_SET_GUEST_DEBUG
757 static struct {
758 target_ulong addr;
759 int len;
760 int type;
761 } hw_breakpoint[4];
763 static int nb_hw_breakpoint;
765 static int find_hw_breakpoint(target_ulong addr, int len, int type)
767 int n;
769 for (n = 0; n < nb_hw_breakpoint; n++)
770 if (hw_breakpoint[n].addr == addr && hw_breakpoint[n].type == type &&
771 (hw_breakpoint[n].len == len || len == -1))
772 return n;
773 return -1;
776 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
777 target_ulong len, int type)
779 switch (type) {
780 case GDB_BREAKPOINT_HW:
781 len = 1;
782 break;
783 case GDB_WATCHPOINT_WRITE:
784 case GDB_WATCHPOINT_ACCESS:
785 switch (len) {
786 case 1:
787 break;
788 case 2:
789 case 4:
790 case 8:
791 if (addr & (len - 1))
792 return -EINVAL;
793 break;
794 default:
795 return -EINVAL;
797 break;
798 default:
799 return -ENOSYS;
802 if (nb_hw_breakpoint == 4)
803 return -ENOBUFS;
805 if (find_hw_breakpoint(addr, len, type) >= 0)
806 return -EEXIST;
808 hw_breakpoint[nb_hw_breakpoint].addr = addr;
809 hw_breakpoint[nb_hw_breakpoint].len = len;
810 hw_breakpoint[nb_hw_breakpoint].type = type;
811 nb_hw_breakpoint++;
813 return 0;
816 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
817 target_ulong len, int type)
819 int n;
821 n = find_hw_breakpoint(addr, (type == GDB_BREAKPOINT_HW) ? 1 : len, type);
822 if (n < 0)
823 return -ENOENT;
825 nb_hw_breakpoint--;
826 hw_breakpoint[n] = hw_breakpoint[nb_hw_breakpoint];
828 return 0;
831 void kvm_arch_remove_all_hw_breakpoints(void)
833 nb_hw_breakpoint = 0;
836 static CPUWatchpoint hw_watchpoint;
838 int kvm_arch_debug(struct kvm_debug_exit_arch *arch_info)
840 int handle = 0;
841 int n;
843 if (arch_info->exception == 1) {
844 if (arch_info->dr6 & (1 << 14)) {
845 if (cpu_single_env->singlestep_enabled)
846 handle = 1;
847 } else {
848 for (n = 0; n < 4; n++)
849 if (arch_info->dr6 & (1 << n))
850 switch ((arch_info->dr7 >> (16 + n*4)) & 0x3) {
851 case 0x0:
852 handle = 1;
853 break;
854 case 0x1:
855 handle = 1;
856 cpu_single_env->watchpoint_hit = &hw_watchpoint;
857 hw_watchpoint.vaddr = hw_breakpoint[n].addr;
858 hw_watchpoint.flags = BP_MEM_WRITE;
859 break;
860 case 0x3:
861 handle = 1;
862 cpu_single_env->watchpoint_hit = &hw_watchpoint;
863 hw_watchpoint.vaddr = hw_breakpoint[n].addr;
864 hw_watchpoint.flags = BP_MEM_ACCESS;
865 break;
868 } else if (kvm_find_sw_breakpoint(arch_info->pc))
869 handle = 1;
871 if (!handle)
872 kvm_update_guest_debug(cpu_single_env,
873 (arch_info->exception == 1) ?
874 KVM_GUESTDBG_INJECT_DB : KVM_GUESTDBG_INJECT_BP);
876 return handle;
879 void kvm_arch_update_guest_debug(CPUState *env, struct kvm_guest_debug *dbg)
881 const uint8_t type_code[] = {
882 [GDB_BREAKPOINT_HW] = 0x0,
883 [GDB_WATCHPOINT_WRITE] = 0x1,
884 [GDB_WATCHPOINT_ACCESS] = 0x3
886 const uint8_t len_code[] = {
887 [1] = 0x0, [2] = 0x1, [4] = 0x3, [8] = 0x2
889 int n;
891 if (!TAILQ_EMPTY(&kvm_sw_breakpoints))
892 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
894 if (nb_hw_breakpoint > 0) {
895 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
896 dbg->arch.debugreg[7] = 0x0600;
897 for (n = 0; n < nb_hw_breakpoint; n++) {
898 dbg->arch.debugreg[n] = hw_breakpoint[n].addr;
899 dbg->arch.debugreg[7] |= (2 << (n * 2)) |
900 (type_code[hw_breakpoint[n].type] << (16 + n*4)) |
901 (len_code[hw_breakpoint[n].len] << (18 + n*4));
905 #endif
907 void kvm_arch_do_ioperm(void *_data)
909 struct ioperm_data *data = _data;
910 ioperm(data->start_port, data->num, data->turn_on);