kvm: qemu: Make device assignment depend on libpci
[kvm-userspace.git] / qemu / kqemu.c
blob96660b0132cbf15e31c4873ba5512121ae4d4e18
1 /*
2 * KQEMU support
4 * Copyright (c) 2005-2008 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301 USA
20 #include "config.h"
21 #ifdef _WIN32
22 #include <windows.h>
23 #include <winioctl.h>
24 #else
25 #include <sys/types.h>
26 #include <sys/mman.h>
27 #include <sys/ioctl.h>
28 #endif
29 #ifdef HOST_SOLARIS
30 #include <sys/ioccom.h>
31 #endif
32 #include <stdlib.h>
33 #include <stdio.h>
34 #include <stdarg.h>
35 #include <string.h>
36 #include <errno.h>
37 #include <unistd.h>
38 #include <inttypes.h>
40 #include "cpu.h"
41 #include "exec-all.h"
42 #include "qemu-common.h"
44 #ifdef USE_KQEMU
46 #define DEBUG
47 //#define PROFILE
50 #ifdef DEBUG
51 # define LOG_INT(...) qemu_log_mask(CPU_LOG_INT, ## __VA_ARGS__)
52 # define LOG_INT_STATE(env) log_cpu_state_mask(CPU_LOG_INT, (env), 0)
53 #else
54 # define LOG_INT(...) do { } while (0)
55 # define LOG_INT_STATE(env) do { } while (0)
56 #endif
58 #include <unistd.h>
59 #include <fcntl.h>
60 #include "kqemu.h"
62 #ifdef _WIN32
63 #define KQEMU_DEVICE "\\\\.\\kqemu"
64 #else
65 #define KQEMU_DEVICE "/dev/kqemu"
66 #endif
68 static void qpi_init(void);
70 #ifdef _WIN32
71 #define KQEMU_INVALID_FD INVALID_HANDLE_VALUE
72 HANDLE kqemu_fd = KQEMU_INVALID_FD;
73 #define kqemu_closefd(x) CloseHandle(x)
74 #else
75 #define KQEMU_INVALID_FD -1
76 int kqemu_fd = KQEMU_INVALID_FD;
77 #define kqemu_closefd(x) close(x)
78 #endif
80 /* 0 = not allowed
81 1 = user kqemu
82 2 = kernel kqemu
84 int kqemu_allowed = 1;
85 uint64_t *pages_to_flush;
86 unsigned int nb_pages_to_flush;
87 uint64_t *ram_pages_to_update;
88 unsigned int nb_ram_pages_to_update;
89 uint64_t *modified_ram_pages;
90 unsigned int nb_modified_ram_pages;
91 uint8_t *modified_ram_pages_table;
92 int qpi_io_memory;
93 uint32_t kqemu_comm_base; /* physical address of the QPI communication page */
95 #define cpuid(index, eax, ebx, ecx, edx) \
96 asm volatile ("cpuid" \
97 : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) \
98 : "0" (index))
100 #ifdef __x86_64__
101 static int is_cpuid_supported(void)
103 return 1;
105 #else
106 static int is_cpuid_supported(void)
108 int v0, v1;
109 asm volatile ("pushf\n"
110 "popl %0\n"
111 "movl %0, %1\n"
112 "xorl $0x00200000, %0\n"
113 "pushl %0\n"
114 "popf\n"
115 "pushf\n"
116 "popl %0\n"
117 : "=a" (v0), "=d" (v1)
119 : "cc");
120 return (v0 != v1);
122 #endif
124 static void kqemu_update_cpuid(CPUState *env)
126 int critical_features_mask, features, ext_features, ext_features_mask;
127 uint32_t eax, ebx, ecx, edx;
129 /* the following features are kept identical on the host and
130 target cpus because they are important for user code. Strictly
131 speaking, only SSE really matters because the OS must support
132 it if the user code uses it. */
133 critical_features_mask =
134 CPUID_CMOV | CPUID_CX8 |
135 CPUID_FXSR | CPUID_MMX | CPUID_SSE |
136 CPUID_SSE2 | CPUID_SEP;
137 ext_features_mask = CPUID_EXT_SSE3 | CPUID_EXT_MONITOR;
138 if (!is_cpuid_supported()) {
139 features = 0;
140 ext_features = 0;
141 } else {
142 cpuid(1, eax, ebx, ecx, edx);
143 features = edx;
144 ext_features = ecx;
146 #ifdef __x86_64__
147 /* NOTE: on x86_64 CPUs, SYSENTER is not supported in
148 compatibility mode, so in order to have the best performances
149 it is better not to use it */
150 features &= ~CPUID_SEP;
151 #endif
152 env->cpuid_features = (env->cpuid_features & ~critical_features_mask) |
153 (features & critical_features_mask);
154 env->cpuid_ext_features = (env->cpuid_ext_features & ~ext_features_mask) |
155 (ext_features & ext_features_mask);
156 /* XXX: we could update more of the target CPUID state so that the
157 non accelerated code sees exactly the same CPU features as the
158 accelerated code */
161 int kqemu_init(CPUState *env)
163 struct kqemu_init kinit;
164 int ret, version;
165 #ifdef _WIN32
166 DWORD temp;
167 #endif
169 if (!kqemu_allowed)
170 return -1;
172 #ifdef _WIN32
173 kqemu_fd = CreateFile(KQEMU_DEVICE, GENERIC_WRITE | GENERIC_READ,
174 FILE_SHARE_READ | FILE_SHARE_WRITE,
175 NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL,
176 NULL);
177 if (kqemu_fd == KQEMU_INVALID_FD) {
178 fprintf(stderr, "Could not open '%s' - QEMU acceleration layer not activated: %lu\n",
179 KQEMU_DEVICE, GetLastError());
180 return -1;
182 #else
183 kqemu_fd = open(KQEMU_DEVICE, O_RDWR);
184 if (kqemu_fd == KQEMU_INVALID_FD) {
185 fprintf(stderr, "Could not open '%s' - QEMU acceleration layer not activated: %s\n",
186 KQEMU_DEVICE, strerror(errno));
187 return -1;
189 #endif
190 version = 0;
191 #ifdef _WIN32
192 DeviceIoControl(kqemu_fd, KQEMU_GET_VERSION, NULL, 0,
193 &version, sizeof(version), &temp, NULL);
194 #else
195 ioctl(kqemu_fd, KQEMU_GET_VERSION, &version);
196 #endif
197 if (version != KQEMU_VERSION) {
198 fprintf(stderr, "Version mismatch between kqemu module and qemu (%08x %08x) - disabling kqemu use\n",
199 version, KQEMU_VERSION);
200 goto fail;
203 pages_to_flush = qemu_vmalloc(KQEMU_MAX_PAGES_TO_FLUSH *
204 sizeof(uint64_t));
205 if (!pages_to_flush)
206 goto fail;
208 ram_pages_to_update = qemu_vmalloc(KQEMU_MAX_RAM_PAGES_TO_UPDATE *
209 sizeof(uint64_t));
210 if (!ram_pages_to_update)
211 goto fail;
213 modified_ram_pages = qemu_vmalloc(KQEMU_MAX_MODIFIED_RAM_PAGES *
214 sizeof(uint64_t));
215 if (!modified_ram_pages)
216 goto fail;
217 modified_ram_pages_table = qemu_mallocz(phys_ram_size >> TARGET_PAGE_BITS);
218 if (!modified_ram_pages_table)
219 goto fail;
221 memset(&kinit, 0, sizeof(kinit)); /* set the paddings to zero */
222 kinit.ram_base = phys_ram_base;
223 kinit.ram_size = phys_ram_size;
224 kinit.ram_dirty = phys_ram_dirty;
225 kinit.pages_to_flush = pages_to_flush;
226 kinit.ram_pages_to_update = ram_pages_to_update;
227 kinit.modified_ram_pages = modified_ram_pages;
228 #ifdef _WIN32
229 ret = DeviceIoControl(kqemu_fd, KQEMU_INIT, &kinit, sizeof(kinit),
230 NULL, 0, &temp, NULL) == TRUE ? 0 : -1;
231 #else
232 ret = ioctl(kqemu_fd, KQEMU_INIT, &kinit);
233 #endif
234 if (ret < 0) {
235 fprintf(stderr, "Error %d while initializing QEMU acceleration layer - disabling it for now\n", ret);
236 fail:
237 kqemu_closefd(kqemu_fd);
238 kqemu_fd = KQEMU_INVALID_FD;
239 return -1;
241 kqemu_update_cpuid(env);
242 env->kqemu_enabled = kqemu_allowed;
243 nb_pages_to_flush = 0;
244 nb_ram_pages_to_update = 0;
246 qpi_init();
247 return 0;
250 void kqemu_flush_page(CPUState *env, target_ulong addr)
252 LOG_INT("kqemu_flush_page: addr=" TARGET_FMT_lx "\n", addr);
253 if (nb_pages_to_flush >= KQEMU_MAX_PAGES_TO_FLUSH)
254 nb_pages_to_flush = KQEMU_FLUSH_ALL;
255 else
256 pages_to_flush[nb_pages_to_flush++] = addr;
259 void kqemu_flush(CPUState *env, int global)
261 LOG_INT("kqemu_flush:\n");
262 nb_pages_to_flush = KQEMU_FLUSH_ALL;
265 void kqemu_set_notdirty(CPUState *env, ram_addr_t ram_addr)
267 LOG_INT("kqemu_set_notdirty: addr=%08lx\n",
268 (unsigned long)ram_addr);
269 /* we only track transitions to dirty state */
270 if (phys_ram_dirty[ram_addr >> TARGET_PAGE_BITS] != 0xff)
271 return;
272 if (nb_ram_pages_to_update >= KQEMU_MAX_RAM_PAGES_TO_UPDATE)
273 nb_ram_pages_to_update = KQEMU_RAM_PAGES_UPDATE_ALL;
274 else
275 ram_pages_to_update[nb_ram_pages_to_update++] = ram_addr;
278 static void kqemu_reset_modified_ram_pages(void)
280 int i;
281 unsigned long page_index;
283 for(i = 0; i < nb_modified_ram_pages; i++) {
284 page_index = modified_ram_pages[i] >> TARGET_PAGE_BITS;
285 modified_ram_pages_table[page_index] = 0;
287 nb_modified_ram_pages = 0;
290 void kqemu_modify_page(CPUState *env, ram_addr_t ram_addr)
292 unsigned long page_index;
293 int ret;
294 #ifdef _WIN32
295 DWORD temp;
296 #endif
298 page_index = ram_addr >> TARGET_PAGE_BITS;
299 if (!modified_ram_pages_table[page_index]) {
300 #if 0
301 printf("%d: modify_page=%08lx\n", nb_modified_ram_pages, ram_addr);
302 #endif
303 modified_ram_pages_table[page_index] = 1;
304 modified_ram_pages[nb_modified_ram_pages++] = ram_addr;
305 if (nb_modified_ram_pages >= KQEMU_MAX_MODIFIED_RAM_PAGES) {
306 /* flush */
307 #ifdef _WIN32
308 ret = DeviceIoControl(kqemu_fd, KQEMU_MODIFY_RAM_PAGES,
309 &nb_modified_ram_pages,
310 sizeof(nb_modified_ram_pages),
311 NULL, 0, &temp, NULL);
312 #else
313 ret = ioctl(kqemu_fd, KQEMU_MODIFY_RAM_PAGES,
314 &nb_modified_ram_pages);
315 #endif
316 kqemu_reset_modified_ram_pages();
321 void kqemu_set_phys_mem(uint64_t start_addr, ram_addr_t size,
322 ram_addr_t phys_offset)
324 struct kqemu_phys_mem kphys_mem1, *kphys_mem = &kphys_mem1;
325 uint64_t end;
326 int ret, io_index;
328 end = (start_addr + size + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK;
329 start_addr &= TARGET_PAGE_MASK;
330 kphys_mem->phys_addr = start_addr;
331 kphys_mem->size = end - start_addr;
332 kphys_mem->ram_addr = phys_offset & TARGET_PAGE_MASK;
333 io_index = phys_offset & ~TARGET_PAGE_MASK;
334 switch(io_index) {
335 case IO_MEM_RAM:
336 kphys_mem->io_index = KQEMU_IO_MEM_RAM;
337 break;
338 case IO_MEM_ROM:
339 kphys_mem->io_index = KQEMU_IO_MEM_ROM;
340 break;
341 default:
342 if (qpi_io_memory == io_index) {
343 kphys_mem->io_index = KQEMU_IO_MEM_COMM;
344 } else {
345 kphys_mem->io_index = KQEMU_IO_MEM_UNASSIGNED;
347 break;
349 #ifdef _WIN32
351 DWORD temp;
352 ret = DeviceIoControl(kqemu_fd, KQEMU_SET_PHYS_MEM,
353 kphys_mem, sizeof(*kphys_mem),
354 NULL, 0, &temp, NULL) == TRUE ? 0 : -1;
356 #else
357 ret = ioctl(kqemu_fd, KQEMU_SET_PHYS_MEM, kphys_mem);
358 #endif
359 if (ret < 0) {
360 fprintf(stderr, "kqemu: KQEMU_SET_PHYS_PAGE error=%d: start_addr=0x%016" PRIx64 " size=0x%08lx phys_offset=0x%08lx\n",
361 ret, start_addr,
362 (unsigned long)size, (unsigned long)phys_offset);
366 struct fpstate {
367 uint16_t fpuc;
368 uint16_t dummy1;
369 uint16_t fpus;
370 uint16_t dummy2;
371 uint16_t fptag;
372 uint16_t dummy3;
374 uint32_t fpip;
375 uint32_t fpcs;
376 uint32_t fpoo;
377 uint32_t fpos;
378 uint8_t fpregs1[8 * 10];
381 struct fpxstate {
382 uint16_t fpuc;
383 uint16_t fpus;
384 uint16_t fptag;
385 uint16_t fop;
386 uint32_t fpuip;
387 uint16_t cs_sel;
388 uint16_t dummy0;
389 uint32_t fpudp;
390 uint16_t ds_sel;
391 uint16_t dummy1;
392 uint32_t mxcsr;
393 uint32_t mxcsr_mask;
394 uint8_t fpregs1[8 * 16];
395 uint8_t xmm_regs[16 * 16];
396 uint8_t dummy2[96];
399 static struct fpxstate fpx1 __attribute__((aligned(16)));
401 static void restore_native_fp_frstor(CPUState *env)
403 int fptag, i, j;
404 struct fpstate fp1, *fp = &fp1;
406 fp->fpuc = env->fpuc;
407 fp->fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
408 fptag = 0;
409 for (i=7; i>=0; i--) {
410 fptag <<= 2;
411 if (env->fptags[i]) {
412 fptag |= 3;
413 } else {
414 /* the FPU automatically computes it */
417 fp->fptag = fptag;
418 j = env->fpstt;
419 for(i = 0;i < 8; i++) {
420 memcpy(&fp->fpregs1[i * 10], &env->fpregs[j].d, 10);
421 j = (j + 1) & 7;
423 asm volatile ("frstor %0" : "=m" (*fp));
426 static void save_native_fp_fsave(CPUState *env)
428 int fptag, i, j;
429 uint16_t fpuc;
430 struct fpstate fp1, *fp = &fp1;
432 asm volatile ("fsave %0" : : "m" (*fp));
433 env->fpuc = fp->fpuc;
434 env->fpstt = (fp->fpus >> 11) & 7;
435 env->fpus = fp->fpus & ~0x3800;
436 fptag = fp->fptag;
437 for(i = 0;i < 8; i++) {
438 env->fptags[i] = ((fptag & 3) == 3);
439 fptag >>= 2;
441 j = env->fpstt;
442 for(i = 0;i < 8; i++) {
443 memcpy(&env->fpregs[j].d, &fp->fpregs1[i * 10], 10);
444 j = (j + 1) & 7;
446 /* we must restore the default rounding state */
447 fpuc = 0x037f | (env->fpuc & (3 << 10));
448 asm volatile("fldcw %0" : : "m" (fpuc));
451 static void restore_native_fp_fxrstor(CPUState *env)
453 struct fpxstate *fp = &fpx1;
454 int i, j, fptag;
456 fp->fpuc = env->fpuc;
457 fp->fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
458 fptag = 0;
459 for(i = 0; i < 8; i++)
460 fptag |= (env->fptags[i] << i);
461 fp->fptag = fptag ^ 0xff;
463 j = env->fpstt;
464 for(i = 0;i < 8; i++) {
465 memcpy(&fp->fpregs1[i * 16], &env->fpregs[j].d, 10);
466 j = (j + 1) & 7;
468 if (env->cpuid_features & CPUID_SSE) {
469 fp->mxcsr = env->mxcsr;
470 /* XXX: check if DAZ is not available */
471 fp->mxcsr_mask = 0xffff;
472 memcpy(fp->xmm_regs, env->xmm_regs, CPU_NB_REGS * 16);
474 asm volatile ("fxrstor %0" : "=m" (*fp));
477 static void save_native_fp_fxsave(CPUState *env)
479 struct fpxstate *fp = &fpx1;
480 int fptag, i, j;
481 uint16_t fpuc;
483 asm volatile ("fxsave %0" : : "m" (*fp));
484 env->fpuc = fp->fpuc;
485 env->fpstt = (fp->fpus >> 11) & 7;
486 env->fpus = fp->fpus & ~0x3800;
487 fptag = fp->fptag ^ 0xff;
488 for(i = 0;i < 8; i++) {
489 env->fptags[i] = (fptag >> i) & 1;
491 j = env->fpstt;
492 for(i = 0;i < 8; i++) {
493 memcpy(&env->fpregs[j].d, &fp->fpregs1[i * 16], 10);
494 j = (j + 1) & 7;
496 if (env->cpuid_features & CPUID_SSE) {
497 env->mxcsr = fp->mxcsr;
498 memcpy(env->xmm_regs, fp->xmm_regs, CPU_NB_REGS * 16);
501 /* we must restore the default rounding state */
502 asm volatile ("fninit");
503 fpuc = 0x037f | (env->fpuc & (3 << 10));
504 asm volatile("fldcw %0" : : "m" (fpuc));
507 static int do_syscall(CPUState *env,
508 struct kqemu_cpu_state *kenv)
510 int selector;
512 selector = (env->star >> 32) & 0xffff;
513 #ifdef TARGET_X86_64
514 if (env->hflags & HF_LMA_MASK) {
515 int code64;
517 env->regs[R_ECX] = kenv->next_eip;
518 env->regs[11] = env->eflags;
520 code64 = env->hflags & HF_CS64_MASK;
522 cpu_x86_set_cpl(env, 0);
523 cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
524 0, 0xffffffff,
525 DESC_G_MASK | DESC_P_MASK |
526 DESC_S_MASK |
527 DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK | DESC_L_MASK);
528 cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc,
529 0, 0xffffffff,
530 DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
531 DESC_S_MASK |
532 DESC_W_MASK | DESC_A_MASK);
533 env->eflags &= ~env->fmask;
534 if (code64)
535 env->eip = env->lstar;
536 else
537 env->eip = env->cstar;
538 } else
539 #endif
541 env->regs[R_ECX] = (uint32_t)kenv->next_eip;
543 cpu_x86_set_cpl(env, 0);
544 cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
545 0, 0xffffffff,
546 DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
547 DESC_S_MASK |
548 DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK);
549 cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc,
550 0, 0xffffffff,
551 DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
552 DESC_S_MASK |
553 DESC_W_MASK | DESC_A_MASK);
554 env->eflags &= ~(IF_MASK | RF_MASK | VM_MASK);
555 env->eip = (uint32_t)env->star;
557 return 2;
560 #ifdef CONFIG_PROFILER
562 #define PC_REC_SIZE 1
563 #define PC_REC_HASH_BITS 16
564 #define PC_REC_HASH_SIZE (1 << PC_REC_HASH_BITS)
566 typedef struct PCRecord {
567 unsigned long pc;
568 int64_t count;
569 struct PCRecord *next;
570 } PCRecord;
572 static PCRecord *pc_rec_hash[PC_REC_HASH_SIZE];
573 static int nb_pc_records;
575 static void kqemu_record_pc(unsigned long pc)
577 unsigned long h;
578 PCRecord **pr, *r;
580 h = pc / PC_REC_SIZE;
581 h = h ^ (h >> PC_REC_HASH_BITS);
582 h &= (PC_REC_HASH_SIZE - 1);
583 pr = &pc_rec_hash[h];
584 for(;;) {
585 r = *pr;
586 if (r == NULL)
587 break;
588 if (r->pc == pc) {
589 r->count++;
590 return;
592 pr = &r->next;
594 r = malloc(sizeof(PCRecord));
595 r->count = 1;
596 r->pc = pc;
597 r->next = NULL;
598 *pr = r;
599 nb_pc_records++;
602 static int pc_rec_cmp(const void *p1, const void *p2)
604 PCRecord *r1 = *(PCRecord **)p1;
605 PCRecord *r2 = *(PCRecord **)p2;
606 if (r1->count < r2->count)
607 return 1;
608 else if (r1->count == r2->count)
609 return 0;
610 else
611 return -1;
614 static void kqemu_record_flush(void)
616 PCRecord *r, *r_next;
617 int h;
619 for(h = 0; h < PC_REC_HASH_SIZE; h++) {
620 for(r = pc_rec_hash[h]; r != NULL; r = r_next) {
621 r_next = r->next;
622 free(r);
624 pc_rec_hash[h] = NULL;
626 nb_pc_records = 0;
629 void kqemu_record_dump(void)
631 PCRecord **pr, *r;
632 int i, h;
633 FILE *f;
634 int64_t total, sum;
636 pr = malloc(sizeof(PCRecord *) * nb_pc_records);
637 i = 0;
638 total = 0;
639 for(h = 0; h < PC_REC_HASH_SIZE; h++) {
640 for(r = pc_rec_hash[h]; r != NULL; r = r->next) {
641 pr[i++] = r;
642 total += r->count;
645 qsort(pr, nb_pc_records, sizeof(PCRecord *), pc_rec_cmp);
647 f = fopen("/tmp/kqemu.stats", "w");
648 if (!f) {
649 perror("/tmp/kqemu.stats");
650 exit(1);
652 fprintf(f, "total: %" PRId64 "\n", total);
653 sum = 0;
654 for(i = 0; i < nb_pc_records; i++) {
655 r = pr[i];
656 sum += r->count;
657 fprintf(f, "%08lx: %" PRId64 " %0.2f%% %0.2f%%\n",
658 r->pc,
659 r->count,
660 (double)r->count / (double)total * 100.0,
661 (double)sum / (double)total * 100.0);
663 fclose(f);
664 free(pr);
666 kqemu_record_flush();
668 #endif
670 static inline void kqemu_load_seg(struct kqemu_segment_cache *ksc,
671 const SegmentCache *sc)
673 ksc->selector = sc->selector;
674 ksc->flags = sc->flags;
675 ksc->limit = sc->limit;
676 ksc->base = sc->base;
679 static inline void kqemu_save_seg(SegmentCache *sc,
680 const struct kqemu_segment_cache *ksc)
682 sc->selector = ksc->selector;
683 sc->flags = ksc->flags;
684 sc->limit = ksc->limit;
685 sc->base = ksc->base;
688 int kqemu_cpu_exec(CPUState *env)
690 struct kqemu_cpu_state kcpu_state, *kenv = &kcpu_state;
691 int ret, cpl, i;
692 #ifdef CONFIG_PROFILER
693 int64_t ti;
694 #endif
695 #ifdef _WIN32
696 DWORD temp;
697 #endif
699 #ifdef CONFIG_PROFILER
700 ti = profile_getclock();
701 #endif
702 LOG_INT("kqemu: cpu_exec: enter\n");
703 LOG_INT_STATE(env);
704 for(i = 0; i < CPU_NB_REGS; i++)
705 kenv->regs[i] = env->regs[i];
706 kenv->eip = env->eip;
707 kenv->eflags = env->eflags;
708 for(i = 0; i < 6; i++)
709 kqemu_load_seg(&kenv->segs[i], &env->segs[i]);
710 kqemu_load_seg(&kenv->ldt, &env->ldt);
711 kqemu_load_seg(&kenv->tr, &env->tr);
712 kqemu_load_seg(&kenv->gdt, &env->gdt);
713 kqemu_load_seg(&kenv->idt, &env->idt);
714 kenv->cr0 = env->cr[0];
715 kenv->cr2 = env->cr[2];
716 kenv->cr3 = env->cr[3];
717 kenv->cr4 = env->cr[4];
718 kenv->a20_mask = env->a20_mask;
719 kenv->efer = env->efer;
720 kenv->tsc_offset = 0;
721 kenv->star = env->star;
722 kenv->sysenter_cs = env->sysenter_cs;
723 kenv->sysenter_esp = env->sysenter_esp;
724 kenv->sysenter_eip = env->sysenter_eip;
725 #ifdef TARGET_X86_64
726 kenv->lstar = env->lstar;
727 kenv->cstar = env->cstar;
728 kenv->fmask = env->fmask;
729 kenv->kernelgsbase = env->kernelgsbase;
730 #endif
731 if (env->dr[7] & 0xff) {
732 kenv->dr7 = env->dr[7];
733 kenv->dr0 = env->dr[0];
734 kenv->dr1 = env->dr[1];
735 kenv->dr2 = env->dr[2];
736 kenv->dr3 = env->dr[3];
737 } else {
738 kenv->dr7 = 0;
740 kenv->dr6 = env->dr[6];
741 cpl = (env->hflags & HF_CPL_MASK);
742 kenv->cpl = cpl;
743 kenv->nb_pages_to_flush = nb_pages_to_flush;
744 kenv->user_only = (env->kqemu_enabled == 1);
745 kenv->nb_ram_pages_to_update = nb_ram_pages_to_update;
746 nb_ram_pages_to_update = 0;
747 kenv->nb_modified_ram_pages = nb_modified_ram_pages;
749 kqemu_reset_modified_ram_pages();
751 if (env->cpuid_features & CPUID_FXSR)
752 restore_native_fp_fxrstor(env);
753 else
754 restore_native_fp_frstor(env);
756 #ifdef _WIN32
757 if (DeviceIoControl(kqemu_fd, KQEMU_EXEC,
758 kenv, sizeof(struct kqemu_cpu_state),
759 kenv, sizeof(struct kqemu_cpu_state),
760 &temp, NULL)) {
761 ret = kenv->retval;
762 } else {
763 ret = -1;
765 #else
766 ioctl(kqemu_fd, KQEMU_EXEC, kenv);
767 ret = kenv->retval;
768 #endif
769 if (env->cpuid_features & CPUID_FXSR)
770 save_native_fp_fxsave(env);
771 else
772 save_native_fp_fsave(env);
774 for(i = 0; i < CPU_NB_REGS; i++)
775 env->regs[i] = kenv->regs[i];
776 env->eip = kenv->eip;
777 env->eflags = kenv->eflags;
778 for(i = 0; i < 6; i++)
779 kqemu_save_seg(&env->segs[i], &kenv->segs[i]);
780 cpu_x86_set_cpl(env, kenv->cpl);
781 kqemu_save_seg(&env->ldt, &kenv->ldt);
782 env->cr[0] = kenv->cr0;
783 env->cr[4] = kenv->cr4;
784 env->cr[3] = kenv->cr3;
785 env->cr[2] = kenv->cr2;
786 env->dr[6] = kenv->dr6;
787 #ifdef TARGET_X86_64
788 env->kernelgsbase = kenv->kernelgsbase;
789 #endif
791 /* flush pages as indicated by kqemu */
792 if (kenv->nb_pages_to_flush >= KQEMU_FLUSH_ALL) {
793 tlb_flush(env, 1);
794 } else {
795 for(i = 0; i < kenv->nb_pages_to_flush; i++) {
796 tlb_flush_page(env, pages_to_flush[i]);
799 nb_pages_to_flush = 0;
801 #ifdef CONFIG_PROFILER
802 kqemu_time += profile_getclock() - ti;
803 kqemu_exec_count++;
804 #endif
806 if (kenv->nb_ram_pages_to_update > 0) {
807 cpu_tlb_update_dirty(env);
810 if (kenv->nb_modified_ram_pages > 0) {
811 for(i = 0; i < kenv->nb_modified_ram_pages; i++) {
812 unsigned long addr;
813 addr = modified_ram_pages[i];
814 tb_invalidate_phys_page_range(addr, addr + TARGET_PAGE_SIZE, 0);
818 /* restore the hidden flags */
820 unsigned int new_hflags;
821 #ifdef TARGET_X86_64
822 if ((env->hflags & HF_LMA_MASK) &&
823 (env->segs[R_CS].flags & DESC_L_MASK)) {
824 /* long mode */
825 new_hflags = HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK;
826 } else
827 #endif
829 /* legacy / compatibility case */
830 new_hflags = (env->segs[R_CS].flags & DESC_B_MASK)
831 >> (DESC_B_SHIFT - HF_CS32_SHIFT);
832 new_hflags |= (env->segs[R_SS].flags & DESC_B_MASK)
833 >> (DESC_B_SHIFT - HF_SS32_SHIFT);
834 if (!(env->cr[0] & CR0_PE_MASK) ||
835 (env->eflags & VM_MASK) ||
836 !(env->hflags & HF_CS32_MASK)) {
837 /* XXX: try to avoid this test. The problem comes from the
838 fact that is real mode or vm86 mode we only modify the
839 'base' and 'selector' fields of the segment cache to go
840 faster. A solution may be to force addseg to one in
841 translate-i386.c. */
842 new_hflags |= HF_ADDSEG_MASK;
843 } else {
844 new_hflags |= ((env->segs[R_DS].base |
845 env->segs[R_ES].base |
846 env->segs[R_SS].base) != 0) <<
847 HF_ADDSEG_SHIFT;
850 env->hflags = (env->hflags &
851 ~(HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK)) |
852 new_hflags;
854 /* update FPU flags */
855 env->hflags = (env->hflags & ~(HF_MP_MASK | HF_EM_MASK | HF_TS_MASK)) |
856 ((env->cr[0] << (HF_MP_SHIFT - 1)) & (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK));
857 if (env->cr[4] & CR4_OSFXSR_MASK)
858 env->hflags |= HF_OSFXSR_MASK;
859 else
860 env->hflags &= ~HF_OSFXSR_MASK;
862 LOG_INT("kqemu: kqemu_cpu_exec: ret=0x%x\n", ret);
863 if (ret == KQEMU_RET_SYSCALL) {
864 /* syscall instruction */
865 return do_syscall(env, kenv);
866 } else
867 if ((ret & 0xff00) == KQEMU_RET_INT) {
868 env->exception_index = ret & 0xff;
869 env->error_code = 0;
870 env->exception_is_int = 1;
871 env->exception_next_eip = kenv->next_eip;
872 #ifdef CONFIG_PROFILER
873 kqemu_ret_int_count++;
874 #endif
875 LOG_INT("kqemu: interrupt v=%02x:\n", env->exception_index);
876 LOG_INT_STATE(env);
877 return 1;
878 } else if ((ret & 0xff00) == KQEMU_RET_EXCEPTION) {
879 env->exception_index = ret & 0xff;
880 env->error_code = kenv->error_code;
881 env->exception_is_int = 0;
882 env->exception_next_eip = 0;
883 #ifdef CONFIG_PROFILER
884 kqemu_ret_excp_count++;
885 #endif
886 LOG_INT("kqemu: exception v=%02x e=%04x:\n",
887 env->exception_index, env->error_code);
888 LOG_INT_STATE(env);
889 return 1;
890 } else if (ret == KQEMU_RET_INTR) {
891 #ifdef CONFIG_PROFILER
892 kqemu_ret_intr_count++;
893 #endif
894 LOG_INT_STATE(env);
895 return 0;
896 } else if (ret == KQEMU_RET_SOFTMMU) {
897 #ifdef CONFIG_PROFILER
899 unsigned long pc = env->eip + env->segs[R_CS].base;
900 kqemu_record_pc(pc);
902 #endif
903 LOG_INT_STATE(env);
904 return 2;
905 } else {
906 cpu_dump_state(env, stderr, fprintf, 0);
907 fprintf(stderr, "Unsupported return value: 0x%x\n", ret);
908 exit(1);
910 return 0;
913 void kqemu_cpu_interrupt(CPUState *env)
915 #if defined(_WIN32)
916 /* cancelling the I/O request causes KQEMU to finish executing the
917 current block and successfully returning. */
918 CancelIo(kqemu_fd);
919 #endif
923 QEMU paravirtualization interface. The current interface only
924 allows to modify the IF and IOPL flags when running in
925 kqemu.
927 At this point it is not very satisfactory. I leave it for reference
928 as it adds little complexity.
931 #define QPI_COMM_PAGE_PHYS_ADDR 0xff000000
933 static uint32_t qpi_mem_readb(void *opaque, target_phys_addr_t addr)
935 return 0;
938 static uint32_t qpi_mem_readw(void *opaque, target_phys_addr_t addr)
940 return 0;
943 static void qpi_mem_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
947 static void qpi_mem_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
951 static uint32_t qpi_mem_readl(void *opaque, target_phys_addr_t addr)
953 CPUState *env;
955 env = cpu_single_env;
956 if (!env)
957 return 0;
958 return env->eflags & (IF_MASK | IOPL_MASK);
961 /* Note: after writing to this address, the guest code must make sure
962 it is exiting the current TB. pushf/popf can be used for that
963 purpose. */
964 static void qpi_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
966 CPUState *env;
968 env = cpu_single_env;
969 if (!env)
970 return;
971 env->eflags = (env->eflags & ~(IF_MASK | IOPL_MASK)) |
972 (val & (IF_MASK | IOPL_MASK));
975 static CPUReadMemoryFunc *qpi_mem_read[3] = {
976 qpi_mem_readb,
977 qpi_mem_readw,
978 qpi_mem_readl,
981 static CPUWriteMemoryFunc *qpi_mem_write[3] = {
982 qpi_mem_writeb,
983 qpi_mem_writew,
984 qpi_mem_writel,
987 static void qpi_init(void)
989 kqemu_comm_base = 0xff000000 | 1;
990 qpi_io_memory = cpu_register_io_memory(0,
991 qpi_mem_read,
992 qpi_mem_write, NULL);
993 cpu_register_physical_memory(kqemu_comm_base & ~0xfff,
994 0x1000, qpi_io_memory);
996 #endif