kvm: configure: if all else fails, infer kernel version from .config
[kvm-userspace.git] / qemu / kqemu.c
blob4783aa2a082d6aca51ad12d6f1eea9ca1d516aff
1 /*
2 * KQEMU support
4 * Copyright (c) 2005-2008 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 #include "config.h"
21 #ifdef _WIN32
22 #define WIN32_LEAN_AND_MEAN
23 #include <windows.h>
24 #include <winioctl.h>
25 #else
26 #include <sys/types.h>
27 #include <sys/mman.h>
28 #include <sys/ioctl.h>
29 #endif
30 #ifdef HOST_SOLARIS
31 #include <sys/ioccom.h>
32 #endif
33 #include <stdlib.h>
34 #include <stdio.h>
35 #include <stdarg.h>
36 #include <string.h>
37 #include <errno.h>
38 #include <unistd.h>
39 #include <inttypes.h>
41 #include "cpu.h"
42 #include "exec-all.h"
43 #include "qemu-common.h"
45 #ifdef USE_KQEMU
47 #define DEBUG
48 //#define PROFILE
50 #include <unistd.h>
51 #include <fcntl.h>
52 #include "kqemu.h"
54 #ifdef _WIN32
55 #define KQEMU_DEVICE "\\\\.\\kqemu"
56 #else
57 #define KQEMU_DEVICE "/dev/kqemu"
58 #endif
60 static void qpi_init(void);
62 #ifdef _WIN32
63 #define KQEMU_INVALID_FD INVALID_HANDLE_VALUE
64 HANDLE kqemu_fd = KQEMU_INVALID_FD;
65 #define kqemu_closefd(x) CloseHandle(x)
66 #else
67 #define KQEMU_INVALID_FD -1
68 int kqemu_fd = KQEMU_INVALID_FD;
69 #define kqemu_closefd(x) close(x)
70 #endif
72 /* 0 = not allowed
73 1 = user kqemu
74 2 = kernel kqemu
76 int kqemu_allowed = 1;
77 uint64_t *pages_to_flush;
78 unsigned int nb_pages_to_flush;
79 uint64_t *ram_pages_to_update;
80 unsigned int nb_ram_pages_to_update;
81 uint64_t *modified_ram_pages;
82 unsigned int nb_modified_ram_pages;
83 uint8_t *modified_ram_pages_table;
84 int qpi_io_memory;
85 uint32_t kqemu_comm_base; /* physical address of the QPI communication page */
87 #define cpuid(index, eax, ebx, ecx, edx) \
88 asm volatile ("cpuid" \
89 : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) \
90 : "0" (index))
92 #ifdef __x86_64__
93 static int is_cpuid_supported(void)
95 return 1;
97 #else
98 static int is_cpuid_supported(void)
100 int v0, v1;
101 asm volatile ("pushf\n"
102 "popl %0\n"
103 "movl %0, %1\n"
104 "xorl $0x00200000, %0\n"
105 "pushl %0\n"
106 "popf\n"
107 "pushf\n"
108 "popl %0\n"
109 : "=a" (v0), "=d" (v1)
111 : "cc");
112 return (v0 != v1);
114 #endif
116 static void kqemu_update_cpuid(CPUState *env)
118 int critical_features_mask, features, ext_features, ext_features_mask;
119 uint32_t eax, ebx, ecx, edx;
121 /* the following features are kept identical on the host and
122 target cpus because they are important for user code. Strictly
123 speaking, only SSE really matters because the OS must support
124 it if the user code uses it. */
125 critical_features_mask =
126 CPUID_CMOV | CPUID_CX8 |
127 CPUID_FXSR | CPUID_MMX | CPUID_SSE |
128 CPUID_SSE2 | CPUID_SEP;
129 ext_features_mask = CPUID_EXT_SSE3 | CPUID_EXT_MONITOR;
130 if (!is_cpuid_supported()) {
131 features = 0;
132 ext_features = 0;
133 } else {
134 cpuid(1, eax, ebx, ecx, edx);
135 features = edx;
136 ext_features = ecx;
138 #ifdef __x86_64__
139 /* NOTE: on x86_64 CPUs, SYSENTER is not supported in
140 compatibility mode, so in order to have the best performances
141 it is better not to use it */
142 features &= ~CPUID_SEP;
143 #endif
144 env->cpuid_features = (env->cpuid_features & ~critical_features_mask) |
145 (features & critical_features_mask);
146 env->cpuid_ext_features = (env->cpuid_ext_features & ~ext_features_mask) |
147 (ext_features & ext_features_mask);
148 /* XXX: we could update more of the target CPUID state so that the
149 non accelerated code sees exactly the same CPU features as the
150 accelerated code */
153 int kqemu_init(CPUState *env)
155 struct kqemu_init kinit;
156 int ret, version;
157 #ifdef _WIN32
158 DWORD temp;
159 #endif
161 if (!kqemu_allowed)
162 return -1;
164 #ifdef _WIN32
165 kqemu_fd = CreateFile(KQEMU_DEVICE, GENERIC_WRITE | GENERIC_READ,
166 FILE_SHARE_READ | FILE_SHARE_WRITE,
167 NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL,
168 NULL);
169 if (kqemu_fd == KQEMU_INVALID_FD) {
170 fprintf(stderr, "Could not open '%s' - QEMU acceleration layer not activated: %lu\n",
171 KQEMU_DEVICE, GetLastError());
172 return -1;
174 #else
175 kqemu_fd = open(KQEMU_DEVICE, O_RDWR);
176 if (kqemu_fd == KQEMU_INVALID_FD) {
177 fprintf(stderr, "Could not open '%s' - QEMU acceleration layer not activated: %s\n",
178 KQEMU_DEVICE, strerror(errno));
179 return -1;
181 #endif
182 version = 0;
183 #ifdef _WIN32
184 DeviceIoControl(kqemu_fd, KQEMU_GET_VERSION, NULL, 0,
185 &version, sizeof(version), &temp, NULL);
186 #else
187 ioctl(kqemu_fd, KQEMU_GET_VERSION, &version);
188 #endif
189 if (version != KQEMU_VERSION) {
190 fprintf(stderr, "Version mismatch between kqemu module and qemu (%08x %08x) - disabling kqemu use\n",
191 version, KQEMU_VERSION);
192 goto fail;
195 pages_to_flush = qemu_vmalloc(KQEMU_MAX_PAGES_TO_FLUSH *
196 sizeof(uint64_t));
197 if (!pages_to_flush)
198 goto fail;
200 ram_pages_to_update = qemu_vmalloc(KQEMU_MAX_RAM_PAGES_TO_UPDATE *
201 sizeof(uint64_t));
202 if (!ram_pages_to_update)
203 goto fail;
205 modified_ram_pages = qemu_vmalloc(KQEMU_MAX_MODIFIED_RAM_PAGES *
206 sizeof(uint64_t));
207 if (!modified_ram_pages)
208 goto fail;
209 modified_ram_pages_table = qemu_mallocz(phys_ram_size >> TARGET_PAGE_BITS);
210 if (!modified_ram_pages_table)
211 goto fail;
213 memset(&kinit, 0, sizeof(kinit)); /* set the paddings to zero */
214 kinit.ram_base = phys_ram_base;
215 kinit.ram_size = phys_ram_size;
216 kinit.ram_dirty = phys_ram_dirty;
217 kinit.pages_to_flush = pages_to_flush;
218 kinit.ram_pages_to_update = ram_pages_to_update;
219 kinit.modified_ram_pages = modified_ram_pages;
220 #ifdef _WIN32
221 ret = DeviceIoControl(kqemu_fd, KQEMU_INIT, &kinit, sizeof(kinit),
222 NULL, 0, &temp, NULL) == TRUE ? 0 : -1;
223 #else
224 ret = ioctl(kqemu_fd, KQEMU_INIT, &kinit);
225 #endif
226 if (ret < 0) {
227 fprintf(stderr, "Error %d while initializing QEMU acceleration layer - disabling it for now\n", ret);
228 fail:
229 kqemu_closefd(kqemu_fd);
230 kqemu_fd = KQEMU_INVALID_FD;
231 return -1;
233 kqemu_update_cpuid(env);
234 env->kqemu_enabled = kqemu_allowed;
235 nb_pages_to_flush = 0;
236 nb_ram_pages_to_update = 0;
238 qpi_init();
239 return 0;
242 void kqemu_flush_page(CPUState *env, target_ulong addr)
244 #if defined(DEBUG)
245 if (loglevel & CPU_LOG_INT) {
246 fprintf(logfile, "kqemu_flush_page: addr=" TARGET_FMT_lx "\n", addr);
248 #endif
249 if (nb_pages_to_flush >= KQEMU_MAX_PAGES_TO_FLUSH)
250 nb_pages_to_flush = KQEMU_FLUSH_ALL;
251 else
252 pages_to_flush[nb_pages_to_flush++] = addr;
255 void kqemu_flush(CPUState *env, int global)
257 #ifdef DEBUG
258 if (loglevel & CPU_LOG_INT) {
259 fprintf(logfile, "kqemu_flush:\n");
261 #endif
262 nb_pages_to_flush = KQEMU_FLUSH_ALL;
265 void kqemu_set_notdirty(CPUState *env, ram_addr_t ram_addr)
267 #ifdef DEBUG
268 if (loglevel & CPU_LOG_INT) {
269 fprintf(logfile, "kqemu_set_notdirty: addr=%08lx\n",
270 (unsigned long)ram_addr);
272 #endif
273 /* we only track transitions to dirty state */
274 if (phys_ram_dirty[ram_addr >> TARGET_PAGE_BITS] != 0xff)
275 return;
276 if (nb_ram_pages_to_update >= KQEMU_MAX_RAM_PAGES_TO_UPDATE)
277 nb_ram_pages_to_update = KQEMU_RAM_PAGES_UPDATE_ALL;
278 else
279 ram_pages_to_update[nb_ram_pages_to_update++] = ram_addr;
282 static void kqemu_reset_modified_ram_pages(void)
284 int i;
285 unsigned long page_index;
287 for(i = 0; i < nb_modified_ram_pages; i++) {
288 page_index = modified_ram_pages[i] >> TARGET_PAGE_BITS;
289 modified_ram_pages_table[page_index] = 0;
291 nb_modified_ram_pages = 0;
294 void kqemu_modify_page(CPUState *env, ram_addr_t ram_addr)
296 unsigned long page_index;
297 int ret;
298 #ifdef _WIN32
299 DWORD temp;
300 #endif
302 page_index = ram_addr >> TARGET_PAGE_BITS;
303 if (!modified_ram_pages_table[page_index]) {
304 #if 0
305 printf("%d: modify_page=%08lx\n", nb_modified_ram_pages, ram_addr);
306 #endif
307 modified_ram_pages_table[page_index] = 1;
308 modified_ram_pages[nb_modified_ram_pages++] = ram_addr;
309 if (nb_modified_ram_pages >= KQEMU_MAX_MODIFIED_RAM_PAGES) {
310 /* flush */
311 #ifdef _WIN32
312 ret = DeviceIoControl(kqemu_fd, KQEMU_MODIFY_RAM_PAGES,
313 &nb_modified_ram_pages,
314 sizeof(nb_modified_ram_pages),
315 NULL, 0, &temp, NULL);
316 #else
317 ret = ioctl(kqemu_fd, KQEMU_MODIFY_RAM_PAGES,
318 &nb_modified_ram_pages);
319 #endif
320 kqemu_reset_modified_ram_pages();
325 void kqemu_set_phys_mem(uint64_t start_addr, ram_addr_t size,
326 ram_addr_t phys_offset)
328 struct kqemu_phys_mem kphys_mem1, *kphys_mem = &kphys_mem1;
329 uint64_t end;
330 int ret, io_index;
332 end = (start_addr + size + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK;
333 start_addr &= TARGET_PAGE_MASK;
334 kphys_mem->phys_addr = start_addr;
335 kphys_mem->size = end - start_addr;
336 kphys_mem->ram_addr = phys_offset & TARGET_PAGE_MASK;
337 io_index = phys_offset & ~TARGET_PAGE_MASK;
338 switch(io_index) {
339 case IO_MEM_RAM:
340 kphys_mem->io_index = KQEMU_IO_MEM_RAM;
341 break;
342 case IO_MEM_ROM:
343 kphys_mem->io_index = KQEMU_IO_MEM_ROM;
344 break;
345 default:
346 if (qpi_io_memory == io_index) {
347 kphys_mem->io_index = KQEMU_IO_MEM_COMM;
348 } else {
349 kphys_mem->io_index = KQEMU_IO_MEM_UNASSIGNED;
351 break;
353 #ifdef _WIN32
355 DWORD temp;
356 ret = DeviceIoControl(kqemu_fd, KQEMU_SET_PHYS_MEM,
357 kphys_mem, sizeof(*kphys_mem),
358 NULL, 0, &temp, NULL) == TRUE ? 0 : -1;
360 #else
361 ret = ioctl(kqemu_fd, KQEMU_SET_PHYS_MEM, kphys_mem);
362 #endif
363 if (ret < 0) {
364 fprintf(stderr, "kqemu: KQEMU_SET_PHYS_PAGE error=%d: start_addr=0x%016" PRIx64 " size=0x%08lx phys_offset=0x%08lx\n",
365 ret, start_addr,
366 (unsigned long)size, (unsigned long)phys_offset);
370 struct fpstate {
371 uint16_t fpuc;
372 uint16_t dummy1;
373 uint16_t fpus;
374 uint16_t dummy2;
375 uint16_t fptag;
376 uint16_t dummy3;
378 uint32_t fpip;
379 uint32_t fpcs;
380 uint32_t fpoo;
381 uint32_t fpos;
382 uint8_t fpregs1[8 * 10];
385 struct fpxstate {
386 uint16_t fpuc;
387 uint16_t fpus;
388 uint16_t fptag;
389 uint16_t fop;
390 uint32_t fpuip;
391 uint16_t cs_sel;
392 uint16_t dummy0;
393 uint32_t fpudp;
394 uint16_t ds_sel;
395 uint16_t dummy1;
396 uint32_t mxcsr;
397 uint32_t mxcsr_mask;
398 uint8_t fpregs1[8 * 16];
399 uint8_t xmm_regs[16 * 16];
400 uint8_t dummy2[96];
403 static struct fpxstate fpx1 __attribute__((aligned(16)));
405 static void restore_native_fp_frstor(CPUState *env)
407 int fptag, i, j;
408 struct fpstate fp1, *fp = &fp1;
410 fp->fpuc = env->fpuc;
411 fp->fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
412 fptag = 0;
413 for (i=7; i>=0; i--) {
414 fptag <<= 2;
415 if (env->fptags[i]) {
416 fptag |= 3;
417 } else {
418 /* the FPU automatically computes it */
421 fp->fptag = fptag;
422 j = env->fpstt;
423 for(i = 0;i < 8; i++) {
424 memcpy(&fp->fpregs1[i * 10], &env->fpregs[j].d, 10);
425 j = (j + 1) & 7;
427 asm volatile ("frstor %0" : "=m" (*fp));
430 static void save_native_fp_fsave(CPUState *env)
432 int fptag, i, j;
433 uint16_t fpuc;
434 struct fpstate fp1, *fp = &fp1;
436 asm volatile ("fsave %0" : : "m" (*fp));
437 env->fpuc = fp->fpuc;
438 env->fpstt = (fp->fpus >> 11) & 7;
439 env->fpus = fp->fpus & ~0x3800;
440 fptag = fp->fptag;
441 for(i = 0;i < 8; i++) {
442 env->fptags[i] = ((fptag & 3) == 3);
443 fptag >>= 2;
445 j = env->fpstt;
446 for(i = 0;i < 8; i++) {
447 memcpy(&env->fpregs[j].d, &fp->fpregs1[i * 10], 10);
448 j = (j + 1) & 7;
450 /* we must restore the default rounding state */
451 fpuc = 0x037f | (env->fpuc & (3 << 10));
452 asm volatile("fldcw %0" : : "m" (fpuc));
455 static void restore_native_fp_fxrstor(CPUState *env)
457 struct fpxstate *fp = &fpx1;
458 int i, j, fptag;
460 fp->fpuc = env->fpuc;
461 fp->fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
462 fptag = 0;
463 for(i = 0; i < 8; i++)
464 fptag |= (env->fptags[i] << i);
465 fp->fptag = fptag ^ 0xff;
467 j = env->fpstt;
468 for(i = 0;i < 8; i++) {
469 memcpy(&fp->fpregs1[i * 16], &env->fpregs[j].d, 10);
470 j = (j + 1) & 7;
472 if (env->cpuid_features & CPUID_SSE) {
473 fp->mxcsr = env->mxcsr;
474 /* XXX: check if DAZ is not available */
475 fp->mxcsr_mask = 0xffff;
476 memcpy(fp->xmm_regs, env->xmm_regs, CPU_NB_REGS * 16);
478 asm volatile ("fxrstor %0" : "=m" (*fp));
481 static void save_native_fp_fxsave(CPUState *env)
483 struct fpxstate *fp = &fpx1;
484 int fptag, i, j;
485 uint16_t fpuc;
487 asm volatile ("fxsave %0" : : "m" (*fp));
488 env->fpuc = fp->fpuc;
489 env->fpstt = (fp->fpus >> 11) & 7;
490 env->fpus = fp->fpus & ~0x3800;
491 fptag = fp->fptag ^ 0xff;
492 for(i = 0;i < 8; i++) {
493 env->fptags[i] = (fptag >> i) & 1;
495 j = env->fpstt;
496 for(i = 0;i < 8; i++) {
497 memcpy(&env->fpregs[j].d, &fp->fpregs1[i * 16], 10);
498 j = (j + 1) & 7;
500 if (env->cpuid_features & CPUID_SSE) {
501 env->mxcsr = fp->mxcsr;
502 memcpy(env->xmm_regs, fp->xmm_regs, CPU_NB_REGS * 16);
505 /* we must restore the default rounding state */
506 asm volatile ("fninit");
507 fpuc = 0x037f | (env->fpuc & (3 << 10));
508 asm volatile("fldcw %0" : : "m" (fpuc));
511 static int do_syscall(CPUState *env,
512 struct kqemu_cpu_state *kenv)
514 int selector;
516 selector = (env->star >> 32) & 0xffff;
517 #ifdef TARGET_X86_64
518 if (env->hflags & HF_LMA_MASK) {
519 int code64;
521 env->regs[R_ECX] = kenv->next_eip;
522 env->regs[11] = env->eflags;
524 code64 = env->hflags & HF_CS64_MASK;
526 cpu_x86_set_cpl(env, 0);
527 cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
528 0, 0xffffffff,
529 DESC_G_MASK | DESC_P_MASK |
530 DESC_S_MASK |
531 DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK | DESC_L_MASK);
532 cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc,
533 0, 0xffffffff,
534 DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
535 DESC_S_MASK |
536 DESC_W_MASK | DESC_A_MASK);
537 env->eflags &= ~env->fmask;
538 if (code64)
539 env->eip = env->lstar;
540 else
541 env->eip = env->cstar;
542 } else
543 #endif
545 env->regs[R_ECX] = (uint32_t)kenv->next_eip;
547 cpu_x86_set_cpl(env, 0);
548 cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
549 0, 0xffffffff,
550 DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
551 DESC_S_MASK |
552 DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK);
553 cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc,
554 0, 0xffffffff,
555 DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
556 DESC_S_MASK |
557 DESC_W_MASK | DESC_A_MASK);
558 env->eflags &= ~(IF_MASK | RF_MASK | VM_MASK);
559 env->eip = (uint32_t)env->star;
561 return 2;
564 #ifdef CONFIG_PROFILER
566 #define PC_REC_SIZE 1
567 #define PC_REC_HASH_BITS 16
568 #define PC_REC_HASH_SIZE (1 << PC_REC_HASH_BITS)
570 typedef struct PCRecord {
571 unsigned long pc;
572 int64_t count;
573 struct PCRecord *next;
574 } PCRecord;
576 static PCRecord *pc_rec_hash[PC_REC_HASH_SIZE];
577 static int nb_pc_records;
579 static void kqemu_record_pc(unsigned long pc)
581 unsigned long h;
582 PCRecord **pr, *r;
584 h = pc / PC_REC_SIZE;
585 h = h ^ (h >> PC_REC_HASH_BITS);
586 h &= (PC_REC_HASH_SIZE - 1);
587 pr = &pc_rec_hash[h];
588 for(;;) {
589 r = *pr;
590 if (r == NULL)
591 break;
592 if (r->pc == pc) {
593 r->count++;
594 return;
596 pr = &r->next;
598 r = malloc(sizeof(PCRecord));
599 r->count = 1;
600 r->pc = pc;
601 r->next = NULL;
602 *pr = r;
603 nb_pc_records++;
606 static int pc_rec_cmp(const void *p1, const void *p2)
608 PCRecord *r1 = *(PCRecord **)p1;
609 PCRecord *r2 = *(PCRecord **)p2;
610 if (r1->count < r2->count)
611 return 1;
612 else if (r1->count == r2->count)
613 return 0;
614 else
615 return -1;
618 static void kqemu_record_flush(void)
620 PCRecord *r, *r_next;
621 int h;
623 for(h = 0; h < PC_REC_HASH_SIZE; h++) {
624 for(r = pc_rec_hash[h]; r != NULL; r = r_next) {
625 r_next = r->next;
626 free(r);
628 pc_rec_hash[h] = NULL;
630 nb_pc_records = 0;
633 void kqemu_record_dump(void)
635 PCRecord **pr, *r;
636 int i, h;
637 FILE *f;
638 int64_t total, sum;
640 pr = malloc(sizeof(PCRecord *) * nb_pc_records);
641 i = 0;
642 total = 0;
643 for(h = 0; h < PC_REC_HASH_SIZE; h++) {
644 for(r = pc_rec_hash[h]; r != NULL; r = r->next) {
645 pr[i++] = r;
646 total += r->count;
649 qsort(pr, nb_pc_records, sizeof(PCRecord *), pc_rec_cmp);
651 f = fopen("/tmp/kqemu.stats", "w");
652 if (!f) {
653 perror("/tmp/kqemu.stats");
654 exit(1);
656 fprintf(f, "total: %" PRId64 "\n", total);
657 sum = 0;
658 for(i = 0; i < nb_pc_records; i++) {
659 r = pr[i];
660 sum += r->count;
661 fprintf(f, "%08lx: %" PRId64 " %0.2f%% %0.2f%%\n",
662 r->pc,
663 r->count,
664 (double)r->count / (double)total * 100.0,
665 (double)sum / (double)total * 100.0);
667 fclose(f);
668 free(pr);
670 kqemu_record_flush();
672 #endif
674 static inline void kqemu_load_seg(struct kqemu_segment_cache *ksc,
675 const SegmentCache *sc)
677 ksc->selector = sc->selector;
678 ksc->flags = sc->flags;
679 ksc->limit = sc->limit;
680 ksc->base = sc->base;
683 static inline void kqemu_save_seg(SegmentCache *sc,
684 const struct kqemu_segment_cache *ksc)
686 sc->selector = ksc->selector;
687 sc->flags = ksc->flags;
688 sc->limit = ksc->limit;
689 sc->base = ksc->base;
692 int kqemu_cpu_exec(CPUState *env)
694 struct kqemu_cpu_state kcpu_state, *kenv = &kcpu_state;
695 int ret, cpl, i;
696 #ifdef CONFIG_PROFILER
697 int64_t ti;
698 #endif
699 #ifdef _WIN32
700 DWORD temp;
701 #endif
703 #ifdef CONFIG_PROFILER
704 ti = profile_getclock();
705 #endif
706 #ifdef DEBUG
707 if (loglevel & CPU_LOG_INT) {
708 fprintf(logfile, "kqemu: cpu_exec: enter\n");
709 cpu_dump_state(env, logfile, fprintf, 0);
711 #endif
712 for(i = 0; i < CPU_NB_REGS; i++)
713 kenv->regs[i] = env->regs[i];
714 kenv->eip = env->eip;
715 kenv->eflags = env->eflags;
716 for(i = 0; i < 6; i++)
717 kqemu_load_seg(&kenv->segs[i], &env->segs[i]);
718 kqemu_load_seg(&kenv->ldt, &env->ldt);
719 kqemu_load_seg(&kenv->tr, &env->tr);
720 kqemu_load_seg(&kenv->gdt, &env->gdt);
721 kqemu_load_seg(&kenv->idt, &env->idt);
722 kenv->cr0 = env->cr[0];
723 kenv->cr2 = env->cr[2];
724 kenv->cr3 = env->cr[3];
725 kenv->cr4 = env->cr[4];
726 kenv->a20_mask = env->a20_mask;
727 kenv->efer = env->efer;
728 kenv->tsc_offset = 0;
729 kenv->star = env->star;
730 kenv->sysenter_cs = env->sysenter_cs;
731 kenv->sysenter_esp = env->sysenter_esp;
732 kenv->sysenter_eip = env->sysenter_eip;
733 #ifdef TARGET_X86_64
734 kenv->lstar = env->lstar;
735 kenv->cstar = env->cstar;
736 kenv->fmask = env->fmask;
737 kenv->kernelgsbase = env->kernelgsbase;
738 #endif
739 if (env->dr[7] & 0xff) {
740 kenv->dr7 = env->dr[7];
741 kenv->dr0 = env->dr[0];
742 kenv->dr1 = env->dr[1];
743 kenv->dr2 = env->dr[2];
744 kenv->dr3 = env->dr[3];
745 } else {
746 kenv->dr7 = 0;
748 kenv->dr6 = env->dr[6];
749 cpl = (env->hflags & HF_CPL_MASK);
750 kenv->cpl = cpl;
751 kenv->nb_pages_to_flush = nb_pages_to_flush;
752 kenv->user_only = (env->kqemu_enabled == 1);
753 kenv->nb_ram_pages_to_update = nb_ram_pages_to_update;
754 nb_ram_pages_to_update = 0;
755 kenv->nb_modified_ram_pages = nb_modified_ram_pages;
757 kqemu_reset_modified_ram_pages();
759 if (env->cpuid_features & CPUID_FXSR)
760 restore_native_fp_fxrstor(env);
761 else
762 restore_native_fp_frstor(env);
764 #ifdef _WIN32
765 if (DeviceIoControl(kqemu_fd, KQEMU_EXEC,
766 kenv, sizeof(struct kqemu_cpu_state),
767 kenv, sizeof(struct kqemu_cpu_state),
768 &temp, NULL)) {
769 ret = kenv->retval;
770 } else {
771 ret = -1;
773 #else
774 ioctl(kqemu_fd, KQEMU_EXEC, kenv);
775 ret = kenv->retval;
776 #endif
777 if (env->cpuid_features & CPUID_FXSR)
778 save_native_fp_fxsave(env);
779 else
780 save_native_fp_fsave(env);
782 for(i = 0; i < CPU_NB_REGS; i++)
783 env->regs[i] = kenv->regs[i];
784 env->eip = kenv->eip;
785 env->eflags = kenv->eflags;
786 for(i = 0; i < 6; i++)
787 kqemu_save_seg(&env->segs[i], &kenv->segs[i]);
788 cpu_x86_set_cpl(env, kenv->cpl);
789 kqemu_save_seg(&env->ldt, &kenv->ldt);
790 env->cr[0] = kenv->cr0;
791 env->cr[4] = kenv->cr4;
792 env->cr[3] = kenv->cr3;
793 env->cr[2] = kenv->cr2;
794 env->dr[6] = kenv->dr6;
795 #ifdef TARGET_X86_64
796 env->kernelgsbase = kenv->kernelgsbase;
797 #endif
799 /* flush pages as indicated by kqemu */
800 if (kenv->nb_pages_to_flush >= KQEMU_FLUSH_ALL) {
801 tlb_flush(env, 1);
802 } else {
803 for(i = 0; i < kenv->nb_pages_to_flush; i++) {
804 tlb_flush_page(env, pages_to_flush[i]);
807 nb_pages_to_flush = 0;
809 #ifdef CONFIG_PROFILER
810 kqemu_time += profile_getclock() - ti;
811 kqemu_exec_count++;
812 #endif
814 if (kenv->nb_ram_pages_to_update > 0) {
815 cpu_tlb_update_dirty(env);
818 if (kenv->nb_modified_ram_pages > 0) {
819 for(i = 0; i < kenv->nb_modified_ram_pages; i++) {
820 unsigned long addr;
821 addr = modified_ram_pages[i];
822 tb_invalidate_phys_page_range(addr, addr + TARGET_PAGE_SIZE, 0);
826 /* restore the hidden flags */
828 unsigned int new_hflags;
829 #ifdef TARGET_X86_64
830 if ((env->hflags & HF_LMA_MASK) &&
831 (env->segs[R_CS].flags & DESC_L_MASK)) {
832 /* long mode */
833 new_hflags = HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK;
834 } else
835 #endif
837 /* legacy / compatibility case */
838 new_hflags = (env->segs[R_CS].flags & DESC_B_MASK)
839 >> (DESC_B_SHIFT - HF_CS32_SHIFT);
840 new_hflags |= (env->segs[R_SS].flags & DESC_B_MASK)
841 >> (DESC_B_SHIFT - HF_SS32_SHIFT);
842 if (!(env->cr[0] & CR0_PE_MASK) ||
843 (env->eflags & VM_MASK) ||
844 !(env->hflags & HF_CS32_MASK)) {
845 /* XXX: try to avoid this test. The problem comes from the
846 fact that is real mode or vm86 mode we only modify the
847 'base' and 'selector' fields of the segment cache to go
848 faster. A solution may be to force addseg to one in
849 translate-i386.c. */
850 new_hflags |= HF_ADDSEG_MASK;
851 } else {
852 new_hflags |= ((env->segs[R_DS].base |
853 env->segs[R_ES].base |
854 env->segs[R_SS].base) != 0) <<
855 HF_ADDSEG_SHIFT;
858 env->hflags = (env->hflags &
859 ~(HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK)) |
860 new_hflags;
862 /* update FPU flags */
863 env->hflags = (env->hflags & ~(HF_MP_MASK | HF_EM_MASK | HF_TS_MASK)) |
864 ((env->cr[0] << (HF_MP_SHIFT - 1)) & (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK));
865 if (env->cr[4] & CR4_OSFXSR_MASK)
866 env->hflags |= HF_OSFXSR_MASK;
867 else
868 env->hflags &= ~HF_OSFXSR_MASK;
870 #ifdef DEBUG
871 if (loglevel & CPU_LOG_INT) {
872 fprintf(logfile, "kqemu: kqemu_cpu_exec: ret=0x%x\n", ret);
874 #endif
875 if (ret == KQEMU_RET_SYSCALL) {
876 /* syscall instruction */
877 return do_syscall(env, kenv);
878 } else
879 if ((ret & 0xff00) == KQEMU_RET_INT) {
880 env->exception_index = ret & 0xff;
881 env->error_code = 0;
882 env->exception_is_int = 1;
883 env->exception_next_eip = kenv->next_eip;
884 #ifdef CONFIG_PROFILER
885 kqemu_ret_int_count++;
886 #endif
887 #ifdef DEBUG
888 if (loglevel & CPU_LOG_INT) {
889 fprintf(logfile, "kqemu: interrupt v=%02x:\n",
890 env->exception_index);
891 cpu_dump_state(env, logfile, fprintf, 0);
893 #endif
894 return 1;
895 } else if ((ret & 0xff00) == KQEMU_RET_EXCEPTION) {
896 env->exception_index = ret & 0xff;
897 env->error_code = kenv->error_code;
898 env->exception_is_int = 0;
899 env->exception_next_eip = 0;
900 #ifdef CONFIG_PROFILER
901 kqemu_ret_excp_count++;
902 #endif
903 #ifdef DEBUG
904 if (loglevel & CPU_LOG_INT) {
905 fprintf(logfile, "kqemu: exception v=%02x e=%04x:\n",
906 env->exception_index, env->error_code);
907 cpu_dump_state(env, logfile, fprintf, 0);
909 #endif
910 return 1;
911 } else if (ret == KQEMU_RET_INTR) {
912 #ifdef CONFIG_PROFILER
913 kqemu_ret_intr_count++;
914 #endif
915 #ifdef DEBUG
916 if (loglevel & CPU_LOG_INT) {
917 cpu_dump_state(env, logfile, fprintf, 0);
919 #endif
920 return 0;
921 } else if (ret == KQEMU_RET_SOFTMMU) {
922 #ifdef CONFIG_PROFILER
924 unsigned long pc = env->eip + env->segs[R_CS].base;
925 kqemu_record_pc(pc);
927 #endif
928 #ifdef DEBUG
929 if (loglevel & CPU_LOG_INT) {
930 cpu_dump_state(env, logfile, fprintf, 0);
932 #endif
933 return 2;
934 } else {
935 cpu_dump_state(env, stderr, fprintf, 0);
936 fprintf(stderr, "Unsupported return value: 0x%x\n", ret);
937 exit(1);
939 return 0;
942 void kqemu_cpu_interrupt(CPUState *env)
944 #if defined(_WIN32)
945 /* cancelling the I/O request causes KQEMU to finish executing the
946 current block and successfully returning. */
947 CancelIo(kqemu_fd);
948 #endif
952 QEMU paravirtualization interface. The current interface only
953 allows to modify the IF and IOPL flags when running in
954 kqemu.
956 At this point it is not very satisfactory. I leave it for reference
957 as it adds little complexity.
960 #define QPI_COMM_PAGE_PHYS_ADDR 0xff000000
962 static uint32_t qpi_mem_readb(void *opaque, target_phys_addr_t addr)
964 return 0;
967 static uint32_t qpi_mem_readw(void *opaque, target_phys_addr_t addr)
969 return 0;
972 static void qpi_mem_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
976 static void qpi_mem_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
980 static uint32_t qpi_mem_readl(void *opaque, target_phys_addr_t addr)
982 CPUState *env;
984 env = cpu_single_env;
985 if (!env)
986 return 0;
987 return env->eflags & (IF_MASK | IOPL_MASK);
990 /* Note: after writing to this address, the guest code must make sure
991 it is exiting the current TB. pushf/popf can be used for that
992 purpose. */
993 static void qpi_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
995 CPUState *env;
997 env = cpu_single_env;
998 if (!env)
999 return;
1000 env->eflags = (env->eflags & ~(IF_MASK | IOPL_MASK)) |
1001 (val & (IF_MASK | IOPL_MASK));
1004 static CPUReadMemoryFunc *qpi_mem_read[3] = {
1005 qpi_mem_readb,
1006 qpi_mem_readw,
1007 qpi_mem_readl,
1010 static CPUWriteMemoryFunc *qpi_mem_write[3] = {
1011 qpi_mem_writeb,
1012 qpi_mem_writew,
1013 qpi_mem_writel,
1016 static void qpi_init(void)
1018 kqemu_comm_base = 0xff000000 | 1;
1019 qpi_io_memory = cpu_register_io_memory(0,
1020 qpi_mem_read,
1021 qpi_mem_write, NULL);
1022 cpu_register_physical_memory(kqemu_comm_base & ~0xfff,
1023 0x1000, qpi_io_memory);
1025 #endif