Fix typo in comment
[kqemu.git] / common / kernel.c
blob6a88452e8f1397e80db54257f689819cbf81cfcc
1 /*
2 * KQEMU
4 * Copyright (C) 2004-2008 Fabrice Bellard
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * version 2 as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 #include "kqemu_int.h"
21 #include "monitor-image.h"
23 //#define DEBUG
24 //#define DEBUG_INVALIDATE
26 static int mon_set_pte(struct kqemu_state *s, unsigned long vaddr,
27 unsigned long page_index, uint32_t pte_flags);
28 static void *mon_alloc_page(struct kqemu_state *s,
29 unsigned long *ppage_index);
31 #include "common.c"
33 void *memcpy(void *d1, const void *s1, size_t len)
35 uint8_t *d = d1;
36 const uint8_t *s = s1;
38 while (len--) {
39 *d++ = *s++;
41 return d1;
44 void *memset(void *d1, int val, size_t len)
46 uint8_t *d = d1;
48 while (len--) {
49 *d++ = val;
51 return d1;
54 static void set_seg(uint32_t *p, unsigned long addr, unsigned long limit,
55 int flags)
57 unsigned int e1, e2;
58 e1 = (addr << 16) | (limit & 0xffff);
59 e2 = ((addr >> 16) & 0xff) | (addr & 0xff000000) | (limit & 0x000f0000) |
60 (flags << 8);
61 p[0] = e1;
62 p[1] = e2;
65 #ifdef __x86_64__
66 static void set_seg64(uint32_t *p, unsigned long addr, unsigned long limit,
67 int flags)
69 unsigned int e1, e2;
70 e1 = (addr << 16) | (limit & 0xffff);
71 e2 = ((addr >> 16) & 0xff) | (addr & 0xff000000) | (limit & 0x000f0000) |
72 (flags << 8);
73 p[0] = e1;
74 p[1] = e2;
75 p[2] = addr >> 32;
76 p[3] = 0;
78 #endif
80 static void set_gate(uint32_t *p, unsigned int type, unsigned int dpl,
81 unsigned long addr, unsigned int sel)
83 unsigned int e1, e2;
84 e1 = (addr & 0xffff) | (sel << 16);
85 e2 = (addr & 0xffff0000) | 0x8000 | (dpl << 13) | (type << 8);
86 p[0] = e1;
87 p[1] = e2;
88 #ifdef __x86_64__
89 p[2] = addr >> 32;
90 p[3] = 0;
91 #endif
94 #if 0
95 static void set_trap_gate(struct kqemu_state *s, int n, int dpl, void *addr)
97 set_gate((uint32_t *)(s->idt_table + IDT_ENTRY_SIZE * n),
98 15, dpl, (unsigned long )addr, s->monitor_cs_sel);
100 #endif
102 static void set_intr_gate(struct kqemu_state *s, int n, int dpl, unsigned long addr)
104 set_gate((uint32_t *)(s->idt_table + IDT_ENTRY_SIZE * n),
105 14, dpl, addr, s->monitor_cs_sel);
108 static void mon_set_interrupt(struct kqemu_state *s, int intno, int is_int)
110 const struct monitor_code_header *m = (void *)monitor_code;
111 int dpl;
113 switch(intno) {
114 case 3:
115 case 4:
116 case 5:
117 dpl = 3;
118 break;
119 default:
120 dpl = 0;
121 break;
123 set_intr_gate(s, intno, dpl, m->interrupt_table +
124 INTERRUPT_ENTRY_SIZE * intno + s->monitor_vaddr);
128 /* only used during init */
130 static void mon_map_page_init(struct kqemu_state *s)
132 int i;
134 s->first_mapped_page = (s->monitor_end_vaddr - s->monitor_vaddr) >> PAGE_SHIFT;
135 for(i = s->first_mapped_page; i < MAX_MAPPED_PAGES - 1; i++) {
136 s->mapped_pages[i].next = i + 1;
138 s->mapped_pages[MAX_MAPPED_PAGES - 1].next = -1;
139 for(i = 0; i < MAX_MAPPED_PAGES; i++) {
140 s->mapped_pages[i].page_index = -1;
141 s->mapped_pages[i].host_page = NULL;
145 /* return NULL if error */
146 static void *mon_alloc_page(struct kqemu_state *s,
147 unsigned long *ppage_index)
149 unsigned long vaddr, page_index;
150 struct kqemu_page *host_page;
151 host_page = kqemu_alloc_zeroed_page(&page_index);
152 if (!host_page) {
153 #ifdef DEBUG
154 kqemu_log("mon_alloc_page: NULL\n");
155 #endif
156 return NULL;
158 vaddr = get_vaddr(s);
159 set_vaddr_page_index(s, vaddr, page_index, host_page, 0);
160 /* avoid recursion during init */
161 if (!s->in_page_init)
162 mon_set_pte(s, vaddr, page_index, PG_PRESENT_MASK | PG_GLOBAL(s) | PG_RW_MASK);
163 #ifdef DEBUG
164 kqemu_log("mon_alloc_page: vaddr=%p page_index=%08lx\n",
165 (void *)vaddr, (void *)page_index);
166 #endif
167 if (ppage_index)
168 *ppage_index = page_index;
169 return (void *)vaddr;
172 static int mon_set_pte(struct kqemu_state *s, unsigned long vaddr,
173 unsigned long page_index, uint32_t pte_flags)
175 #ifdef DEBUG
176 kqemu_log("mon_set_pte: vaddr=0x%lx page_index=0x%lx pte_flags=0x%x\n",
177 vaddr, page_index, pte_flags);
178 #endif
179 if (USE_PAE(s)) {
180 uint64_t *ptep;
181 ptep = mon_get_ptep_l3(s, 0, vaddr, 1, NULL);
182 if (!ptep)
183 return -1;
184 *ptep = ((uint64_t)page_index << PAGE_SHIFT) | pte_flags;
185 } else {
186 uint32_t *ptep;
187 ptep = mon_get_ptep_l2(s, 0, vaddr, 1, NULL);
188 if (!ptep)
189 return -1;
190 *ptep = (page_index << PAGE_SHIFT) | pte_flags;
192 return 0;
195 /* return NULL if error */
196 static void *mon_user_map(struct kqemu_state *s, void *uaddr, int size,
197 int pte_flags)
199 unsigned long page_index, vaddr, i;
200 void *ptr = NULL;
201 struct kqemu_user_page *host_page;
203 size = PAGE_ALIGN(size);
205 /* NOTE: we use the fact that getvaddr returns contiguous pages */
206 for(i = 0; i < size; i += 4096) {
207 host_page = kqemu_lock_user_page(&page_index,
208 (unsigned long)uaddr + i);
209 if (!host_page)
210 return NULL;
211 vaddr = get_vaddr(s);
212 set_vaddr_page_index(s, vaddr, page_index, host_page, 1);
213 mon_set_pte(s, vaddr, page_index,
214 PG_PRESENT_MASK | PG_GLOBAL(s) | pte_flags);
215 if (i == 0)
216 ptr = (void *)vaddr;
218 return ptr;
221 #define cpuid(index, eax, ebx, ecx, edx) \
222 asm volatile ("cpuid" \
223 : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) \
224 : "0" (index))
226 #ifdef __x86_64__
227 static int is_cpuid_supported(void)
229 return 1;
231 #else
232 static int is_cpuid_supported(void)
234 int v0, v1;
235 asm volatile ("pushf\n"
236 "popl %0\n"
237 "movl %0, %1\n"
238 "xorl $0x00200000, %0\n"
239 "pushl %0\n"
240 "popf\n"
241 "pushf\n"
242 "popl %0\n"
243 : "=a" (v0), "=d" (v1)
245 : "cc");
246 return (v0 != v1);
248 #endif
250 static void get_cpuid_features(struct kqemu_state *s)
252 uint32_t eax, ebx, ecx, edx;
253 int is_intel;
255 if (!is_cpuid_supported()) {
256 s->cpuid_features = 0;
257 return;
258 } else {
259 cpuid(0, eax, ebx, ecx, edx);
260 is_intel = (ebx == 0x756e6547 && edx == 0x49656e69 &&
261 ecx == 0x6c65746e);
262 cpuid(1, eax, ebx, ecx, edx);
263 /* SEP is buggy on some pentium pros */
264 if (is_intel && (edx & CPUID_SEP) &&
265 (eax & 0xfff) < 0x633) {
266 edx &= ~CPUID_SEP;
268 s->cpuid_features = edx;
270 s->cpuid_ext2_features = 0;
271 cpuid(0x80000000, eax, ebx, ecx, edx);
272 if (eax >= 0x80000001) {
273 cpuid(0x80000001, eax, ebx, ecx, edx);
274 s->cpuid_ext2_features = edx;
279 /* per instance locked ram page allocation logic */
280 static void kqemu_update_locked_ram_pages(struct kqemu_global_state *g)
282 struct kqemu_state *s;
283 unsigned long total_ram_pages, max_locked_ram_pages;
285 total_ram_pages = 0;
286 for(s = g->first_state; s != NULL; s = s->next_state) {
287 total_ram_pages += s->nb_ram_pages;
290 /* XXX: better logic to guaranty no overflow ? */
291 for(s = g->first_state; s != NULL; s = s->next_state) {
292 max_locked_ram_pages = (g->max_locked_ram_pages * s->nb_ram_pages) /
293 total_ram_pages;
294 if (max_locked_ram_pages < MIN_LOCKED_RAM_PAGES)
295 max_locked_ram_pages = MIN_LOCKED_RAM_PAGES;
296 s->max_locked_ram_pages = max_locked_ram_pages;
297 #ifdef DEBUG
298 kqemu_log("state %p: max locked ram=%d KB\n",
299 s, s->max_locked_ram_pages * 4);
300 #endif
304 static int kqemu_add_state(struct kqemu_global_state *g,
305 struct kqemu_state *s)
307 int ret;
309 spin_lock(&g->lock);
310 if (((g->nb_kqemu_states + 1) * MIN_LOCKED_RAM_PAGES) >
311 g->max_locked_ram_pages) {
312 ret = -1;
313 } else {
314 s->global_state = g;
315 s->next_state = g->first_state;
316 g->first_state = s;
317 g->nb_kqemu_states++;
318 kqemu_update_locked_ram_pages(g);
319 ret = 0;
321 spin_unlock(&g->lock);
322 return ret;
325 static void kqemu_del_state(struct kqemu_state *s)
327 struct kqemu_global_state *g = s->global_state;
328 struct kqemu_state **ps;
330 if (g) {
331 spin_lock(&g->lock);
332 for(ps = &g->first_state; *ps != NULL; ps = &(*ps)->next_state) {
333 if (*ps == s) {
334 *ps = s->next_state;
335 break;
338 g->nb_kqemu_states--;
339 kqemu_update_locked_ram_pages(g);
340 spin_unlock(&g->lock);
344 struct kqemu_global_state *kqemu_global_init(int max_locked_pages)
346 struct kqemu_global_state *g;
348 g = kqemu_vmalloc(PAGE_ALIGN(sizeof(struct kqemu_global_state)));
349 if (!g)
350 return NULL;
351 memset(g, 0, sizeof(struct kqemu_global_state));
352 spin_lock_init(&g->lock);
353 g->max_locked_ram_pages = max_locked_pages;
354 return g;
357 void kqemu_global_delete(struct kqemu_global_state *g)
359 /* XXX: free all existing states ? */
360 kqemu_vfree(g);
363 struct kqemu_state *kqemu_init(struct kqemu_init *d,
364 struct kqemu_global_state *g)
366 struct kqemu_state *s;
367 const struct monitor_code_header *m = (void *)monitor_code;
368 unsigned long vaddr;
369 const uint8_t *kernel_vaddr;
370 int i, j, n, kqemu_state_size;
371 uint8_t *s1;
372 uint64_t *dt_table;
374 /* some consistency checks */
375 if (((unsigned long)d->ram_base & ~PAGE_MASK) != 0 ||
376 ((unsigned long)d->ram_dirty & ~PAGE_MASK) != 0 ||
377 (d->ram_size & ~PAGE_MASK) != 0 ||
378 d->ram_size >= 0x7ffff000 ||
379 ((unsigned long)d->pages_to_flush & ~PAGE_MASK) != 0 ||
380 ((unsigned long)d->ram_pages_to_update & ~PAGE_MASK) != 0 ||
381 ((unsigned long)d->modified_ram_pages & ~PAGE_MASK) != 0) {
382 kqemu_log("Invalid kqemu_init data alignment\n");
383 return NULL;
386 n = d->ram_size >> PAGE_SHIFT;
387 kqemu_state_size = PAGE_ALIGN(sizeof(monitor_code)) +
388 PAGE_ALIGN(sizeof(struct kqemu_state) +
389 n * sizeof(struct kqemu_ram_page));
390 s1 = kqemu_vmalloc(kqemu_state_size);
391 if (!s1)
392 return NULL;
393 memset(s1, 0, kqemu_state_size);
394 memcpy(s1, monitor_code, sizeof(monitor_code));
395 s = (void *)(s1 + PAGE_ALIGN(sizeof(monitor_code)));
397 #ifndef __x86_64__
398 /* check PAE state */
400 unsigned long host_cr4;
401 asm volatile ("movl %%cr4, %0" : "=r" (host_cr4));
402 s->use_pae = (host_cr4 & CR4_PAE_MASK) != 0;
404 #endif
406 /* the following can be initialized with any value */
407 #ifdef __x86_64__
408 // s->monitor_vaddr = 0xffff900000000000;
409 /* must stay in low 4GB for easier 16 bit ESP fix */
410 s->monitor_vaddr = 0xf0000000;
411 #else
412 s->monitor_vaddr = 0xf0000000;
413 #endif
414 s->monitor_selector_base = 0xf180;
415 #ifdef DEBUG
416 kqemu_log("kqemu_init monitor_vaddr=0x%08lx sel_base=0x%04x\n",
417 s->monitor_vaddr, s->monitor_selector_base);
418 #endif
420 /* selectors */
421 s->monitor_cs_sel = s->monitor_selector_base + (0 << 3);
422 #ifdef __x86_64__
423 s->monitor_ds_sel = 0; /* no need for a specific data segment */
424 /* used for 16 bit esp fix */
425 s->monitor_cs32_sel = (s->monitor_selector_base + (7 << 3)) | 1;
426 s->monitor_ss16_sel = (s->monitor_selector_base + (6 << 3)) | 1;
427 s->monitor_ss_null_sel = (s->monitor_selector_base + (1 << 3)) | 3;
428 #else
429 s->monitor_ds_sel = s->monitor_selector_base + (1 << 3);
430 s->monitor_ss16_sel = s->monitor_selector_base + (6 << 3);
431 #endif
432 s->monitor_ldt_sel = s->monitor_selector_base + (2 << 3);
434 s->monitor_data_vaddr = s->monitor_vaddr +
435 PAGE_ALIGN(sizeof(monitor_code));
436 s->monitor_end_vaddr = s->monitor_vaddr + kqemu_state_size;
437 s->monitor_to_kernel_offset = (unsigned long)s - s->monitor_data_vaddr;
439 /* must be done easly so that 'fail' case works */
440 lock_ram_init(s);
442 /* IDT init */
443 s->monitor_idt.base = s->monitor_data_vaddr +
444 offsetof(struct kqemu_state, idt_table);
445 s->monitor_idt.limit = sizeof(s->idt_table) - 1;
447 /* we use interrupt gates to disable IF */
448 for(i = 0; i <= 0x13; i++) {
449 mon_set_interrupt(s, i, 0);
451 for(i = 0x14; i < 256; i++) {
452 mon_set_interrupt(s, i, 1);
455 /* GDT init */
456 s->monitor_gdt.limit = 0xffff;
458 /* TSS init */
459 #ifdef __x86_64__
460 s->monitor_tss.rsp0 = s->monitor_data_vaddr +
461 offsetof(struct kqemu_state, regs1.dummy[0]);
462 s->monitor_tss.bitmap = 0x8000; /* no I/O permitted */
463 #else
464 /* TSS init */
465 s->monitor_tss.esp0 = s->monitor_data_vaddr +
466 offsetof(struct kqemu_state, regs1.dummy[0]);
467 s->monitor_tss.ss0 = s->monitor_ds_sel;
468 s->monitor_tss.bitmap = 0x8000; /* no I/O permitted */
469 s->monitor_tss.back_link = 0xffff; /* generates error if iret with
470 NT bit */
471 #endif
473 #ifdef __x86_64__
474 set_seg64(s->tr_desc_cache,
475 s->monitor_data_vaddr +
476 offsetof(struct kqemu_state, monitor_tss),
477 sizeof(struct kqemu_tss) - 1, 0x89);
478 #else
479 set_seg(s->tr_desc_cache,
480 s->monitor_data_vaddr +
481 offsetof(struct kqemu_state, monitor_tss),
482 235, 0x89);
483 #endif
485 /* for each CPL we create a LDT and GDT */
486 for(i = 0; i < NB_DT_TABLES; i++) {
487 unsigned long ldt_addr;
488 dt_table = s->dt_table + i * 16384;
489 ldt_addr = s->monitor_data_vaddr +
490 offsetof(struct kqemu_state, dt_table) + 0x10000 + 0x20000 * i;
491 #ifdef __x86_64__
492 set_seg64((uint32_t *)(dt_table + (s->monitor_ldt_sel >> 3)),
493 ldt_addr, 0xffff, 0x82);
494 set_seg((uint32_t *)(dt_table + (s->monitor_cs_sel >> 3)),
495 0, 0xfffff, 0xa09a); /* long mode segment */
496 set_seg((uint32_t *)(dt_table + (s->monitor_ss16_sel >> 3)),
497 (s->monitor_data_vaddr + offsetof(struct kqemu_state, stack)) & ~0xffff,
498 0xffff, 0x00b2); /* SS16 segment for 16 bit ESP fix */
499 set_seg((uint32_t *)(dt_table + (s->monitor_cs32_sel >> 3)),
500 0, 0xfffff, 0xc0ba); /* CS32 segment for 16 bit ESP fix */
501 set_seg((uint32_t *)(dt_table + (s->monitor_ss_null_sel >> 3)),
502 0, 0, 0x40f2); /* substitute for null SS segment */
503 #else
504 set_seg((uint32_t *)(dt_table + (s->monitor_ldt_sel >> 3)),
505 ldt_addr, 0xffff, 0x82);
506 set_seg((uint32_t *)(dt_table + (s->monitor_cs_sel >> 3)),
507 0, 0xfffff, 0xc09a);
508 set_seg((uint32_t *)(dt_table + (s->monitor_ds_sel >> 3)),
509 0, 0xfffff, 0xc092);
510 set_seg((uint32_t *)(dt_table + (s->monitor_ss16_sel >> 3)),
511 (s->monitor_data_vaddr + offsetof(struct kqemu_state, stack)) & ~0xffff,
512 0xffff, 0x0092);
513 #endif
516 /* page table init */
517 mon_map_page_init(s);
519 s->in_page_init = 1; /* avoid recursion in page allocator */
521 /* make sure we allocate enough PTE for the monitor itself (2 MB
522 is OK for both PAE and normal MMU) */
523 for(i = 0; i < MONITOR_MEM_SIZE; i += 2048 * 1024) {
524 mon_set_pte(s, s->monitor_vaddr + i, 0, 0);
527 /* set the pte of the allocated pages (no page_alloc is needed) */
528 for(i = 0; i < MAX_MAPPED_PAGES; i++) {
529 unsigned long page_index;
530 page_index = s->mapped_pages[i].page_index;
531 if (page_index != -1) {
532 mon_set_pte(s, s->monitor_vaddr + ((unsigned long)i << PAGE_SHIFT),
533 page_index,
534 PG_PRESENT_MASK | PG_GLOBAL(s) | PG_RW_MASK);
537 s->in_page_init = 0;
539 kernel_vaddr = s1;
540 for(vaddr = s->monitor_vaddr; vaddr < s->monitor_data_vaddr;
541 vaddr += PAGE_SIZE) {
542 /* XXX: RW because of data, need to set it only to the right
543 pages */
544 mon_set_pte(s, vaddr, kqemu_vmalloc_to_phys(kernel_vaddr),
545 PG_PRESENT_MASK | PG_GLOBAL(s) | PG_RW_MASK);
546 kernel_vaddr += PAGE_SIZE;
548 for(; vaddr < s->monitor_end_vaddr;
549 vaddr += PAGE_SIZE) {
550 mon_set_pte(s, vaddr, kqemu_vmalloc_to_phys(kernel_vaddr),
551 PG_PRESENT_MASK | PG_GLOBAL(s) | PG_RW_MASK);
552 kernel_vaddr += PAGE_SIZE;
555 /* clone the monitor PTE pages in each address space */
556 for(i = 1; i < NB_ADDRESS_SPACES; i++) {
557 if (USE_PAE(s)) {
558 uint64_t *pdep, *pdep1;
559 for(j = 0; j < MONITOR_MEM_SIZE; j += 2048 * 1024) {
560 vaddr = s->monitor_vaddr + j;
561 pdep = mon_get_ptep_l3(s, 0, vaddr, 2, NULL);
562 pdep1 = mon_get_ptep_l3(s, i, vaddr, 2, NULL);
563 *pdep1 = *pdep;
565 } else {
566 uint32_t *pdep, *pdep1;
567 for(j = 0; j < MONITOR_MEM_SIZE; j += 4096 * 1024) {
568 vaddr = s->monitor_vaddr + j;
569 pdep = mon_get_ptep_l2(s, 0, vaddr, 2, NULL);
570 pdep1 = mon_get_ptep_l2(s, i, vaddr, 2, NULL);
571 *pdep1 = *pdep;
576 /* set the cr3 register of each address space */
577 for(i = 0; i < NB_ADDRESS_SPACES; i++) {
578 unsigned long pfn;
580 pfn = kqemu_vmalloc_to_phys(&s->pgds[i]);
581 /* sanity check */
582 #ifndef __x86_64__
583 if (pfn >= (1 << (32 - PAGE_SHIFT))) {
584 kqemu_log("Error: invalid cr3 (%p)\n", (void *)pfn);
585 goto fail;
587 #endif
588 s->pgds_cr3[i] = pfn << PAGE_SHIFT;
589 #ifdef DEBUG
590 kqemu_log("pgds_cr3[%d] = %p\n", i, (void *)s->pgds_cr3[i]);
592 int start, end;
593 start = s->monitor_vaddr >> 22;
594 end = start + (MONITOR_MEM_SIZE >> 22);
595 for(j=start;j<end;j++) {
596 kqemu_log("%03x: %08x\n", j, s->pgds[i].l2[j]);
599 #endif
602 /* prepare nexus page switch logic */
604 unsigned long monitor_page;
606 s->nexus_kaddr = (unsigned long)s1;
607 monitor_page = kqemu_vmalloc_to_phys((void *)s->nexus_kaddr);
608 if (USE_PAE(s)) {
609 s->nexus_pte = ((uint64_t)monitor_page << PAGE_SHIFT) |
610 PG_PRESENT_MASK | PG_GLOBAL(s);
611 for(i = 0; i < NB_ADDRESS_SPACES; i++) {
612 s->nexus_kaddr_ptep[i] =
613 mon_get_ptep_l3(s, i, s->nexus_kaddr, 1,
614 (unsigned long *)&s->nexus_kaddr_vptep[i]);
616 } else {
617 s->nexus_pte = (monitor_page << PAGE_SHIFT) |
618 PG_PRESENT_MASK | PG_GLOBAL(s);
619 for(i = 0; i < NB_ADDRESS_SPACES; i++) {
620 s->nexus_kaddr_ptep[i] =
621 mon_get_ptep_l2(s, i, s->nexus_kaddr, 1,
622 (unsigned long *)&s->nexus_kaddr_vptep[i]);
625 #ifdef DEBUG
626 kqemu_log("nexus_kaddr=%p nexus_pte=0x%08x vptep0=%p vptep1=%p\n",
627 (void *)s->nexus_kaddr,
628 (int)s->nexus_pte,
629 (void *)s->nexus_kaddr_vptep[0],
630 (void *)s->nexus_kaddr_vptep[1]);
631 #endif
633 s->monitor_data_kaddr = (unsigned long)s;
634 s->monitor_jmp = m->kernel2monitor_jmp_offset + s->monitor_vaddr;
635 s->kernel_jmp = m->monitor2kernel_jmp_offset + (unsigned long)s1;
637 /* communication page */
638 s->comm_page_index = kqemu_vmalloc_to_phys(&s->comm_page);
640 /* physical RAM */
642 int i;
643 struct kqemu_ram_page *p;
645 s->ram_size = d->ram_size;
646 s->nb_ram_pages = s->ram_size >> PAGE_SHIFT;
647 s->ram_base_uaddr = (unsigned long)d->ram_base;
648 #ifdef DEBUG
649 kqemu_log("nb_ram_pages=%d\n", s->nb_ram_pages);
650 #endif
651 p = s->ram_pages;
652 for(i = 0; i < s->nb_ram_pages; i++) {
653 p[i].paddr = -1;
654 p[i].vaddr = -1;
657 /* init mapped ram page list */
658 map_ram_init(s);
661 s->ram_dirty = mon_user_map(s, d->ram_dirty, s->ram_size >> PAGE_SHIFT,
662 PG_RW_MASK);
663 if (!s->ram_dirty)
664 goto fail;
666 s->pages_to_flush = mon_user_map(s, d->pages_to_flush, PAGE_SIZE,
667 PG_RW_MASK);
668 if (!s->pages_to_flush)
669 goto fail;
671 s->ram_pages_to_update = mon_user_map(s, d->ram_pages_to_update,
672 PAGE_SIZE, 0);
673 if (!s->ram_pages_to_update)
674 goto fail;
676 s->modified_ram_pages = mon_user_map(s, d->modified_ram_pages,
677 PAGE_SIZE, PG_RW_MASK);
678 if (!s->modified_ram_pages)
679 goto fail;
681 for(i = 0;i < RAM_PAGE_CACHE_SIZE;i++) {
682 vaddr = get_vaddr(s);
683 if (i == 0)
684 s->ram_page_cache_base = vaddr;
686 for(i = 0;i < RAM_PAGE_CACHE_SIZE;i++) {
687 s->slot_to_ram_addr[i] = -1;
690 soft_tlb_flush(s);
692 get_cpuid_features(s);
694 /* disable SEP code if sysenter is not supported by the CPU or not
695 used by the OS */
696 s->use_sep = 0;
697 if (s->cpuid_features & CPUID_SEP) {
698 uint32_t dummy, cs_val;
699 rdmsr(MSR_IA32_SYSENTER_CS, cs_val, dummy);
700 if (cs_val != 0) {
701 s->use_sep = 1;
704 /* syscall support */
705 s->use_syscall = 0;
706 if (s->cpuid_ext2_features & CPUID_EXT2_SYSCALL) {
707 uint32_t efer_low, efer_high;
708 rdmsr(MSR_EFER, efer_low, efer_high);
709 if (efer_low & MSR_EFER_SCE) {
710 s->use_syscall = 1;
713 /* apic to disable NMI if required */
714 s->use_apic = 0;
715 if (s->cpuid_features & CPUID_APIC) {
716 uint32_t apic_base, apic_baseh;
717 rdmsr(MSR_IA32_APICBASE, apic_base, apic_baseh);
718 if (apic_base & MSR_IA32_APICBASE_ENABLE) {
719 apic_base = apic_base & MSR_IA32_APICBASE_BASE;
720 s->apic_regs = kqemu_io_map(apic_base >> PAGE_SHIFT, PAGE_SIZE);
721 if (s->apic_regs) {
722 s->apic_lvt_max = (s->apic_regs[APIC_LVR >> 2] >> 16) & 0xff;
723 if (s->apic_lvt_max < 3)
724 s->apic_lvt_max = 3;
725 else if (s->apic_lvt_max > 5)
726 s->apic_lvt_max = 5;
727 s->use_apic = 1;
728 #if defined(DEBUG)
729 kqemu_log("apic_base=%p (virt=%p) apic_lvt_max=%d\n",
730 (void *)apic_base, (void *)s->apic_regs,
731 s->apic_lvt_max);
732 #endif
737 #ifndef __x86_64__
738 /* PGE support */
739 s->pg_global_mask = 0;
740 if (s->cpuid_features & CPUID_PGE)
741 s->pg_global_mask = PG_GLOBAL_MASK;
742 #endif
744 #ifdef PROFILE_INSN
746 for(i=0;i<512;i++) {
747 s->tab_insn_cycles_min[i] = 0x7fffffff;
750 #endif
751 if (kqemu_add_state(g, s) < 0)
752 goto fail;
753 return s;
754 fail:
755 kqemu_delete(s);
756 return NULL;
759 int kqemu_set_phys_mem(struct kqemu_state *s,
760 const struct kqemu_phys_mem *kphys_mem)
762 uint64_t start, size, end, addr;
763 uint32_t ram_addr, ram_end, *ptr, pd, io_index;
765 start = kphys_mem->phys_addr;
766 size = kphys_mem->size;
767 end = start + size;
768 if ((start & ~PAGE_MASK) != 0 || (end & ~PAGE_MASK) != 0)
769 return -1;
770 /* XXX: we only support 32 bit physical address space */
771 if ((start & ~0xffffffffULL) != 0 ||
772 ((end - 1) & ~0xffffffffULL) != 0)
773 return -1;
774 io_index = kphys_mem->io_index;
775 if (io_index > KQEMU_IO_MEM_UNASSIGNED)
776 return -1;
777 pd = io_index;
778 if (io_index <= KQEMU_IO_MEM_ROM) {
779 ram_addr = kphys_mem->ram_addr;
780 if ((ram_addr & ~PAGE_MASK) != 0)
781 return -1;
782 ram_end = ram_addr + size;
783 /* check overflow */
784 if (ram_end < ram_addr)
785 return -1;
786 if (ram_end > s->ram_size)
787 return -1;
788 pd |= (ram_addr & PAGE_MASK);
790 for(addr = start; addr != end; addr += PAGE_SIZE) {
791 ptr = phys_page_findp(s, addr >> PAGE_SHIFT, 1);
792 if (!ptr)
793 return -1;
794 *ptr = pd;
795 if (io_index <= KQEMU_IO_MEM_ROM)
796 pd += PAGE_SIZE;
798 return 0;
801 #ifdef PROFILE_INTERP2
803 #ifdef __x86_64__
804 static inline unsigned int lldiv(uint64_t a, uint64_t b)
806 int q;
807 if (b == 0) {
808 q = 0;
809 } else {
810 q = a / b;
812 return q;
814 #else
815 static unsigned int lldiv(uint64_t a, uint64_t b)
817 uint32_t b32;
818 unsigned int q, r;
820 if (b == 0) {
821 q = 0;
822 } else {
823 while (b >= 0x100000000LL) {
824 b >>= 1;
825 a >>= 1;
827 b32 = b;
828 asm volatile ("divl %2"
829 : "=a" (q), "=d" (r)
830 : "m" (b32), "a" ((uint32_t )a), "d" ((uint32_t )(a >> 32)));
832 return q;
834 #endif
836 #define CYCLES_TO_MS(x) lldiv(x, 2400000)
837 #define EXCP_CYCLES 1200 /* approximate cycles to handle one exception */
839 static void profile_dump(struct kqemu_state *s)
841 #ifdef PROFILE_INSN
842 for(i=0;i<512;i++) {
843 if (s->tab_insn_count[i] != 0) {
844 kqemu_log("%02x: %9lld %4d %4d %4d %11lld\n",
846 s->tab_insn_count[i],
847 s->tab_insn_cycles_min[i],
848 lldiv(s->tab_insn_cycles[i], s->tab_insn_count[i]),
849 s->tab_insn_cycles_max[i],
850 s->tab_insn_cycles[i]);
853 #endif
854 #ifdef PROFILE_INTERP_PC
856 int i, j, n;
857 ProfileInterpEntry *pe, *pe1, *pe2, tmp;
858 int64_t cycles_tot, cycles_sum;
860 kqemu_log("Interp PC dump:\n");
861 kqemu_log("n: EIP count avg_insn_count avg_cycles cumulative_time\n");
863 /* add exception cost */
864 for(i = 0; i < s->nb_profile_interp_entries; i++) {
865 pe = &s->profile_interp_entries[i];
866 pe->cycles += pe->count * EXCP_CYCLES;
869 /* sort */
870 for(i = 0; i < (s->nb_profile_interp_entries - 1); i++) {
871 for(j = i + 1; j < s->nb_profile_interp_entries; j++) {
872 pe1 = &s->profile_interp_entries[i];
873 pe2 = &s->profile_interp_entries[j];
874 if (pe1->cycles < pe2->cycles) {
875 tmp = *pe1;
876 *pe1 = *pe2;
877 *pe2 = tmp;
882 cycles_tot = 0;
883 for(i = 0; i < s->nb_profile_interp_entries; i++)
884 cycles_tot += s->profile_interp_entries[i].cycles;
886 cycles_sum = 0;
887 n = s->nb_profile_interp_entries;
888 if (n > 50)
889 n = 50;
890 for(i = 0; i < n; i++) {
891 pe = &s->profile_interp_entries[i];
892 cycles_sum += pe->cycles;
893 kqemu_log("%4d: " FMT_lx " %lld %d %d %d%%\n",
895 pe->eip,
896 pe->count,
897 lldiv(pe->insn_count, pe->count),
898 lldiv(pe->cycles, pe->count),
899 lldiv(cycles_sum * 100, cycles_tot));
902 #endif
903 kqemu_log("Execution statistics:\n");
904 kqemu_log("total_interp_count=%lld\n",
905 s->total_interp_count);
906 kqemu_log("exc_interp: count=%lld avg_insn=%d (%lld)\n",
907 s->exc_interp_count,
908 lldiv(s->exc_insn_count, s->exc_interp_count),
909 s->exc_insn_count);
910 kqemu_log("exc_interp: max=%d EIP=%08lx\n",
911 s->exc_insn_count_max,
912 s->exc_start_eip_max);
913 kqemu_log("exc_seg_cycles=%d cycles/insn=%d (%d ms)\n",
914 lldiv(s->exc_seg_cycles, s->exc_interp_count),
915 lldiv(s->exc_interp_cycles, s->exc_insn_count),
916 CYCLES_TO_MS(s->exc_interp_cycles + s->exc_seg_cycles + s->exc_interp_count * EXCP_CYCLES));
917 kqemu_log("interp_interrupt: count=%lld cycles=%d (%d ms)\n",
918 s->interp_interrupt_count,
919 lldiv(s->interp_interrupt_cycles, s->interp_interrupt_count),
920 CYCLES_TO_MS(s->interp_interrupt_cycles));
922 kqemu_log("tlb_flush: count=%lld cycles=%d (%d ms)\n",
923 s->tlb_flush_count,
924 lldiv(s->tlb_flush_cycles, s->tlb_flush_count),
925 CYCLES_TO_MS(s->tlb_flush_cycles));
926 kqemu_log("tlb_flush_page: count=%lld cycles=%d (%d ms)\n",
927 s->tlb_flush_page_count,
928 lldiv(s->tlb_flush_page_cycles, s->tlb_flush_page_count),
929 CYCLES_TO_MS(s->tlb_flush_page_cycles));
930 kqemu_log("page faults: total=%lld mmu=%lld cycles=%d (%d ms)\n",
931 s->total_page_fault_count,
932 s->mmu_page_fault_count,
933 lldiv(s->mmu_page_fault_cycles + s->tlb_page_fault_cycles, s->mmu_page_fault_count),
934 CYCLES_TO_MS(s->mmu_page_fault_cycles + s->tlb_page_fault_cycles + EXCP_CYCLES * s->total_page_fault_count));
935 kqemu_log("page faults tlb: count=%lld (interp_count=%lld) cycles=%d (%d ms)\n",
936 s->tlb_page_fault_count,
937 s->tlb_interp_page_fault_count,
938 lldiv(s->tlb_page_fault_cycles, s->tlb_page_fault_count),
939 CYCLES_TO_MS(s->tlb_page_fault_cycles + EXCP_CYCLES * s->tlb_page_fault_count));
940 kqemu_log("exec_init: count=%lld cycles=%d (%d ms)\n",
941 s->exec_init_count,
942 lldiv(s->exec_init_cycles, s->exec_init_count),
943 CYCLES_TO_MS(s->exec_init_cycles));
944 kqemu_log("hw_interrupt: count=%lld cycles=%d (%d ms)\n",
945 s->hw_interrupt_count,
946 lldiv(s->hw_interrupt_cycles, s->hw_interrupt_count),
947 CYCLES_TO_MS(s->hw_interrupt_cycles + EXCP_CYCLES * s->hw_interrupt_count));
948 kqemu_log("ram_map: count=%lld miss=%d%%\n",
949 s->ram_map_count,
950 lldiv(s->ram_map_miss_count * 100, s->ram_map_count));
952 #endif
954 void kqemu_delete(struct kqemu_state *s)
956 uint8_t *s1;
957 struct kqemu_ram_page *rp;
958 struct mapped_page *p;
959 int i;
961 #ifdef PROFILE_INTERP2
962 profile_dump(s);
963 #endif
964 /* unlock the user pages */
965 for(rp = s->locked_page_head.lock_next;
966 rp != KER_RP_PTR(s, &s->locked_page_head);
967 rp = rp->lock_next) {
968 rp = MON_RP_PTR(s, rp);
969 kqemu_unlock_user_page(rp->host_page);
972 /* free all user and kernel pages */
973 for(i = 0; i < MAX_MAPPED_PAGES; i++) {
974 p = &s->mapped_pages[i];
975 if (p->host_page != NULL) {
976 if (p->user_page) {
977 kqemu_unlock_user_page(p->host_page);
978 } else {
979 kqemu_free_page(p->host_page);
984 if (s->apic_regs)
985 kqemu_io_unmap((void *)s->apic_regs, PAGE_SIZE);
987 kqemu_del_state(s);
989 s1 = (uint8_t *)s - PAGE_ALIGN(sizeof(monitor_code));
990 kqemu_vfree(s1);
993 struct kqemu_cpu_state *kqemu_get_cpu_state(struct kqemu_state *s)
995 return &s->cpu_state;
998 static inline int apic_check_lvt(struct kqemu_state *s, int lvt)
1000 uint32_t val;
1001 val = s->apic_regs[(APIC_LVTT >> 2) + lvt * 4];
1002 if (!(val & APIC_LVT_MASKED) &&
1003 (val & APIC_DM_MASK) == APIC_DM_NMI) {
1004 val |= APIC_LVT_MASKED;
1005 s->apic_regs[(APIC_LVTT >> 2) + lvt * 4] = val;
1006 return 1 << lvt;
1007 } else {
1008 return 0;
1012 static inline void apic_restore_lvt(struct kqemu_state *s, int lvt,
1013 int lvt_mask)
1015 if (lvt_mask & (1 << lvt))
1016 s->apic_regs[(APIC_LVTT >> 2) + lvt * 4] &= ~APIC_LVT_MASKED;
1019 static int apic_save_and_disable_nmi(struct kqemu_state *s)
1021 int lvt_mask;
1023 lvt_mask = 0;
1024 switch(s->apic_lvt_max) {
1025 case 5:
1026 default:
1027 lvt_mask |= apic_check_lvt(s, 1); /* APIC_LVTTHMR */
1028 /* fall thru */
1029 case 4:
1030 lvt_mask |= apic_check_lvt(s, 2); /* APIC_LVTPC */
1031 lvt_mask |= apic_check_lvt(s, 2); /* APIC_LVTPC (twice because
1032 could be masked by hardware) */
1033 /* fall thru */
1034 case 3:
1035 lvt_mask |= apic_check_lvt(s, 0); /* APIC_LVTT */
1036 lvt_mask |= apic_check_lvt(s, 3); /* APIC_LVT0 */
1037 lvt_mask |= apic_check_lvt(s, 4); /* APIC_LVT1 */
1038 lvt_mask |= apic_check_lvt(s, 5); /* APIC_LVTERR */
1039 break;
1041 return lvt_mask;
1044 static void apic_restore_nmi(struct kqemu_state *s, int lvt_mask)
1046 if (lvt_mask) {
1047 apic_restore_lvt(s, 0, lvt_mask);
1048 apic_restore_lvt(s, 1, lvt_mask);
1049 apic_restore_lvt(s, 2, lvt_mask);
1050 apic_restore_lvt(s, 3, lvt_mask);
1051 apic_restore_lvt(s, 4, lvt_mask);
1052 apic_restore_lvt(s, 5, lvt_mask);
1056 #define LOAD_DR(n)\
1058 if ((s->cpu_state.dr ## n - s->monitor_vaddr) < MONITOR_MEM_SIZE) {\
1059 /* cannot set breakpoint */\
1060 s->monitor_dr7 &= ~(3 << (2 * n));\
1061 } else {\
1062 asm volatile ("mov %0, %%dr" #n : : "r" (s->cpu_state.dr ## n));\
1066 long kqemu_exec(struct kqemu_state *s)
1068 const struct monitor_code_header *m = (void *)monitor_code;
1069 void (*kernel2monitor)(struct kqemu_state *s) =
1070 (void *)(m->kernel2monitor + s->nexus_kaddr);
1071 unsigned long *ptr;
1072 int ret, apic_nmi_mask, cpl;
1073 uint32_t cs_val;
1074 unsigned long flags;
1075 uint32_t efer_low, efer_high, efer_low1;
1076 int is_user;
1077 uint16_t saved_fs, saved_gs;
1078 #ifdef __x86_64__
1079 uint16_t saved_ds, saved_es;
1080 unsigned long fs_base, gs_base;
1081 #endif
1083 #ifdef PROFILE
1084 s->nb_profile_ts = 0;
1085 #endif
1086 profile_record(s);
1087 profile_record(s);
1089 cs_val = 0; /* avoid warning */
1090 efer_low = 0; /* avoid warning */
1091 efer_high = 0; /* avoid warning */
1092 apic_nmi_mask = 0; /* avoid warning */
1094 /* NOTE: we do not abort here because we need to execute the
1095 various page commands before */
1096 if ((s->cpu_state.tr.selector & 0xfffc) == 0 ||
1097 (s->cpu_state.tr.selector & 4) != 0) {
1098 s->monitor_tr_sel = s->monitor_selector_base + (4 << 3);
1099 } else {
1100 s->monitor_tr_sel = s->cpu_state.tr.selector & 0xfff8;
1103 /* init the initial cr3 */
1104 cpl = s->cpu_state.cpl;
1105 cpl &= 3;
1106 s->cpu_state.cpl = cpl;
1107 s->monitor_cr3 = s->pgds_cr3[(cpl == 3)];
1108 /* init the initial GDT */
1109 #ifdef USE_SEG_GP
1110 s->monitor_gdt.base = s->monitor_data_vaddr +
1111 offsetof(struct kqemu_state, dt_table) + 0x20000 * (cpl == 3);
1112 #else
1113 s->monitor_gdt.base = s->monitor_data_vaddr +
1114 offsetof(struct kqemu_state, dt_table) + 0x20000 * cpl;
1115 #endif
1117 /* push stack frame to call monitor_exec() */
1118 /* reserve space for the registers */
1119 ptr = (void *)(s->stack_end - sizeof(struct kqemu_exception_regs));
1120 #ifdef __x86_64__
1121 *--ptr = 0; /* no return addr */
1122 *--ptr = m->monitor_exec + s->monitor_vaddr;
1123 *--ptr = 0; /* rbp */
1124 *--ptr = 0; /* rbx */
1125 *--ptr = 0; /* r12 */
1126 *--ptr = 0; /* r13 */
1127 *--ptr = 0; /* r14 */
1128 *--ptr = 0; /* r15 */
1129 #else
1130 *--ptr = s->monitor_data_vaddr; /* parameter = kqemu_state */
1131 *--ptr = 0; /* no return addr */
1132 *--ptr = m->monitor_exec + s->monitor_vaddr;
1133 *--ptr = 0; /* ebp */
1134 *--ptr = 0; /* ebx */
1135 *--ptr = 0; /* esi */
1136 *--ptr = 0; /* edi */
1137 #endif
1138 s->monitor_esp = s->monitor_data_vaddr + (unsigned long)ptr -
1139 (unsigned long)s;
1140 profile_record(s);
1141 for(;;) {
1142 /* currently we execute all the monitor code with interrupt
1143 masked. It is not optimal but simpler */
1144 save_flags(flags);
1145 cli();
1146 profile_record(s);
1148 if (s->use_apic) {
1149 apic_nmi_mask = apic_save_and_disable_nmi(s);
1152 /* load breakpoint registers and avoid setting them if in the
1153 monitor address space. We suppose that no breakpoints are
1154 set by the host OS for this process */
1155 if (s->cpu_state.dr7 & 0xff) {
1156 s->monitor_dr7 = s->cpu_state.dr7;
1157 LOAD_DR(0);
1158 LOAD_DR(1);
1159 LOAD_DR(2);
1160 LOAD_DR(3);
1161 asm volatile ("mov %0, %%dr6" : : "r" (s->cpu_state.dr6));
1162 } else {
1163 s->monitor_dr7 = 0;
1166 profile_record(s);
1167 if (s->use_sep) {
1168 uint32_t dummy;
1169 /* disable SEP */
1170 rdmsr(MSR_IA32_SYSENTER_CS, cs_val, dummy);
1171 wrmsr(MSR_IA32_SYSENTER_CS, 0, 0);
1173 profile_record(s);
1174 if (s->use_syscall) {
1175 rdmsr(MSR_EFER, efer_low, efer_high);
1176 efer_low1 = efer_low & ~MSR_EFER_SCE;
1177 wrmsr(MSR_EFER, efer_low1, efer_high);
1179 profile_record(s);
1180 #ifdef __x86_64__
1181 /* disable syscall/sysret (will generate ILLOP execption) */
1182 /* save segment registers */
1183 asm volatile ("movw %%ds, %0" : "=m" (saved_ds));
1184 asm volatile ("movw %%es, %0" : "=m" (saved_es));
1185 rdmsrl(MSR_FSBASE, fs_base);
1186 rdmsrl(MSR_GSBASE, gs_base);
1187 #endif
1188 asm volatile ("movw %%fs, %0" : "=m" (saved_fs));
1189 asm volatile ("movw %%gs, %0" : "=m" (saved_gs));
1190 profile_record(s);
1192 /* write the nexus PTE - we assume the pointer does not change */
1193 is_user = (s->cpu_state.cpl == 3);
1194 if (USE_PAE(s)) {
1195 uint64_t *ptep;
1196 ptep = s->nexus_kaddr_ptep[is_user];
1197 s->nexus_orig_pte = *ptep;
1198 *ptep = s->nexus_pte;
1199 } else {
1200 uint32_t *ptep;
1201 ptep = s->nexus_kaddr_ptep[is_user];
1202 s->nexus_orig_pte = *ptep;
1203 *ptep = s->nexus_pte;
1206 kernel2monitor(s);
1208 /* restore the original PTE (note that the CPL can change) */
1209 is_user = (s->cpu_state.cpl == 3);
1210 if (USE_PAE(s)) {
1211 uint64_t *ptep;
1212 ptep = s->nexus_kaddr_ptep[is_user];
1213 *ptep = s->nexus_orig_pte;
1214 } else {
1215 uint32_t *ptep;
1216 ptep = s->nexus_kaddr_ptep[is_user];
1217 *ptep = s->nexus_orig_pte;
1220 profile_record(s);
1221 /* restore segments */
1222 asm volatile ("movw %0, %%fs" : : "m" (saved_fs));
1223 asm volatile ("movw %0, %%gs" : : "m" (saved_gs));
1224 #ifdef __x86_64__
1225 wrmsrl(MSR_FSBASE, fs_base);
1226 wrmsrl(MSR_GSBASE, gs_base);
1227 asm volatile ("movw %0, %%ds" : : "m" (saved_ds));
1228 asm volatile ("movw %0, %%es" : : "m" (saved_es));
1229 #endif
1230 profile_record(s);
1231 if (s->use_syscall) {
1232 /* restore syscall/sysret */
1233 wrmsr(MSR_EFER, efer_low, efer_high);
1235 profile_record(s);
1236 if (s->use_sep) {
1237 wrmsr(MSR_IA32_SYSENTER_CS, cs_val, 0);
1239 profile_record(s);
1240 if (s->use_apic) {
1241 apic_restore_nmi(s, apic_nmi_mask);
1243 profile_record(s);
1245 if (s->mon_req == MON_REQ_IRQ) {
1246 struct kqemu_exception_regs *r;
1247 /* execute the requested host interrupt and then schedule
1248 in the host OS */
1249 exec_irq(s->arg0); /* side effect: restore the IRQs */
1250 r = (void *)((unsigned long)s->regs - s->monitor_data_vaddr + (unsigned long)s);
1251 if ((r->cs_sel & 3) == 3) {
1252 /* if interrupting user code, we schedule to give time
1253 to the other processes. We can be interrupted by a
1254 signal a that case. */
1255 if (kqemu_schedule()) {
1256 restore_cpu_state_from_regs(s, r);
1257 ret = KQEMU_RET_INTR;
1258 break;
1261 } else {
1262 unsigned long page_index;
1264 restore_flags(flags);
1265 switch(s->mon_req) {
1266 case MON_REQ_ABORT:
1267 kqemu_log("aborting: %s", s->log_buf);
1268 ret = KQEMU_RET_ABORT;
1269 goto the_end;
1270 case MON_REQ_EXIT:
1271 ret = s->arg0;
1272 if (s->regs) {
1273 struct kqemu_exception_regs *r;
1274 r = (void *)((unsigned long)s->regs - s->monitor_data_vaddr + (unsigned long)s);
1275 restore_cpu_state_from_regs(s, r);
1277 goto the_end;
1278 case MON_REQ_LOG:
1279 kqemu_log("%s", s->log_buf);
1280 break;
1281 case MON_REQ_ALLOC_PAGE:
1282 s->ret = (unsigned long)kqemu_alloc_zeroed_page(&page_index);
1283 s->ret2 = page_index;
1284 break;
1285 case MON_REQ_LOCK_USER_PAGE:
1286 s->ret = (unsigned long)kqemu_lock_user_page(&page_index,
1287 s->arg0);
1288 s->ret2 = page_index;
1289 break;
1290 case MON_REQ_UNLOCK_USER_PAGE:
1291 kqemu_unlock_user_page((struct kqemu_user_page *)s->arg0);
1292 break;
1293 case MON_REQ_EXCEPTION:
1294 exec_exception(s->arg0);
1295 break;
1296 default:
1297 kqemu_log("invalid mon request: %d\n", s->mon_req);
1298 break;
1302 the_end:
1304 profile_record(s);
1305 #ifdef PROFILE
1307 int i, last, first, overhead;
1308 first = s->profile_ts[0];
1309 last = first;
1310 overhead = s->profile_ts[1] - s->profile_ts[0];
1311 kqemu_log("profile (overhead=%d):\n", overhead);
1312 for(i = 1; i < s->nb_profile_ts; i++) {
1313 kqemu_log("%3d@%4d: %6d %6d\n",
1314 i, s->profile_line[i],
1315 s->profile_ts[i] - first - i * overhead,
1316 s->profile_ts[i] - last - overhead);
1317 last = s->profile_ts[i];
1320 #endif
1321 s->cpu_state.retval = ret;
1322 return 0;