4 * Copyright (C) 2004-2008 Fabrice Bellard
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * version 2 as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 #include "kqemu_int.h"
23 //#define DEBUG_PHYS_LOAD_STORE
26 //#define DEBUG_SOFT_TLB
27 //#define DEBUG_INVALIDATE
29 //#define PROFILE_SOFTMMU
30 //#define DEBUG_DT_CACHE
32 static void mon_set_pte(struct kqemu_state
*s
,
33 int as_index
, unsigned long vaddr
,
34 unsigned long paddr
, int pte_flags
);
35 static void unmap_ram_page(struct kqemu_state
*s
,
36 struct kqemu_ram_page
*rp
);
37 static void unlock_ram_page(struct kqemu_state
*s
,
38 struct kqemu_ram_page
*rp
);
39 static void *mon_alloc_page(struct kqemu_state
*s
,
40 unsigned long *ppage_index
);
46 * Segment state in monitor code:
48 * If CPL = 3 or not USE_SEG_GP:
49 * FS, GS are stored in %fs, %gs.
50 * CS, SS, DS, ES are stored in s->reg1.xx_sel
51 * the content of the CPU seg desc caches are consistent with the dt_table
53 * If CPL != 3 and USE_SEG_GP:
55 * FS, GS are stored in %fs, %gs. If not null and different from
56 * s->reg1.cs_sel and s->reg1.ss_sel, then the content of the CPU
57 * seg desc caches are consistent with s->seg_desc_cache[R_xx]
59 * DS, ES are stored in s1->reg1.xx_sel. Same remark as FS and FS
60 * for CPU seg desc cache consistency.
62 * CS, SS are stored in s1->reg1.xx_sel. The content of the CPU seg
63 * desc caches are consistent with the dt_table
65 * If seg_cache_loaded is true, then s->cpu_state.segs[].base is
66 * updated. For CS and SS, s->cpu_state.segs[].flags is updated too.
70 static inline void save_segs(struct kqemu_state
*s
)
72 struct kqemu_cpu_state
*env
= &s
->cpu_state
;
74 asm volatile ("movw %%fs, %0" : "=m" (env
->segs
[R_FS
].selector
));
75 asm volatile ("movw %%gs, %0" : "=m" (env
->segs
[R_GS
].selector
));
77 rdmsrl(MSR_FSBASE
, env
->segs
[R_FS
].base
);
78 rdmsrl(MSR_GSBASE
, env
->segs
[R_GS
].base
);
80 asm volatile ("movw %%ds, %0" : "=m" (env
->segs
[R_DS
].selector
));
81 asm volatile ("movw %%es, %0" : "=m" (env
->segs
[R_ES
].selector
));
85 static inline void reload_segs(struct kqemu_state
*s
)
87 struct kqemu_cpu_state
*env
= &s
->cpu_state
;
90 if (s
->cpu_state
.cpl
!= 3) {
91 set_cpu_seg_cache(s
, R_FS
, env
->segs
[R_FS
].selector
);
92 set_cpu_seg_cache(s
, R_GS
, env
->segs
[R_GS
].selector
);
94 set_cpu_seg_cache(s
, R_DS
, env
->segs
[R_DS
].selector
);
95 set_cpu_seg_cache(s
, R_ES
, env
->segs
[R_ES
].selector
);
100 LOAD_SEG(fs
, env
->segs
[R_FS
].selector
);
101 LOAD_SEG(gs
, env
->segs
[R_GS
].selector
);
103 LOAD_SEG(ds
, env
->segs
[R_DS
].selector
);
104 LOAD_SEG(es
, env
->segs
[R_ES
].selector
);
108 wrmsrl(MSR_FSBASE
, env
->segs
[R_FS
].base
);
109 wrmsrl(MSR_GSBASE
, env
->segs
[R_GS
].base
);
113 void update_host_cr0(struct kqemu_state
*s
)
115 unsigned long guest_cr0
, host_cr0
;
117 guest_cr0
= s
->cpu_state
.cr0
;
118 host_cr0
= s
->kernel_cr0
;
119 if (guest_cr0
& (CR0_TS_MASK
| CR0_EM_MASK
)) {
120 host_cr0
|= CR0_TS_MASK
;
122 host_cr0
= (host_cr0
& ~(CR0_MP_MASK
)) | (guest_cr0
& CR0_MP_MASK
);
123 host_cr0
&= ~CR0_AM_MASK
;
124 if ((guest_cr0
& CR0_AM_MASK
) && s
->cpu_state
.cpl
== 3)
125 host_cr0
|= CR0_AM_MASK
;
126 asm volatile ("mov %0, %%cr0" : : "r" (host_cr0
));
129 void update_host_cr4(struct kqemu_state
*s
)
131 unsigned long guest_cr4
, host_cr4
, mask
;
132 asm volatile("mov %%cr4, %0" : "=r" (host_cr4
));
134 if (s
->cpuid_features
& CPUID_FXSR
)
135 mask
|= CR4_OSFXSR_MASK
;
136 if (s
->cpuid_features
& CPUID_SSE
)
137 mask
|= CR4_OSXMMEXCPT_MASK
;
138 guest_cr4
= s
->cpu_state
.cr4
;
139 host_cr4
= (guest_cr4
& mask
) | (host_cr4
& ~mask
);
140 if (s
->cpu_state
.cpl
== 0) {
141 host_cr4
&= ~CR4_TSD_MASK
; /* rdtsc is enabled */
143 host_cr4
= (guest_cr4
& CR4_TSD_MASK
) | (host_cr4
& ~CR4_TSD_MASK
);
145 asm volatile ("mov %0, %%cr4" : : "r" (host_cr4
));
148 static inline void restore_monitor_nexus_mapping(struct kqemu_state
*s
)
151 /* restore the original mapping */
152 is_user
= (s
->cpu_state
.cpl
== 3);
155 ptep
= s
->nexus_kaddr_vptep
[is_user
];
156 *ptep
= s
->nexus_orig_pte
;
159 ptep
= s
->nexus_kaddr_vptep
[is_user
];
160 *ptep
= s
->nexus_orig_pte
;
162 asm volatile ("invlpg (%0)" : : "r" (s
->nexus_kaddr
));
165 static void monitor2kernel1(struct kqemu_state
*s
)
167 struct kqemu_exception_regs
*r
;
175 /* map the nexus page to its kernel address */
176 is_user
= (s
->cpu_state
.cpl
== 3);
179 ptep
= s
->nexus_kaddr_vptep
[is_user
];
180 s
->nexus_orig_pte
= *ptep
;
181 *ptep
= s
->nexus_pte
;
184 ptep
= s
->nexus_kaddr_vptep
[is_user
];
185 s
->nexus_orig_pte
= *ptep
;
186 *ptep
= s
->nexus_pte
;
188 asm volatile ("invlpg (%0)" : : "r" (s
->nexus_kaddr
));
196 restore_monitor_nexus_mapping(s
);
203 void monitor_log(struct kqemu_state
*s
, const char *fmt
, ...)
207 mon_vsnprintf(s
->log_buf
, sizeof(s
->log_buf
), fmt
, ap
);
208 s
->mon_req
= MON_REQ_LOG
;
213 void monitor_panic(struct kqemu_state
*s
, const char *fmt
, ...)
217 mon_vsnprintf(s
->log_buf
, sizeof(s
->log_buf
), fmt
, ap
);
218 s
->mon_req
= MON_REQ_ABORT
;
220 /* should never come here */
224 void __attribute__((noreturn
, format (printf
, 3, 4)))
225 monitor_panic_regs(struct kqemu_state
*s
, struct kqemu_exception_regs
*r
,
226 const char *fmt
, ...)
231 mon_vsnprintf(s
->log_buf
, sizeof(s
->log_buf
), fmt
, ap
);
232 len
= strlen(s
->log_buf
);
233 mon_snprintf(s
->log_buf
+ len
, sizeof(s
->log_buf
) - len
,
234 "err=%04x CS:EIP=%04x:" FMT_lx
" SS:SP=%04x:" FMT_lx
"\n",
235 (int)r
->error_code
, r
->cs_sel
, (long)r
->eip
,
236 r
->ss_sel
, (long)r
->esp
);
237 s
->mon_req
= MON_REQ_ABORT
;
239 /* should never come here */
243 struct kqemu_page
*monitor_alloc_page(struct kqemu_state
*s
,
244 unsigned long *ppage_index
)
246 s
->mon_req
= MON_REQ_ALLOC_PAGE
;
248 *ppage_index
= s
->ret2
;
249 return (void *)s
->ret
;
252 static struct kqemu_user_page
*monitor_lock_user_page(struct kqemu_state
*s
,
253 unsigned long *ppage_index
,
256 s
->mon_req
= MON_REQ_LOCK_USER_PAGE
;
259 *ppage_index
= s
->ret2
;
260 return (void *)s
->ret
;
263 static void monitor_unlock_user_page(struct kqemu_state
*s
,
264 struct kqemu_user_page
*page
)
266 s
->mon_req
= MON_REQ_UNLOCK_USER_PAGE
;
267 s
->arg0
= (long)page
;
271 /* return NULL if error */
272 static void *mon_alloc_page(struct kqemu_state
*s
,
273 unsigned long *ppage_index
)
275 unsigned long vaddr
, page_index
;
276 struct kqemu_page
*host_page
;
278 host_page
= monitor_alloc_page(s
, &page_index
);
282 vaddr
= get_vaddr(s
);
283 /* XXX: check error */
284 set_vaddr_page_index(s
, vaddr
, page_index
, host_page
, 0);
285 mon_set_pte(s
, 0, vaddr
, page_index
,
286 PG_PRESENT_MASK
| PG_GLOBAL(s
) | PG_RW_MASK
);
288 *ppage_index
= page_index
;
289 return (void *)vaddr
;
292 static void mon_set_pte(struct kqemu_state
*s
,
293 int as_index
, unsigned long vaddr
,
294 unsigned long page_index
, int pte_flags
)
298 ptep
= mon_get_ptep_l3(s
, as_index
, vaddr
, 1);
299 *ptep
= ((uint64_t)page_index
<< PAGE_SHIFT
) | pte_flags
;
302 ptep
= mon_get_ptep_l2(s
, as_index
, vaddr
, 1);
303 *ptep
= (page_index
<< PAGE_SHIFT
) | pte_flags
;
305 asm volatile("invlpg %0" : : "m" (*(uint8_t *)vaddr
));
308 static uint32_t phys_page_find(struct kqemu_state
*s
,
309 unsigned long page_index
)
313 ptr
= phys_page_findp(s
, page_index
, 0);
315 return KQEMU_IO_MEM_UNASSIGNED
;
318 monitor_log(s
, "pd=%08x\n", pd
);
323 /* return the ram page only if it is already locked */
324 static struct kqemu_ram_page
*get_locked_ram_page(struct kqemu_state
*s
,
325 unsigned long ram_addr
)
328 struct kqemu_ram_page
*rp
;
329 ram_page_index
= ram_addr
>> PAGE_SHIFT
;
330 rp
= &s
->ram_pages
[ram_page_index
];
336 /* unlock some pages to be able to allocate at least one page */
337 static void unlock_pages(struct kqemu_state
*s
)
339 while (s
->nb_locked_ram_pages
>= s
->max_locked_ram_pages
) {
340 /* unlock the least recently used pages */
341 unlock_ram_page(s
, s
->locked_page_head
.lock_prev
);
345 static struct kqemu_ram_page
*lock_ram_page(struct kqemu_state
*s
,
346 unsigned long ram_addr
)
349 struct kqemu_ram_page
*rp
, **p
, *rp_prev
, *rp_next
;
350 unsigned long uaddr
, page_index
;
351 struct kqemu_user_page
*host_page
;
353 ram_page_index
= ram_addr
>> PAGE_SHIFT
;
354 rp
= &s
->ram_pages
[ram_page_index
];
355 if (rp
->paddr
== -1) {
359 uaddr
= ram_addr
+ s
->ram_base_uaddr
;
360 host_page
= monitor_lock_user_page(s
, &page_index
, uaddr
);
362 monitor_panic(s
, "Could not lock user page %p", (void *)uaddr
);
363 rp
->paddr
= page_index
;
364 rp
->host_page
= host_page
;
366 /* insert in hash table */
367 p
= &s
->ram_page_hash
[ram_page_hash_func(page_index
)];
371 /* insert at lock list head */
372 rp_prev
= &s
->locked_page_head
;
373 rp_next
= s
->locked_page_head
.lock_next
;
374 rp_next
->lock_prev
= rp
;
375 rp
->lock_next
= rp_next
;
376 rp_prev
->lock_next
= rp
;
377 rp
->lock_prev
= rp_prev
;
378 s
->nb_locked_ram_pages
++;
380 monitor_log(s
, "lock_ram_page: %p rp=%p\n", (void *)ram_addr
, rp
);
386 static void unlock_ram_page(struct kqemu_state
*s
,
387 struct kqemu_ram_page
*rp
)
389 struct kqemu_ram_page
**prp
;
394 monitor_log(s
, "unlock_ram_page: rp=%p\n", rp
);
396 unmap_ram_page(s
, rp
);
398 /* remove it from the hash list */
399 prp
= &s
->ram_page_hash
[ram_page_hash_func(rp
->paddr
)];
404 *prp
= rp
->hash_next
;
407 prp
= &(*prp
)->hash_next
;
410 /* unlock it in the kernel */
411 monitor_unlock_user_page(s
, rp
->host_page
);
415 /* remove from lock list */
416 rp
->lock_prev
->lock_next
= rp
->lock_next
;
417 rp
->lock_next
->lock_prev
= rp
->lock_prev
;
418 s
->nb_locked_ram_pages
--;
421 static void map_ram_page(struct kqemu_state
*s
,
422 int as_index
, unsigned long vaddr
,
423 struct kqemu_ram_page
*rp
, int pte_flags
)
425 unsigned long *rptep
;
426 struct kqemu_ram_page
*rp_prev
, *rp_next
;
429 monitor_log(s
, "map_ram_page: vaddr=%p rp=%p pte_flags=0x%x\n",
430 (void *)vaddr
, rp
, pte_flags
);
432 unmap_virtual_ram_page(s
, as_index
, vaddr
);
434 mon_set_pte(s
, as_index
, vaddr
, rp
->paddr
, pte_flags
);
436 if (rp
->vaddr
== -1) {
437 /* most common case */
438 rp
->vaddr
= vaddr
| (as_index
<< 1);
440 /* add in mapping list */
441 rp_prev
= s
->mapped_page_head
.map_prev
;
442 rp_next
= &s
->mapped_page_head
;
443 rp_next
->map_prev
= rp
;
444 rp
->map_next
= rp_next
;
445 rp_prev
->map_next
= rp
;
446 rp
->map_prev
= rp_prev
;
448 /* add a new mapping (there is already at least one mapping) */
449 rptep
= get_ram_page_next_mapping_alloc(s
, as_index
, vaddr
, 1);
451 monitor_panic(s
, "next_mapping: could not alloc page");
453 rp
->vaddr
= vaddr
| (as_index
<< 1) | 1;
456 /* move to head in locked list */
457 rp_prev
= &s
->locked_page_head
;
458 if (rp
!= rp_prev
->lock_next
) {
460 rp
->lock_prev
->lock_next
= rp
->lock_next
;
461 rp
->lock_next
->lock_prev
= rp
->lock_prev
;
464 rp_next
= s
->locked_page_head
.lock_next
;
465 rp_next
->lock_prev
= rp
;
466 rp
->lock_next
= rp_next
;
467 rp_prev
->lock_next
= rp
;
468 rp
->lock_prev
= rp_prev
;
472 static unsigned long ram_ptr_to_ram_addr(struct kqemu_state
*s
, void *ptr
)
475 slot
= ((unsigned long)ptr
- s
->ram_page_cache_base
) >> PAGE_SHIFT
;
476 return s
->slot_to_ram_addr
[slot
];
479 static void *get_ram_ptr_slow(struct kqemu_state
*s
, int slot
,
480 unsigned long ram_addr
)
482 struct kqemu_ram_page
*rp
;
486 #ifdef PROFILE_INTERP2
487 s
->ram_map_miss_count
++;
489 rp
= lock_ram_page(s
, ram_addr
);
490 vaddr
= (slot
<< PAGE_SHIFT
) + s
->ram_page_cache_base
;
491 /* map the ram page */
492 map_ram_page(s
, 0, vaddr
, rp
,
493 PG_PRESENT_MASK
| PG_GLOBAL(s
) |
494 PG_ACCESSED_MASK
| PG_DIRTY_MASK
|
496 s
->slot_to_ram_addr
[slot
] = ram_addr
;
498 #if defined(DEBUG_SOFT_TLB)
499 monitor_log(s
, "get_ram_ptr: slot=%d ram_addr=%p ptr=%p\n",
500 slot
, (void *)ram_addr
, ptr
);
505 static inline void *get_ram_ptr(struct kqemu_state
*s
, int slot
,
506 unsigned long ram_addr
)
509 #ifdef PROFILE_INTERP2
512 if (likely(s
->slot_to_ram_addr
[slot
] == ram_addr
)) {
513 vaddr
= (slot
<< PAGE_SHIFT
) + s
->ram_page_cache_base
;
514 return (void *)vaddr
;
516 return get_ram_ptr_slow(s
, slot
, ram_addr
);
520 static inline int ram_is_dirty(struct kqemu_state
*s
, unsigned long ram_addr
)
522 return s
->ram_dirty
[ram_addr
>> PAGE_SHIFT
] == 0xff;
525 static inline int ram_get_dirty(struct kqemu_state
*s
, unsigned long ram_addr
,
528 return s
->ram_dirty
[ram_addr
>> PAGE_SHIFT
] & dirty_flags
;
531 static void ram_set_read_only(struct kqemu_state
*s
,
532 unsigned long ram_addr
)
534 struct kqemu_ram_page
*rp
;
535 unsigned long addr
, vaddr
;
536 unsigned long *nptep
;
539 rp
= get_locked_ram_page(s
, ram_addr
);
545 addr
= vaddr
& ~0xfff;
546 if ((addr
- s
->ram_page_cache_base
) < SOFT_TLB_SIZE
* PAGE_SIZE
) {
550 ptep
= (uint32_t *)mon_get_ptep_l3(s
,
551 GET_AS(vaddr
), addr
, 0);
553 ptep
= mon_get_ptep_l2(s
, GET_AS(vaddr
), addr
, 0);
554 *ptep
&= ~PG_RW_MASK
;
555 asm volatile("invlpg %0" : : "m" (*(uint8_t *)addr
));
557 if (IS_LAST_VADDR(vaddr
))
559 nptep
= get_ram_page_next_mapping(s
, GET_AS(vaddr
), addr
);
565 /* XXX: need to reset user space structures too */
566 static void ram_reset_dirty(struct kqemu_state
*s
,
567 unsigned long ram_addr
, int dirty_flag
)
570 /* we must modify the protection of all the user pages if it is
572 if (ram_is_dirty(s
, ram_addr
)) {
573 ram_set_read_only(s
, ram_addr
);
574 /* signal QEMU that it needs to update its TLB info */
575 s
->cpu_state
.nb_ram_pages_to_update
= 1;
577 s
->ram_dirty
[ram_addr
>> PAGE_SHIFT
] &= ~dirty_flag
;
580 static inline void *get_phys_mem_ptr(struct kqemu_state
*s
,
581 unsigned long paddr
, int write
)
584 unsigned long pd
, ram_addr
;
587 pd
= phys_page_find(s
, paddr
>> PAGE_SHIFT
);
588 io_index
= (pd
& ~PAGE_MASK
);
589 if (unlikely(io_index
!= KQEMU_IO_MEM_RAM
)) {
590 if (io_index
!= KQEMU_IO_MEM_ROM
)
595 ram_addr
= pd
& PAGE_MASK
;
596 slot
= (ram_addr
>> PAGE_SHIFT
);
597 slot
= slot
^ (slot
>> PHYS_SLOT_BITS
) ^ (slot
>> (2 * PHYS_SLOT_BITS
));
598 slot
= (slot
& (PHYS_NB_SLOTS
- 1)) + SOFT_TLB_SIZE
;
599 ptr
= get_ram_ptr(s
, slot
, ram_addr
);
600 #if defined(DEBUG_TLB)
601 monitor_log(s
, "get_phys_mem_ptr: paddr=%p ram_addr=%p ptr=%p\n",
606 return ptr
+ (paddr
& ~PAGE_MASK
);
609 static uint32_t ldl_phys_mmu(struct kqemu_state
*s
, unsigned long addr
)
613 ptr
= get_phys_mem_ptr(s
, addr
, 0);
618 #ifdef DEBUG_PHYS_LOAD_STORE
619 monitor_log(s
, "ldl_phys_mmu: %p = 0x%08x\n", (void *)addr
, val
);
624 /* NOTE: we do not update the dirty bits. This function is only used
625 to update the D and A bits, so it is not critical */
626 static void stl_phys_mmu(struct kqemu_state
*s
, unsigned long addr
,
630 #ifdef DEBUG_PHYS_LOAD_STORE
631 monitor_log(s
, "st_phys_mmu: %p = 0x%08x\n", (void *)addr
, val
);
633 ptr
= get_phys_mem_ptr(s
, addr
, 1);
638 /* return 0 if OK, 2 if the mapping could not be done because I/O
639 memory region or monitor memory area */
640 static long tlb_set_page(struct kqemu_state
*s
,
641 unsigned long vaddr
, unsigned long paddr
,
642 int prot
, int is_softmmu
)
645 int pte_flags
, mask
, is_user
;
647 struct kqemu_ram_page
*rp
;
650 monitor_log(s
, "tlb_set_page: vaddr=%p paddr=%p prot=0x%02x s=%d\n",
651 (void *)vaddr
, (void *)paddr
, prot
, is_softmmu
);
653 pd
= phys_page_find(s
, paddr
>> PAGE_SHIFT
);
655 if ((pd
& ~PAGE_MASK
) > KQEMU_IO_MEM_ROM
) {
656 if ((pd
& ~PAGE_MASK
) == KQEMU_IO_MEM_COMM
) {
657 /* special case: mapping of the kqemu communication page */
658 pte_flags
= PG_PRESENT_MASK
| PG_USER_MASK
|
659 PG_ACCESSED_MASK
| PG_DIRTY_MASK
;
660 is_user
= (s
->cpu_state
.cpl
== 3);
666 pte_flags
|= PG_ORIG_RW_MASK
| PG_RW_MASK
;
667 mon_set_pte(s
, is_user
, vaddr
, s
->comm_page_index
, pte_flags
);
670 /* IO access: no mapping is done as it will be handled by the
676 /* XXX: dirty ram support */
677 /* XXX: rom support */
679 unsigned long vaddr1
;
682 slot
= (vaddr
>> PAGE_SHIFT
) & (SOFT_TLB_SIZE
- 1);
683 e
= &s
->soft_tlb
[slot
];
684 vaddr1
= vaddr
& PAGE_MASK
;
685 if (prot
& PAGE_KREAD
)
686 e
->vaddr
[0] = vaddr1
;
689 if (prot
& PAGE_KWRITE
)
690 e
->vaddr
[1] = vaddr1
;
693 if (prot
& PAGE_UREAD
)
694 e
->vaddr
[2] = vaddr1
;
697 if (prot
& PAGE_UWRITE
)
698 e
->vaddr
[3] = vaddr1
;
701 ptr
= get_ram_ptr(s
, slot
, pd
& PAGE_MASK
);
702 e
->addend
= (unsigned long)ptr
- vaddr1
;
703 #ifdef DEBUG_SOFT_TLB
704 monitor_log(s
, "tlb_set_page: vaddr=%p paddr=%p prot=0x%02x s=%d\n",
705 (void *)vaddr
, (void *)paddr
, prot
, is_softmmu
);
708 } else if ((vaddr
- s
->monitor_vaddr
) < MONITOR_MEM_SIZE
) {
711 pte_flags
= PG_PRESENT_MASK
| PG_USER_MASK
|
712 PG_ACCESSED_MASK
| PG_DIRTY_MASK
;
713 #ifdef USE_USER_PG_GLOBAL
714 /* user pages are marked as global to stay in TLB when
715 switching to kernel mode */
716 /* XXX: check WP bit or ensure once that WP is set in
718 if (prot
& PAGE_UREAD
)
719 pte_flags
|= PG_GLOBAL(s
);
721 is_user
= (s
->cpu_state
.cpl
== 3);
727 pte_flags
|= PG_ORIG_RW_MASK
| PG_RW_MASK
;
728 if ((pd
& ~PAGE_MASK
) == KQEMU_IO_MEM_ROM
||
729 ((pd
& ~PAGE_MASK
) == KQEMU_IO_MEM_RAM
&&
730 !ram_is_dirty(s
, pd
))) {
731 pte_flags
&= ~PG_RW_MASK
;
734 rp
= lock_ram_page(s
, pd
& PAGE_MASK
);
735 map_ram_page(s
, is_user
, vaddr
, rp
, pte_flags
);
743 0 = nothing more to do
744 1 = generate PF fault
745 2 = soft MMU activation required for this block
747 long cpu_x86_handle_mmu_fault(struct kqemu_state
*s
, unsigned long addr
,
748 int is_write
, int is_user
, int is_softmmu
)
750 struct kqemu_cpu_state
*env
= &s
->cpu_state
;
751 uint32_t pdpe_addr
, pde_addr
, pte_addr
;
752 uint32_t pde
, pte
, ptep
, pdpe
;
753 int error_code
, is_dirty
, prot
, page_size
;
754 unsigned long paddr
, page_offset
;
755 unsigned long vaddr
, virt_addr
;
759 monitor_log(s
, "mmu_fault: addr=%08lx w=%d u=%d s=%d\n",
760 addr
, is_write
, is_user
, is_softmmu
);
765 if (!(env
->cr0
& CR0_PG_MASK
)) {
767 virt_addr
= addr
& PAGE_MASK
;
768 prot
= PAGE_KREAD
| PAGE_KWRITE
| PAGE_UREAD
| PAGE_UWRITE
;
774 if (env
->cr4
& CR4_PAE_MASK
) {
775 /* XXX: we only use 32 bit physical addresses */
777 if (env
->efer
& MSR_EFER_LMA
) {
778 uint32_t pml4e_addr
, pml4e
;
781 /* XXX: handle user + rw rights */
782 /* XXX: handle NX flag */
783 /* test virtual address sign extension */
784 sext
= (int64_t)addr
>> 47;
785 if (sext
!= 0 && sext
!= -1) {
790 pml4e_addr
= ((env
->cr3
& ~0xfff) + (((addr
>> 39) & 0x1ff) << 3)) &
792 pml4e
= ldl_phys_mmu(s
, pml4e_addr
);
793 if (!(pml4e
& PG_PRESENT_MASK
)) {
797 if (!(pml4e
& PG_ACCESSED_MASK
)) {
798 pml4e
|= PG_ACCESSED_MASK
;
799 stl_phys_mmu(s
, pml4e_addr
, pml4e
);
802 pdpe_addr
= ((pml4e
& ~0xfff) + (((addr
>> 30) & 0x1ff) << 3)) &
804 pdpe
= ldl_phys_mmu(s
, pdpe_addr
);
805 if (!(pdpe
& PG_PRESENT_MASK
)) {
809 if (!(pdpe
& PG_ACCESSED_MASK
)) {
810 pdpe
|= PG_ACCESSED_MASK
;
811 stl_phys_mmu(s
, pdpe_addr
, pdpe
);
816 pdpe_addr
= ((env
->cr3
& ~0x1f) + ((addr
>> 30) << 3)) &
818 pdpe
= ldl_phys_mmu(s
, pdpe_addr
);
819 if (!(pdpe
& PG_PRESENT_MASK
)) {
825 pde_addr
= ((pdpe
& ~0xfff) + (((addr
>> 21) & 0x1ff) << 3)) &
827 pde
= ldl_phys_mmu(s
, pde_addr
);
828 if (!(pde
& PG_PRESENT_MASK
)) {
832 if (pde
& PG_PSE_MASK
) {
834 page_size
= 2048 * 1024;
835 goto handle_big_page
;
838 if (!(pde
& PG_ACCESSED_MASK
)) {
839 pde
|= PG_ACCESSED_MASK
;
840 stl_phys_mmu(s
, pde_addr
, pde
);
842 pte_addr
= ((pde
& ~0xfff) + (((addr
>> 12) & 0x1ff) << 3)) &
847 /* page directory entry */
848 pde_addr
= ((env
->cr3
& ~0xfff) + ((addr
>> 20) & ~3)) &
850 pde
= ldl_phys_mmu(s
, pde_addr
);
851 if (!(pde
& PG_PRESENT_MASK
)) {
855 /* if PSE bit is set, then we use a 4MB page */
856 if ((pde
& PG_PSE_MASK
) && (env
->cr4
& CR4_PSE_MASK
)) {
857 page_size
= 4096 * 1024;
860 if (!(pde
& PG_USER_MASK
))
861 goto do_fault_protect
;
862 if (is_write
&& !(pde
& PG_RW_MASK
))
863 goto do_fault_protect
;
865 if ((env
->cr0
& CR0_WP_MASK
) &&
866 is_write
&& !(pde
& PG_RW_MASK
))
867 goto do_fault_protect
;
869 is_dirty
= is_write
&& !(pde
& PG_DIRTY_MASK
);
870 if (!(pde
& PG_ACCESSED_MASK
) || is_dirty
) {
871 pde
|= PG_ACCESSED_MASK
;
873 pde
|= PG_DIRTY_MASK
;
874 stl_phys_mmu(s
, pde_addr
, pde
);
877 pte
= pde
& ~( (page_size
- 1) & ~0xfff); /* align to page_size */
879 virt_addr
= addr
& ~(page_size
- 1);
881 if (!(pde
& PG_ACCESSED_MASK
)) {
882 pde
|= PG_ACCESSED_MASK
;
883 stl_phys_mmu(s
, pde_addr
, pde
);
886 /* page directory entry */
887 pte_addr
= ((pde
& ~0xfff) + ((addr
>> 10) & 0xffc)) &
890 pte
= ldl_phys_mmu(s
, pte_addr
);
891 if (!(pte
& PG_PRESENT_MASK
)) {
895 /* combine pde and pte user and rw protections */
898 if (!(ptep
& PG_USER_MASK
))
899 goto do_fault_protect
;
900 if (is_write
&& !(ptep
& PG_RW_MASK
))
901 goto do_fault_protect
;
903 if ((env
->cr0
& CR0_WP_MASK
) &&
904 is_write
&& !(ptep
& PG_RW_MASK
))
905 goto do_fault_protect
;
907 is_dirty
= is_write
&& !(pte
& PG_DIRTY_MASK
);
908 if (!(pte
& PG_ACCESSED_MASK
) || is_dirty
) {
909 pte
|= PG_ACCESSED_MASK
;
911 pte
|= PG_DIRTY_MASK
;
912 stl_phys_mmu(s
, pte_addr
, pte
);
915 virt_addr
= addr
& ~0xfff;
918 /* the page can be put in the TLB */
920 if (ptep
& PG_USER_MASK
)
922 if (pte
& PG_DIRTY_MASK
) {
923 /* only set write access if already dirty... otherwise wait
925 if (ptep
& PG_USER_MASK
) {
926 if (ptep
& PG_RW_MASK
)
929 if (!(env
->cr0
& CR0_WP_MASK
) ||
935 pte
= pte
& env
->a20_mask
;
937 /* Even if 4MB pages, we map only one 4KB page in the cache to
938 avoid filling it too fast */
939 page_offset
= (addr
& PAGE_MASK
) & (page_size
- 1);
940 paddr
= (pte
& PAGE_MASK
) + page_offset
;
941 vaddr
= virt_addr
+ page_offset
;
943 ret
= tlb_set_page(s
, vaddr
, paddr
, prot
, is_softmmu
);
947 error_code
= PG_ERROR_P_MASK
;
950 env
->error_code
= (is_write
<< PG_ERROR_W_BIT
) | error_code
;
952 env
->error_code
|= PG_ERROR_U_MASK
;
956 static void soft_tlb_fill(struct kqemu_state
*s
, unsigned long vaddr
,
957 int is_write
, int is_user
)
960 #ifdef PROFILE_SOFTMMU
964 ret
= cpu_x86_handle_mmu_fault(s
, vaddr
, is_write
, is_user
, 1);
965 #ifdef PROFILE_SOFTMMU
966 ti
= getclock() - ti
;
967 monitor_log(s
, "soft_tlb_fill: w=%d u=%d addr=%p cycle=%d\n",
968 is_write
, is_user
, (void *)vaddr
, ti
);
971 raise_exception(s
, EXCP0E_PAGE
);
973 raise_exception(s
, KQEMU_RET_SOFTMMU
);
976 static void *map_vaddr(struct kqemu_state
*s
, unsigned long addr
,
977 int is_write
, int is_user
)
982 e
= &s
->soft_tlb
[(addr
>> PAGE_SHIFT
) & (SOFT_TLB_SIZE
- 1)];
984 if (e
->vaddr
[(is_user
<< 1) + is_write
] != (addr
& PAGE_MASK
)) {
985 soft_tlb_fill(s
, addr
, is_write
, is_user
);
988 taddr
= e
->addend
+ addr
;
990 return (void *)taddr
;
993 uint32_t ldub_slow(struct kqemu_state
*s
, unsigned long addr
,
1000 e
= &s
->soft_tlb
[(addr
>> PAGE_SHIFT
) & (SOFT_TLB_SIZE
- 1)];
1002 if (unlikely(e
->vaddr
[(is_user
<< 1)] != (addr
& PAGE_MASK
))) {
1003 soft_tlb_fill(s
, addr
, 0, is_user
);
1006 taddr
= e
->addend
+ addr
;
1007 val
= *(uint8_t *)taddr
;
1012 uint32_t lduw_slow(struct kqemu_state
*s
, unsigned long addr
,
1017 unsigned long taddr
;
1019 e
= &s
->soft_tlb
[(addr
>> PAGE_SHIFT
) & (SOFT_TLB_SIZE
- 1)];
1021 if (unlikely(e
->vaddr
[(is_user
<< 1)] != (addr
& (PAGE_MASK
| 1)))) {
1022 if (e
->vaddr
[(is_user
<< 1)] == (addr
& PAGE_MASK
)) {
1023 /* unaligned access */
1024 if (((addr
+ 1) & PAGE_MASK
) == (addr
& PAGE_MASK
)) {
1028 /* access spans two pages (rare case) */
1029 v0
= ldub_slow(s
, addr
, is_user
);
1030 v1
= ldub_slow(s
, addr
+ 1, is_user
);
1031 val
= v0
| (v1
<< 8);
1034 soft_tlb_fill(s
, addr
, 0, is_user
);
1039 taddr
= e
->addend
+ addr
;
1040 val
= *(uint16_t *)taddr
;
1045 uint32_t ldl_slow(struct kqemu_state
*s
, unsigned long addr
,
1050 unsigned long taddr
;
1052 e
= &s
->soft_tlb
[(addr
>> PAGE_SHIFT
) & (SOFT_TLB_SIZE
- 1)];
1054 if (unlikely(e
->vaddr
[(is_user
<< 1)] != (addr
& (PAGE_MASK
| 3)))) {
1055 if (e
->vaddr
[(is_user
<< 1)] == (addr
& PAGE_MASK
)) {
1056 /* unaligned access */
1057 if (((addr
+ 3) & PAGE_MASK
) == (addr
& PAGE_MASK
)) {
1062 /* access spans two pages (rare case) */
1063 shift
= (addr
& 3) * 8;
1065 v0
= ldl_slow(s
, addr
, is_user
);
1066 v1
= ldl_slow(s
, addr
+ 4, is_user
);
1067 val
= (v0
>> shift
) | (v1
<< (32 - shift
));
1070 soft_tlb_fill(s
, addr
, 0, is_user
);
1075 taddr
= e
->addend
+ addr
;
1076 val
= *(uint32_t *)taddr
;
1081 uint64_t ldq_slow(struct kqemu_state
*s
, unsigned long addr
,
1086 unsigned long taddr
;
1088 e
= &s
->soft_tlb
[(addr
>> PAGE_SHIFT
) & (SOFT_TLB_SIZE
- 1)];
1090 if (unlikely(e
->vaddr
[(is_user
<< 1)] != (addr
& (PAGE_MASK
| 7)))) {
1091 if (e
->vaddr
[(is_user
<< 1)] == (addr
& PAGE_MASK
)) {
1092 /* unaligned access */
1093 if (((addr
+ 7) & PAGE_MASK
) == (addr
& PAGE_MASK
)) {
1098 /* access spans two pages (rare case) */
1099 shift
= (addr
& 7) * 8;
1101 v0
= ldq_slow(s
, addr
, is_user
);
1102 v1
= ldq_slow(s
, addr
+ 8, is_user
);
1103 val
= (v0
>> shift
) | (v1
<< (64 - shift
));
1106 soft_tlb_fill(s
, addr
, 0, is_user
);
1111 taddr
= e
->addend
+ addr
;
1112 val
= *(uint64_t *)taddr
;
1117 void stb_slow(struct kqemu_state
*s
, unsigned long addr
,
1118 uint32_t val
, int is_user
)
1121 unsigned long taddr
;
1123 e
= &s
->soft_tlb
[(addr
>> PAGE_SHIFT
) & (SOFT_TLB_SIZE
- 1)];
1125 if (unlikely(e
->vaddr
[(is_user
<< 1) + 1] != (addr
& PAGE_MASK
))) {
1126 soft_tlb_fill(s
, addr
, 1, is_user
);
1129 taddr
= e
->addend
+ addr
;
1130 *(uint8_t *)taddr
= val
;
1134 void stw_slow(struct kqemu_state
*s
, unsigned long addr
,
1135 uint32_t val
, int is_user
)
1138 unsigned long taddr
;
1140 e
= &s
->soft_tlb
[(addr
>> PAGE_SHIFT
) & (SOFT_TLB_SIZE
- 1)];
1142 if (unlikely(e
->vaddr
[(is_user
<< 1) + 1] != (addr
& (PAGE_MASK
| 1)))) {
1143 if (e
->vaddr
[(is_user
<< 1) + 1] == (addr
& PAGE_MASK
)) {
1144 /* unaligned access */
1145 if (((addr
+ 1) & PAGE_MASK
) == (addr
& PAGE_MASK
)) {
1148 /* access spans two pages (rare case) */
1149 stb_slow(s
, addr
, val
, is_user
);
1150 stb_slow(s
, addr
+ 1, val
>> 8, is_user
);
1153 soft_tlb_fill(s
, addr
, 1, is_user
);
1158 taddr
= e
->addend
+ addr
;
1159 *(uint16_t *)taddr
= val
;
1163 void stl_slow(struct kqemu_state
*s
, unsigned long addr
,
1164 uint32_t val
, int is_user
)
1167 unsigned long taddr
;
1169 e
= &s
->soft_tlb
[(addr
>> PAGE_SHIFT
) & (SOFT_TLB_SIZE
- 1)];
1171 if (unlikely(e
->vaddr
[(is_user
<< 1) + 1] != (addr
& (PAGE_MASK
| 3)))) {
1172 if (e
->vaddr
[(is_user
<< 1) + 1] == (addr
& PAGE_MASK
)) {
1173 /* unaligned access */
1174 if (((addr
+ 3) & PAGE_MASK
) == (addr
& PAGE_MASK
)) {
1177 /* access spans two pages (rare case) */
1178 stb_slow(s
, addr
, val
, is_user
);
1179 stb_slow(s
, addr
+ 1, val
>> 8, is_user
);
1180 stb_slow(s
, addr
+ 2, val
>> 16, is_user
);
1181 stb_slow(s
, addr
+ 3, val
>> 24, is_user
);
1184 soft_tlb_fill(s
, addr
, 1, is_user
);
1189 taddr
= e
->addend
+ addr
;
1190 *(uint32_t *)taddr
= val
;
1194 void stq_slow(struct kqemu_state
*s
, unsigned long addr
,
1195 uint64_t val
, int is_user
)
1198 unsigned long taddr
;
1200 e
= &s
->soft_tlb
[(addr
>> PAGE_SHIFT
) & (SOFT_TLB_SIZE
- 1)];
1202 if (unlikely(e
->vaddr
[(is_user
<< 1) + 1] != (addr
& (PAGE_MASK
| 7)))) {
1203 if (e
->vaddr
[(is_user
<< 1) + 1] == (addr
& PAGE_MASK
)) {
1204 /* unaligned access */
1205 if (((addr
+ 7) & PAGE_MASK
) == (addr
& PAGE_MASK
)) {
1208 /* access spans two pages (rare case) */
1209 stb_slow(s
, addr
, val
, is_user
);
1210 stb_slow(s
, addr
+ 1, val
>> 8, is_user
);
1211 stb_slow(s
, addr
+ 2, val
>> 16, is_user
);
1212 stb_slow(s
, addr
+ 3, val
>> 24, is_user
);
1213 stb_slow(s
, addr
+ 4, val
>> 32, is_user
);
1214 stb_slow(s
, addr
+ 5, val
>> 40, is_user
);
1215 stb_slow(s
, addr
+ 6, val
>> 48, is_user
);
1216 stb_slow(s
, addr
+ 7, val
>> 56, is_user
);
1219 soft_tlb_fill(s
, addr
, 1, is_user
);
1224 taddr
= e
->addend
+ addr
;
1225 *(uint64_t *)taddr
= val
;
1229 extern unsigned long __start_mmu_ex_table
;
1230 extern unsigned long __stop_mmu_ex_table
;
1233 void lsort(unsigned long *tab
, int n
)
1238 for(i
= 0; i
< n
- 1; i
++) {
1239 for(j
= i
+ 1; j
< n
;j
++) {
1240 if (tab
[i
] > tab
[j
]) {
1248 for(i
= 0; i
< n
- 1; i
++) {
1249 if (tab
[i
] > tab
[i
+ 1])
1250 asm volatile("ud2");
1255 static int expected_monitor_exception(unsigned long pc
)
1257 unsigned long *tab
, v
;
1259 if (unlikely(!sorted
)) {
1260 lsort(&__start_mmu_ex_table
,
1261 &__stop_mmu_ex_table
- &__start_mmu_ex_table
);
1265 tab
= &__start_mmu_ex_table
;
1267 b
= &__stop_mmu_ex_table
- &__start_mmu_ex_table
- 1;
1283 void kqemu_exception_0e(struct kqemu_state
*s
,
1284 struct kqemu_exception_regs regs
)
1286 unsigned long address
;
1287 int is_write
, is_user
;
1289 #ifdef PROFILE_INTERP2
1292 asm volatile ("mov %%cr2, %0" : "=r" (address
));
1293 #ifdef PROFILE_INTERP2
1297 if ((regs
.cs_sel
& 3) != 3) {
1298 if (!expected_monitor_exception(regs
.eip
)) {
1299 /* exception in monitor space - we may accept it someday if it
1300 is a user access indicated as such */
1301 monitor_panic_regs(s
, ®s
,
1302 "Paging exception in monitor address space. CR2=%p\n",
1305 /* do not reload s->regs because we are already in interpreter */
1306 s
->seg_cache_loaded
= 1;
1309 s
->seg_cache_loaded
= 0;
1311 is_write
= (regs
.error_code
>> 1) & 1;
1312 #ifdef PROFILE_INTERP2
1313 s
->total_page_fault_count
++;
1315 /* see if the page is write protected -> mark it dirty if needed */
1316 is_user
= (s
->cpu_state
.cpl
== 3);
1317 if (is_write
&& (regs
.error_code
& 1)) {
1318 uint32_t ram_index
, *ptep
;
1319 struct kqemu_ram_page
*rp
;
1322 /* get the original writable flag */
1325 ptep
= (uint32_t *)mon_get_ptep_l3(s
, is_user
, address
, 0);
1328 pte
= *(uint64_t *)ptep
;
1329 if (!(pte
& PG_PRESENT_MASK
))
1331 if (!(pte
& PG_ORIG_RW_MASK
))
1333 rp
= find_ram_page_from_paddr(s
, pte
>> PAGE_SHIFT
);
1336 ptep
= mon_get_ptep_l2(s
, is_user
, address
, 0);
1340 if (!(pte
& PG_PRESENT_MASK
))
1342 if (!(pte
& PG_ORIG_RW_MASK
))
1344 rp
= find_ram_page_from_paddr(s
, pte
>> PAGE_SHIFT
);
1348 ram_index
= rp
- s
->ram_pages
;
1349 /* cannot write directly on GDT/LDT pages or in pages where
1350 code was translated */
1351 /* XXX: should revalidate or interpret the code to go faster */
1354 if (s
->cpu_state
.cpl
== 3)
1355 dirty_mask
|= DT_DIRTY_FLAG
;
1357 dirty_mask
= DT_DIRTY_FLAG
;
1359 if ((s
->ram_dirty
[ram_index
] & dirty_mask
) != dirty_mask
) {
1360 raise_exception(s
, KQEMU_RET_SOFTMMU
);
1362 /* code updates need to be signaled */
1363 if ((s
->ram_dirty
[ram_index
] & CODE_DIRTY_FLAG
) !=
1365 s
->modified_ram_pages
[s
->cpu_state
.nb_modified_ram_pages
++] =
1366 ram_index
<< PAGE_SHIFT
;
1367 /* too many modified pages: exit */
1368 if (s
->cpu_state
.nb_modified_ram_pages
>=
1369 KQEMU_MAX_MODIFIED_RAM_PAGES
)
1370 raise_exception(s
, KQEMU_RET_SOFTMMU
);
1373 /* set the page as RW and mark the corresponding ram page as
1375 s
->ram_dirty
[ram_index
] = 0xff;
1376 *ptep
|= PG_RW_MASK
;
1377 asm volatile("invlpg %0" : : "m" (*(uint8_t *)address
));
1382 #ifdef PROFILE_INTERP2
1383 s
->mmu_page_fault_count
++;
1385 /* see if it is an MMU fault */
1386 ret
= cpu_x86_handle_mmu_fault(s
, address
, is_write
, is_user
, 0);
1389 #ifdef PROFILE_INTERP2
1390 if ((regs
.cs_sel
& 3) != 3)
1391 s
->tlb_interp_page_fault_count
++;
1392 s
->tlb_page_fault_count
++;
1393 s
->tlb_page_fault_cycles
+= (getclock() - ti
);
1397 #ifdef PROFILE_INTERP2
1398 s
->mmu_page_fault_cycles
+= (getclock() - ti
);
1400 /* real MMU fault */
1401 raise_exception(s
, EXCP0E_PAGE
);
1404 #ifdef PROFILE_INTERP2
1405 s
->mmu_page_fault_cycles
+= (getclock() - ti
);
1407 /* cannot map: I/O */
1408 raise_exception(s
, KQEMU_RET_SOFTMMU
);
1412 /* exit the virtual cpu by raising an exception */
1413 void raise_exception(struct kqemu_state
*s
, int intno
)
1415 /* XXX: the exclusion of exception GPF is needed for correct
1416 Windows XP boot. I don't know the precise explanation yet. */
1417 if (s
->cpu_state
.user_only
|| (unsigned int)intno
>= 0x20 ||
1419 /* exit the monitor if user only */
1421 s
->mon_req
= MON_REQ_EXIT
;
1427 start_func(raise_exception_interp
, s
,
1428 s
->stack_end
- sizeof(struct kqemu_exception_regs
));
1434 void __raise_exception_err(struct kqemu_state
*s
,
1435 int intno
, int error_code
)
1437 s
->cpu_state
.error_code
= error_code
;
1438 raise_exception(s
, intno
);
1441 void do_update_cr3(struct kqemu_state
*s
, unsigned long new_cr3
)
1443 if (s
->cpu_state
.cr0
& CR0_PG_MASK
) {
1445 /* indicate that all the pages must be flushed in user space */
1446 s
->cpu_state
.nb_pages_to_flush
= KQEMU_FLUSH_ALL
;
1448 s
->cpu_state
.cr3
= new_cr3
;
1451 #define CR0_UPDATE_MASK (CR0_TS_MASK | CR0_MP_MASK | CR0_EM_MASK | CR0_AM_MASK)
1453 void do_update_cr0(struct kqemu_state
*s
, unsigned long new_cr0
)
1455 if ((new_cr0
& ~CR0_UPDATE_MASK
) !=
1456 (s
->cpu_state
.cr0
& ~CR0_UPDATE_MASK
))
1457 raise_exception(s
, KQEMU_RET_SOFTMMU
);
1458 if ((new_cr0
& CR0_UPDATE_MASK
) !=
1459 (s
->cpu_state
.cr0
& CR0_UPDATE_MASK
)) {
1460 s
->cpu_state
.cr0
= new_cr0
;
1465 #define CR4_UPDATE_MASK (CR4_TSD_MASK | CR4_OSFXSR_MASK | CR4_OSXMMEXCPT_MASK)
1467 void do_update_cr4(struct kqemu_state
*s
, unsigned long new_cr4
)
1469 if ((new_cr4
& ~CR4_UPDATE_MASK
) !=
1470 (s
->cpu_state
.cr4
& ~CR4_UPDATE_MASK
))
1471 raise_exception(s
, KQEMU_RET_SOFTMMU
);
1472 if ((new_cr4
& CR4_UPDATE_MASK
) !=
1473 (s
->cpu_state
.cr4
& CR4_UPDATE_MASK
)) {
1474 s
->cpu_state
.cr4
= new_cr4
;
1479 void do_invlpg(struct kqemu_state
*s
, unsigned long vaddr
)
1481 tlb_flush_page(s
, vaddr
);
1482 if (s
->cpu_state
.nb_pages_to_flush
>= KQEMU_MAX_PAGES_TO_FLUSH
) {
1483 s
->cpu_state
.nb_pages_to_flush
= KQEMU_FLUSH_ALL
;
1485 s
->pages_to_flush
[s
->cpu_state
.nb_pages_to_flush
++] = vaddr
;
1489 extern unsigned long __start_seg_ex_table
;
1490 extern unsigned long __stop_seg_ex_table
;
1492 static void handle_mon_exception(struct kqemu_state
*s
,
1493 struct kqemu_exception_regs
*regs
,
1496 unsigned long pc
, *p
;
1499 for(p
= &__start_seg_ex_table
; p
!= &__stop_seg_ex_table
; p
++) {
1500 if (*p
== pc
) goto found
;
1502 monitor_panic_regs(s
, regs
,
1503 "Unexpected exception 0x%02x in monitor space\n",
1506 if (intno
== 0x00) {
1507 /* division exception from interp */
1508 /* XXX: verify for fxsave/fxrstor */
1509 s
->regs
= &s
->regs1
;
1511 /* Note: the exception state is reliable only for goto_user
1515 raise_exception_err(s
, intno
, regs
->error_code
);
1518 #ifdef PROFILE_INTERP_PC
1519 static void profile_interp_add(struct kqemu_state
*s
,
1525 ProfileInterpEntry
*pe
;
1527 h
= (eip
^ (eip
>> PROFILE_INTERP_PC_HASH_BITS
) ^
1528 (eip
>> (2 * PROFILE_INTERP_PC_HASH_BITS
))) &
1529 (PROFILE_INTERP_PC_HASH_SIZE
- 1);
1530 idx
= s
->profile_interp_hash_table
[h
];
1532 pe
= &s
->profile_interp_entries
[idx
- 1];
1538 if (s
->nb_profile_interp_entries
>= (PROFILE_INTERP_PC_NB_ENTRIES
- 1)) {
1539 /* too many entries : use last entry */
1540 if (s
->nb_profile_interp_entries
< PROFILE_INTERP_PC_NB_ENTRIES
)
1541 s
->nb_profile_interp_entries
++;
1542 pe
= &s
->profile_interp_entries
[PROFILE_INTERP_PC_NB_ENTRIES
- 1];
1544 /* add one more entry */
1545 pe
= &s
->profile_interp_entries
[s
->nb_profile_interp_entries
++];
1546 pe
->next
= s
->profile_interp_hash_table
[h
];
1547 s
->profile_interp_hash_table
[h
] = s
->nb_profile_interp_entries
;
1552 pe
->cycles
+= cycles
;
1553 pe
->insn_count
+= insn_count
;
1557 static inline void kqemu_exception_interp(struct kqemu_state
*s
, int intno
,
1558 struct kqemu_exception_regs
*regs
)
1560 #ifdef PROFILE_INTERP2
1561 int64_t ti0
, ti1
, ti2
;
1563 unsigned long start_eip
;
1566 if ((regs
->cs_sel
& 3) != 3)
1567 handle_mon_exception(s
, regs
, intno
);
1574 update_seg_cache(s
);
1575 #ifdef PROFILE_INTERP2
1578 start_eip
= s
->regs1
.eip
;
1582 #ifdef PROFILE_INTERP2
1584 s
->exc_interp_count
++;
1585 s
->exc_seg_cycles
+= ti1
- ti0
;
1586 s
->exc_interp_cycles
+= ti2
- ti1
;
1587 c1
-= s
->insn_count
;
1588 s
->exc_insn_count
+= c1
;
1589 if (c1
> s
->exc_insn_count_max
) {
1590 s
->exc_insn_count_max
= c1
;
1591 s
->exc_start_eip_max
= start_eip
;
1593 #ifdef PROFILE_INTERP_PC
1594 profile_interp_add(s
, start_eip
, ti2
- ti0
, c1
+ 1);
1599 /* XXX: remove L bit on x86_64 in legacy emulation ? */
1600 static void check_dt_entries(uint8_t *d
, const uint8_t *s
, int n
)
1604 for(i
= 0; i
< n
; i
++) {
1605 e1
= ((uint32_t *)s
)[0];
1606 e2
= ((uint32_t *)s
)[1];
1607 if (!(e2
& DESC_S_MASK
)) {
1608 /* not a segment: reset DPL to ensure it cannot be used
1610 e2
&= ~(3 << DESC_DPL_SHIFT
);
1612 ((uint32_t *)d
)[32768 * 0 + 0] = e1
; /* CPL = 0 */
1613 ((uint32_t *)d
)[32768 * 0 + 1] = e2
;
1614 ((uint32_t *)d
)[32768 * 1 + 0] = e1
; /* CPL = 1 */
1615 ((uint32_t *)d
)[32768 * 1 + 1] = e2
;
1616 ((uint32_t *)d
)[32768 * 2 + 0] = e1
; /* CPL = 2 */
1617 ((uint32_t *)d
)[32768 * 2 + 1] = e2
;
1619 ((uint32_t *)d
)[32768 * (NB_DT_TABLES
- 1) + 0] = e1
; /* CPL = 3 */
1620 ((uint32_t *)d
)[32768 * (NB_DT_TABLES
- 1) + 1] = e2
;
1621 } else if (unlikely(((e2
& (DESC_CS_MASK
| DESC_C_MASK
)) ==
1622 (DESC_CS_MASK
| DESC_C_MASK
)))) {
1623 /* conforming segment : no need to modify */
1625 ((uint32_t *)d
)[32768 * 0 + 0] = e1
; /* CPL = 0 */
1626 ((uint32_t *)d
)[32768 * 0 + 1] = e2
;
1627 ((uint32_t *)d
)[32768 * 1 + 0] = e1
; /* CPL = 1 */
1628 ((uint32_t *)d
)[32768 * 1 + 1] = e2
;
1629 ((uint32_t *)d
)[32768 * 2 + 0] = e1
; /* CPL = 2 */
1630 ((uint32_t *)d
)[32768 * 2 + 1] = e2
;
1632 ((uint32_t *)d
)[32768 * (NB_DT_TABLES
- 1) + 0] = e1
; /* CPL = 3 */
1633 ((uint32_t *)d
)[32768 * (NB_DT_TABLES
- 1) + 1] = e2
;
1637 uint32_t e2tmp
, e2dpl3
;
1639 dpl
= (e2
>> DESC_DPL_SHIFT
) & 3;
1640 /* standard segment: need to patch the DPL so that
1641 if (DPL >= CPL) then DPL = 3
1643 e2dpl3
= e2
| (3 << DESC_DPL_SHIFT
);
1644 ((uint32_t *)d
)[32768 * 0 + 0] = e1
; /* CPL = 0 */
1645 ((uint32_t *)d
)[32768 * 0 + 1] = e2dpl3
;
1650 ((uint32_t *)d
)[32768 * 1 + 0] = e1
; /* CPL = 1 */
1651 ((uint32_t *)d
)[32768 * 1 + 1] = e2tmp
;
1656 ((uint32_t *)d
)[32768 * 2 + 0] = e1
; /* CPL = 2 */
1657 ((uint32_t *)d
)[32768 * 2 + 1] = e2tmp
;
1659 ((uint32_t *)d
)[32768 * (NB_DT_TABLES
- 1) + 0] = e1
; /* CPL = 3 */
1660 ((uint32_t *)d
)[32768 * (NB_DT_TABLES
- 1) + 1] = e2
;
1668 static void check_dt_entries_page(struct kqemu_state
*s
, int dt_type
,
1669 int sel
, int sel_end
, const uint8_t *src
)
1672 int mon_sel_start
, mon_sel_end
, sel1
, sel2
;
1674 dt
= (uint8_t *)(s
->dt_table
+ (dt_type
* 8192));
1676 mon_sel_start
= s
->monitor_selector_base
;
1677 mon_sel_end
= s
->monitor_selector_base
+ MONITOR_SEL_RANGE
;
1679 while (sel1
< sel_end
) {
1680 if (sel1
>= mon_sel_start
&& sel1
< mon_sel_end
)
1682 if (sel1
< mon_sel_start
) {
1683 sel2
= mon_sel_start
;
1691 #ifdef DEBUG_DT_CACHE
1692 monitor_log(s
, "check_dt: type=%d sel=%d-%d\n",
1693 dt_type
, sel1
, sel2
);
1695 check_dt_entries(dt
+ sel1
,
1696 src
+ sel1
- sel
, (sel2
- sel1
) >> 3);
1700 #ifdef DEBUG_DT_CACHE
1701 monitor_log(s
, "check_dt: type=%d sel=%d-%d\n",
1702 dt_type
, sel
, sel_end
);
1704 check_dt_entries(dt
+ sel
, src
, (sel_end
- sel
) >> 3);
1708 static void reset_dt_entries2(void *dt1
, int n
)
1712 memset(dt
+ 32768 * 0, 0, n
);
1713 memset(dt
+ 32768 * 1, 0, n
);
1714 memset(dt
+ 32768 * 2, 0, n
);
1716 memset(dt
+ 32768 * (NB_DT_TABLES
- 1), 0, n
);
1719 static void reset_dt_entries(struct kqemu_state
*s
, int dt_type
,
1720 int sel
, int sel_end
)
1723 int mon_sel_start
, mon_sel_end
, sel1
, sel2
;
1725 dt
= (uint8_t *)(s
->dt_table
+ (dt_type
* 8192));
1727 mon_sel_start
= s
->monitor_selector_base
;
1728 mon_sel_end
= s
->monitor_selector_base
+ MONITOR_SEL_RANGE
;
1730 while (sel1
< sel_end
) {
1731 if (sel1
>= mon_sel_start
&& sel1
< mon_sel_end
)
1733 if (sel1
< mon_sel_start
) {
1734 sel2
= mon_sel_start
;
1742 #ifdef DEBUG_DT_CACHE
1743 monitor_log(s
, "reset_dt: type=%d sel=%d-%d\n",
1744 dt_type
, sel1
, sel2
);
1746 reset_dt_entries2(dt
+ sel1
, sel2
- sel1
);
1750 #ifdef DEBUG_DT_CACHE
1751 monitor_log(s
, "reset_dt: type=%d sel=%d-%d\n",
1752 dt_type
, sel
, sel_end
);
1754 reset_dt_entries2(dt
+ sel
, sel_end
- sel
);
1758 /* Note: this function can raise an exception in case of MMU fault or
1759 unaligned DT table */
1760 static void update_dt_cache(struct kqemu_state
*s
, int dt_type
)
1762 unsigned long base
, dt_end
, page_end
, dt_ptr
, ram_addr
;
1765 int pindex
, sel
, sel_end
, dt_changed
, sel2
;
1768 /* XXX: check the exact behaviour of zero LDT */
1769 if ((s
->cpu_state
.ldt
.selector
& 0xfffc) == 0) {
1773 base
= s
->cpu_state
.ldt
.base
;
1774 limit
= s
->cpu_state
.ldt
.limit
;
1777 base
= s
->cpu_state
.gdt
.base
;
1778 limit
= s
->cpu_state
.gdt
.limit
;
1780 dt_changed
= (base
!= s
->dt_base
[dt_type
] ||
1781 limit
!= s
->dt_limit
[dt_type
]);
1783 sel_end
= (limit
+ 1) & ~7;
1784 dt_end
= base
+ sel_end
;
1785 if (dt_end
< base
|| (base
& 7) != 0)
1786 raise_exception(s
, KQEMU_RET_SOFTMMU
);
1790 while (sel
< sel_end
) {
1791 dt_ptr
= base
+ sel
;
1792 page_end
= (dt_ptr
& PAGE_MASK
) + PAGE_SIZE
;
1793 if (page_end
> dt_end
)
1795 sel2
= sel
+ (page_end
- dt_ptr
);
1796 ptr
= map_vaddr(s
, dt_ptr
, 0, 0);
1797 ram_addr
= ram_ptr_to_ram_addr(s
, ptr
);
1799 s
->dt_ram_addr
[dt_type
][pindex
] != ram_addr
||
1800 ram_get_dirty(s
, ram_addr
, DT_DIRTY_FLAG
)) {
1801 s
->dt_ram_addr
[dt_type
][pindex
] = ram_addr
;
1802 check_dt_entries_page(s
, dt_type
, sel
, sel2
, ptr
);
1803 ram_reset_dirty(s
, ram_addr
, DT_DIRTY_FLAG
);
1809 /* reset the remaining DT entries up to the last limit */
1810 sel_end
= (s
->dt_limit
[dt_type
] + 1) & ~7;
1812 reset_dt_entries(s
, dt_type
, sel
, sel_end
);
1814 s
->dt_base
[dt_type
] = base
;
1815 s
->dt_limit
[dt_type
] = limit
;
1818 void update_gdt_ldt_cache(struct kqemu_state
*s
)
1820 update_dt_cache(s
, 0);
1821 update_dt_cache(s
, 1);
1824 void monitor_exec(struct kqemu_state
*s
)
1826 struct kqemu_cpu_state
*env
= &s
->cpu_state
;
1827 struct kqemu_exception_regs
*r
=
1828 (void *)(s
->stack_end
- sizeof(struct kqemu_exception_regs
));
1829 #ifdef PROFILE_INTERP2
1830 int64_t ti
= getclock();
1836 restore_monitor_nexus_mapping(s
);
1840 /* if max_locked_ram_pages was modified because some instances
1841 were added, we unlock some pages here */
1844 /* first we flush the pages if needed */
1845 if (env
->nb_pages_to_flush
!= 0) {
1846 if (env
->nb_pages_to_flush
> KQEMU_MAX_PAGES_TO_FLUSH
) {
1850 for(i
= 0; i
< env
->nb_pages_to_flush
; i
++) {
1851 tlb_flush_page(s
, s
->pages_to_flush
[i
]);
1854 env
->nb_pages_to_flush
= 0;
1857 /* XXX: invalidate modified ram pages */
1858 env
->nb_modified_ram_pages
= 0;
1860 /* unmap pages corresponding to notdirty ram pages */
1861 if (env
->nb_ram_pages_to_update
!= 0) {
1862 unsigned long ram_addr
;
1865 if (env
->nb_ram_pages_to_update
> KQEMU_MAX_RAM_PAGES_TO_UPDATE
) {
1866 for(ram_addr
= 0; ram_addr
< s
->ram_size
; ram_addr
+= PAGE_SIZE
) {
1867 if (!ram_is_dirty(s
, ram_addr
)) {
1868 ram_set_read_only(s
, ram_addr
);
1872 for(i
= 0; i
< env
->nb_ram_pages_to_update
; i
++) {
1873 ram_addr
= s
->ram_pages_to_update
[i
];
1874 if (ram_addr
< s
->ram_size
&&
1875 !ram_is_dirty(s
, ram_addr
)) {
1876 ram_set_read_only(s
, ram_addr
);
1880 env
->nb_ram_pages_to_update
= 0;
1884 if (s
->cpu_state
.cpl
== 3)
1885 update_gdt_ldt_cache(s
);
1887 update_gdt_ldt_cache(s
);
1890 #ifdef PROFILE_INTERP2
1891 s
->exec_init_cycles
+= (getclock() - ti
);
1892 s
->exec_init_count
++;
1895 /* since this is not costly, we ensure here that the CPU state is
1896 consistent with what we can handle */
1897 if (!(env
->cr0
& CR0_PE_MASK
) ||
1898 (env
->eflags
& VM_MASK
)) {
1899 raise_exception(s
, KQEMU_RET_SOFTMMU
);
1903 r
->eflags
= compute_eflags_user(s
, env
->eflags
);
1904 s
->comm_page
.virt_eflags
= env
->eflags
& EFLAGS_MASK
;
1905 r
->cs_sel
= env
->segs
[R_CS
].selector
| 3;
1906 r
->ss_sel
= env
->segs
[R_SS
].selector
| 3;
1908 r
->eax
= env
->regs
[R_EAX
];
1909 r
->ecx
= env
->regs
[R_ECX
];
1910 r
->edx
= env
->regs
[R_EDX
];
1911 r
->ebx
= env
->regs
[R_EBX
];
1912 r
->esp
= env
->regs
[R_ESP
];
1913 r
->ebp
= env
->regs
[R_EBP
];
1914 r
->esi
= env
->regs
[R_ESI
];
1915 r
->edi
= env
->regs
[R_EDI
];
1917 r
->r8
= env
->regs
[8];
1918 r
->r9
= env
->regs
[9];
1919 r
->r10
= env
->regs
[10];
1920 r
->r11
= env
->regs
[11];
1921 r
->r12
= env
->regs
[12];
1922 r
->r13
= env
->regs
[13];
1923 r
->r14
= env
->regs
[14];
1924 r
->r15
= env
->regs
[15];
1926 r
->ds_sel
= env
->segs
[R_DS
].selector
;
1927 r
->es_sel
= env
->segs
[R_ES
].selector
;
1930 update_seg_desc_caches(s
);
1932 /* NOTE: exceptions can occur here */
1935 /* for consistency, we accept to start the interpreter here if
1937 if (!(s
->comm_page
.virt_eflags
& IF_MASK
)) {
1939 s
->seg_cache_loaded
= 1;
1940 s
->insn_count
= MAX_INSN_COUNT
;
1947 /* General Protection Fault. In all cases we need to interpret the
1948 code to know more */
1949 void kqemu_exception_0d(struct kqemu_state
*s
,
1950 struct kqemu_exception_regs regs
)
1952 kqemu_exception_interp(s
, 0x0d, ®s
);
1955 /* illegal intruction. We need to interpret just for the syscall case */
1956 void kqemu_exception_06(struct kqemu_state
*s
,
1957 struct kqemu_exception_regs regs
)
1959 kqemu_exception_interp(s
, 0x06, ®s
);
1962 /* Coproprocessor emulation fault. We handle here the fact that the
1963 FPU state can be temporarily stored in the host OS */
1964 void kqemu_exception_07(struct kqemu_state
*s
,
1965 struct kqemu_exception_regs regs
)
1967 if ((regs
.cs_sel
& 3) != 3) {
1968 if (!expected_monitor_exception(regs
.eip
)) {
1969 monitor_panic_regs(s
, ®s
, "Unexpected exception 0x%02x in monitor space\n", 0x07);
1971 /* this can happen for fxsave/fxrstor instructions in the
1973 s
->seg_cache_loaded
= 1;
1975 s
->seg_cache_loaded
= 0;
1977 s
->regs
= &s
->regs1
;
1978 if (s
->cpu_state
.cr0
& (CR0_TS_MASK
| CR0_EM_MASK
)) {
1979 /* real FPU fault needed */
1980 raise_exception_err(s
, EXCP07_PREX
, 0);
1982 /* the host needs to restore the FPU state for us */
1983 s
->mon_req
= MON_REQ_EXCEPTION
;
1989 /* single step/debug */
1990 void kqemu_exception_01(struct kqemu_state
*s
,
1991 struct kqemu_exception_regs regs
)
1993 unsigned long dr6
, val
;
1995 asm volatile ("mov %%dr6, %0" : "=r" (dr6
));
1996 /* Linux uses lazy dr7 clearing, so we must verify we are in this
1998 /* XXX: check that because TF should have the priority */
1999 if ((dr6
& 0xf) != 0 && !s
->monitor_dr7
)
2002 if ((regs
.cs_sel
& 3) != 3)
2003 monitor_panic_regs(s
, ®s
, "Unexpected exception 0x%02x in monitor space\n", 0x07);
2006 s
->seg_cache_loaded
= 0;
2007 /* update DR6 register */
2008 s
->cpu_state
.dr6
= dr6
;
2009 raise_exception_err(s
, EXCP01_SSTP
, 0);
2012 asm volatile ("mov %0, %%dr7" : : "r" (val
));
2015 #define DEFAULT_EXCEPTION(n) \
2016 void kqemu_exception_ ## n (struct kqemu_state *s, \
2017 struct kqemu_exception_regs regs) \
2019 if ((regs.cs_sel & 3) != 3)\
2020 handle_mon_exception(s, ®s, 0x ## n);\
2022 s->seg_cache_loaded = 0;\
2023 s->cpu_state.error_code = regs.error_code;\
2024 raise_exception(s, 0x ## n);\
2027 DEFAULT_EXCEPTION(00)
2028 DEFAULT_EXCEPTION(02)
2029 DEFAULT_EXCEPTION(03)
2030 DEFAULT_EXCEPTION(04)
2031 DEFAULT_EXCEPTION(05)
2032 DEFAULT_EXCEPTION(08)
2033 DEFAULT_EXCEPTION(09)
2034 DEFAULT_EXCEPTION(0a
)
2035 DEFAULT_EXCEPTION(0b
)
2036 DEFAULT_EXCEPTION(0c
)
2037 DEFAULT_EXCEPTION(0f
)
2038 DEFAULT_EXCEPTION(10)
2039 DEFAULT_EXCEPTION(11)
2040 DEFAULT_EXCEPTION(12)
2041 DEFAULT_EXCEPTION(13)
2043 void monitor_interrupt(struct kqemu_state
*s
, struct kqemu_exception_regs regs
)
2046 #ifdef PROFILE_INTERP2
2047 int64_t ti
= getclock();
2048 s
->hw_interrupt_start_count
++;
2051 intno
= regs
.error_code
;
2053 if ((regs
.cs_sel
& 3) != 3) {
2054 monitor_panic_regs(s
, ®s
, "Interrupt 0x%02x in monitor space\n",
2059 s
->seg_cache_loaded
= 0;
2060 /* execute the irq code in kernel space */
2061 s
->mon_req
= MON_REQ_IRQ
;
2063 /* NOTE: if interrupting user code, the host kernel will schedule
2064 and eventually exit from the monitor_exec loop */
2066 /* ... and come back to monitor space */
2068 #ifdef PROFILE_INTERP2
2069 s
->hw_interrupt_count
++;
2070 s
->hw_interrupt_cycles
+= (getclock() - ti
);