TCG: fix negative frame offset calculations
[qemu/mdroth.git] / exec.c
blob21507fb4a9e4084454dd9a95649f3c6344a34003
1 /*
2 * virtual page mapping and translated block handling
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "tcg.h"
30 #include "hw/hw.h"
31 #include "hw/qdev.h"
32 #include "osdep.h"
33 #include "kvm.h"
34 #include "hw/xen.h"
35 #include "qemu-timer.h"
36 #if defined(CONFIG_USER_ONLY)
37 #include <qemu.h>
38 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
39 #include <sys/param.h>
40 #if __FreeBSD_version >= 700104
41 #define HAVE_KINFO_GETVMMAP
42 #define sigqueue sigqueue_freebsd /* avoid redefinition */
43 #include <sys/time.h>
44 #include <sys/proc.h>
45 #include <machine/profile.h>
46 #define _KERNEL
47 #include <sys/user.h>
48 #undef _KERNEL
49 #undef sigqueue
50 #include <libutil.h>
51 #endif
52 #endif
53 #else /* !CONFIG_USER_ONLY */
54 #include "xen-mapcache.h"
55 #include "trace.h"
56 #endif
58 //#define DEBUG_TB_INVALIDATE
59 //#define DEBUG_FLUSH
60 //#define DEBUG_TLB
61 //#define DEBUG_UNASSIGNED
63 /* make various TB consistency checks */
64 //#define DEBUG_TB_CHECK
65 //#define DEBUG_TLB_CHECK
67 //#define DEBUG_IOPORT
68 //#define DEBUG_SUBPAGE
70 #if !defined(CONFIG_USER_ONLY)
71 /* TB consistency checks only implemented for usermode emulation. */
72 #undef DEBUG_TB_CHECK
73 #endif
75 #define SMC_BITMAP_USE_THRESHOLD 10
77 static TranslationBlock *tbs;
78 static int code_gen_max_blocks;
79 TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
80 static int nb_tbs;
81 /* any access to the tbs or the page table must use this lock */
82 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
84 #if defined(__arm__) || defined(__sparc_v9__)
85 /* The prologue must be reachable with a direct jump. ARM and Sparc64
86 have limited branch ranges (possibly also PPC) so place it in a
87 section close to code segment. */
88 #define code_gen_section \
89 __attribute__((__section__(".gen_code"))) \
90 __attribute__((aligned (32)))
91 #elif defined(_WIN32)
92 /* Maximum alignment for Win32 is 16. */
93 #define code_gen_section \
94 __attribute__((aligned (16)))
95 #else
96 #define code_gen_section \
97 __attribute__((aligned (32)))
98 #endif
100 uint8_t code_gen_prologue[1024] code_gen_section;
101 static uint8_t *code_gen_buffer;
102 static unsigned long code_gen_buffer_size;
103 /* threshold to flush the translated code buffer */
104 static unsigned long code_gen_buffer_max_size;
105 static uint8_t *code_gen_ptr;
107 #if !defined(CONFIG_USER_ONLY)
108 int phys_ram_fd;
109 static int in_migration;
111 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list) };
112 #endif
114 CPUState *first_cpu;
115 /* current CPU in the current thread. It is only valid inside
116 cpu_exec() */
117 CPUState *cpu_single_env;
118 /* 0 = Do not count executed instructions.
119 1 = Precise instruction counting.
120 2 = Adaptive rate instruction counting. */
121 int use_icount = 0;
122 /* Current instruction counter. While executing translated code this may
123 include some instructions that have not yet been executed. */
124 int64_t qemu_icount;
126 typedef struct PageDesc {
127 /* list of TBs intersecting this ram page */
128 TranslationBlock *first_tb;
129 /* in order to optimize self modifying code, we count the number
130 of lookups we do to a given page to use a bitmap */
131 unsigned int code_write_count;
132 uint8_t *code_bitmap;
133 #if defined(CONFIG_USER_ONLY)
134 unsigned long flags;
135 #endif
136 } PageDesc;
138 /* In system mode we want L1_MAP to be based on ram offsets,
139 while in user mode we want it to be based on virtual addresses. */
140 #if !defined(CONFIG_USER_ONLY)
141 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
142 # define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS
143 #else
144 # define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS
145 #endif
146 #else
147 # define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS
148 #endif
150 /* Size of the L2 (and L3, etc) page tables. */
151 #define L2_BITS 10
152 #define L2_SIZE (1 << L2_BITS)
154 /* The bits remaining after N lower levels of page tables. */
155 #define P_L1_BITS_REM \
156 ((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
157 #define V_L1_BITS_REM \
158 ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
160 /* Size of the L1 page table. Avoid silly small sizes. */
161 #if P_L1_BITS_REM < 4
162 #define P_L1_BITS (P_L1_BITS_REM + L2_BITS)
163 #else
164 #define P_L1_BITS P_L1_BITS_REM
165 #endif
167 #if V_L1_BITS_REM < 4
168 #define V_L1_BITS (V_L1_BITS_REM + L2_BITS)
169 #else
170 #define V_L1_BITS V_L1_BITS_REM
171 #endif
173 #define P_L1_SIZE ((target_phys_addr_t)1 << P_L1_BITS)
174 #define V_L1_SIZE ((target_ulong)1 << V_L1_BITS)
176 #define P_L1_SHIFT (TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - P_L1_BITS)
177 #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
179 unsigned long qemu_real_host_page_size;
180 unsigned long qemu_host_page_bits;
181 unsigned long qemu_host_page_size;
182 unsigned long qemu_host_page_mask;
184 /* This is a multi-level map on the virtual address space.
185 The bottom level has pointers to PageDesc. */
186 static void *l1_map[V_L1_SIZE];
188 #if !defined(CONFIG_USER_ONLY)
189 typedef struct PhysPageDesc {
190 /* offset in host memory of the page + io_index in the low bits */
191 ram_addr_t phys_offset;
192 ram_addr_t region_offset;
193 } PhysPageDesc;
195 /* This is a multi-level map on the physical address space.
196 The bottom level has pointers to PhysPageDesc. */
197 static void *l1_phys_map[P_L1_SIZE];
199 static void io_mem_init(void);
201 /* io memory support */
202 CPUWriteMemoryFunc *io_mem_write[IO_MEM_NB_ENTRIES][4];
203 CPUReadMemoryFunc *io_mem_read[IO_MEM_NB_ENTRIES][4];
204 void *io_mem_opaque[IO_MEM_NB_ENTRIES];
205 static char io_mem_used[IO_MEM_NB_ENTRIES];
206 static int io_mem_watch;
207 #endif
209 /* log support */
210 #ifdef WIN32
211 static const char *logfilename = "qemu.log";
212 #else
213 static const char *logfilename = "/tmp/qemu.log";
214 #endif
215 FILE *logfile;
216 int loglevel;
217 static int log_append = 0;
219 /* statistics */
220 #if !defined(CONFIG_USER_ONLY)
221 static int tlb_flush_count;
222 #endif
223 static int tb_flush_count;
224 static int tb_phys_invalidate_count;
226 #ifdef _WIN32
227 static void map_exec(void *addr, long size)
229 DWORD old_protect;
230 VirtualProtect(addr, size,
231 PAGE_EXECUTE_READWRITE, &old_protect);
234 #else
235 static void map_exec(void *addr, long size)
237 unsigned long start, end, page_size;
239 page_size = getpagesize();
240 start = (unsigned long)addr;
241 start &= ~(page_size - 1);
243 end = (unsigned long)addr + size;
244 end += page_size - 1;
245 end &= ~(page_size - 1);
247 mprotect((void *)start, end - start,
248 PROT_READ | PROT_WRITE | PROT_EXEC);
250 #endif
252 static void page_init(void)
254 /* NOTE: we can always suppose that qemu_host_page_size >=
255 TARGET_PAGE_SIZE */
256 #ifdef _WIN32
258 SYSTEM_INFO system_info;
260 GetSystemInfo(&system_info);
261 qemu_real_host_page_size = system_info.dwPageSize;
263 #else
264 qemu_real_host_page_size = getpagesize();
265 #endif
266 if (qemu_host_page_size == 0)
267 qemu_host_page_size = qemu_real_host_page_size;
268 if (qemu_host_page_size < TARGET_PAGE_SIZE)
269 qemu_host_page_size = TARGET_PAGE_SIZE;
270 qemu_host_page_bits = 0;
271 while ((1 << qemu_host_page_bits) < qemu_host_page_size)
272 qemu_host_page_bits++;
273 qemu_host_page_mask = ~(qemu_host_page_size - 1);
275 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
277 #ifdef HAVE_KINFO_GETVMMAP
278 struct kinfo_vmentry *freep;
279 int i, cnt;
281 freep = kinfo_getvmmap(getpid(), &cnt);
282 if (freep) {
283 mmap_lock();
284 for (i = 0; i < cnt; i++) {
285 unsigned long startaddr, endaddr;
287 startaddr = freep[i].kve_start;
288 endaddr = freep[i].kve_end;
289 if (h2g_valid(startaddr)) {
290 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
292 if (h2g_valid(endaddr)) {
293 endaddr = h2g(endaddr);
294 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
295 } else {
296 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
297 endaddr = ~0ul;
298 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
299 #endif
303 free(freep);
304 mmap_unlock();
306 #else
307 FILE *f;
309 last_brk = (unsigned long)sbrk(0);
311 f = fopen("/compat/linux/proc/self/maps", "r");
312 if (f) {
313 mmap_lock();
315 do {
316 unsigned long startaddr, endaddr;
317 int n;
319 n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
321 if (n == 2 && h2g_valid(startaddr)) {
322 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
324 if (h2g_valid(endaddr)) {
325 endaddr = h2g(endaddr);
326 } else {
327 endaddr = ~0ul;
329 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
331 } while (!feof(f));
333 fclose(f);
334 mmap_unlock();
336 #endif
338 #endif
341 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
343 PageDesc *pd;
344 void **lp;
345 int i;
347 #if defined(CONFIG_USER_ONLY)
348 /* We can't use qemu_malloc because it may recurse into a locked mutex. */
349 # define ALLOC(P, SIZE) \
350 do { \
351 P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \
352 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
353 } while (0)
354 #else
355 # define ALLOC(P, SIZE) \
356 do { P = qemu_mallocz(SIZE); } while (0)
357 #endif
359 /* Level 1. Always allocated. */
360 lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
362 /* Level 2..N-1. */
363 for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
364 void **p = *lp;
366 if (p == NULL) {
367 if (!alloc) {
368 return NULL;
370 ALLOC(p, sizeof(void *) * L2_SIZE);
371 *lp = p;
374 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
377 pd = *lp;
378 if (pd == NULL) {
379 if (!alloc) {
380 return NULL;
382 ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
383 *lp = pd;
386 #undef ALLOC
388 return pd + (index & (L2_SIZE - 1));
391 static inline PageDesc *page_find(tb_page_addr_t index)
393 return page_find_alloc(index, 0);
396 #if !defined(CONFIG_USER_ONLY)
397 static PhysPageDesc *phys_page_find_alloc(target_phys_addr_t index, int alloc)
399 PhysPageDesc *pd;
400 void **lp;
401 int i;
403 /* Level 1. Always allocated. */
404 lp = l1_phys_map + ((index >> P_L1_SHIFT) & (P_L1_SIZE - 1));
406 /* Level 2..N-1. */
407 for (i = P_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
408 void **p = *lp;
409 if (p == NULL) {
410 if (!alloc) {
411 return NULL;
413 *lp = p = qemu_mallocz(sizeof(void *) * L2_SIZE);
415 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
418 pd = *lp;
419 if (pd == NULL) {
420 int i;
422 if (!alloc) {
423 return NULL;
426 *lp = pd = qemu_malloc(sizeof(PhysPageDesc) * L2_SIZE);
428 for (i = 0; i < L2_SIZE; i++) {
429 pd[i].phys_offset = IO_MEM_UNASSIGNED;
430 pd[i].region_offset = (index + i) << TARGET_PAGE_BITS;
434 return pd + (index & (L2_SIZE - 1));
437 static inline PhysPageDesc *phys_page_find(target_phys_addr_t index)
439 return phys_page_find_alloc(index, 0);
442 static void tlb_protect_code(ram_addr_t ram_addr);
443 static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
444 target_ulong vaddr);
445 #define mmap_lock() do { } while(0)
446 #define mmap_unlock() do { } while(0)
447 #endif
449 #define DEFAULT_CODE_GEN_BUFFER_SIZE (32 * 1024 * 1024)
451 #if defined(CONFIG_USER_ONLY)
452 /* Currently it is not recommended to allocate big chunks of data in
453 user mode. It will change when a dedicated libc will be used */
454 #define USE_STATIC_CODE_GEN_BUFFER
455 #endif
457 #ifdef USE_STATIC_CODE_GEN_BUFFER
458 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
459 __attribute__((aligned (CODE_GEN_ALIGN)));
460 #endif
462 static void code_gen_alloc(unsigned long tb_size)
464 #ifdef USE_STATIC_CODE_GEN_BUFFER
465 code_gen_buffer = static_code_gen_buffer;
466 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
467 map_exec(code_gen_buffer, code_gen_buffer_size);
468 #else
469 code_gen_buffer_size = tb_size;
470 if (code_gen_buffer_size == 0) {
471 #if defined(CONFIG_USER_ONLY)
472 /* in user mode, phys_ram_size is not meaningful */
473 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
474 #else
475 /* XXX: needs adjustments */
476 code_gen_buffer_size = (unsigned long)(ram_size / 4);
477 #endif
479 if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE)
480 code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE;
481 /* The code gen buffer location may have constraints depending on
482 the host cpu and OS */
483 #if defined(__linux__)
485 int flags;
486 void *start = NULL;
488 flags = MAP_PRIVATE | MAP_ANONYMOUS;
489 #if defined(__x86_64__)
490 flags |= MAP_32BIT;
491 /* Cannot map more than that */
492 if (code_gen_buffer_size > (800 * 1024 * 1024))
493 code_gen_buffer_size = (800 * 1024 * 1024);
494 #elif defined(__sparc_v9__)
495 // Map the buffer below 2G, so we can use direct calls and branches
496 flags |= MAP_FIXED;
497 start = (void *) 0x60000000UL;
498 if (code_gen_buffer_size > (512 * 1024 * 1024))
499 code_gen_buffer_size = (512 * 1024 * 1024);
500 #elif defined(__arm__)
501 /* Map the buffer below 32M, so we can use direct calls and branches */
502 flags |= MAP_FIXED;
503 start = (void *) 0x01000000UL;
504 if (code_gen_buffer_size > 16 * 1024 * 1024)
505 code_gen_buffer_size = 16 * 1024 * 1024;
506 #elif defined(__s390x__)
507 /* Map the buffer so that we can use direct calls and branches. */
508 /* We have a +- 4GB range on the branches; leave some slop. */
509 if (code_gen_buffer_size > (3ul * 1024 * 1024 * 1024)) {
510 code_gen_buffer_size = 3ul * 1024 * 1024 * 1024;
512 start = (void *)0x90000000UL;
513 #endif
514 code_gen_buffer = mmap(start, code_gen_buffer_size,
515 PROT_WRITE | PROT_READ | PROT_EXEC,
516 flags, -1, 0);
517 if (code_gen_buffer == MAP_FAILED) {
518 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
519 exit(1);
522 #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
523 || defined(__DragonFly__) || defined(__OpenBSD__)
525 int flags;
526 void *addr = NULL;
527 flags = MAP_PRIVATE | MAP_ANONYMOUS;
528 #if defined(__x86_64__)
529 /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume
530 * 0x40000000 is free */
531 flags |= MAP_FIXED;
532 addr = (void *)0x40000000;
533 /* Cannot map more than that */
534 if (code_gen_buffer_size > (800 * 1024 * 1024))
535 code_gen_buffer_size = (800 * 1024 * 1024);
536 #elif defined(__sparc_v9__)
537 // Map the buffer below 2G, so we can use direct calls and branches
538 flags |= MAP_FIXED;
539 addr = (void *) 0x60000000UL;
540 if (code_gen_buffer_size > (512 * 1024 * 1024)) {
541 code_gen_buffer_size = (512 * 1024 * 1024);
543 #endif
544 code_gen_buffer = mmap(addr, code_gen_buffer_size,
545 PROT_WRITE | PROT_READ | PROT_EXEC,
546 flags, -1, 0);
547 if (code_gen_buffer == MAP_FAILED) {
548 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
549 exit(1);
552 #else
553 code_gen_buffer = qemu_malloc(code_gen_buffer_size);
554 map_exec(code_gen_buffer, code_gen_buffer_size);
555 #endif
556 #endif /* !USE_STATIC_CODE_GEN_BUFFER */
557 map_exec(code_gen_prologue, sizeof(code_gen_prologue));
558 code_gen_buffer_max_size = code_gen_buffer_size -
559 (TCG_MAX_OP_SIZE * OPC_MAX_SIZE);
560 code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
561 tbs = qemu_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
564 /* Must be called before using the QEMU cpus. 'tb_size' is the size
565 (in bytes) allocated to the translation buffer. Zero means default
566 size. */
567 void cpu_exec_init_all(unsigned long tb_size)
569 cpu_gen_init();
570 code_gen_alloc(tb_size);
571 code_gen_ptr = code_gen_buffer;
572 page_init();
573 #if !defined(CONFIG_USER_ONLY)
574 io_mem_init();
575 #endif
576 #if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
577 /* There's no guest base to take into account, so go ahead and
578 initialize the prologue now. */
579 tcg_prologue_init(&tcg_ctx);
580 #endif
583 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
585 static int cpu_common_post_load(void *opaque, int version_id)
587 CPUState *env = opaque;
589 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
590 version_id is increased. */
591 env->interrupt_request &= ~0x01;
592 tlb_flush(env, 1);
594 return 0;
597 static const VMStateDescription vmstate_cpu_common = {
598 .name = "cpu_common",
599 .version_id = 1,
600 .minimum_version_id = 1,
601 .minimum_version_id_old = 1,
602 .post_load = cpu_common_post_load,
603 .fields = (VMStateField []) {
604 VMSTATE_UINT32(halted, CPUState),
605 VMSTATE_UINT32(interrupt_request, CPUState),
606 VMSTATE_END_OF_LIST()
609 #endif
611 CPUState *qemu_get_cpu(int cpu)
613 CPUState *env = first_cpu;
615 while (env) {
616 if (env->cpu_index == cpu)
617 break;
618 env = env->next_cpu;
621 return env;
624 void cpu_exec_init(CPUState *env)
626 CPUState **penv;
627 int cpu_index;
629 #if defined(CONFIG_USER_ONLY)
630 cpu_list_lock();
631 #endif
632 env->next_cpu = NULL;
633 penv = &first_cpu;
634 cpu_index = 0;
635 while (*penv != NULL) {
636 penv = &(*penv)->next_cpu;
637 cpu_index++;
639 env->cpu_index = cpu_index;
640 env->numa_node = 0;
641 QTAILQ_INIT(&env->breakpoints);
642 QTAILQ_INIT(&env->watchpoints);
643 #ifndef CONFIG_USER_ONLY
644 env->thread_id = qemu_get_thread_id();
645 #endif
646 *penv = env;
647 #if defined(CONFIG_USER_ONLY)
648 cpu_list_unlock();
649 #endif
650 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
651 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
652 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
653 cpu_save, cpu_load, env);
654 #endif
657 /* Allocate a new translation block. Flush the translation buffer if
658 too many translation blocks or too much generated code. */
659 static TranslationBlock *tb_alloc(target_ulong pc)
661 TranslationBlock *tb;
663 if (nb_tbs >= code_gen_max_blocks ||
664 (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
665 return NULL;
666 tb = &tbs[nb_tbs++];
667 tb->pc = pc;
668 tb->cflags = 0;
669 return tb;
672 void tb_free(TranslationBlock *tb)
674 /* In practice this is mostly used for single use temporary TB
675 Ignore the hard cases and just back up if this TB happens to
676 be the last one generated. */
677 if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
678 code_gen_ptr = tb->tc_ptr;
679 nb_tbs--;
683 static inline void invalidate_page_bitmap(PageDesc *p)
685 if (p->code_bitmap) {
686 qemu_free(p->code_bitmap);
687 p->code_bitmap = NULL;
689 p->code_write_count = 0;
692 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
694 static void page_flush_tb_1 (int level, void **lp)
696 int i;
698 if (*lp == NULL) {
699 return;
701 if (level == 0) {
702 PageDesc *pd = *lp;
703 for (i = 0; i < L2_SIZE; ++i) {
704 pd[i].first_tb = NULL;
705 invalidate_page_bitmap(pd + i);
707 } else {
708 void **pp = *lp;
709 for (i = 0; i < L2_SIZE; ++i) {
710 page_flush_tb_1 (level - 1, pp + i);
715 static void page_flush_tb(void)
717 int i;
718 for (i = 0; i < V_L1_SIZE; i++) {
719 page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
723 /* flush all the translation blocks */
724 /* XXX: tb_flush is currently not thread safe */
725 void tb_flush(CPUState *env1)
727 CPUState *env;
728 #if defined(DEBUG_FLUSH)
729 printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
730 (unsigned long)(code_gen_ptr - code_gen_buffer),
731 nb_tbs, nb_tbs > 0 ?
732 ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
733 #endif
734 if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
735 cpu_abort(env1, "Internal error: code buffer overflow\n");
737 nb_tbs = 0;
739 for(env = first_cpu; env != NULL; env = env->next_cpu) {
740 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
743 memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
744 page_flush_tb();
746 code_gen_ptr = code_gen_buffer;
747 /* XXX: flush processor icache at this point if cache flush is
748 expensive */
749 tb_flush_count++;
752 #ifdef DEBUG_TB_CHECK
754 static void tb_invalidate_check(target_ulong address)
756 TranslationBlock *tb;
757 int i;
758 address &= TARGET_PAGE_MASK;
759 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
760 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
761 if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
762 address >= tb->pc + tb->size)) {
763 printf("ERROR invalidate: address=" TARGET_FMT_lx
764 " PC=%08lx size=%04x\n",
765 address, (long)tb->pc, tb->size);
771 /* verify that all the pages have correct rights for code */
772 static void tb_page_check(void)
774 TranslationBlock *tb;
775 int i, flags1, flags2;
777 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
778 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
779 flags1 = page_get_flags(tb->pc);
780 flags2 = page_get_flags(tb->pc + tb->size - 1);
781 if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
782 printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
783 (long)tb->pc, tb->size, flags1, flags2);
789 #endif
791 /* invalidate one TB */
792 static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
793 int next_offset)
795 TranslationBlock *tb1;
796 for(;;) {
797 tb1 = *ptb;
798 if (tb1 == tb) {
799 *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
800 break;
802 ptb = (TranslationBlock **)((char *)tb1 + next_offset);
806 static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
808 TranslationBlock *tb1;
809 unsigned int n1;
811 for(;;) {
812 tb1 = *ptb;
813 n1 = (long)tb1 & 3;
814 tb1 = (TranslationBlock *)((long)tb1 & ~3);
815 if (tb1 == tb) {
816 *ptb = tb1->page_next[n1];
817 break;
819 ptb = &tb1->page_next[n1];
823 static inline void tb_jmp_remove(TranslationBlock *tb, int n)
825 TranslationBlock *tb1, **ptb;
826 unsigned int n1;
828 ptb = &tb->jmp_next[n];
829 tb1 = *ptb;
830 if (tb1) {
831 /* find tb(n) in circular list */
832 for(;;) {
833 tb1 = *ptb;
834 n1 = (long)tb1 & 3;
835 tb1 = (TranslationBlock *)((long)tb1 & ~3);
836 if (n1 == n && tb1 == tb)
837 break;
838 if (n1 == 2) {
839 ptb = &tb1->jmp_first;
840 } else {
841 ptb = &tb1->jmp_next[n1];
844 /* now we can suppress tb(n) from the list */
845 *ptb = tb->jmp_next[n];
847 tb->jmp_next[n] = NULL;
851 /* reset the jump entry 'n' of a TB so that it is not chained to
852 another TB */
853 static inline void tb_reset_jump(TranslationBlock *tb, int n)
855 tb_set_jmp_target(tb, n, (unsigned long)(tb->tc_ptr + tb->tb_next_offset[n]));
858 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
860 CPUState *env;
861 PageDesc *p;
862 unsigned int h, n1;
863 tb_page_addr_t phys_pc;
864 TranslationBlock *tb1, *tb2;
866 /* remove the TB from the hash list */
867 phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
868 h = tb_phys_hash_func(phys_pc);
869 tb_remove(&tb_phys_hash[h], tb,
870 offsetof(TranslationBlock, phys_hash_next));
872 /* remove the TB from the page list */
873 if (tb->page_addr[0] != page_addr) {
874 p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
875 tb_page_remove(&p->first_tb, tb);
876 invalidate_page_bitmap(p);
878 if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
879 p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
880 tb_page_remove(&p->first_tb, tb);
881 invalidate_page_bitmap(p);
884 tb_invalidated_flag = 1;
886 /* remove the TB from the hash list */
887 h = tb_jmp_cache_hash_func(tb->pc);
888 for(env = first_cpu; env != NULL; env = env->next_cpu) {
889 if (env->tb_jmp_cache[h] == tb)
890 env->tb_jmp_cache[h] = NULL;
893 /* suppress this TB from the two jump lists */
894 tb_jmp_remove(tb, 0);
895 tb_jmp_remove(tb, 1);
897 /* suppress any remaining jumps to this TB */
898 tb1 = tb->jmp_first;
899 for(;;) {
900 n1 = (long)tb1 & 3;
901 if (n1 == 2)
902 break;
903 tb1 = (TranslationBlock *)((long)tb1 & ~3);
904 tb2 = tb1->jmp_next[n1];
905 tb_reset_jump(tb1, n1);
906 tb1->jmp_next[n1] = NULL;
907 tb1 = tb2;
909 tb->jmp_first = (TranslationBlock *)((long)tb | 2); /* fail safe */
911 tb_phys_invalidate_count++;
914 static inline void set_bits(uint8_t *tab, int start, int len)
916 int end, mask, end1;
918 end = start + len;
919 tab += start >> 3;
920 mask = 0xff << (start & 7);
921 if ((start & ~7) == (end & ~7)) {
922 if (start < end) {
923 mask &= ~(0xff << (end & 7));
924 *tab |= mask;
926 } else {
927 *tab++ |= mask;
928 start = (start + 8) & ~7;
929 end1 = end & ~7;
930 while (start < end1) {
931 *tab++ = 0xff;
932 start += 8;
934 if (start < end) {
935 mask = ~(0xff << (end & 7));
936 *tab |= mask;
941 static void build_page_bitmap(PageDesc *p)
943 int n, tb_start, tb_end;
944 TranslationBlock *tb;
946 p->code_bitmap = qemu_mallocz(TARGET_PAGE_SIZE / 8);
948 tb = p->first_tb;
949 while (tb != NULL) {
950 n = (long)tb & 3;
951 tb = (TranslationBlock *)((long)tb & ~3);
952 /* NOTE: this is subtle as a TB may span two physical pages */
953 if (n == 0) {
954 /* NOTE: tb_end may be after the end of the page, but
955 it is not a problem */
956 tb_start = tb->pc & ~TARGET_PAGE_MASK;
957 tb_end = tb_start + tb->size;
958 if (tb_end > TARGET_PAGE_SIZE)
959 tb_end = TARGET_PAGE_SIZE;
960 } else {
961 tb_start = 0;
962 tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
964 set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
965 tb = tb->page_next[n];
969 TranslationBlock *tb_gen_code(CPUState *env,
970 target_ulong pc, target_ulong cs_base,
971 int flags, int cflags)
973 TranslationBlock *tb;
974 uint8_t *tc_ptr;
975 tb_page_addr_t phys_pc, phys_page2;
976 target_ulong virt_page2;
977 int code_gen_size;
979 phys_pc = get_page_addr_code(env, pc);
980 tb = tb_alloc(pc);
981 if (!tb) {
982 /* flush must be done */
983 tb_flush(env);
984 /* cannot fail at this point */
985 tb = tb_alloc(pc);
986 /* Don't forget to invalidate previous TB info. */
987 tb_invalidated_flag = 1;
989 tc_ptr = code_gen_ptr;
990 tb->tc_ptr = tc_ptr;
991 tb->cs_base = cs_base;
992 tb->flags = flags;
993 tb->cflags = cflags;
994 cpu_gen_code(env, tb, &code_gen_size);
995 code_gen_ptr = (void *)(((unsigned long)code_gen_ptr + code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
997 /* check next page if needed */
998 virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
999 phys_page2 = -1;
1000 if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1001 phys_page2 = get_page_addr_code(env, virt_page2);
1003 tb_link_page(tb, phys_pc, phys_page2);
1004 return tb;
1007 /* invalidate all TBs which intersect with the target physical page
1008 starting in range [start;end[. NOTE: start and end must refer to
1009 the same physical page. 'is_cpu_write_access' should be true if called
1010 from a real cpu write access: the virtual CPU will exit the current
1011 TB if code is modified inside this TB. */
1012 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1013 int is_cpu_write_access)
1015 TranslationBlock *tb, *tb_next, *saved_tb;
1016 CPUState *env = cpu_single_env;
1017 tb_page_addr_t tb_start, tb_end;
1018 PageDesc *p;
1019 int n;
1020 #ifdef TARGET_HAS_PRECISE_SMC
1021 int current_tb_not_found = is_cpu_write_access;
1022 TranslationBlock *current_tb = NULL;
1023 int current_tb_modified = 0;
1024 target_ulong current_pc = 0;
1025 target_ulong current_cs_base = 0;
1026 int current_flags = 0;
1027 #endif /* TARGET_HAS_PRECISE_SMC */
1029 p = page_find(start >> TARGET_PAGE_BITS);
1030 if (!p)
1031 return;
1032 if (!p->code_bitmap &&
1033 ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1034 is_cpu_write_access) {
1035 /* build code bitmap */
1036 build_page_bitmap(p);
1039 /* we remove all the TBs in the range [start, end[ */
1040 /* XXX: see if in some cases it could be faster to invalidate all the code */
1041 tb = p->first_tb;
1042 while (tb != NULL) {
1043 n = (long)tb & 3;
1044 tb = (TranslationBlock *)((long)tb & ~3);
1045 tb_next = tb->page_next[n];
1046 /* NOTE: this is subtle as a TB may span two physical pages */
1047 if (n == 0) {
1048 /* NOTE: tb_end may be after the end of the page, but
1049 it is not a problem */
1050 tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1051 tb_end = tb_start + tb->size;
1052 } else {
1053 tb_start = tb->page_addr[1];
1054 tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1056 if (!(tb_end <= start || tb_start >= end)) {
1057 #ifdef TARGET_HAS_PRECISE_SMC
1058 if (current_tb_not_found) {
1059 current_tb_not_found = 0;
1060 current_tb = NULL;
1061 if (env->mem_io_pc) {
1062 /* now we have a real cpu fault */
1063 current_tb = tb_find_pc(env->mem_io_pc);
1066 if (current_tb == tb &&
1067 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1068 /* If we are modifying the current TB, we must stop
1069 its execution. We could be more precise by checking
1070 that the modification is after the current PC, but it
1071 would require a specialized function to partially
1072 restore the CPU state */
1074 current_tb_modified = 1;
1075 cpu_restore_state(current_tb, env, env->mem_io_pc);
1076 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1077 &current_flags);
1079 #endif /* TARGET_HAS_PRECISE_SMC */
1080 /* we need to do that to handle the case where a signal
1081 occurs while doing tb_phys_invalidate() */
1082 saved_tb = NULL;
1083 if (env) {
1084 saved_tb = env->current_tb;
1085 env->current_tb = NULL;
1087 tb_phys_invalidate(tb, -1);
1088 if (env) {
1089 env->current_tb = saved_tb;
1090 if (env->interrupt_request && env->current_tb)
1091 cpu_interrupt(env, env->interrupt_request);
1094 tb = tb_next;
1096 #if !defined(CONFIG_USER_ONLY)
1097 /* if no code remaining, no need to continue to use slow writes */
1098 if (!p->first_tb) {
1099 invalidate_page_bitmap(p);
1100 if (is_cpu_write_access) {
1101 tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1104 #endif
1105 #ifdef TARGET_HAS_PRECISE_SMC
1106 if (current_tb_modified) {
1107 /* we generate a block containing just the instruction
1108 modifying the memory. It will ensure that it cannot modify
1109 itself */
1110 env->current_tb = NULL;
1111 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1112 cpu_resume_from_signal(env, NULL);
1114 #endif
1117 /* len must be <= 8 and start must be a multiple of len */
1118 static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1120 PageDesc *p;
1121 int offset, b;
1122 #if 0
1123 if (1) {
1124 qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1125 cpu_single_env->mem_io_vaddr, len,
1126 cpu_single_env->eip,
1127 cpu_single_env->eip + (long)cpu_single_env->segs[R_CS].base);
1129 #endif
1130 p = page_find(start >> TARGET_PAGE_BITS);
1131 if (!p)
1132 return;
1133 if (p->code_bitmap) {
1134 offset = start & ~TARGET_PAGE_MASK;
1135 b = p->code_bitmap[offset >> 3] >> (offset & 7);
1136 if (b & ((1 << len) - 1))
1137 goto do_invalidate;
1138 } else {
1139 do_invalidate:
1140 tb_invalidate_phys_page_range(start, start + len, 1);
1144 #if !defined(CONFIG_SOFTMMU)
1145 static void tb_invalidate_phys_page(tb_page_addr_t addr,
1146 unsigned long pc, void *puc)
1148 TranslationBlock *tb;
1149 PageDesc *p;
1150 int n;
1151 #ifdef TARGET_HAS_PRECISE_SMC
1152 TranslationBlock *current_tb = NULL;
1153 CPUState *env = cpu_single_env;
1154 int current_tb_modified = 0;
1155 target_ulong current_pc = 0;
1156 target_ulong current_cs_base = 0;
1157 int current_flags = 0;
1158 #endif
1160 addr &= TARGET_PAGE_MASK;
1161 p = page_find(addr >> TARGET_PAGE_BITS);
1162 if (!p)
1163 return;
1164 tb = p->first_tb;
1165 #ifdef TARGET_HAS_PRECISE_SMC
1166 if (tb && pc != 0) {
1167 current_tb = tb_find_pc(pc);
1169 #endif
1170 while (tb != NULL) {
1171 n = (long)tb & 3;
1172 tb = (TranslationBlock *)((long)tb & ~3);
1173 #ifdef TARGET_HAS_PRECISE_SMC
1174 if (current_tb == tb &&
1175 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1176 /* If we are modifying the current TB, we must stop
1177 its execution. We could be more precise by checking
1178 that the modification is after the current PC, but it
1179 would require a specialized function to partially
1180 restore the CPU state */
1182 current_tb_modified = 1;
1183 cpu_restore_state(current_tb, env, pc);
1184 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1185 &current_flags);
1187 #endif /* TARGET_HAS_PRECISE_SMC */
1188 tb_phys_invalidate(tb, addr);
1189 tb = tb->page_next[n];
1191 p->first_tb = NULL;
1192 #ifdef TARGET_HAS_PRECISE_SMC
1193 if (current_tb_modified) {
1194 /* we generate a block containing just the instruction
1195 modifying the memory. It will ensure that it cannot modify
1196 itself */
1197 env->current_tb = NULL;
1198 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1199 cpu_resume_from_signal(env, puc);
1201 #endif
1203 #endif
1205 /* add the tb in the target page and protect it if necessary */
1206 static inline void tb_alloc_page(TranslationBlock *tb,
1207 unsigned int n, tb_page_addr_t page_addr)
1209 PageDesc *p;
1210 TranslationBlock *last_first_tb;
1212 tb->page_addr[n] = page_addr;
1213 p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1214 tb->page_next[n] = p->first_tb;
1215 last_first_tb = p->first_tb;
1216 p->first_tb = (TranslationBlock *)((long)tb | n);
1217 invalidate_page_bitmap(p);
1219 #if defined(TARGET_HAS_SMC) || 1
1221 #if defined(CONFIG_USER_ONLY)
1222 if (p->flags & PAGE_WRITE) {
1223 target_ulong addr;
1224 PageDesc *p2;
1225 int prot;
1227 /* force the host page as non writable (writes will have a
1228 page fault + mprotect overhead) */
1229 page_addr &= qemu_host_page_mask;
1230 prot = 0;
1231 for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1232 addr += TARGET_PAGE_SIZE) {
1234 p2 = page_find (addr >> TARGET_PAGE_BITS);
1235 if (!p2)
1236 continue;
1237 prot |= p2->flags;
1238 p2->flags &= ~PAGE_WRITE;
1240 mprotect(g2h(page_addr), qemu_host_page_size,
1241 (prot & PAGE_BITS) & ~PAGE_WRITE);
1242 #ifdef DEBUG_TB_INVALIDATE
1243 printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1244 page_addr);
1245 #endif
1247 #else
1248 /* if some code is already present, then the pages are already
1249 protected. So we handle the case where only the first TB is
1250 allocated in a physical page */
1251 if (!last_first_tb) {
1252 tlb_protect_code(page_addr);
1254 #endif
1256 #endif /* TARGET_HAS_SMC */
1259 /* add a new TB and link it to the physical page tables. phys_page2 is
1260 (-1) to indicate that only one page contains the TB. */
1261 void tb_link_page(TranslationBlock *tb,
1262 tb_page_addr_t phys_pc, tb_page_addr_t phys_page2)
1264 unsigned int h;
1265 TranslationBlock **ptb;
1267 /* Grab the mmap lock to stop another thread invalidating this TB
1268 before we are done. */
1269 mmap_lock();
1270 /* add in the physical hash table */
1271 h = tb_phys_hash_func(phys_pc);
1272 ptb = &tb_phys_hash[h];
1273 tb->phys_hash_next = *ptb;
1274 *ptb = tb;
1276 /* add in the page list */
1277 tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1278 if (phys_page2 != -1)
1279 tb_alloc_page(tb, 1, phys_page2);
1280 else
1281 tb->page_addr[1] = -1;
1283 tb->jmp_first = (TranslationBlock *)((long)tb | 2);
1284 tb->jmp_next[0] = NULL;
1285 tb->jmp_next[1] = NULL;
1287 /* init original jump addresses */
1288 if (tb->tb_next_offset[0] != 0xffff)
1289 tb_reset_jump(tb, 0);
1290 if (tb->tb_next_offset[1] != 0xffff)
1291 tb_reset_jump(tb, 1);
1293 #ifdef DEBUG_TB_CHECK
1294 tb_page_check();
1295 #endif
1296 mmap_unlock();
1299 /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1300 tb[1].tc_ptr. Return NULL if not found */
1301 TranslationBlock *tb_find_pc(unsigned long tc_ptr)
1303 int m_min, m_max, m;
1304 unsigned long v;
1305 TranslationBlock *tb;
1307 if (nb_tbs <= 0)
1308 return NULL;
1309 if (tc_ptr < (unsigned long)code_gen_buffer ||
1310 tc_ptr >= (unsigned long)code_gen_ptr)
1311 return NULL;
1312 /* binary search (cf Knuth) */
1313 m_min = 0;
1314 m_max = nb_tbs - 1;
1315 while (m_min <= m_max) {
1316 m = (m_min + m_max) >> 1;
1317 tb = &tbs[m];
1318 v = (unsigned long)tb->tc_ptr;
1319 if (v == tc_ptr)
1320 return tb;
1321 else if (tc_ptr < v) {
1322 m_max = m - 1;
1323 } else {
1324 m_min = m + 1;
1327 return &tbs[m_max];
1330 static void tb_reset_jump_recursive(TranslationBlock *tb);
1332 static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1334 TranslationBlock *tb1, *tb_next, **ptb;
1335 unsigned int n1;
1337 tb1 = tb->jmp_next[n];
1338 if (tb1 != NULL) {
1339 /* find head of list */
1340 for(;;) {
1341 n1 = (long)tb1 & 3;
1342 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1343 if (n1 == 2)
1344 break;
1345 tb1 = tb1->jmp_next[n1];
1347 /* we are now sure now that tb jumps to tb1 */
1348 tb_next = tb1;
1350 /* remove tb from the jmp_first list */
1351 ptb = &tb_next->jmp_first;
1352 for(;;) {
1353 tb1 = *ptb;
1354 n1 = (long)tb1 & 3;
1355 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1356 if (n1 == n && tb1 == tb)
1357 break;
1358 ptb = &tb1->jmp_next[n1];
1360 *ptb = tb->jmp_next[n];
1361 tb->jmp_next[n] = NULL;
1363 /* suppress the jump to next tb in generated code */
1364 tb_reset_jump(tb, n);
1366 /* suppress jumps in the tb on which we could have jumped */
1367 tb_reset_jump_recursive(tb_next);
1371 static void tb_reset_jump_recursive(TranslationBlock *tb)
1373 tb_reset_jump_recursive2(tb, 0);
1374 tb_reset_jump_recursive2(tb, 1);
1377 #if defined(TARGET_HAS_ICE)
1378 #if defined(CONFIG_USER_ONLY)
1379 static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1381 tb_invalidate_phys_page_range(pc, pc + 1, 0);
1383 #else
1384 static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1386 target_phys_addr_t addr;
1387 target_ulong pd;
1388 ram_addr_t ram_addr;
1389 PhysPageDesc *p;
1391 addr = cpu_get_phys_page_debug(env, pc);
1392 p = phys_page_find(addr >> TARGET_PAGE_BITS);
1393 if (!p) {
1394 pd = IO_MEM_UNASSIGNED;
1395 } else {
1396 pd = p->phys_offset;
1398 ram_addr = (pd & TARGET_PAGE_MASK) | (pc & ~TARGET_PAGE_MASK);
1399 tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1401 #endif
1402 #endif /* TARGET_HAS_ICE */
1404 #if defined(CONFIG_USER_ONLY)
1405 void cpu_watchpoint_remove_all(CPUState *env, int mask)
1410 int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1411 int flags, CPUWatchpoint **watchpoint)
1413 return -ENOSYS;
1415 #else
1416 /* Add a watchpoint. */
1417 int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1418 int flags, CPUWatchpoint **watchpoint)
1420 target_ulong len_mask = ~(len - 1);
1421 CPUWatchpoint *wp;
1423 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1424 if ((len != 1 && len != 2 && len != 4 && len != 8) || (addr & ~len_mask)) {
1425 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1426 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1427 return -EINVAL;
1429 wp = qemu_malloc(sizeof(*wp));
1431 wp->vaddr = addr;
1432 wp->len_mask = len_mask;
1433 wp->flags = flags;
1435 /* keep all GDB-injected watchpoints in front */
1436 if (flags & BP_GDB)
1437 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1438 else
1439 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1441 tlb_flush_page(env, addr);
1443 if (watchpoint)
1444 *watchpoint = wp;
1445 return 0;
1448 /* Remove a specific watchpoint. */
1449 int cpu_watchpoint_remove(CPUState *env, target_ulong addr, target_ulong len,
1450 int flags)
1452 target_ulong len_mask = ~(len - 1);
1453 CPUWatchpoint *wp;
1455 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1456 if (addr == wp->vaddr && len_mask == wp->len_mask
1457 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1458 cpu_watchpoint_remove_by_ref(env, wp);
1459 return 0;
1462 return -ENOENT;
1465 /* Remove a specific watchpoint by reference. */
1466 void cpu_watchpoint_remove_by_ref(CPUState *env, CPUWatchpoint *watchpoint)
1468 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1470 tlb_flush_page(env, watchpoint->vaddr);
1472 qemu_free(watchpoint);
1475 /* Remove all matching watchpoints. */
1476 void cpu_watchpoint_remove_all(CPUState *env, int mask)
1478 CPUWatchpoint *wp, *next;
1480 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1481 if (wp->flags & mask)
1482 cpu_watchpoint_remove_by_ref(env, wp);
1485 #endif
1487 /* Add a breakpoint. */
1488 int cpu_breakpoint_insert(CPUState *env, target_ulong pc, int flags,
1489 CPUBreakpoint **breakpoint)
1491 #if defined(TARGET_HAS_ICE)
1492 CPUBreakpoint *bp;
1494 bp = qemu_malloc(sizeof(*bp));
1496 bp->pc = pc;
1497 bp->flags = flags;
1499 /* keep all GDB-injected breakpoints in front */
1500 if (flags & BP_GDB)
1501 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1502 else
1503 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1505 breakpoint_invalidate(env, pc);
1507 if (breakpoint)
1508 *breakpoint = bp;
1509 return 0;
1510 #else
1511 return -ENOSYS;
1512 #endif
1515 /* Remove a specific breakpoint. */
1516 int cpu_breakpoint_remove(CPUState *env, target_ulong pc, int flags)
1518 #if defined(TARGET_HAS_ICE)
1519 CPUBreakpoint *bp;
1521 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1522 if (bp->pc == pc && bp->flags == flags) {
1523 cpu_breakpoint_remove_by_ref(env, bp);
1524 return 0;
1527 return -ENOENT;
1528 #else
1529 return -ENOSYS;
1530 #endif
1533 /* Remove a specific breakpoint by reference. */
1534 void cpu_breakpoint_remove_by_ref(CPUState *env, CPUBreakpoint *breakpoint)
1536 #if defined(TARGET_HAS_ICE)
1537 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1539 breakpoint_invalidate(env, breakpoint->pc);
1541 qemu_free(breakpoint);
1542 #endif
1545 /* Remove all matching breakpoints. */
1546 void cpu_breakpoint_remove_all(CPUState *env, int mask)
1548 #if defined(TARGET_HAS_ICE)
1549 CPUBreakpoint *bp, *next;
1551 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1552 if (bp->flags & mask)
1553 cpu_breakpoint_remove_by_ref(env, bp);
1555 #endif
1558 /* enable or disable single step mode. EXCP_DEBUG is returned by the
1559 CPU loop after each instruction */
1560 void cpu_single_step(CPUState *env, int enabled)
1562 #if defined(TARGET_HAS_ICE)
1563 if (env->singlestep_enabled != enabled) {
1564 env->singlestep_enabled = enabled;
1565 if (kvm_enabled())
1566 kvm_update_guest_debug(env, 0);
1567 else {
1568 /* must flush all the translated code to avoid inconsistencies */
1569 /* XXX: only flush what is necessary */
1570 tb_flush(env);
1573 #endif
1576 /* enable or disable low levels log */
1577 void cpu_set_log(int log_flags)
1579 loglevel = log_flags;
1580 if (loglevel && !logfile) {
1581 logfile = fopen(logfilename, log_append ? "a" : "w");
1582 if (!logfile) {
1583 perror(logfilename);
1584 _exit(1);
1586 #if !defined(CONFIG_SOFTMMU)
1587 /* must avoid mmap() usage of glibc by setting a buffer "by hand" */
1589 static char logfile_buf[4096];
1590 setvbuf(logfile, logfile_buf, _IOLBF, sizeof(logfile_buf));
1592 #elif !defined(_WIN32)
1593 /* Win32 doesn't support line-buffering and requires size >= 2 */
1594 setvbuf(logfile, NULL, _IOLBF, 0);
1595 #endif
1596 log_append = 1;
1598 if (!loglevel && logfile) {
1599 fclose(logfile);
1600 logfile = NULL;
1604 void cpu_set_log_filename(const char *filename)
1606 logfilename = strdup(filename);
1607 if (logfile) {
1608 fclose(logfile);
1609 logfile = NULL;
1611 cpu_set_log(loglevel);
1614 static void cpu_unlink_tb(CPUState *env)
1616 /* FIXME: TB unchaining isn't SMP safe. For now just ignore the
1617 problem and hope the cpu will stop of its own accord. For userspace
1618 emulation this often isn't actually as bad as it sounds. Often
1619 signals are used primarily to interrupt blocking syscalls. */
1620 TranslationBlock *tb;
1621 static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1623 spin_lock(&interrupt_lock);
1624 tb = env->current_tb;
1625 /* if the cpu is currently executing code, we must unlink it and
1626 all the potentially executing TB */
1627 if (tb) {
1628 env->current_tb = NULL;
1629 tb_reset_jump_recursive(tb);
1631 spin_unlock(&interrupt_lock);
1634 #ifndef CONFIG_USER_ONLY
1635 /* mask must never be zero, except for A20 change call */
1636 static void tcg_handle_interrupt(CPUState *env, int mask)
1638 int old_mask;
1640 old_mask = env->interrupt_request;
1641 env->interrupt_request |= mask;
1644 * If called from iothread context, wake the target cpu in
1645 * case its halted.
1647 if (!qemu_cpu_is_self(env)) {
1648 qemu_cpu_kick(env);
1649 return;
1652 if (use_icount) {
1653 env->icount_decr.u16.high = 0xffff;
1654 if (!can_do_io(env)
1655 && (mask & ~old_mask) != 0) {
1656 cpu_abort(env, "Raised interrupt while not in I/O function");
1658 } else {
1659 cpu_unlink_tb(env);
1663 CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
1665 #else /* CONFIG_USER_ONLY */
1667 void cpu_interrupt(CPUState *env, int mask)
1669 env->interrupt_request |= mask;
1670 cpu_unlink_tb(env);
1672 #endif /* CONFIG_USER_ONLY */
1674 void cpu_reset_interrupt(CPUState *env, int mask)
1676 env->interrupt_request &= ~mask;
1679 void cpu_exit(CPUState *env)
1681 env->exit_request = 1;
1682 cpu_unlink_tb(env);
1685 const CPULogItem cpu_log_items[] = {
1686 { CPU_LOG_TB_OUT_ASM, "out_asm",
1687 "show generated host assembly code for each compiled TB" },
1688 { CPU_LOG_TB_IN_ASM, "in_asm",
1689 "show target assembly code for each compiled TB" },
1690 { CPU_LOG_TB_OP, "op",
1691 "show micro ops for each compiled TB" },
1692 { CPU_LOG_TB_OP_OPT, "op_opt",
1693 "show micro ops "
1694 #ifdef TARGET_I386
1695 "before eflags optimization and "
1696 #endif
1697 "after liveness analysis" },
1698 { CPU_LOG_INT, "int",
1699 "show interrupts/exceptions in short format" },
1700 { CPU_LOG_EXEC, "exec",
1701 "show trace before each executed TB (lots of logs)" },
1702 { CPU_LOG_TB_CPU, "cpu",
1703 "show CPU state before block translation" },
1704 #ifdef TARGET_I386
1705 { CPU_LOG_PCALL, "pcall",
1706 "show protected mode far calls/returns/exceptions" },
1707 { CPU_LOG_RESET, "cpu_reset",
1708 "show CPU state before CPU resets" },
1709 #endif
1710 #ifdef DEBUG_IOPORT
1711 { CPU_LOG_IOPORT, "ioport",
1712 "show all i/o ports accesses" },
1713 #endif
1714 { 0, NULL, NULL },
1717 #ifndef CONFIG_USER_ONLY
1718 static QLIST_HEAD(memory_client_list, CPUPhysMemoryClient) memory_client_list
1719 = QLIST_HEAD_INITIALIZER(memory_client_list);
1721 static void cpu_notify_set_memory(target_phys_addr_t start_addr,
1722 ram_addr_t size,
1723 ram_addr_t phys_offset,
1724 bool log_dirty)
1726 CPUPhysMemoryClient *client;
1727 QLIST_FOREACH(client, &memory_client_list, list) {
1728 client->set_memory(client, start_addr, size, phys_offset, log_dirty);
1732 static int cpu_notify_sync_dirty_bitmap(target_phys_addr_t start,
1733 target_phys_addr_t end)
1735 CPUPhysMemoryClient *client;
1736 QLIST_FOREACH(client, &memory_client_list, list) {
1737 int r = client->sync_dirty_bitmap(client, start, end);
1738 if (r < 0)
1739 return r;
1741 return 0;
1744 static int cpu_notify_migration_log(int enable)
1746 CPUPhysMemoryClient *client;
1747 QLIST_FOREACH(client, &memory_client_list, list) {
1748 int r = client->migration_log(client, enable);
1749 if (r < 0)
1750 return r;
1752 return 0;
1755 struct last_map {
1756 target_phys_addr_t start_addr;
1757 ram_addr_t size;
1758 ram_addr_t phys_offset;
1761 /* The l1_phys_map provides the upper P_L1_BITs of the guest physical
1762 * address. Each intermediate table provides the next L2_BITs of guest
1763 * physical address space. The number of levels vary based on host and
1764 * guest configuration, making it efficient to build the final guest
1765 * physical address by seeding the L1 offset and shifting and adding in
1766 * each L2 offset as we recurse through them. */
1767 static void phys_page_for_each_1(CPUPhysMemoryClient *client, int level,
1768 void **lp, target_phys_addr_t addr,
1769 struct last_map *map)
1771 int i;
1773 if (*lp == NULL) {
1774 return;
1776 if (level == 0) {
1777 PhysPageDesc *pd = *lp;
1778 addr <<= L2_BITS + TARGET_PAGE_BITS;
1779 for (i = 0; i < L2_SIZE; ++i) {
1780 if (pd[i].phys_offset != IO_MEM_UNASSIGNED) {
1781 target_phys_addr_t start_addr = addr | i << TARGET_PAGE_BITS;
1783 if (map->size &&
1784 start_addr == map->start_addr + map->size &&
1785 pd[i].phys_offset == map->phys_offset + map->size) {
1787 map->size += TARGET_PAGE_SIZE;
1788 continue;
1789 } else if (map->size) {
1790 client->set_memory(client, map->start_addr,
1791 map->size, map->phys_offset, false);
1794 map->start_addr = start_addr;
1795 map->size = TARGET_PAGE_SIZE;
1796 map->phys_offset = pd[i].phys_offset;
1799 } else {
1800 void **pp = *lp;
1801 for (i = 0; i < L2_SIZE; ++i) {
1802 phys_page_for_each_1(client, level - 1, pp + i,
1803 (addr << L2_BITS) | i, map);
1808 static void phys_page_for_each(CPUPhysMemoryClient *client)
1810 int i;
1811 struct last_map map = { };
1813 for (i = 0; i < P_L1_SIZE; ++i) {
1814 phys_page_for_each_1(client, P_L1_SHIFT / L2_BITS - 1,
1815 l1_phys_map + i, i, &map);
1817 if (map.size) {
1818 client->set_memory(client, map.start_addr, map.size, map.phys_offset,
1819 false);
1823 void cpu_register_phys_memory_client(CPUPhysMemoryClient *client)
1825 QLIST_INSERT_HEAD(&memory_client_list, client, list);
1826 phys_page_for_each(client);
1829 void cpu_unregister_phys_memory_client(CPUPhysMemoryClient *client)
1831 QLIST_REMOVE(client, list);
1833 #endif
1835 static int cmp1(const char *s1, int n, const char *s2)
1837 if (strlen(s2) != n)
1838 return 0;
1839 return memcmp(s1, s2, n) == 0;
1842 /* takes a comma separated list of log masks. Return 0 if error. */
1843 int cpu_str_to_log_mask(const char *str)
1845 const CPULogItem *item;
1846 int mask;
1847 const char *p, *p1;
1849 p = str;
1850 mask = 0;
1851 for(;;) {
1852 p1 = strchr(p, ',');
1853 if (!p1)
1854 p1 = p + strlen(p);
1855 if(cmp1(p,p1-p,"all")) {
1856 for(item = cpu_log_items; item->mask != 0; item++) {
1857 mask |= item->mask;
1859 } else {
1860 for(item = cpu_log_items; item->mask != 0; item++) {
1861 if (cmp1(p, p1 - p, item->name))
1862 goto found;
1864 return 0;
1866 found:
1867 mask |= item->mask;
1868 if (*p1 != ',')
1869 break;
1870 p = p1 + 1;
1872 return mask;
1875 void cpu_abort(CPUState *env, const char *fmt, ...)
1877 va_list ap;
1878 va_list ap2;
1880 va_start(ap, fmt);
1881 va_copy(ap2, ap);
1882 fprintf(stderr, "qemu: fatal: ");
1883 vfprintf(stderr, fmt, ap);
1884 fprintf(stderr, "\n");
1885 #ifdef TARGET_I386
1886 cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU | X86_DUMP_CCOP);
1887 #else
1888 cpu_dump_state(env, stderr, fprintf, 0);
1889 #endif
1890 if (qemu_log_enabled()) {
1891 qemu_log("qemu: fatal: ");
1892 qemu_log_vprintf(fmt, ap2);
1893 qemu_log("\n");
1894 #ifdef TARGET_I386
1895 log_cpu_state(env, X86_DUMP_FPU | X86_DUMP_CCOP);
1896 #else
1897 log_cpu_state(env, 0);
1898 #endif
1899 qemu_log_flush();
1900 qemu_log_close();
1902 va_end(ap2);
1903 va_end(ap);
1904 #if defined(CONFIG_USER_ONLY)
1906 struct sigaction act;
1907 sigfillset(&act.sa_mask);
1908 act.sa_handler = SIG_DFL;
1909 sigaction(SIGABRT, &act, NULL);
1911 #endif
1912 abort();
1915 CPUState *cpu_copy(CPUState *env)
1917 CPUState *new_env = cpu_init(env->cpu_model_str);
1918 CPUState *next_cpu = new_env->next_cpu;
1919 int cpu_index = new_env->cpu_index;
1920 #if defined(TARGET_HAS_ICE)
1921 CPUBreakpoint *bp;
1922 CPUWatchpoint *wp;
1923 #endif
1925 memcpy(new_env, env, sizeof(CPUState));
1927 /* Preserve chaining and index. */
1928 new_env->next_cpu = next_cpu;
1929 new_env->cpu_index = cpu_index;
1931 /* Clone all break/watchpoints.
1932 Note: Once we support ptrace with hw-debug register access, make sure
1933 BP_CPU break/watchpoints are handled correctly on clone. */
1934 QTAILQ_INIT(&env->breakpoints);
1935 QTAILQ_INIT(&env->watchpoints);
1936 #if defined(TARGET_HAS_ICE)
1937 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1938 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1940 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1941 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1942 wp->flags, NULL);
1944 #endif
1946 return new_env;
1949 #if !defined(CONFIG_USER_ONLY)
1951 static inline void tlb_flush_jmp_cache(CPUState *env, target_ulong addr)
1953 unsigned int i;
1955 /* Discard jump cache entries for any tb which might potentially
1956 overlap the flushed page. */
1957 i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1958 memset (&env->tb_jmp_cache[i], 0,
1959 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1961 i = tb_jmp_cache_hash_page(addr);
1962 memset (&env->tb_jmp_cache[i], 0,
1963 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1966 static CPUTLBEntry s_cputlb_empty_entry = {
1967 .addr_read = -1,
1968 .addr_write = -1,
1969 .addr_code = -1,
1970 .addend = -1,
1973 /* NOTE: if flush_global is true, also flush global entries (not
1974 implemented yet) */
1975 void tlb_flush(CPUState *env, int flush_global)
1977 int i;
1979 #if defined(DEBUG_TLB)
1980 printf("tlb_flush:\n");
1981 #endif
1982 /* must reset current TB so that interrupts cannot modify the
1983 links while we are modifying them */
1984 env->current_tb = NULL;
1986 for(i = 0; i < CPU_TLB_SIZE; i++) {
1987 int mmu_idx;
1988 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
1989 env->tlb_table[mmu_idx][i] = s_cputlb_empty_entry;
1993 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
1995 env->tlb_flush_addr = -1;
1996 env->tlb_flush_mask = 0;
1997 tlb_flush_count++;
2000 static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
2002 if (addr == (tlb_entry->addr_read &
2003 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
2004 addr == (tlb_entry->addr_write &
2005 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
2006 addr == (tlb_entry->addr_code &
2007 (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
2008 *tlb_entry = s_cputlb_empty_entry;
2012 void tlb_flush_page(CPUState *env, target_ulong addr)
2014 int i;
2015 int mmu_idx;
2017 #if defined(DEBUG_TLB)
2018 printf("tlb_flush_page: " TARGET_FMT_lx "\n", addr);
2019 #endif
2020 /* Check if we need to flush due to large pages. */
2021 if ((addr & env->tlb_flush_mask) == env->tlb_flush_addr) {
2022 #if defined(DEBUG_TLB)
2023 printf("tlb_flush_page: forced full flush ("
2024 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
2025 env->tlb_flush_addr, env->tlb_flush_mask);
2026 #endif
2027 tlb_flush(env, 1);
2028 return;
2030 /* must reset current TB so that interrupts cannot modify the
2031 links while we are modifying them */
2032 env->current_tb = NULL;
2034 addr &= TARGET_PAGE_MASK;
2035 i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2036 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
2037 tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr);
2039 tlb_flush_jmp_cache(env, addr);
2042 /* update the TLBs so that writes to code in the virtual page 'addr'
2043 can be detected */
2044 static void tlb_protect_code(ram_addr_t ram_addr)
2046 cpu_physical_memory_reset_dirty(ram_addr,
2047 ram_addr + TARGET_PAGE_SIZE,
2048 CODE_DIRTY_FLAG);
2051 /* update the TLB so that writes in physical page 'phys_addr' are no longer
2052 tested for self modifying code */
2053 static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
2054 target_ulong vaddr)
2056 cpu_physical_memory_set_dirty_flags(ram_addr, CODE_DIRTY_FLAG);
2059 static inline void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry,
2060 unsigned long start, unsigned long length)
2062 unsigned long addr;
2063 if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == IO_MEM_RAM) {
2064 addr = (tlb_entry->addr_write & TARGET_PAGE_MASK) + tlb_entry->addend;
2065 if ((addr - start) < length) {
2066 tlb_entry->addr_write = (tlb_entry->addr_write & TARGET_PAGE_MASK) | TLB_NOTDIRTY;
2071 /* Note: start and end must be within the same ram block. */
2072 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
2073 int dirty_flags)
2075 CPUState *env;
2076 unsigned long length, start1;
2077 int i;
2079 start &= TARGET_PAGE_MASK;
2080 end = TARGET_PAGE_ALIGN(end);
2082 length = end - start;
2083 if (length == 0)
2084 return;
2085 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
2087 /* we modify the TLB cache so that the dirty bit will be set again
2088 when accessing the range */
2089 start1 = (unsigned long)qemu_safe_ram_ptr(start);
2090 /* Check that we don't span multiple blocks - this breaks the
2091 address comparisons below. */
2092 if ((unsigned long)qemu_safe_ram_ptr(end - 1) - start1
2093 != (end - 1) - start) {
2094 abort();
2097 for(env = first_cpu; env != NULL; env = env->next_cpu) {
2098 int mmu_idx;
2099 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2100 for(i = 0; i < CPU_TLB_SIZE; i++)
2101 tlb_reset_dirty_range(&env->tlb_table[mmu_idx][i],
2102 start1, length);
2107 int cpu_physical_memory_set_dirty_tracking(int enable)
2109 int ret = 0;
2110 in_migration = enable;
2111 ret = cpu_notify_migration_log(!!enable);
2112 return ret;
2115 int cpu_physical_memory_get_dirty_tracking(void)
2117 return in_migration;
2120 int cpu_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
2121 target_phys_addr_t end_addr)
2123 int ret;
2125 ret = cpu_notify_sync_dirty_bitmap(start_addr, end_addr);
2126 return ret;
2129 int cpu_physical_log_start(target_phys_addr_t start_addr,
2130 ram_addr_t size)
2132 CPUPhysMemoryClient *client;
2133 QLIST_FOREACH(client, &memory_client_list, list) {
2134 if (client->log_start) {
2135 int r = client->log_start(client, start_addr, size);
2136 if (r < 0) {
2137 return r;
2141 return 0;
2144 int cpu_physical_log_stop(target_phys_addr_t start_addr,
2145 ram_addr_t size)
2147 CPUPhysMemoryClient *client;
2148 QLIST_FOREACH(client, &memory_client_list, list) {
2149 if (client->log_stop) {
2150 int r = client->log_stop(client, start_addr, size);
2151 if (r < 0) {
2152 return r;
2156 return 0;
2159 static inline void tlb_update_dirty(CPUTLBEntry *tlb_entry)
2161 ram_addr_t ram_addr;
2162 void *p;
2164 if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == IO_MEM_RAM) {
2165 p = (void *)(unsigned long)((tlb_entry->addr_write & TARGET_PAGE_MASK)
2166 + tlb_entry->addend);
2167 ram_addr = qemu_ram_addr_from_host_nofail(p);
2168 if (!cpu_physical_memory_is_dirty(ram_addr)) {
2169 tlb_entry->addr_write |= TLB_NOTDIRTY;
2174 /* update the TLB according to the current state of the dirty bits */
2175 void cpu_tlb_update_dirty(CPUState *env)
2177 int i;
2178 int mmu_idx;
2179 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2180 for(i = 0; i < CPU_TLB_SIZE; i++)
2181 tlb_update_dirty(&env->tlb_table[mmu_idx][i]);
2185 static inline void tlb_set_dirty1(CPUTLBEntry *tlb_entry, target_ulong vaddr)
2187 if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY))
2188 tlb_entry->addr_write = vaddr;
2191 /* update the TLB corresponding to virtual page vaddr
2192 so that it is no longer dirty */
2193 static inline void tlb_set_dirty(CPUState *env, target_ulong vaddr)
2195 int i;
2196 int mmu_idx;
2198 vaddr &= TARGET_PAGE_MASK;
2199 i = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2200 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
2201 tlb_set_dirty1(&env->tlb_table[mmu_idx][i], vaddr);
2204 /* Our TLB does not support large pages, so remember the area covered by
2205 large pages and trigger a full TLB flush if these are invalidated. */
2206 static void tlb_add_large_page(CPUState *env, target_ulong vaddr,
2207 target_ulong size)
2209 target_ulong mask = ~(size - 1);
2211 if (env->tlb_flush_addr == (target_ulong)-1) {
2212 env->tlb_flush_addr = vaddr & mask;
2213 env->tlb_flush_mask = mask;
2214 return;
2216 /* Extend the existing region to include the new page.
2217 This is a compromise between unnecessary flushes and the cost
2218 of maintaining a full variable size TLB. */
2219 mask &= env->tlb_flush_mask;
2220 while (((env->tlb_flush_addr ^ vaddr) & mask) != 0) {
2221 mask <<= 1;
2223 env->tlb_flush_addr &= mask;
2224 env->tlb_flush_mask = mask;
2227 /* Add a new TLB entry. At most one entry for a given virtual address
2228 is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
2229 supplied size is only used by tlb_flush_page. */
2230 void tlb_set_page(CPUState *env, target_ulong vaddr,
2231 target_phys_addr_t paddr, int prot,
2232 int mmu_idx, target_ulong size)
2234 PhysPageDesc *p;
2235 unsigned long pd;
2236 unsigned int index;
2237 target_ulong address;
2238 target_ulong code_address;
2239 unsigned long addend;
2240 CPUTLBEntry *te;
2241 CPUWatchpoint *wp;
2242 target_phys_addr_t iotlb;
2244 assert(size >= TARGET_PAGE_SIZE);
2245 if (size != TARGET_PAGE_SIZE) {
2246 tlb_add_large_page(env, vaddr, size);
2248 p = phys_page_find(paddr >> TARGET_PAGE_BITS);
2249 if (!p) {
2250 pd = IO_MEM_UNASSIGNED;
2251 } else {
2252 pd = p->phys_offset;
2254 #if defined(DEBUG_TLB)
2255 printf("tlb_set_page: vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
2256 " prot=%x idx=%d pd=0x%08lx\n",
2257 vaddr, paddr, prot, mmu_idx, pd);
2258 #endif
2260 address = vaddr;
2261 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM && !(pd & IO_MEM_ROMD)) {
2262 /* IO memory case (romd handled later) */
2263 address |= TLB_MMIO;
2265 addend = (unsigned long)qemu_get_ram_ptr(pd & TARGET_PAGE_MASK);
2266 if ((pd & ~TARGET_PAGE_MASK) <= IO_MEM_ROM) {
2267 /* Normal RAM. */
2268 iotlb = pd & TARGET_PAGE_MASK;
2269 if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_RAM)
2270 iotlb |= IO_MEM_NOTDIRTY;
2271 else
2272 iotlb |= IO_MEM_ROM;
2273 } else {
2274 /* IO handlers are currently passed a physical address.
2275 It would be nice to pass an offset from the base address
2276 of that region. This would avoid having to special case RAM,
2277 and avoid full address decoding in every device.
2278 We can't use the high bits of pd for this because
2279 IO_MEM_ROMD uses these as a ram address. */
2280 iotlb = (pd & ~TARGET_PAGE_MASK);
2281 if (p) {
2282 iotlb += p->region_offset;
2283 } else {
2284 iotlb += paddr;
2288 code_address = address;
2289 /* Make accesses to pages with watchpoints go via the
2290 watchpoint trap routines. */
2291 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2292 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
2293 /* Avoid trapping reads of pages with a write breakpoint. */
2294 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
2295 iotlb = io_mem_watch + paddr;
2296 address |= TLB_MMIO;
2297 break;
2302 index = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2303 env->iotlb[mmu_idx][index] = iotlb - vaddr;
2304 te = &env->tlb_table[mmu_idx][index];
2305 te->addend = addend - vaddr;
2306 if (prot & PAGE_READ) {
2307 te->addr_read = address;
2308 } else {
2309 te->addr_read = -1;
2312 if (prot & PAGE_EXEC) {
2313 te->addr_code = code_address;
2314 } else {
2315 te->addr_code = -1;
2317 if (prot & PAGE_WRITE) {
2318 if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_ROM ||
2319 (pd & IO_MEM_ROMD)) {
2320 /* Write access calls the I/O callback. */
2321 te->addr_write = address | TLB_MMIO;
2322 } else if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_RAM &&
2323 !cpu_physical_memory_is_dirty(pd)) {
2324 te->addr_write = address | TLB_NOTDIRTY;
2325 } else {
2326 te->addr_write = address;
2328 } else {
2329 te->addr_write = -1;
2333 #else
2335 void tlb_flush(CPUState *env, int flush_global)
2339 void tlb_flush_page(CPUState *env, target_ulong addr)
2344 * Walks guest process memory "regions" one by one
2345 * and calls callback function 'fn' for each region.
2348 struct walk_memory_regions_data
2350 walk_memory_regions_fn fn;
2351 void *priv;
2352 unsigned long start;
2353 int prot;
2356 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
2357 abi_ulong end, int new_prot)
2359 if (data->start != -1ul) {
2360 int rc = data->fn(data->priv, data->start, end, data->prot);
2361 if (rc != 0) {
2362 return rc;
2366 data->start = (new_prot ? end : -1ul);
2367 data->prot = new_prot;
2369 return 0;
2372 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
2373 abi_ulong base, int level, void **lp)
2375 abi_ulong pa;
2376 int i, rc;
2378 if (*lp == NULL) {
2379 return walk_memory_regions_end(data, base, 0);
2382 if (level == 0) {
2383 PageDesc *pd = *lp;
2384 for (i = 0; i < L2_SIZE; ++i) {
2385 int prot = pd[i].flags;
2387 pa = base | (i << TARGET_PAGE_BITS);
2388 if (prot != data->prot) {
2389 rc = walk_memory_regions_end(data, pa, prot);
2390 if (rc != 0) {
2391 return rc;
2395 } else {
2396 void **pp = *lp;
2397 for (i = 0; i < L2_SIZE; ++i) {
2398 pa = base | ((abi_ulong)i <<
2399 (TARGET_PAGE_BITS + L2_BITS * level));
2400 rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
2401 if (rc != 0) {
2402 return rc;
2407 return 0;
2410 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
2412 struct walk_memory_regions_data data;
2413 unsigned long i;
2415 data.fn = fn;
2416 data.priv = priv;
2417 data.start = -1ul;
2418 data.prot = 0;
2420 for (i = 0; i < V_L1_SIZE; i++) {
2421 int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
2422 V_L1_SHIFT / L2_BITS - 1, l1_map + i);
2423 if (rc != 0) {
2424 return rc;
2428 return walk_memory_regions_end(&data, 0, 0);
2431 static int dump_region(void *priv, abi_ulong start,
2432 abi_ulong end, unsigned long prot)
2434 FILE *f = (FILE *)priv;
2436 (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2437 " "TARGET_ABI_FMT_lx" %c%c%c\n",
2438 start, end, end - start,
2439 ((prot & PAGE_READ) ? 'r' : '-'),
2440 ((prot & PAGE_WRITE) ? 'w' : '-'),
2441 ((prot & PAGE_EXEC) ? 'x' : '-'));
2443 return (0);
2446 /* dump memory mappings */
2447 void page_dump(FILE *f)
2449 (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2450 "start", "end", "size", "prot");
2451 walk_memory_regions(f, dump_region);
2454 int page_get_flags(target_ulong address)
2456 PageDesc *p;
2458 p = page_find(address >> TARGET_PAGE_BITS);
2459 if (!p)
2460 return 0;
2461 return p->flags;
2464 /* Modify the flags of a page and invalidate the code if necessary.
2465 The flag PAGE_WRITE_ORG is positioned automatically depending
2466 on PAGE_WRITE. The mmap_lock should already be held. */
2467 void page_set_flags(target_ulong start, target_ulong end, int flags)
2469 target_ulong addr, len;
2471 /* This function should never be called with addresses outside the
2472 guest address space. If this assert fires, it probably indicates
2473 a missing call to h2g_valid. */
2474 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2475 assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2476 #endif
2477 assert(start < end);
2479 start = start & TARGET_PAGE_MASK;
2480 end = TARGET_PAGE_ALIGN(end);
2482 if (flags & PAGE_WRITE) {
2483 flags |= PAGE_WRITE_ORG;
2486 for (addr = start, len = end - start;
2487 len != 0;
2488 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2489 PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2491 /* If the write protection bit is set, then we invalidate
2492 the code inside. */
2493 if (!(p->flags & PAGE_WRITE) &&
2494 (flags & PAGE_WRITE) &&
2495 p->first_tb) {
2496 tb_invalidate_phys_page(addr, 0, NULL);
2498 p->flags = flags;
2502 int page_check_range(target_ulong start, target_ulong len, int flags)
2504 PageDesc *p;
2505 target_ulong end;
2506 target_ulong addr;
2508 /* This function should never be called with addresses outside the
2509 guest address space. If this assert fires, it probably indicates
2510 a missing call to h2g_valid. */
2511 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2512 assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2513 #endif
2515 if (len == 0) {
2516 return 0;
2518 if (start + len - 1 < start) {
2519 /* We've wrapped around. */
2520 return -1;
2523 end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2524 start = start & TARGET_PAGE_MASK;
2526 for (addr = start, len = end - start;
2527 len != 0;
2528 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2529 p = page_find(addr >> TARGET_PAGE_BITS);
2530 if( !p )
2531 return -1;
2532 if( !(p->flags & PAGE_VALID) )
2533 return -1;
2535 if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2536 return -1;
2537 if (flags & PAGE_WRITE) {
2538 if (!(p->flags & PAGE_WRITE_ORG))
2539 return -1;
2540 /* unprotect the page if it was put read-only because it
2541 contains translated code */
2542 if (!(p->flags & PAGE_WRITE)) {
2543 if (!page_unprotect(addr, 0, NULL))
2544 return -1;
2546 return 0;
2549 return 0;
2552 /* called from signal handler: invalidate the code and unprotect the
2553 page. Return TRUE if the fault was successfully handled. */
2554 int page_unprotect(target_ulong address, unsigned long pc, void *puc)
2556 unsigned int prot;
2557 PageDesc *p;
2558 target_ulong host_start, host_end, addr;
2560 /* Technically this isn't safe inside a signal handler. However we
2561 know this only ever happens in a synchronous SEGV handler, so in
2562 practice it seems to be ok. */
2563 mmap_lock();
2565 p = page_find(address >> TARGET_PAGE_BITS);
2566 if (!p) {
2567 mmap_unlock();
2568 return 0;
2571 /* if the page was really writable, then we change its
2572 protection back to writable */
2573 if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2574 host_start = address & qemu_host_page_mask;
2575 host_end = host_start + qemu_host_page_size;
2577 prot = 0;
2578 for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2579 p = page_find(addr >> TARGET_PAGE_BITS);
2580 p->flags |= PAGE_WRITE;
2581 prot |= p->flags;
2583 /* and since the content will be modified, we must invalidate
2584 the corresponding translated code. */
2585 tb_invalidate_phys_page(addr, pc, puc);
2586 #ifdef DEBUG_TB_CHECK
2587 tb_invalidate_check(addr);
2588 #endif
2590 mprotect((void *)g2h(host_start), qemu_host_page_size,
2591 prot & PAGE_BITS);
2593 mmap_unlock();
2594 return 1;
2596 mmap_unlock();
2597 return 0;
2600 static inline void tlb_set_dirty(CPUState *env,
2601 unsigned long addr, target_ulong vaddr)
2604 #endif /* defined(CONFIG_USER_ONLY) */
2606 #if !defined(CONFIG_USER_ONLY)
2608 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2609 typedef struct subpage_t {
2610 target_phys_addr_t base;
2611 ram_addr_t sub_io_index[TARGET_PAGE_SIZE];
2612 ram_addr_t region_offset[TARGET_PAGE_SIZE];
2613 } subpage_t;
2615 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2616 ram_addr_t memory, ram_addr_t region_offset);
2617 static subpage_t *subpage_init (target_phys_addr_t base, ram_addr_t *phys,
2618 ram_addr_t orig_memory,
2619 ram_addr_t region_offset);
2620 #define CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2, \
2621 need_subpage) \
2622 do { \
2623 if (addr > start_addr) \
2624 start_addr2 = 0; \
2625 else { \
2626 start_addr2 = start_addr & ~TARGET_PAGE_MASK; \
2627 if (start_addr2 > 0) \
2628 need_subpage = 1; \
2631 if ((start_addr + orig_size) - addr >= TARGET_PAGE_SIZE) \
2632 end_addr2 = TARGET_PAGE_SIZE - 1; \
2633 else { \
2634 end_addr2 = (start_addr + orig_size - 1) & ~TARGET_PAGE_MASK; \
2635 if (end_addr2 < TARGET_PAGE_SIZE - 1) \
2636 need_subpage = 1; \
2638 } while (0)
2640 /* register physical memory.
2641 For RAM, 'size' must be a multiple of the target page size.
2642 If (phys_offset & ~TARGET_PAGE_MASK) != 0, then it is an
2643 io memory page. The address used when calling the IO function is
2644 the offset from the start of the region, plus region_offset. Both
2645 start_addr and region_offset are rounded down to a page boundary
2646 before calculating this offset. This should not be a problem unless
2647 the low bits of start_addr and region_offset differ. */
2648 void cpu_register_physical_memory_log(target_phys_addr_t start_addr,
2649 ram_addr_t size,
2650 ram_addr_t phys_offset,
2651 ram_addr_t region_offset,
2652 bool log_dirty)
2654 target_phys_addr_t addr, end_addr;
2655 PhysPageDesc *p;
2656 CPUState *env;
2657 ram_addr_t orig_size = size;
2658 subpage_t *subpage;
2660 assert(size);
2661 cpu_notify_set_memory(start_addr, size, phys_offset, log_dirty);
2663 if (phys_offset == IO_MEM_UNASSIGNED) {
2664 region_offset = start_addr;
2666 region_offset &= TARGET_PAGE_MASK;
2667 size = (size + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK;
2668 end_addr = start_addr + (target_phys_addr_t)size;
2670 addr = start_addr;
2671 do {
2672 p = phys_page_find(addr >> TARGET_PAGE_BITS);
2673 if (p && p->phys_offset != IO_MEM_UNASSIGNED) {
2674 ram_addr_t orig_memory = p->phys_offset;
2675 target_phys_addr_t start_addr2, end_addr2;
2676 int need_subpage = 0;
2678 CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2,
2679 need_subpage);
2680 if (need_subpage) {
2681 if (!(orig_memory & IO_MEM_SUBPAGE)) {
2682 subpage = subpage_init((addr & TARGET_PAGE_MASK),
2683 &p->phys_offset, orig_memory,
2684 p->region_offset);
2685 } else {
2686 subpage = io_mem_opaque[(orig_memory & ~TARGET_PAGE_MASK)
2687 >> IO_MEM_SHIFT];
2689 subpage_register(subpage, start_addr2, end_addr2, phys_offset,
2690 region_offset);
2691 p->region_offset = 0;
2692 } else {
2693 p->phys_offset = phys_offset;
2694 if ((phys_offset & ~TARGET_PAGE_MASK) <= IO_MEM_ROM ||
2695 (phys_offset & IO_MEM_ROMD))
2696 phys_offset += TARGET_PAGE_SIZE;
2698 } else {
2699 p = phys_page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2700 p->phys_offset = phys_offset;
2701 p->region_offset = region_offset;
2702 if ((phys_offset & ~TARGET_PAGE_MASK) <= IO_MEM_ROM ||
2703 (phys_offset & IO_MEM_ROMD)) {
2704 phys_offset += TARGET_PAGE_SIZE;
2705 } else {
2706 target_phys_addr_t start_addr2, end_addr2;
2707 int need_subpage = 0;
2709 CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr,
2710 end_addr2, need_subpage);
2712 if (need_subpage) {
2713 subpage = subpage_init((addr & TARGET_PAGE_MASK),
2714 &p->phys_offset, IO_MEM_UNASSIGNED,
2715 addr & TARGET_PAGE_MASK);
2716 subpage_register(subpage, start_addr2, end_addr2,
2717 phys_offset, region_offset);
2718 p->region_offset = 0;
2722 region_offset += TARGET_PAGE_SIZE;
2723 addr += TARGET_PAGE_SIZE;
2724 } while (addr != end_addr);
2726 /* since each CPU stores ram addresses in its TLB cache, we must
2727 reset the modified entries */
2728 /* XXX: slow ! */
2729 for(env = first_cpu; env != NULL; env = env->next_cpu) {
2730 tlb_flush(env, 1);
2734 /* XXX: temporary until new memory mapping API */
2735 ram_addr_t cpu_get_physical_page_desc(target_phys_addr_t addr)
2737 PhysPageDesc *p;
2739 p = phys_page_find(addr >> TARGET_PAGE_BITS);
2740 if (!p)
2741 return IO_MEM_UNASSIGNED;
2742 return p->phys_offset;
2745 void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2747 if (kvm_enabled())
2748 kvm_coalesce_mmio_region(addr, size);
2751 void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2753 if (kvm_enabled())
2754 kvm_uncoalesce_mmio_region(addr, size);
2757 void qemu_flush_coalesced_mmio_buffer(void)
2759 if (kvm_enabled())
2760 kvm_flush_coalesced_mmio_buffer();
2763 #if defined(__linux__) && !defined(TARGET_S390X)
2765 #include <sys/vfs.h>
2767 #define HUGETLBFS_MAGIC 0x958458f6
2769 static long gethugepagesize(const char *path)
2771 struct statfs fs;
2772 int ret;
2774 do {
2775 ret = statfs(path, &fs);
2776 } while (ret != 0 && errno == EINTR);
2778 if (ret != 0) {
2779 perror(path);
2780 return 0;
2783 if (fs.f_type != HUGETLBFS_MAGIC)
2784 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2786 return fs.f_bsize;
2789 static void *file_ram_alloc(RAMBlock *block,
2790 ram_addr_t memory,
2791 const char *path)
2793 char *filename;
2794 void *area;
2795 int fd;
2796 #ifdef MAP_POPULATE
2797 int flags;
2798 #endif
2799 unsigned long hpagesize;
2801 hpagesize = gethugepagesize(path);
2802 if (!hpagesize) {
2803 return NULL;
2806 if (memory < hpagesize) {
2807 return NULL;
2810 if (kvm_enabled() && !kvm_has_sync_mmu()) {
2811 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2812 return NULL;
2815 if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2816 return NULL;
2819 fd = mkstemp(filename);
2820 if (fd < 0) {
2821 perror("unable to create backing store for hugepages");
2822 free(filename);
2823 return NULL;
2825 unlink(filename);
2826 free(filename);
2828 memory = (memory+hpagesize-1) & ~(hpagesize-1);
2831 * ftruncate is not supported by hugetlbfs in older
2832 * hosts, so don't bother bailing out on errors.
2833 * If anything goes wrong with it under other filesystems,
2834 * mmap will fail.
2836 if (ftruncate(fd, memory))
2837 perror("ftruncate");
2839 #ifdef MAP_POPULATE
2840 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2841 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
2842 * to sidestep this quirk.
2844 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2845 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2846 #else
2847 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2848 #endif
2849 if (area == MAP_FAILED) {
2850 perror("file_ram_alloc: can't mmap RAM pages");
2851 close(fd);
2852 return (NULL);
2854 block->fd = fd;
2855 return area;
2857 #endif
2859 static ram_addr_t find_ram_offset(ram_addr_t size)
2861 RAMBlock *block, *next_block;
2862 ram_addr_t offset = 0, mingap = ULONG_MAX;
2864 if (QLIST_EMPTY(&ram_list.blocks))
2865 return 0;
2867 QLIST_FOREACH(block, &ram_list.blocks, next) {
2868 ram_addr_t end, next = ULONG_MAX;
2870 end = block->offset + block->length;
2872 QLIST_FOREACH(next_block, &ram_list.blocks, next) {
2873 if (next_block->offset >= end) {
2874 next = MIN(next, next_block->offset);
2877 if (next - end >= size && next - end < mingap) {
2878 offset = end;
2879 mingap = next - end;
2882 return offset;
2885 static ram_addr_t last_ram_offset(void)
2887 RAMBlock *block;
2888 ram_addr_t last = 0;
2890 QLIST_FOREACH(block, &ram_list.blocks, next)
2891 last = MAX(last, block->offset + block->length);
2893 return last;
2896 ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name,
2897 ram_addr_t size, void *host)
2899 RAMBlock *new_block, *block;
2901 size = TARGET_PAGE_ALIGN(size);
2902 new_block = qemu_mallocz(sizeof(*new_block));
2904 if (dev && dev->parent_bus && dev->parent_bus->info->get_dev_path) {
2905 char *id = dev->parent_bus->info->get_dev_path(dev);
2906 if (id) {
2907 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
2908 qemu_free(id);
2911 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
2913 QLIST_FOREACH(block, &ram_list.blocks, next) {
2914 if (!strcmp(block->idstr, new_block->idstr)) {
2915 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
2916 new_block->idstr);
2917 abort();
2921 new_block->offset = find_ram_offset(size);
2922 if (host) {
2923 new_block->host = host;
2924 new_block->flags |= RAM_PREALLOC_MASK;
2925 } else {
2926 if (mem_path) {
2927 #if defined (__linux__) && !defined(TARGET_S390X)
2928 new_block->host = file_ram_alloc(new_block, size, mem_path);
2929 if (!new_block->host) {
2930 new_block->host = qemu_vmalloc(size);
2931 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2933 #else
2934 fprintf(stderr, "-mem-path option unsupported\n");
2935 exit(1);
2936 #endif
2937 } else {
2938 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2939 /* S390 KVM requires the topmost vma of the RAM to be smaller than
2940 an system defined value, which is at least 256GB. Larger systems
2941 have larger values. We put the guest between the end of data
2942 segment (system break) and this value. We use 32GB as a base to
2943 have enough room for the system break to grow. */
2944 new_block->host = mmap((void*)0x800000000, size,
2945 PROT_EXEC|PROT_READ|PROT_WRITE,
2946 MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
2947 if (new_block->host == MAP_FAILED) {
2948 fprintf(stderr, "Allocating RAM failed\n");
2949 abort();
2951 #else
2952 if (xen_mapcache_enabled()) {
2953 xen_ram_alloc(new_block->offset, size);
2954 } else {
2955 new_block->host = qemu_vmalloc(size);
2957 #endif
2958 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2961 new_block->length = size;
2963 QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
2965 ram_list.phys_dirty = qemu_realloc(ram_list.phys_dirty,
2966 last_ram_offset() >> TARGET_PAGE_BITS);
2967 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
2968 0xff, size >> TARGET_PAGE_BITS);
2970 if (kvm_enabled())
2971 kvm_setup_guest_memory(new_block->host, size);
2973 return new_block->offset;
2976 ram_addr_t qemu_ram_alloc(DeviceState *dev, const char *name, ram_addr_t size)
2978 return qemu_ram_alloc_from_ptr(dev, name, size, NULL);
2981 void qemu_ram_free_from_ptr(ram_addr_t addr)
2983 RAMBlock *block;
2985 QLIST_FOREACH(block, &ram_list.blocks, next) {
2986 if (addr == block->offset) {
2987 QLIST_REMOVE(block, next);
2988 qemu_free(block);
2989 return;
2994 void qemu_ram_free(ram_addr_t addr)
2996 RAMBlock *block;
2998 QLIST_FOREACH(block, &ram_list.blocks, next) {
2999 if (addr == block->offset) {
3000 QLIST_REMOVE(block, next);
3001 if (block->flags & RAM_PREALLOC_MASK) {
3003 } else if (mem_path) {
3004 #if defined (__linux__) && !defined(TARGET_S390X)
3005 if (block->fd) {
3006 munmap(block->host, block->length);
3007 close(block->fd);
3008 } else {
3009 qemu_vfree(block->host);
3011 #else
3012 abort();
3013 #endif
3014 } else {
3015 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
3016 munmap(block->host, block->length);
3017 #else
3018 if (xen_mapcache_enabled()) {
3019 qemu_invalidate_entry(block->host);
3020 } else {
3021 qemu_vfree(block->host);
3023 #endif
3025 qemu_free(block);
3026 return;
3032 #ifndef _WIN32
3033 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
3035 RAMBlock *block;
3036 ram_addr_t offset;
3037 int flags;
3038 void *area, *vaddr;
3040 QLIST_FOREACH(block, &ram_list.blocks, next) {
3041 offset = addr - block->offset;
3042 if (offset < block->length) {
3043 vaddr = block->host + offset;
3044 if (block->flags & RAM_PREALLOC_MASK) {
3046 } else {
3047 flags = MAP_FIXED;
3048 munmap(vaddr, length);
3049 if (mem_path) {
3050 #if defined(__linux__) && !defined(TARGET_S390X)
3051 if (block->fd) {
3052 #ifdef MAP_POPULATE
3053 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
3054 MAP_PRIVATE;
3055 #else
3056 flags |= MAP_PRIVATE;
3057 #endif
3058 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3059 flags, block->fd, offset);
3060 } else {
3061 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
3062 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3063 flags, -1, 0);
3065 #else
3066 abort();
3067 #endif
3068 } else {
3069 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
3070 flags |= MAP_SHARED | MAP_ANONYMOUS;
3071 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
3072 flags, -1, 0);
3073 #else
3074 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
3075 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3076 flags, -1, 0);
3077 #endif
3079 if (area != vaddr) {
3080 fprintf(stderr, "Could not remap addr: %lx@%lx\n",
3081 length, addr);
3082 exit(1);
3084 qemu_madvise(vaddr, length, QEMU_MADV_MERGEABLE);
3086 return;
3090 #endif /* !_WIN32 */
3092 /* Return a host pointer to ram allocated with qemu_ram_alloc.
3093 With the exception of the softmmu code in this file, this should
3094 only be used for local memory (e.g. video ram) that the device owns,
3095 and knows it isn't going to access beyond the end of the block.
3097 It should not be used for general purpose DMA.
3098 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
3100 void *qemu_get_ram_ptr(ram_addr_t addr)
3102 RAMBlock *block;
3104 QLIST_FOREACH(block, &ram_list.blocks, next) {
3105 if (addr - block->offset < block->length) {
3106 /* Move this entry to to start of the list. */
3107 if (block != QLIST_FIRST(&ram_list.blocks)) {
3108 QLIST_REMOVE(block, next);
3109 QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
3111 if (xen_mapcache_enabled()) {
3112 /* We need to check if the requested address is in the RAM
3113 * because we don't want to map the entire memory in QEMU.
3114 * In that case just map until the end of the page.
3116 if (block->offset == 0) {
3117 return qemu_map_cache(addr, 0, 0);
3118 } else if (block->host == NULL) {
3119 block->host = qemu_map_cache(block->offset, block->length, 1);
3122 return block->host + (addr - block->offset);
3126 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3127 abort();
3129 return NULL;
3132 /* Return a host pointer to ram allocated with qemu_ram_alloc.
3133 * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
3135 void *qemu_safe_ram_ptr(ram_addr_t addr)
3137 RAMBlock *block;
3139 QLIST_FOREACH(block, &ram_list.blocks, next) {
3140 if (addr - block->offset < block->length) {
3141 if (xen_mapcache_enabled()) {
3142 /* We need to check if the requested address is in the RAM
3143 * because we don't want to map the entire memory in QEMU.
3144 * In that case just map until the end of the page.
3146 if (block->offset == 0) {
3147 return qemu_map_cache(addr, 0, 0);
3148 } else if (block->host == NULL) {
3149 block->host = qemu_map_cache(block->offset, block->length, 1);
3152 return block->host + (addr - block->offset);
3156 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3157 abort();
3159 return NULL;
3162 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
3163 * but takes a size argument */
3164 void *qemu_ram_ptr_length(target_phys_addr_t addr, target_phys_addr_t *size)
3166 if (xen_mapcache_enabled())
3167 return qemu_map_cache(addr, *size, 1);
3168 else {
3169 RAMBlock *block;
3171 QLIST_FOREACH(block, &ram_list.blocks, next) {
3172 if (addr - block->offset < block->length) {
3173 if (addr - block->offset + *size > block->length)
3174 *size = block->length - addr + block->offset;
3175 return block->host + (addr - block->offset);
3179 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3180 abort();
3182 *size = 0;
3183 return NULL;
3187 void qemu_put_ram_ptr(void *addr)
3189 trace_qemu_put_ram_ptr(addr);
3192 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
3194 RAMBlock *block;
3195 uint8_t *host = ptr;
3197 if (xen_mapcache_enabled()) {
3198 *ram_addr = qemu_ram_addr_from_mapcache(ptr);
3199 return 0;
3202 QLIST_FOREACH(block, &ram_list.blocks, next) {
3203 /* This case append when the block is not mapped. */
3204 if (block->host == NULL) {
3205 continue;
3207 if (host - block->host < block->length) {
3208 *ram_addr = block->offset + (host - block->host);
3209 return 0;
3213 return -1;
3216 /* Some of the softmmu routines need to translate from a host pointer
3217 (typically a TLB entry) back to a ram offset. */
3218 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
3220 ram_addr_t ram_addr;
3222 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
3223 fprintf(stderr, "Bad ram pointer %p\n", ptr);
3224 abort();
3226 return ram_addr;
3229 static uint32_t unassigned_mem_readb(void *opaque, target_phys_addr_t addr)
3231 #ifdef DEBUG_UNASSIGNED
3232 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3233 #endif
3234 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3235 do_unassigned_access(addr, 0, 0, 0, 1);
3236 #endif
3237 return 0;
3240 static uint32_t unassigned_mem_readw(void *opaque, target_phys_addr_t addr)
3242 #ifdef DEBUG_UNASSIGNED
3243 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3244 #endif
3245 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3246 do_unassigned_access(addr, 0, 0, 0, 2);
3247 #endif
3248 return 0;
3251 static uint32_t unassigned_mem_readl(void *opaque, target_phys_addr_t addr)
3253 #ifdef DEBUG_UNASSIGNED
3254 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3255 #endif
3256 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3257 do_unassigned_access(addr, 0, 0, 0, 4);
3258 #endif
3259 return 0;
3262 static void unassigned_mem_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
3264 #ifdef DEBUG_UNASSIGNED
3265 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3266 #endif
3267 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3268 do_unassigned_access(addr, 1, 0, 0, 1);
3269 #endif
3272 static void unassigned_mem_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
3274 #ifdef DEBUG_UNASSIGNED
3275 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3276 #endif
3277 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3278 do_unassigned_access(addr, 1, 0, 0, 2);
3279 #endif
3282 static void unassigned_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
3284 #ifdef DEBUG_UNASSIGNED
3285 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3286 #endif
3287 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3288 do_unassigned_access(addr, 1, 0, 0, 4);
3289 #endif
3292 static CPUReadMemoryFunc * const unassigned_mem_read[3] = {
3293 unassigned_mem_readb,
3294 unassigned_mem_readw,
3295 unassigned_mem_readl,
3298 static CPUWriteMemoryFunc * const unassigned_mem_write[3] = {
3299 unassigned_mem_writeb,
3300 unassigned_mem_writew,
3301 unassigned_mem_writel,
3304 static void notdirty_mem_writeb(void *opaque, target_phys_addr_t ram_addr,
3305 uint32_t val)
3307 int dirty_flags;
3308 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3309 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3310 #if !defined(CONFIG_USER_ONLY)
3311 tb_invalidate_phys_page_fast(ram_addr, 1);
3312 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3313 #endif
3315 stb_p(qemu_get_ram_ptr(ram_addr), val);
3316 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3317 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3318 /* we remove the notdirty callback only if the code has been
3319 flushed */
3320 if (dirty_flags == 0xff)
3321 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3324 static void notdirty_mem_writew(void *opaque, target_phys_addr_t ram_addr,
3325 uint32_t val)
3327 int dirty_flags;
3328 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3329 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3330 #if !defined(CONFIG_USER_ONLY)
3331 tb_invalidate_phys_page_fast(ram_addr, 2);
3332 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3333 #endif
3335 stw_p(qemu_get_ram_ptr(ram_addr), val);
3336 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3337 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3338 /* we remove the notdirty callback only if the code has been
3339 flushed */
3340 if (dirty_flags == 0xff)
3341 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3344 static void notdirty_mem_writel(void *opaque, target_phys_addr_t ram_addr,
3345 uint32_t val)
3347 int dirty_flags;
3348 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3349 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3350 #if !defined(CONFIG_USER_ONLY)
3351 tb_invalidate_phys_page_fast(ram_addr, 4);
3352 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3353 #endif
3355 stl_p(qemu_get_ram_ptr(ram_addr), val);
3356 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3357 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3358 /* we remove the notdirty callback only if the code has been
3359 flushed */
3360 if (dirty_flags == 0xff)
3361 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3364 static CPUReadMemoryFunc * const error_mem_read[3] = {
3365 NULL, /* never used */
3366 NULL, /* never used */
3367 NULL, /* never used */
3370 static CPUWriteMemoryFunc * const notdirty_mem_write[3] = {
3371 notdirty_mem_writeb,
3372 notdirty_mem_writew,
3373 notdirty_mem_writel,
3376 /* Generate a debug exception if a watchpoint has been hit. */
3377 static void check_watchpoint(int offset, int len_mask, int flags)
3379 CPUState *env = cpu_single_env;
3380 target_ulong pc, cs_base;
3381 TranslationBlock *tb;
3382 target_ulong vaddr;
3383 CPUWatchpoint *wp;
3384 int cpu_flags;
3386 if (env->watchpoint_hit) {
3387 /* We re-entered the check after replacing the TB. Now raise
3388 * the debug interrupt so that is will trigger after the
3389 * current instruction. */
3390 cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
3391 return;
3393 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
3394 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
3395 if ((vaddr == (wp->vaddr & len_mask) ||
3396 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
3397 wp->flags |= BP_WATCHPOINT_HIT;
3398 if (!env->watchpoint_hit) {
3399 env->watchpoint_hit = wp;
3400 tb = tb_find_pc(env->mem_io_pc);
3401 if (!tb) {
3402 cpu_abort(env, "check_watchpoint: could not find TB for "
3403 "pc=%p", (void *)env->mem_io_pc);
3405 cpu_restore_state(tb, env, env->mem_io_pc);
3406 tb_phys_invalidate(tb, -1);
3407 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
3408 env->exception_index = EXCP_DEBUG;
3409 } else {
3410 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
3411 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
3413 cpu_resume_from_signal(env, NULL);
3415 } else {
3416 wp->flags &= ~BP_WATCHPOINT_HIT;
3421 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
3422 so these check for a hit then pass through to the normal out-of-line
3423 phys routines. */
3424 static uint32_t watch_mem_readb(void *opaque, target_phys_addr_t addr)
3426 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x0, BP_MEM_READ);
3427 return ldub_phys(addr);
3430 static uint32_t watch_mem_readw(void *opaque, target_phys_addr_t addr)
3432 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x1, BP_MEM_READ);
3433 return lduw_phys(addr);
3436 static uint32_t watch_mem_readl(void *opaque, target_phys_addr_t addr)
3438 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x3, BP_MEM_READ);
3439 return ldl_phys(addr);
3442 static void watch_mem_writeb(void *opaque, target_phys_addr_t addr,
3443 uint32_t val)
3445 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x0, BP_MEM_WRITE);
3446 stb_phys(addr, val);
3449 static void watch_mem_writew(void *opaque, target_phys_addr_t addr,
3450 uint32_t val)
3452 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x1, BP_MEM_WRITE);
3453 stw_phys(addr, val);
3456 static void watch_mem_writel(void *opaque, target_phys_addr_t addr,
3457 uint32_t val)
3459 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x3, BP_MEM_WRITE);
3460 stl_phys(addr, val);
3463 static CPUReadMemoryFunc * const watch_mem_read[3] = {
3464 watch_mem_readb,
3465 watch_mem_readw,
3466 watch_mem_readl,
3469 static CPUWriteMemoryFunc * const watch_mem_write[3] = {
3470 watch_mem_writeb,
3471 watch_mem_writew,
3472 watch_mem_writel,
3475 static inline uint32_t subpage_readlen (subpage_t *mmio,
3476 target_phys_addr_t addr,
3477 unsigned int len)
3479 unsigned int idx = SUBPAGE_IDX(addr);
3480 #if defined(DEBUG_SUBPAGE)
3481 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3482 mmio, len, addr, idx);
3483 #endif
3485 addr += mmio->region_offset[idx];
3486 idx = mmio->sub_io_index[idx];
3487 return io_mem_read[idx][len](io_mem_opaque[idx], addr);
3490 static inline void subpage_writelen (subpage_t *mmio, target_phys_addr_t addr,
3491 uint32_t value, unsigned int len)
3493 unsigned int idx = SUBPAGE_IDX(addr);
3494 #if defined(DEBUG_SUBPAGE)
3495 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d value %08x\n",
3496 __func__, mmio, len, addr, idx, value);
3497 #endif
3499 addr += mmio->region_offset[idx];
3500 idx = mmio->sub_io_index[idx];
3501 io_mem_write[idx][len](io_mem_opaque[idx], addr, value);
3504 static uint32_t subpage_readb (void *opaque, target_phys_addr_t addr)
3506 return subpage_readlen(opaque, addr, 0);
3509 static void subpage_writeb (void *opaque, target_phys_addr_t addr,
3510 uint32_t value)
3512 subpage_writelen(opaque, addr, value, 0);
3515 static uint32_t subpage_readw (void *opaque, target_phys_addr_t addr)
3517 return subpage_readlen(opaque, addr, 1);
3520 static void subpage_writew (void *opaque, target_phys_addr_t addr,
3521 uint32_t value)
3523 subpage_writelen(opaque, addr, value, 1);
3526 static uint32_t subpage_readl (void *opaque, target_phys_addr_t addr)
3528 return subpage_readlen(opaque, addr, 2);
3531 static void subpage_writel (void *opaque, target_phys_addr_t addr,
3532 uint32_t value)
3534 subpage_writelen(opaque, addr, value, 2);
3537 static CPUReadMemoryFunc * const subpage_read[] = {
3538 &subpage_readb,
3539 &subpage_readw,
3540 &subpage_readl,
3543 static CPUWriteMemoryFunc * const subpage_write[] = {
3544 &subpage_writeb,
3545 &subpage_writew,
3546 &subpage_writel,
3549 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3550 ram_addr_t memory, ram_addr_t region_offset)
3552 int idx, eidx;
3554 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3555 return -1;
3556 idx = SUBPAGE_IDX(start);
3557 eidx = SUBPAGE_IDX(end);
3558 #if defined(DEBUG_SUBPAGE)
3559 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3560 mmio, start, end, idx, eidx, memory);
3561 #endif
3562 if ((memory & ~TARGET_PAGE_MASK) == IO_MEM_RAM)
3563 memory = IO_MEM_UNASSIGNED;
3564 memory = (memory >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3565 for (; idx <= eidx; idx++) {
3566 mmio->sub_io_index[idx] = memory;
3567 mmio->region_offset[idx] = region_offset;
3570 return 0;
3573 static subpage_t *subpage_init (target_phys_addr_t base, ram_addr_t *phys,
3574 ram_addr_t orig_memory,
3575 ram_addr_t region_offset)
3577 subpage_t *mmio;
3578 int subpage_memory;
3580 mmio = qemu_mallocz(sizeof(subpage_t));
3582 mmio->base = base;
3583 subpage_memory = cpu_register_io_memory(subpage_read, subpage_write, mmio,
3584 DEVICE_NATIVE_ENDIAN);
3585 #if defined(DEBUG_SUBPAGE)
3586 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3587 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3588 #endif
3589 *phys = subpage_memory | IO_MEM_SUBPAGE;
3590 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, orig_memory, region_offset);
3592 return mmio;
3595 static int get_free_io_mem_idx(void)
3597 int i;
3599 for (i = 0; i<IO_MEM_NB_ENTRIES; i++)
3600 if (!io_mem_used[i]) {
3601 io_mem_used[i] = 1;
3602 return i;
3604 fprintf(stderr, "RAN out out io_mem_idx, max %d !\n", IO_MEM_NB_ENTRIES);
3605 return -1;
3609 * Usually, devices operate in little endian mode. There are devices out
3610 * there that operate in big endian too. Each device gets byte swapped
3611 * mmio if plugged onto a CPU that does the other endianness.
3613 * CPU Device swap?
3615 * little little no
3616 * little big yes
3617 * big little yes
3618 * big big no
3621 typedef struct SwapEndianContainer {
3622 CPUReadMemoryFunc *read[3];
3623 CPUWriteMemoryFunc *write[3];
3624 void *opaque;
3625 } SwapEndianContainer;
3627 static uint32_t swapendian_mem_readb (void *opaque, target_phys_addr_t addr)
3629 uint32_t val;
3630 SwapEndianContainer *c = opaque;
3631 val = c->read[0](c->opaque, addr);
3632 return val;
3635 static uint32_t swapendian_mem_readw(void *opaque, target_phys_addr_t addr)
3637 uint32_t val;
3638 SwapEndianContainer *c = opaque;
3639 val = bswap16(c->read[1](c->opaque, addr));
3640 return val;
3643 static uint32_t swapendian_mem_readl(void *opaque, target_phys_addr_t addr)
3645 uint32_t val;
3646 SwapEndianContainer *c = opaque;
3647 val = bswap32(c->read[2](c->opaque, addr));
3648 return val;
3651 static CPUReadMemoryFunc * const swapendian_readfn[3]={
3652 swapendian_mem_readb,
3653 swapendian_mem_readw,
3654 swapendian_mem_readl
3657 static void swapendian_mem_writeb(void *opaque, target_phys_addr_t addr,
3658 uint32_t val)
3660 SwapEndianContainer *c = opaque;
3661 c->write[0](c->opaque, addr, val);
3664 static void swapendian_mem_writew(void *opaque, target_phys_addr_t addr,
3665 uint32_t val)
3667 SwapEndianContainer *c = opaque;
3668 c->write[1](c->opaque, addr, bswap16(val));
3671 static void swapendian_mem_writel(void *opaque, target_phys_addr_t addr,
3672 uint32_t val)
3674 SwapEndianContainer *c = opaque;
3675 c->write[2](c->opaque, addr, bswap32(val));
3678 static CPUWriteMemoryFunc * const swapendian_writefn[3]={
3679 swapendian_mem_writeb,
3680 swapendian_mem_writew,
3681 swapendian_mem_writel
3684 static void swapendian_init(int io_index)
3686 SwapEndianContainer *c = qemu_malloc(sizeof(SwapEndianContainer));
3687 int i;
3689 /* Swap mmio for big endian targets */
3690 c->opaque = io_mem_opaque[io_index];
3691 for (i = 0; i < 3; i++) {
3692 c->read[i] = io_mem_read[io_index][i];
3693 c->write[i] = io_mem_write[io_index][i];
3695 io_mem_read[io_index][i] = swapendian_readfn[i];
3696 io_mem_write[io_index][i] = swapendian_writefn[i];
3698 io_mem_opaque[io_index] = c;
3701 static void swapendian_del(int io_index)
3703 if (io_mem_read[io_index][0] == swapendian_readfn[0]) {
3704 qemu_free(io_mem_opaque[io_index]);
3708 /* mem_read and mem_write are arrays of functions containing the
3709 function to access byte (index 0), word (index 1) and dword (index
3710 2). Functions can be omitted with a NULL function pointer.
3711 If io_index is non zero, the corresponding io zone is
3712 modified. If it is zero, a new io zone is allocated. The return
3713 value can be used with cpu_register_physical_memory(). (-1) is
3714 returned if error. */
3715 static int cpu_register_io_memory_fixed(int io_index,
3716 CPUReadMemoryFunc * const *mem_read,
3717 CPUWriteMemoryFunc * const *mem_write,
3718 void *opaque, enum device_endian endian)
3720 int i;
3722 if (io_index <= 0) {
3723 io_index = get_free_io_mem_idx();
3724 if (io_index == -1)
3725 return io_index;
3726 } else {
3727 io_index >>= IO_MEM_SHIFT;
3728 if (io_index >= IO_MEM_NB_ENTRIES)
3729 return -1;
3732 for (i = 0; i < 3; ++i) {
3733 io_mem_read[io_index][i]
3734 = (mem_read[i] ? mem_read[i] : unassigned_mem_read[i]);
3736 for (i = 0; i < 3; ++i) {
3737 io_mem_write[io_index][i]
3738 = (mem_write[i] ? mem_write[i] : unassigned_mem_write[i]);
3740 io_mem_opaque[io_index] = opaque;
3742 switch (endian) {
3743 case DEVICE_BIG_ENDIAN:
3744 #ifndef TARGET_WORDS_BIGENDIAN
3745 swapendian_init(io_index);
3746 #endif
3747 break;
3748 case DEVICE_LITTLE_ENDIAN:
3749 #ifdef TARGET_WORDS_BIGENDIAN
3750 swapendian_init(io_index);
3751 #endif
3752 break;
3753 case DEVICE_NATIVE_ENDIAN:
3754 default:
3755 break;
3758 return (io_index << IO_MEM_SHIFT);
3761 int cpu_register_io_memory(CPUReadMemoryFunc * const *mem_read,
3762 CPUWriteMemoryFunc * const *mem_write,
3763 void *opaque, enum device_endian endian)
3765 return cpu_register_io_memory_fixed(0, mem_read, mem_write, opaque, endian);
3768 void cpu_unregister_io_memory(int io_table_address)
3770 int i;
3771 int io_index = io_table_address >> IO_MEM_SHIFT;
3773 swapendian_del(io_index);
3775 for (i=0;i < 3; i++) {
3776 io_mem_read[io_index][i] = unassigned_mem_read[i];
3777 io_mem_write[io_index][i] = unassigned_mem_write[i];
3779 io_mem_opaque[io_index] = NULL;
3780 io_mem_used[io_index] = 0;
3783 static void io_mem_init(void)
3785 int i;
3787 cpu_register_io_memory_fixed(IO_MEM_ROM, error_mem_read,
3788 unassigned_mem_write, NULL,
3789 DEVICE_NATIVE_ENDIAN);
3790 cpu_register_io_memory_fixed(IO_MEM_UNASSIGNED, unassigned_mem_read,
3791 unassigned_mem_write, NULL,
3792 DEVICE_NATIVE_ENDIAN);
3793 cpu_register_io_memory_fixed(IO_MEM_NOTDIRTY, error_mem_read,
3794 notdirty_mem_write, NULL,
3795 DEVICE_NATIVE_ENDIAN);
3796 for (i=0; i<5; i++)
3797 io_mem_used[i] = 1;
3799 io_mem_watch = cpu_register_io_memory(watch_mem_read,
3800 watch_mem_write, NULL,
3801 DEVICE_NATIVE_ENDIAN);
3804 #endif /* !defined(CONFIG_USER_ONLY) */
3806 /* physical memory access (slow version, mainly for debug) */
3807 #if defined(CONFIG_USER_ONLY)
3808 int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
3809 uint8_t *buf, int len, int is_write)
3811 int l, flags;
3812 target_ulong page;
3813 void * p;
3815 while (len > 0) {
3816 page = addr & TARGET_PAGE_MASK;
3817 l = (page + TARGET_PAGE_SIZE) - addr;
3818 if (l > len)
3819 l = len;
3820 flags = page_get_flags(page);
3821 if (!(flags & PAGE_VALID))
3822 return -1;
3823 if (is_write) {
3824 if (!(flags & PAGE_WRITE))
3825 return -1;
3826 /* XXX: this code should not depend on lock_user */
3827 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3828 return -1;
3829 memcpy(p, buf, l);
3830 unlock_user(p, addr, l);
3831 } else {
3832 if (!(flags & PAGE_READ))
3833 return -1;
3834 /* XXX: this code should not depend on lock_user */
3835 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3836 return -1;
3837 memcpy(buf, p, l);
3838 unlock_user(p, addr, 0);
3840 len -= l;
3841 buf += l;
3842 addr += l;
3844 return 0;
3847 #else
3848 void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
3849 int len, int is_write)
3851 int l, io_index;
3852 uint8_t *ptr;
3853 uint32_t val;
3854 target_phys_addr_t page;
3855 unsigned long pd;
3856 PhysPageDesc *p;
3858 while (len > 0) {
3859 page = addr & TARGET_PAGE_MASK;
3860 l = (page + TARGET_PAGE_SIZE) - addr;
3861 if (l > len)
3862 l = len;
3863 p = phys_page_find(page >> TARGET_PAGE_BITS);
3864 if (!p) {
3865 pd = IO_MEM_UNASSIGNED;
3866 } else {
3867 pd = p->phys_offset;
3870 if (is_write) {
3871 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
3872 target_phys_addr_t addr1 = addr;
3873 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3874 if (p)
3875 addr1 = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3876 /* XXX: could force cpu_single_env to NULL to avoid
3877 potential bugs */
3878 if (l >= 4 && ((addr1 & 3) == 0)) {
3879 /* 32 bit write access */
3880 val = ldl_p(buf);
3881 io_mem_write[io_index][2](io_mem_opaque[io_index], addr1, val);
3882 l = 4;
3883 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3884 /* 16 bit write access */
3885 val = lduw_p(buf);
3886 io_mem_write[io_index][1](io_mem_opaque[io_index], addr1, val);
3887 l = 2;
3888 } else {
3889 /* 8 bit write access */
3890 val = ldub_p(buf);
3891 io_mem_write[io_index][0](io_mem_opaque[io_index], addr1, val);
3892 l = 1;
3894 } else {
3895 unsigned long addr1;
3896 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
3897 /* RAM case */
3898 ptr = qemu_get_ram_ptr(addr1);
3899 memcpy(ptr, buf, l);
3900 if (!cpu_physical_memory_is_dirty(addr1)) {
3901 /* invalidate code */
3902 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
3903 /* set dirty bit */
3904 cpu_physical_memory_set_dirty_flags(
3905 addr1, (0xff & ~CODE_DIRTY_FLAG));
3907 qemu_put_ram_ptr(ptr);
3909 } else {
3910 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
3911 !(pd & IO_MEM_ROMD)) {
3912 target_phys_addr_t addr1 = addr;
3913 /* I/O case */
3914 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3915 if (p)
3916 addr1 = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3917 if (l >= 4 && ((addr1 & 3) == 0)) {
3918 /* 32 bit read access */
3919 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr1);
3920 stl_p(buf, val);
3921 l = 4;
3922 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3923 /* 16 bit read access */
3924 val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr1);
3925 stw_p(buf, val);
3926 l = 2;
3927 } else {
3928 /* 8 bit read access */
3929 val = io_mem_read[io_index][0](io_mem_opaque[io_index], addr1);
3930 stb_p(buf, val);
3931 l = 1;
3933 } else {
3934 /* RAM case */
3935 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK);
3936 memcpy(buf, ptr + (addr & ~TARGET_PAGE_MASK), l);
3937 qemu_put_ram_ptr(ptr);
3940 len -= l;
3941 buf += l;
3942 addr += l;
3946 /* used for ROM loading : can write in RAM and ROM */
3947 void cpu_physical_memory_write_rom(target_phys_addr_t addr,
3948 const uint8_t *buf, int len)
3950 int l;
3951 uint8_t *ptr;
3952 target_phys_addr_t page;
3953 unsigned long pd;
3954 PhysPageDesc *p;
3956 while (len > 0) {
3957 page = addr & TARGET_PAGE_MASK;
3958 l = (page + TARGET_PAGE_SIZE) - addr;
3959 if (l > len)
3960 l = len;
3961 p = phys_page_find(page >> TARGET_PAGE_BITS);
3962 if (!p) {
3963 pd = IO_MEM_UNASSIGNED;
3964 } else {
3965 pd = p->phys_offset;
3968 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM &&
3969 (pd & ~TARGET_PAGE_MASK) != IO_MEM_ROM &&
3970 !(pd & IO_MEM_ROMD)) {
3971 /* do nothing */
3972 } else {
3973 unsigned long addr1;
3974 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
3975 /* ROM/RAM case */
3976 ptr = qemu_get_ram_ptr(addr1);
3977 memcpy(ptr, buf, l);
3978 qemu_put_ram_ptr(ptr);
3980 len -= l;
3981 buf += l;
3982 addr += l;
3986 typedef struct {
3987 void *buffer;
3988 target_phys_addr_t addr;
3989 target_phys_addr_t len;
3990 } BounceBuffer;
3992 static BounceBuffer bounce;
3994 typedef struct MapClient {
3995 void *opaque;
3996 void (*callback)(void *opaque);
3997 QLIST_ENTRY(MapClient) link;
3998 } MapClient;
4000 static QLIST_HEAD(map_client_list, MapClient) map_client_list
4001 = QLIST_HEAD_INITIALIZER(map_client_list);
4003 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
4005 MapClient *client = qemu_malloc(sizeof(*client));
4007 client->opaque = opaque;
4008 client->callback = callback;
4009 QLIST_INSERT_HEAD(&map_client_list, client, link);
4010 return client;
4013 void cpu_unregister_map_client(void *_client)
4015 MapClient *client = (MapClient *)_client;
4017 QLIST_REMOVE(client, link);
4018 qemu_free(client);
4021 static void cpu_notify_map_clients(void)
4023 MapClient *client;
4025 while (!QLIST_EMPTY(&map_client_list)) {
4026 client = QLIST_FIRST(&map_client_list);
4027 client->callback(client->opaque);
4028 cpu_unregister_map_client(client);
4032 /* Map a physical memory region into a host virtual address.
4033 * May map a subset of the requested range, given by and returned in *plen.
4034 * May return NULL if resources needed to perform the mapping are exhausted.
4035 * Use only for reads OR writes - not for read-modify-write operations.
4036 * Use cpu_register_map_client() to know when retrying the map operation is
4037 * likely to succeed.
4039 void *cpu_physical_memory_map(target_phys_addr_t addr,
4040 target_phys_addr_t *plen,
4041 int is_write)
4043 target_phys_addr_t len = *plen;
4044 target_phys_addr_t todo = 0;
4045 int l;
4046 target_phys_addr_t page;
4047 unsigned long pd;
4048 PhysPageDesc *p;
4049 target_phys_addr_t addr1 = addr;
4051 while (len > 0) {
4052 page = addr & TARGET_PAGE_MASK;
4053 l = (page + TARGET_PAGE_SIZE) - addr;
4054 if (l > len)
4055 l = len;
4056 p = phys_page_find(page >> TARGET_PAGE_BITS);
4057 if (!p) {
4058 pd = IO_MEM_UNASSIGNED;
4059 } else {
4060 pd = p->phys_offset;
4063 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4064 if (todo || bounce.buffer) {
4065 break;
4067 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
4068 bounce.addr = addr;
4069 bounce.len = l;
4070 if (!is_write) {
4071 cpu_physical_memory_read(addr, bounce.buffer, l);
4074 *plen = l;
4075 return bounce.buffer;
4078 len -= l;
4079 addr += l;
4080 todo += l;
4082 *plen = todo;
4083 return qemu_ram_ptr_length(addr1, plen);
4086 /* Unmaps a memory region previously mapped by cpu_physical_memory_map().
4087 * Will also mark the memory as dirty if is_write == 1. access_len gives
4088 * the amount of memory that was actually read or written by the caller.
4090 void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
4091 int is_write, target_phys_addr_t access_len)
4093 if (buffer != bounce.buffer) {
4094 if (is_write) {
4095 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
4096 while (access_len) {
4097 unsigned l;
4098 l = TARGET_PAGE_SIZE;
4099 if (l > access_len)
4100 l = access_len;
4101 if (!cpu_physical_memory_is_dirty(addr1)) {
4102 /* invalidate code */
4103 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
4104 /* set dirty bit */
4105 cpu_physical_memory_set_dirty_flags(
4106 addr1, (0xff & ~CODE_DIRTY_FLAG));
4108 addr1 += l;
4109 access_len -= l;
4112 if (xen_mapcache_enabled()) {
4113 qemu_invalidate_entry(buffer);
4115 return;
4117 if (is_write) {
4118 cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len);
4120 qemu_vfree(bounce.buffer);
4121 bounce.buffer = NULL;
4122 cpu_notify_map_clients();
4125 /* warning: addr must be aligned */
4126 uint32_t ldl_phys(target_phys_addr_t addr)
4128 int io_index;
4129 uint8_t *ptr;
4130 uint32_t val;
4131 unsigned long pd;
4132 PhysPageDesc *p;
4134 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4135 if (!p) {
4136 pd = IO_MEM_UNASSIGNED;
4137 } else {
4138 pd = p->phys_offset;
4141 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4142 !(pd & IO_MEM_ROMD)) {
4143 /* I/O case */
4144 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4145 if (p)
4146 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4147 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
4148 } else {
4149 /* RAM case */
4150 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4151 (addr & ~TARGET_PAGE_MASK);
4152 val = ldl_p(ptr);
4154 return val;
4157 /* warning: addr must be aligned */
4158 uint64_t ldq_phys(target_phys_addr_t addr)
4160 int io_index;
4161 uint8_t *ptr;
4162 uint64_t val;
4163 unsigned long pd;
4164 PhysPageDesc *p;
4166 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4167 if (!p) {
4168 pd = IO_MEM_UNASSIGNED;
4169 } else {
4170 pd = p->phys_offset;
4173 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4174 !(pd & IO_MEM_ROMD)) {
4175 /* I/O case */
4176 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4177 if (p)
4178 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4179 #ifdef TARGET_WORDS_BIGENDIAN
4180 val = (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr) << 32;
4181 val |= io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4);
4182 #else
4183 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
4184 val |= (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4) << 32;
4185 #endif
4186 } else {
4187 /* RAM case */
4188 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4189 (addr & ~TARGET_PAGE_MASK);
4190 val = ldq_p(ptr);
4192 return val;
4195 /* XXX: optimize */
4196 uint32_t ldub_phys(target_phys_addr_t addr)
4198 uint8_t val;
4199 cpu_physical_memory_read(addr, &val, 1);
4200 return val;
4203 /* warning: addr must be aligned */
4204 uint32_t lduw_phys(target_phys_addr_t addr)
4206 int io_index;
4207 uint8_t *ptr;
4208 uint64_t val;
4209 unsigned long pd;
4210 PhysPageDesc *p;
4212 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4213 if (!p) {
4214 pd = IO_MEM_UNASSIGNED;
4215 } else {
4216 pd = p->phys_offset;
4219 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4220 !(pd & IO_MEM_ROMD)) {
4221 /* I/O case */
4222 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4223 if (p)
4224 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4225 val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr);
4226 } else {
4227 /* RAM case */
4228 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4229 (addr & ~TARGET_PAGE_MASK);
4230 val = lduw_p(ptr);
4232 return val;
4235 /* warning: addr must be aligned. The ram page is not masked as dirty
4236 and the code inside is not invalidated. It is useful if the dirty
4237 bits are used to track modified PTEs */
4238 void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val)
4240 int io_index;
4241 uint8_t *ptr;
4242 unsigned long pd;
4243 PhysPageDesc *p;
4245 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4246 if (!p) {
4247 pd = IO_MEM_UNASSIGNED;
4248 } else {
4249 pd = p->phys_offset;
4252 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4253 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4254 if (p)
4255 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4256 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4257 } else {
4258 unsigned long addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4259 ptr = qemu_get_ram_ptr(addr1);
4260 stl_p(ptr, val);
4262 if (unlikely(in_migration)) {
4263 if (!cpu_physical_memory_is_dirty(addr1)) {
4264 /* invalidate code */
4265 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4266 /* set dirty bit */
4267 cpu_physical_memory_set_dirty_flags(
4268 addr1, (0xff & ~CODE_DIRTY_FLAG));
4274 void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val)
4276 int io_index;
4277 uint8_t *ptr;
4278 unsigned long pd;
4279 PhysPageDesc *p;
4281 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4282 if (!p) {
4283 pd = IO_MEM_UNASSIGNED;
4284 } else {
4285 pd = p->phys_offset;
4288 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4289 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4290 if (p)
4291 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4292 #ifdef TARGET_WORDS_BIGENDIAN
4293 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val >> 32);
4294 io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val);
4295 #else
4296 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4297 io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val >> 32);
4298 #endif
4299 } else {
4300 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4301 (addr & ~TARGET_PAGE_MASK);
4302 stq_p(ptr, val);
4306 /* warning: addr must be aligned */
4307 void stl_phys(target_phys_addr_t addr, uint32_t val)
4309 int io_index;
4310 uint8_t *ptr;
4311 unsigned long pd;
4312 PhysPageDesc *p;
4314 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4315 if (!p) {
4316 pd = IO_MEM_UNASSIGNED;
4317 } else {
4318 pd = p->phys_offset;
4321 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4322 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4323 if (p)
4324 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4325 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4326 } else {
4327 unsigned long addr1;
4328 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4329 /* RAM case */
4330 ptr = qemu_get_ram_ptr(addr1);
4331 stl_p(ptr, val);
4332 if (!cpu_physical_memory_is_dirty(addr1)) {
4333 /* invalidate code */
4334 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4335 /* set dirty bit */
4336 cpu_physical_memory_set_dirty_flags(addr1,
4337 (0xff & ~CODE_DIRTY_FLAG));
4342 /* XXX: optimize */
4343 void stb_phys(target_phys_addr_t addr, uint32_t val)
4345 uint8_t v = val;
4346 cpu_physical_memory_write(addr, &v, 1);
4349 /* warning: addr must be aligned */
4350 void stw_phys(target_phys_addr_t addr, uint32_t val)
4352 int io_index;
4353 uint8_t *ptr;
4354 unsigned long pd;
4355 PhysPageDesc *p;
4357 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4358 if (!p) {
4359 pd = IO_MEM_UNASSIGNED;
4360 } else {
4361 pd = p->phys_offset;
4364 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4365 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4366 if (p)
4367 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4368 io_mem_write[io_index][1](io_mem_opaque[io_index], addr, val);
4369 } else {
4370 unsigned long addr1;
4371 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4372 /* RAM case */
4373 ptr = qemu_get_ram_ptr(addr1);
4374 stw_p(ptr, val);
4375 if (!cpu_physical_memory_is_dirty(addr1)) {
4376 /* invalidate code */
4377 tb_invalidate_phys_page_range(addr1, addr1 + 2, 0);
4378 /* set dirty bit */
4379 cpu_physical_memory_set_dirty_flags(addr1,
4380 (0xff & ~CODE_DIRTY_FLAG));
4385 /* XXX: optimize */
4386 void stq_phys(target_phys_addr_t addr, uint64_t val)
4388 val = tswap64(val);
4389 cpu_physical_memory_write(addr, &val, 8);
4392 /* virtual memory access for debug (includes writing to ROM) */
4393 int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
4394 uint8_t *buf, int len, int is_write)
4396 int l;
4397 target_phys_addr_t phys_addr;
4398 target_ulong page;
4400 while (len > 0) {
4401 page = addr & TARGET_PAGE_MASK;
4402 phys_addr = cpu_get_phys_page_debug(env, page);
4403 /* if no physical page mapped, return an error */
4404 if (phys_addr == -1)
4405 return -1;
4406 l = (page + TARGET_PAGE_SIZE) - addr;
4407 if (l > len)
4408 l = len;
4409 phys_addr += (addr & ~TARGET_PAGE_MASK);
4410 if (is_write)
4411 cpu_physical_memory_write_rom(phys_addr, buf, l);
4412 else
4413 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
4414 len -= l;
4415 buf += l;
4416 addr += l;
4418 return 0;
4420 #endif
4422 /* in deterministic execution mode, instructions doing device I/Os
4423 must be at the end of the TB */
4424 void cpu_io_recompile(CPUState *env, void *retaddr)
4426 TranslationBlock *tb;
4427 uint32_t n, cflags;
4428 target_ulong pc, cs_base;
4429 uint64_t flags;
4431 tb = tb_find_pc((unsigned long)retaddr);
4432 if (!tb) {
4433 cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
4434 retaddr);
4436 n = env->icount_decr.u16.low + tb->icount;
4437 cpu_restore_state(tb, env, (unsigned long)retaddr);
4438 /* Calculate how many instructions had been executed before the fault
4439 occurred. */
4440 n = n - env->icount_decr.u16.low;
4441 /* Generate a new TB ending on the I/O insn. */
4442 n++;
4443 /* On MIPS and SH, delay slot instructions can only be restarted if
4444 they were already the first instruction in the TB. If this is not
4445 the first instruction in a TB then re-execute the preceding
4446 branch. */
4447 #if defined(TARGET_MIPS)
4448 if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
4449 env->active_tc.PC -= 4;
4450 env->icount_decr.u16.low++;
4451 env->hflags &= ~MIPS_HFLAG_BMASK;
4453 #elif defined(TARGET_SH4)
4454 if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
4455 && n > 1) {
4456 env->pc -= 2;
4457 env->icount_decr.u16.low++;
4458 env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
4460 #endif
4461 /* This should never happen. */
4462 if (n > CF_COUNT_MASK)
4463 cpu_abort(env, "TB too big during recompile");
4465 cflags = n | CF_LAST_IO;
4466 pc = tb->pc;
4467 cs_base = tb->cs_base;
4468 flags = tb->flags;
4469 tb_phys_invalidate(tb, -1);
4470 /* FIXME: In theory this could raise an exception. In practice
4471 we have already translated the block once so it's probably ok. */
4472 tb_gen_code(env, pc, cs_base, flags, cflags);
4473 /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4474 the first in the TB) then we end up generating a whole new TB and
4475 repeating the fault, which is horribly inefficient.
4476 Better would be to execute just this insn uncached, or generate a
4477 second new TB. */
4478 cpu_resume_from_signal(env, NULL);
4481 #if !defined(CONFIG_USER_ONLY)
4483 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
4485 int i, target_code_size, max_target_code_size;
4486 int direct_jmp_count, direct_jmp2_count, cross_page;
4487 TranslationBlock *tb;
4489 target_code_size = 0;
4490 max_target_code_size = 0;
4491 cross_page = 0;
4492 direct_jmp_count = 0;
4493 direct_jmp2_count = 0;
4494 for(i = 0; i < nb_tbs; i++) {
4495 tb = &tbs[i];
4496 target_code_size += tb->size;
4497 if (tb->size > max_target_code_size)
4498 max_target_code_size = tb->size;
4499 if (tb->page_addr[1] != -1)
4500 cross_page++;
4501 if (tb->tb_next_offset[0] != 0xffff) {
4502 direct_jmp_count++;
4503 if (tb->tb_next_offset[1] != 0xffff) {
4504 direct_jmp2_count++;
4508 /* XXX: avoid using doubles ? */
4509 cpu_fprintf(f, "Translation buffer state:\n");
4510 cpu_fprintf(f, "gen code size %td/%ld\n",
4511 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4512 cpu_fprintf(f, "TB count %d/%d\n",
4513 nb_tbs, code_gen_max_blocks);
4514 cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
4515 nb_tbs ? target_code_size / nb_tbs : 0,
4516 max_target_code_size);
4517 cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n",
4518 nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4519 target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4520 cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4521 cross_page,
4522 nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4523 cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n",
4524 direct_jmp_count,
4525 nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4526 direct_jmp2_count,
4527 nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4528 cpu_fprintf(f, "\nStatistics:\n");
4529 cpu_fprintf(f, "TB flush count %d\n", tb_flush_count);
4530 cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4531 cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
4532 tcg_dump_info(f, cpu_fprintf);
4535 #define MMUSUFFIX _cmmu
4536 #define GETPC() NULL
4537 #define env cpu_single_env
4538 #define SOFTMMU_CODE_ACCESS
4540 #define SHIFT 0
4541 #include "softmmu_template.h"
4543 #define SHIFT 1
4544 #include "softmmu_template.h"
4546 #define SHIFT 2
4547 #include "softmmu_template.h"
4549 #define SHIFT 3
4550 #include "softmmu_template.h"
4552 #undef env
4554 #endif