qlist: add qlist_first()/qlist_next()
[qemu/mdroth.git] / exec.c
blob2160ded4017f9f3f820360cb5a388bb5facdc8c2
1 /*
2 * virtual page mapping and translated block handling
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "tcg.h"
30 #include "hw/hw.h"
31 #include "hw/qdev.h"
32 #include "osdep.h"
33 #include "kvm.h"
34 #include "hw/xen.h"
35 #include "qemu-timer.h"
36 #if defined(CONFIG_USER_ONLY)
37 #include <qemu.h>
38 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
39 #include <sys/param.h>
40 #if __FreeBSD_version >= 700104
41 #define HAVE_KINFO_GETVMMAP
42 #define sigqueue sigqueue_freebsd /* avoid redefinition */
43 #include <sys/time.h>
44 #include <sys/proc.h>
45 #include <machine/profile.h>
46 #define _KERNEL
47 #include <sys/user.h>
48 #undef _KERNEL
49 #undef sigqueue
50 #include <libutil.h>
51 #endif
52 #endif
53 #else /* !CONFIG_USER_ONLY */
54 #include "xen-mapcache.h"
55 #include "trace.h"
56 #endif
58 //#define DEBUG_TB_INVALIDATE
59 //#define DEBUG_FLUSH
60 //#define DEBUG_TLB
61 //#define DEBUG_UNASSIGNED
63 /* make various TB consistency checks */
64 //#define DEBUG_TB_CHECK
65 //#define DEBUG_TLB_CHECK
67 //#define DEBUG_IOPORT
68 //#define DEBUG_SUBPAGE
70 #if !defined(CONFIG_USER_ONLY)
71 /* TB consistency checks only implemented for usermode emulation. */
72 #undef DEBUG_TB_CHECK
73 #endif
75 #define SMC_BITMAP_USE_THRESHOLD 10
77 static TranslationBlock *tbs;
78 static int code_gen_max_blocks;
79 TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
80 static int nb_tbs;
81 /* any access to the tbs or the page table must use this lock */
82 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
84 #if defined(__arm__) || defined(__sparc_v9__)
85 /* The prologue must be reachable with a direct jump. ARM and Sparc64
86 have limited branch ranges (possibly also PPC) so place it in a
87 section close to code segment. */
88 #define code_gen_section \
89 __attribute__((__section__(".gen_code"))) \
90 __attribute__((aligned (32)))
91 #elif defined(_WIN32)
92 /* Maximum alignment for Win32 is 16. */
93 #define code_gen_section \
94 __attribute__((aligned (16)))
95 #else
96 #define code_gen_section \
97 __attribute__((aligned (32)))
98 #endif
100 uint8_t code_gen_prologue[1024] code_gen_section;
101 static uint8_t *code_gen_buffer;
102 static unsigned long code_gen_buffer_size;
103 /* threshold to flush the translated code buffer */
104 static unsigned long code_gen_buffer_max_size;
105 static uint8_t *code_gen_ptr;
107 #if !defined(CONFIG_USER_ONLY)
108 int phys_ram_fd;
109 static int in_migration;
111 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list) };
112 #endif
114 CPUState *first_cpu;
115 /* current CPU in the current thread. It is only valid inside
116 cpu_exec() */
117 CPUState *cpu_single_env;
118 /* 0 = Do not count executed instructions.
119 1 = Precise instruction counting.
120 2 = Adaptive rate instruction counting. */
121 int use_icount = 0;
122 /* Current instruction counter. While executing translated code this may
123 include some instructions that have not yet been executed. */
124 int64_t qemu_icount;
126 typedef struct PageDesc {
127 /* list of TBs intersecting this ram page */
128 TranslationBlock *first_tb;
129 /* in order to optimize self modifying code, we count the number
130 of lookups we do to a given page to use a bitmap */
131 unsigned int code_write_count;
132 uint8_t *code_bitmap;
133 #if defined(CONFIG_USER_ONLY)
134 unsigned long flags;
135 #endif
136 } PageDesc;
138 /* In system mode we want L1_MAP to be based on ram offsets,
139 while in user mode we want it to be based on virtual addresses. */
140 #if !defined(CONFIG_USER_ONLY)
141 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
142 # define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS
143 #else
144 # define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS
145 #endif
146 #else
147 # define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS
148 #endif
150 /* Size of the L2 (and L3, etc) page tables. */
151 #define L2_BITS 10
152 #define L2_SIZE (1 << L2_BITS)
154 /* The bits remaining after N lower levels of page tables. */
155 #define P_L1_BITS_REM \
156 ((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
157 #define V_L1_BITS_REM \
158 ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
160 /* Size of the L1 page table. Avoid silly small sizes. */
161 #if P_L1_BITS_REM < 4
162 #define P_L1_BITS (P_L1_BITS_REM + L2_BITS)
163 #else
164 #define P_L1_BITS P_L1_BITS_REM
165 #endif
167 #if V_L1_BITS_REM < 4
168 #define V_L1_BITS (V_L1_BITS_REM + L2_BITS)
169 #else
170 #define V_L1_BITS V_L1_BITS_REM
171 #endif
173 #define P_L1_SIZE ((target_phys_addr_t)1 << P_L1_BITS)
174 #define V_L1_SIZE ((target_ulong)1 << V_L1_BITS)
176 #define P_L1_SHIFT (TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - P_L1_BITS)
177 #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
179 unsigned long qemu_real_host_page_size;
180 unsigned long qemu_host_page_bits;
181 unsigned long qemu_host_page_size;
182 unsigned long qemu_host_page_mask;
184 /* This is a multi-level map on the virtual address space.
185 The bottom level has pointers to PageDesc. */
186 static void *l1_map[V_L1_SIZE];
188 #if !defined(CONFIG_USER_ONLY)
189 typedef struct PhysPageDesc {
190 /* offset in host memory of the page + io_index in the low bits */
191 ram_addr_t phys_offset;
192 ram_addr_t region_offset;
193 } PhysPageDesc;
195 /* This is a multi-level map on the physical address space.
196 The bottom level has pointers to PhysPageDesc. */
197 static void *l1_phys_map[P_L1_SIZE];
199 static void io_mem_init(void);
201 /* io memory support */
202 CPUWriteMemoryFunc *io_mem_write[IO_MEM_NB_ENTRIES][4];
203 CPUReadMemoryFunc *io_mem_read[IO_MEM_NB_ENTRIES][4];
204 void *io_mem_opaque[IO_MEM_NB_ENTRIES];
205 static char io_mem_used[IO_MEM_NB_ENTRIES];
206 static int io_mem_watch;
207 #endif
209 /* log support */
210 #ifdef WIN32
211 static const char *logfilename = "qemu.log";
212 #else
213 static const char *logfilename = "/tmp/qemu.log";
214 #endif
215 FILE *logfile;
216 int loglevel;
217 static int log_append = 0;
219 /* statistics */
220 #if !defined(CONFIG_USER_ONLY)
221 static int tlb_flush_count;
222 #endif
223 static int tb_flush_count;
224 static int tb_phys_invalidate_count;
226 #ifdef _WIN32
227 static void map_exec(void *addr, long size)
229 DWORD old_protect;
230 VirtualProtect(addr, size,
231 PAGE_EXECUTE_READWRITE, &old_protect);
234 #else
235 static void map_exec(void *addr, long size)
237 unsigned long start, end, page_size;
239 page_size = getpagesize();
240 start = (unsigned long)addr;
241 start &= ~(page_size - 1);
243 end = (unsigned long)addr + size;
244 end += page_size - 1;
245 end &= ~(page_size - 1);
247 mprotect((void *)start, end - start,
248 PROT_READ | PROT_WRITE | PROT_EXEC);
250 #endif
252 static void page_init(void)
254 /* NOTE: we can always suppose that qemu_host_page_size >=
255 TARGET_PAGE_SIZE */
256 #ifdef _WIN32
258 SYSTEM_INFO system_info;
260 GetSystemInfo(&system_info);
261 qemu_real_host_page_size = system_info.dwPageSize;
263 #else
264 qemu_real_host_page_size = getpagesize();
265 #endif
266 if (qemu_host_page_size == 0)
267 qemu_host_page_size = qemu_real_host_page_size;
268 if (qemu_host_page_size < TARGET_PAGE_SIZE)
269 qemu_host_page_size = TARGET_PAGE_SIZE;
270 qemu_host_page_bits = 0;
271 while ((1 << qemu_host_page_bits) < qemu_host_page_size)
272 qemu_host_page_bits++;
273 qemu_host_page_mask = ~(qemu_host_page_size - 1);
275 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
277 #ifdef HAVE_KINFO_GETVMMAP
278 struct kinfo_vmentry *freep;
279 int i, cnt;
281 freep = kinfo_getvmmap(getpid(), &cnt);
282 if (freep) {
283 mmap_lock();
284 for (i = 0; i < cnt; i++) {
285 unsigned long startaddr, endaddr;
287 startaddr = freep[i].kve_start;
288 endaddr = freep[i].kve_end;
289 if (h2g_valid(startaddr)) {
290 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
292 if (h2g_valid(endaddr)) {
293 endaddr = h2g(endaddr);
294 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
295 } else {
296 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
297 endaddr = ~0ul;
298 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
299 #endif
303 free(freep);
304 mmap_unlock();
306 #else
307 FILE *f;
309 last_brk = (unsigned long)sbrk(0);
311 f = fopen("/compat/linux/proc/self/maps", "r");
312 if (f) {
313 mmap_lock();
315 do {
316 unsigned long startaddr, endaddr;
317 int n;
319 n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
321 if (n == 2 && h2g_valid(startaddr)) {
322 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
324 if (h2g_valid(endaddr)) {
325 endaddr = h2g(endaddr);
326 } else {
327 endaddr = ~0ul;
329 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
331 } while (!feof(f));
333 fclose(f);
334 mmap_unlock();
336 #endif
338 #endif
341 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
343 PageDesc *pd;
344 void **lp;
345 int i;
347 #if defined(CONFIG_USER_ONLY)
348 /* We can't use qemu_malloc because it may recurse into a locked mutex. */
349 # define ALLOC(P, SIZE) \
350 do { \
351 P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \
352 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
353 } while (0)
354 #else
355 # define ALLOC(P, SIZE) \
356 do { P = qemu_mallocz(SIZE); } while (0)
357 #endif
359 /* Level 1. Always allocated. */
360 lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
362 /* Level 2..N-1. */
363 for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
364 void **p = *lp;
366 if (p == NULL) {
367 if (!alloc) {
368 return NULL;
370 ALLOC(p, sizeof(void *) * L2_SIZE);
371 *lp = p;
374 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
377 pd = *lp;
378 if (pd == NULL) {
379 if (!alloc) {
380 return NULL;
382 ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
383 *lp = pd;
386 #undef ALLOC
388 return pd + (index & (L2_SIZE - 1));
391 static inline PageDesc *page_find(tb_page_addr_t index)
393 return page_find_alloc(index, 0);
396 #if !defined(CONFIG_USER_ONLY)
397 static PhysPageDesc *phys_page_find_alloc(target_phys_addr_t index, int alloc)
399 PhysPageDesc *pd;
400 void **lp;
401 int i;
403 /* Level 1. Always allocated. */
404 lp = l1_phys_map + ((index >> P_L1_SHIFT) & (P_L1_SIZE - 1));
406 /* Level 2..N-1. */
407 for (i = P_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
408 void **p = *lp;
409 if (p == NULL) {
410 if (!alloc) {
411 return NULL;
413 *lp = p = qemu_mallocz(sizeof(void *) * L2_SIZE);
415 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
418 pd = *lp;
419 if (pd == NULL) {
420 int i;
422 if (!alloc) {
423 return NULL;
426 *lp = pd = qemu_malloc(sizeof(PhysPageDesc) * L2_SIZE);
428 for (i = 0; i < L2_SIZE; i++) {
429 pd[i].phys_offset = IO_MEM_UNASSIGNED;
430 pd[i].region_offset = (index + i) << TARGET_PAGE_BITS;
434 return pd + (index & (L2_SIZE - 1));
437 static inline PhysPageDesc *phys_page_find(target_phys_addr_t index)
439 return phys_page_find_alloc(index, 0);
442 static void tlb_protect_code(ram_addr_t ram_addr);
443 static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
444 target_ulong vaddr);
445 #define mmap_lock() do { } while(0)
446 #define mmap_unlock() do { } while(0)
447 #endif
449 #define DEFAULT_CODE_GEN_BUFFER_SIZE (32 * 1024 * 1024)
451 #if defined(CONFIG_USER_ONLY)
452 /* Currently it is not recommended to allocate big chunks of data in
453 user mode. It will change when a dedicated libc will be used */
454 #define USE_STATIC_CODE_GEN_BUFFER
455 #endif
457 #ifdef USE_STATIC_CODE_GEN_BUFFER
458 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
459 __attribute__((aligned (CODE_GEN_ALIGN)));
460 #endif
462 static void code_gen_alloc(unsigned long tb_size)
464 #ifdef USE_STATIC_CODE_GEN_BUFFER
465 code_gen_buffer = static_code_gen_buffer;
466 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
467 map_exec(code_gen_buffer, code_gen_buffer_size);
468 #else
469 code_gen_buffer_size = tb_size;
470 if (code_gen_buffer_size == 0) {
471 #if defined(CONFIG_USER_ONLY)
472 /* in user mode, phys_ram_size is not meaningful */
473 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
474 #else
475 /* XXX: needs adjustments */
476 code_gen_buffer_size = (unsigned long)(ram_size / 4);
477 #endif
479 if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE)
480 code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE;
481 /* The code gen buffer location may have constraints depending on
482 the host cpu and OS */
483 #if defined(__linux__)
485 int flags;
486 void *start = NULL;
488 flags = MAP_PRIVATE | MAP_ANONYMOUS;
489 #if defined(__x86_64__)
490 flags |= MAP_32BIT;
491 /* Cannot map more than that */
492 if (code_gen_buffer_size > (800 * 1024 * 1024))
493 code_gen_buffer_size = (800 * 1024 * 1024);
494 #elif defined(__sparc_v9__)
495 // Map the buffer below 2G, so we can use direct calls and branches
496 flags |= MAP_FIXED;
497 start = (void *) 0x60000000UL;
498 if (code_gen_buffer_size > (512 * 1024 * 1024))
499 code_gen_buffer_size = (512 * 1024 * 1024);
500 #elif defined(__arm__)
501 /* Map the buffer below 32M, so we can use direct calls and branches */
502 flags |= MAP_FIXED;
503 start = (void *) 0x01000000UL;
504 if (code_gen_buffer_size > 16 * 1024 * 1024)
505 code_gen_buffer_size = 16 * 1024 * 1024;
506 #elif defined(__s390x__)
507 /* Map the buffer so that we can use direct calls and branches. */
508 /* We have a +- 4GB range on the branches; leave some slop. */
509 if (code_gen_buffer_size > (3ul * 1024 * 1024 * 1024)) {
510 code_gen_buffer_size = 3ul * 1024 * 1024 * 1024;
512 start = (void *)0x90000000UL;
513 #endif
514 code_gen_buffer = mmap(start, code_gen_buffer_size,
515 PROT_WRITE | PROT_READ | PROT_EXEC,
516 flags, -1, 0);
517 if (code_gen_buffer == MAP_FAILED) {
518 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
519 exit(1);
522 #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
523 || defined(__DragonFly__) || defined(__OpenBSD__)
525 int flags;
526 void *addr = NULL;
527 flags = MAP_PRIVATE | MAP_ANONYMOUS;
528 #if defined(__x86_64__)
529 /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume
530 * 0x40000000 is free */
531 flags |= MAP_FIXED;
532 addr = (void *)0x40000000;
533 /* Cannot map more than that */
534 if (code_gen_buffer_size > (800 * 1024 * 1024))
535 code_gen_buffer_size = (800 * 1024 * 1024);
536 #elif defined(__sparc_v9__)
537 // Map the buffer below 2G, so we can use direct calls and branches
538 flags |= MAP_FIXED;
539 addr = (void *) 0x60000000UL;
540 if (code_gen_buffer_size > (512 * 1024 * 1024)) {
541 code_gen_buffer_size = (512 * 1024 * 1024);
543 #endif
544 code_gen_buffer = mmap(addr, code_gen_buffer_size,
545 PROT_WRITE | PROT_READ | PROT_EXEC,
546 flags, -1, 0);
547 if (code_gen_buffer == MAP_FAILED) {
548 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
549 exit(1);
552 #else
553 code_gen_buffer = qemu_malloc(code_gen_buffer_size);
554 map_exec(code_gen_buffer, code_gen_buffer_size);
555 #endif
556 #endif /* !USE_STATIC_CODE_GEN_BUFFER */
557 map_exec(code_gen_prologue, sizeof(code_gen_prologue));
558 code_gen_buffer_max_size = code_gen_buffer_size -
559 (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
560 code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
561 tbs = qemu_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
564 /* Must be called before using the QEMU cpus. 'tb_size' is the size
565 (in bytes) allocated to the translation buffer. Zero means default
566 size. */
567 void cpu_exec_init_all(unsigned long tb_size)
569 cpu_gen_init();
570 code_gen_alloc(tb_size);
571 code_gen_ptr = code_gen_buffer;
572 page_init();
573 #if !defined(CONFIG_USER_ONLY)
574 io_mem_init();
575 #endif
576 #if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
577 /* There's no guest base to take into account, so go ahead and
578 initialize the prologue now. */
579 tcg_prologue_init(&tcg_ctx);
580 #endif
583 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
585 static int cpu_common_post_load(void *opaque, int version_id)
587 CPUState *env = opaque;
589 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
590 version_id is increased. */
591 env->interrupt_request &= ~0x01;
592 tlb_flush(env, 1);
594 return 0;
597 static const VMStateDescription vmstate_cpu_common = {
598 .name = "cpu_common",
599 .version_id = 1,
600 .minimum_version_id = 1,
601 .minimum_version_id_old = 1,
602 .post_load = cpu_common_post_load,
603 .fields = (VMStateField []) {
604 VMSTATE_UINT32(halted, CPUState),
605 VMSTATE_UINT32(interrupt_request, CPUState),
606 VMSTATE_END_OF_LIST()
609 #endif
611 CPUState *qemu_get_cpu(int cpu)
613 CPUState *env = first_cpu;
615 while (env) {
616 if (env->cpu_index == cpu)
617 break;
618 env = env->next_cpu;
621 return env;
624 void cpu_exec_init(CPUState *env)
626 CPUState **penv;
627 int cpu_index;
629 #if defined(CONFIG_USER_ONLY)
630 cpu_list_lock();
631 #endif
632 env->next_cpu = NULL;
633 penv = &first_cpu;
634 cpu_index = 0;
635 while (*penv != NULL) {
636 penv = &(*penv)->next_cpu;
637 cpu_index++;
639 env->cpu_index = cpu_index;
640 env->numa_node = 0;
641 QTAILQ_INIT(&env->breakpoints);
642 QTAILQ_INIT(&env->watchpoints);
643 #ifndef CONFIG_USER_ONLY
644 env->thread_id = qemu_get_thread_id();
645 #endif
646 *penv = env;
647 #if defined(CONFIG_USER_ONLY)
648 cpu_list_unlock();
649 #endif
650 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
651 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
652 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
653 cpu_save, cpu_load, env);
654 #endif
657 /* Allocate a new translation block. Flush the translation buffer if
658 too many translation blocks or too much generated code. */
659 static TranslationBlock *tb_alloc(target_ulong pc)
661 TranslationBlock *tb;
663 if (nb_tbs >= code_gen_max_blocks ||
664 (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
665 return NULL;
666 tb = &tbs[nb_tbs++];
667 tb->pc = pc;
668 tb->cflags = 0;
669 return tb;
672 void tb_free(TranslationBlock *tb)
674 /* In practice this is mostly used for single use temporary TB
675 Ignore the hard cases and just back up if this TB happens to
676 be the last one generated. */
677 if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
678 code_gen_ptr = tb->tc_ptr;
679 nb_tbs--;
683 static inline void invalidate_page_bitmap(PageDesc *p)
685 if (p->code_bitmap) {
686 qemu_free(p->code_bitmap);
687 p->code_bitmap = NULL;
689 p->code_write_count = 0;
692 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
694 static void page_flush_tb_1 (int level, void **lp)
696 int i;
698 if (*lp == NULL) {
699 return;
701 if (level == 0) {
702 PageDesc *pd = *lp;
703 for (i = 0; i < L2_SIZE; ++i) {
704 pd[i].first_tb = NULL;
705 invalidate_page_bitmap(pd + i);
707 } else {
708 void **pp = *lp;
709 for (i = 0; i < L2_SIZE; ++i) {
710 page_flush_tb_1 (level - 1, pp + i);
715 static void page_flush_tb(void)
717 int i;
718 for (i = 0; i < V_L1_SIZE; i++) {
719 page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
723 /* flush all the translation blocks */
724 /* XXX: tb_flush is currently not thread safe */
725 void tb_flush(CPUState *env1)
727 CPUState *env;
728 #if defined(DEBUG_FLUSH)
729 printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
730 (unsigned long)(code_gen_ptr - code_gen_buffer),
731 nb_tbs, nb_tbs > 0 ?
732 ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
733 #endif
734 if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
735 cpu_abort(env1, "Internal error: code buffer overflow\n");
737 nb_tbs = 0;
739 for(env = first_cpu; env != NULL; env = env->next_cpu) {
740 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
743 memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
744 page_flush_tb();
746 code_gen_ptr = code_gen_buffer;
747 /* XXX: flush processor icache at this point if cache flush is
748 expensive */
749 tb_flush_count++;
752 #ifdef DEBUG_TB_CHECK
754 static void tb_invalidate_check(target_ulong address)
756 TranslationBlock *tb;
757 int i;
758 address &= TARGET_PAGE_MASK;
759 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
760 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
761 if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
762 address >= tb->pc + tb->size)) {
763 printf("ERROR invalidate: address=" TARGET_FMT_lx
764 " PC=%08lx size=%04x\n",
765 address, (long)tb->pc, tb->size);
771 /* verify that all the pages have correct rights for code */
772 static void tb_page_check(void)
774 TranslationBlock *tb;
775 int i, flags1, flags2;
777 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
778 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
779 flags1 = page_get_flags(tb->pc);
780 flags2 = page_get_flags(tb->pc + tb->size - 1);
781 if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
782 printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
783 (long)tb->pc, tb->size, flags1, flags2);
789 #endif
791 /* invalidate one TB */
792 static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
793 int next_offset)
795 TranslationBlock *tb1;
796 for(;;) {
797 tb1 = *ptb;
798 if (tb1 == tb) {
799 *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
800 break;
802 ptb = (TranslationBlock **)((char *)tb1 + next_offset);
806 static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
808 TranslationBlock *tb1;
809 unsigned int n1;
811 for(;;) {
812 tb1 = *ptb;
813 n1 = (long)tb1 & 3;
814 tb1 = (TranslationBlock *)((long)tb1 & ~3);
815 if (tb1 == tb) {
816 *ptb = tb1->page_next[n1];
817 break;
819 ptb = &tb1->page_next[n1];
823 static inline void tb_jmp_remove(TranslationBlock *tb, int n)
825 TranslationBlock *tb1, **ptb;
826 unsigned int n1;
828 ptb = &tb->jmp_next[n];
829 tb1 = *ptb;
830 if (tb1) {
831 /* find tb(n) in circular list */
832 for(;;) {
833 tb1 = *ptb;
834 n1 = (long)tb1 & 3;
835 tb1 = (TranslationBlock *)((long)tb1 & ~3);
836 if (n1 == n && tb1 == tb)
837 break;
838 if (n1 == 2) {
839 ptb = &tb1->jmp_first;
840 } else {
841 ptb = &tb1->jmp_next[n1];
844 /* now we can suppress tb(n) from the list */
845 *ptb = tb->jmp_next[n];
847 tb->jmp_next[n] = NULL;
851 /* reset the jump entry 'n' of a TB so that it is not chained to
852 another TB */
853 static inline void tb_reset_jump(TranslationBlock *tb, int n)
855 tb_set_jmp_target(tb, n, (unsigned long)(tb->tc_ptr + tb->tb_next_offset[n]));
858 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
860 CPUState *env;
861 PageDesc *p;
862 unsigned int h, n1;
863 tb_page_addr_t phys_pc;
864 TranslationBlock *tb1, *tb2;
866 /* remove the TB from the hash list */
867 phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
868 h = tb_phys_hash_func(phys_pc);
869 tb_remove(&tb_phys_hash[h], tb,
870 offsetof(TranslationBlock, phys_hash_next));
872 /* remove the TB from the page list */
873 if (tb->page_addr[0] != page_addr) {
874 p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
875 tb_page_remove(&p->first_tb, tb);
876 invalidate_page_bitmap(p);
878 if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
879 p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
880 tb_page_remove(&p->first_tb, tb);
881 invalidate_page_bitmap(p);
884 tb_invalidated_flag = 1;
886 /* remove the TB from the hash list */
887 h = tb_jmp_cache_hash_func(tb->pc);
888 for(env = first_cpu; env != NULL; env = env->next_cpu) {
889 if (env->tb_jmp_cache[h] == tb)
890 env->tb_jmp_cache[h] = NULL;
893 /* suppress this TB from the two jump lists */
894 tb_jmp_remove(tb, 0);
895 tb_jmp_remove(tb, 1);
897 /* suppress any remaining jumps to this TB */
898 tb1 = tb->jmp_first;
899 for(;;) {
900 n1 = (long)tb1 & 3;
901 if (n1 == 2)
902 break;
903 tb1 = (TranslationBlock *)((long)tb1 & ~3);
904 tb2 = tb1->jmp_next[n1];
905 tb_reset_jump(tb1, n1);
906 tb1->jmp_next[n1] = NULL;
907 tb1 = tb2;
909 tb->jmp_first = (TranslationBlock *)((long)tb | 2); /* fail safe */
911 tb_phys_invalidate_count++;
914 static inline void set_bits(uint8_t *tab, int start, int len)
916 int end, mask, end1;
918 end = start + len;
919 tab += start >> 3;
920 mask = 0xff << (start & 7);
921 if ((start & ~7) == (end & ~7)) {
922 if (start < end) {
923 mask &= ~(0xff << (end & 7));
924 *tab |= mask;
926 } else {
927 *tab++ |= mask;
928 start = (start + 8) & ~7;
929 end1 = end & ~7;
930 while (start < end1) {
931 *tab++ = 0xff;
932 start += 8;
934 if (start < end) {
935 mask = ~(0xff << (end & 7));
936 *tab |= mask;
941 static void build_page_bitmap(PageDesc *p)
943 int n, tb_start, tb_end;
944 TranslationBlock *tb;
946 p->code_bitmap = qemu_mallocz(TARGET_PAGE_SIZE / 8);
948 tb = p->first_tb;
949 while (tb != NULL) {
950 n = (long)tb & 3;
951 tb = (TranslationBlock *)((long)tb & ~3);
952 /* NOTE: this is subtle as a TB may span two physical pages */
953 if (n == 0) {
954 /* NOTE: tb_end may be after the end of the page, but
955 it is not a problem */
956 tb_start = tb->pc & ~TARGET_PAGE_MASK;
957 tb_end = tb_start + tb->size;
958 if (tb_end > TARGET_PAGE_SIZE)
959 tb_end = TARGET_PAGE_SIZE;
960 } else {
961 tb_start = 0;
962 tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
964 set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
965 tb = tb->page_next[n];
969 TranslationBlock *tb_gen_code(CPUState *env,
970 target_ulong pc, target_ulong cs_base,
971 int flags, int cflags)
973 TranslationBlock *tb;
974 uint8_t *tc_ptr;
975 tb_page_addr_t phys_pc, phys_page2;
976 target_ulong virt_page2;
977 int code_gen_size;
979 phys_pc = get_page_addr_code(env, pc);
980 tb = tb_alloc(pc);
981 if (!tb) {
982 /* flush must be done */
983 tb_flush(env);
984 /* cannot fail at this point */
985 tb = tb_alloc(pc);
986 /* Don't forget to invalidate previous TB info. */
987 tb_invalidated_flag = 1;
989 tc_ptr = code_gen_ptr;
990 tb->tc_ptr = tc_ptr;
991 tb->cs_base = cs_base;
992 tb->flags = flags;
993 tb->cflags = cflags;
994 cpu_gen_code(env, tb, &code_gen_size);
995 code_gen_ptr = (void *)(((unsigned long)code_gen_ptr + code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
997 /* check next page if needed */
998 virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
999 phys_page2 = -1;
1000 if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1001 phys_page2 = get_page_addr_code(env, virt_page2);
1003 tb_link_page(tb, phys_pc, phys_page2);
1004 return tb;
1007 /* invalidate all TBs which intersect with the target physical page
1008 starting in range [start;end[. NOTE: start and end must refer to
1009 the same physical page. 'is_cpu_write_access' should be true if called
1010 from a real cpu write access: the virtual CPU will exit the current
1011 TB if code is modified inside this TB. */
1012 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1013 int is_cpu_write_access)
1015 TranslationBlock *tb, *tb_next, *saved_tb;
1016 CPUState *env = cpu_single_env;
1017 tb_page_addr_t tb_start, tb_end;
1018 PageDesc *p;
1019 int n;
1020 #ifdef TARGET_HAS_PRECISE_SMC
1021 int current_tb_not_found = is_cpu_write_access;
1022 TranslationBlock *current_tb = NULL;
1023 int current_tb_modified = 0;
1024 target_ulong current_pc = 0;
1025 target_ulong current_cs_base = 0;
1026 int current_flags = 0;
1027 #endif /* TARGET_HAS_PRECISE_SMC */
1029 p = page_find(start >> TARGET_PAGE_BITS);
1030 if (!p)
1031 return;
1032 if (!p->code_bitmap &&
1033 ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1034 is_cpu_write_access) {
1035 /* build code bitmap */
1036 build_page_bitmap(p);
1039 /* we remove all the TBs in the range [start, end[ */
1040 /* XXX: see if in some cases it could be faster to invalidate all the code */
1041 tb = p->first_tb;
1042 while (tb != NULL) {
1043 n = (long)tb & 3;
1044 tb = (TranslationBlock *)((long)tb & ~3);
1045 tb_next = tb->page_next[n];
1046 /* NOTE: this is subtle as a TB may span two physical pages */
1047 if (n == 0) {
1048 /* NOTE: tb_end may be after the end of the page, but
1049 it is not a problem */
1050 tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1051 tb_end = tb_start + tb->size;
1052 } else {
1053 tb_start = tb->page_addr[1];
1054 tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1056 if (!(tb_end <= start || tb_start >= end)) {
1057 #ifdef TARGET_HAS_PRECISE_SMC
1058 if (current_tb_not_found) {
1059 current_tb_not_found = 0;
1060 current_tb = NULL;
1061 if (env->mem_io_pc) {
1062 /* now we have a real cpu fault */
1063 current_tb = tb_find_pc(env->mem_io_pc);
1066 if (current_tb == tb &&
1067 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1068 /* If we are modifying the current TB, we must stop
1069 its execution. We could be more precise by checking
1070 that the modification is after the current PC, but it
1071 would require a specialized function to partially
1072 restore the CPU state */
1074 current_tb_modified = 1;
1075 cpu_restore_state(current_tb, env, env->mem_io_pc);
1076 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1077 &current_flags);
1079 #endif /* TARGET_HAS_PRECISE_SMC */
1080 /* we need to do that to handle the case where a signal
1081 occurs while doing tb_phys_invalidate() */
1082 saved_tb = NULL;
1083 if (env) {
1084 saved_tb = env->current_tb;
1085 env->current_tb = NULL;
1087 tb_phys_invalidate(tb, -1);
1088 if (env) {
1089 env->current_tb = saved_tb;
1090 if (env->interrupt_request && env->current_tb)
1091 cpu_interrupt(env, env->interrupt_request);
1094 tb = tb_next;
1096 #if !defined(CONFIG_USER_ONLY)
1097 /* if no code remaining, no need to continue to use slow writes */
1098 if (!p->first_tb) {
1099 invalidate_page_bitmap(p);
1100 if (is_cpu_write_access) {
1101 tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1104 #endif
1105 #ifdef TARGET_HAS_PRECISE_SMC
1106 if (current_tb_modified) {
1107 /* we generate a block containing just the instruction
1108 modifying the memory. It will ensure that it cannot modify
1109 itself */
1110 env->current_tb = NULL;
1111 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1112 cpu_resume_from_signal(env, NULL);
1114 #endif
1117 /* len must be <= 8 and start must be a multiple of len */
1118 static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1120 PageDesc *p;
1121 int offset, b;
1122 #if 0
1123 if (1) {
1124 qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1125 cpu_single_env->mem_io_vaddr, len,
1126 cpu_single_env->eip,
1127 cpu_single_env->eip + (long)cpu_single_env->segs[R_CS].base);
1129 #endif
1130 p = page_find(start >> TARGET_PAGE_BITS);
1131 if (!p)
1132 return;
1133 if (p->code_bitmap) {
1134 offset = start & ~TARGET_PAGE_MASK;
1135 b = p->code_bitmap[offset >> 3] >> (offset & 7);
1136 if (b & ((1 << len) - 1))
1137 goto do_invalidate;
1138 } else {
1139 do_invalidate:
1140 tb_invalidate_phys_page_range(start, start + len, 1);
1144 #if !defined(CONFIG_SOFTMMU)
1145 static void tb_invalidate_phys_page(tb_page_addr_t addr,
1146 unsigned long pc, void *puc)
1148 TranslationBlock *tb;
1149 PageDesc *p;
1150 int n;
1151 #ifdef TARGET_HAS_PRECISE_SMC
1152 TranslationBlock *current_tb = NULL;
1153 CPUState *env = cpu_single_env;
1154 int current_tb_modified = 0;
1155 target_ulong current_pc = 0;
1156 target_ulong current_cs_base = 0;
1157 int current_flags = 0;
1158 #endif
1160 addr &= TARGET_PAGE_MASK;
1161 p = page_find(addr >> TARGET_PAGE_BITS);
1162 if (!p)
1163 return;
1164 tb = p->first_tb;
1165 #ifdef TARGET_HAS_PRECISE_SMC
1166 if (tb && pc != 0) {
1167 current_tb = tb_find_pc(pc);
1169 #endif
1170 while (tb != NULL) {
1171 n = (long)tb & 3;
1172 tb = (TranslationBlock *)((long)tb & ~3);
1173 #ifdef TARGET_HAS_PRECISE_SMC
1174 if (current_tb == tb &&
1175 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1176 /* If we are modifying the current TB, we must stop
1177 its execution. We could be more precise by checking
1178 that the modification is after the current PC, but it
1179 would require a specialized function to partially
1180 restore the CPU state */
1182 current_tb_modified = 1;
1183 cpu_restore_state(current_tb, env, pc);
1184 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1185 &current_flags);
1187 #endif /* TARGET_HAS_PRECISE_SMC */
1188 tb_phys_invalidate(tb, addr);
1189 tb = tb->page_next[n];
1191 p->first_tb = NULL;
1192 #ifdef TARGET_HAS_PRECISE_SMC
1193 if (current_tb_modified) {
1194 /* we generate a block containing just the instruction
1195 modifying the memory. It will ensure that it cannot modify
1196 itself */
1197 env->current_tb = NULL;
1198 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1199 cpu_resume_from_signal(env, puc);
1201 #endif
1203 #endif
1205 /* add the tb in the target page and protect it if necessary */
1206 static inline void tb_alloc_page(TranslationBlock *tb,
1207 unsigned int n, tb_page_addr_t page_addr)
1209 PageDesc *p;
1210 #ifndef CONFIG_USER_ONLY
1211 bool page_already_protected;
1212 #endif
1214 tb->page_addr[n] = page_addr;
1215 p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1216 tb->page_next[n] = p->first_tb;
1217 #ifndef CONFIG_USER_ONLY
1218 page_already_protected = p->first_tb != NULL;
1219 #endif
1220 p->first_tb = (TranslationBlock *)((long)tb | n);
1221 invalidate_page_bitmap(p);
1223 #if defined(TARGET_HAS_SMC) || 1
1225 #if defined(CONFIG_USER_ONLY)
1226 if (p->flags & PAGE_WRITE) {
1227 target_ulong addr;
1228 PageDesc *p2;
1229 int prot;
1231 /* force the host page as non writable (writes will have a
1232 page fault + mprotect overhead) */
1233 page_addr &= qemu_host_page_mask;
1234 prot = 0;
1235 for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1236 addr += TARGET_PAGE_SIZE) {
1238 p2 = page_find (addr >> TARGET_PAGE_BITS);
1239 if (!p2)
1240 continue;
1241 prot |= p2->flags;
1242 p2->flags &= ~PAGE_WRITE;
1244 mprotect(g2h(page_addr), qemu_host_page_size,
1245 (prot & PAGE_BITS) & ~PAGE_WRITE);
1246 #ifdef DEBUG_TB_INVALIDATE
1247 printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1248 page_addr);
1249 #endif
1251 #else
1252 /* if some code is already present, then the pages are already
1253 protected. So we handle the case where only the first TB is
1254 allocated in a physical page */
1255 if (!page_already_protected) {
1256 tlb_protect_code(page_addr);
1258 #endif
1260 #endif /* TARGET_HAS_SMC */
1263 /* add a new TB and link it to the physical page tables. phys_page2 is
1264 (-1) to indicate that only one page contains the TB. */
1265 void tb_link_page(TranslationBlock *tb,
1266 tb_page_addr_t phys_pc, tb_page_addr_t phys_page2)
1268 unsigned int h;
1269 TranslationBlock **ptb;
1271 /* Grab the mmap lock to stop another thread invalidating this TB
1272 before we are done. */
1273 mmap_lock();
1274 /* add in the physical hash table */
1275 h = tb_phys_hash_func(phys_pc);
1276 ptb = &tb_phys_hash[h];
1277 tb->phys_hash_next = *ptb;
1278 *ptb = tb;
1280 /* add in the page list */
1281 tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1282 if (phys_page2 != -1)
1283 tb_alloc_page(tb, 1, phys_page2);
1284 else
1285 tb->page_addr[1] = -1;
1287 tb->jmp_first = (TranslationBlock *)((long)tb | 2);
1288 tb->jmp_next[0] = NULL;
1289 tb->jmp_next[1] = NULL;
1291 /* init original jump addresses */
1292 if (tb->tb_next_offset[0] != 0xffff)
1293 tb_reset_jump(tb, 0);
1294 if (tb->tb_next_offset[1] != 0xffff)
1295 tb_reset_jump(tb, 1);
1297 #ifdef DEBUG_TB_CHECK
1298 tb_page_check();
1299 #endif
1300 mmap_unlock();
1303 /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1304 tb[1].tc_ptr. Return NULL if not found */
1305 TranslationBlock *tb_find_pc(unsigned long tc_ptr)
1307 int m_min, m_max, m;
1308 unsigned long v;
1309 TranslationBlock *tb;
1311 if (nb_tbs <= 0)
1312 return NULL;
1313 if (tc_ptr < (unsigned long)code_gen_buffer ||
1314 tc_ptr >= (unsigned long)code_gen_ptr)
1315 return NULL;
1316 /* binary search (cf Knuth) */
1317 m_min = 0;
1318 m_max = nb_tbs - 1;
1319 while (m_min <= m_max) {
1320 m = (m_min + m_max) >> 1;
1321 tb = &tbs[m];
1322 v = (unsigned long)tb->tc_ptr;
1323 if (v == tc_ptr)
1324 return tb;
1325 else if (tc_ptr < v) {
1326 m_max = m - 1;
1327 } else {
1328 m_min = m + 1;
1331 return &tbs[m_max];
1334 static void tb_reset_jump_recursive(TranslationBlock *tb);
1336 static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1338 TranslationBlock *tb1, *tb_next, **ptb;
1339 unsigned int n1;
1341 tb1 = tb->jmp_next[n];
1342 if (tb1 != NULL) {
1343 /* find head of list */
1344 for(;;) {
1345 n1 = (long)tb1 & 3;
1346 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1347 if (n1 == 2)
1348 break;
1349 tb1 = tb1->jmp_next[n1];
1351 /* we are now sure now that tb jumps to tb1 */
1352 tb_next = tb1;
1354 /* remove tb from the jmp_first list */
1355 ptb = &tb_next->jmp_first;
1356 for(;;) {
1357 tb1 = *ptb;
1358 n1 = (long)tb1 & 3;
1359 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1360 if (n1 == n && tb1 == tb)
1361 break;
1362 ptb = &tb1->jmp_next[n1];
1364 *ptb = tb->jmp_next[n];
1365 tb->jmp_next[n] = NULL;
1367 /* suppress the jump to next tb in generated code */
1368 tb_reset_jump(tb, n);
1370 /* suppress jumps in the tb on which we could have jumped */
1371 tb_reset_jump_recursive(tb_next);
1375 static void tb_reset_jump_recursive(TranslationBlock *tb)
1377 tb_reset_jump_recursive2(tb, 0);
1378 tb_reset_jump_recursive2(tb, 1);
1381 #if defined(TARGET_HAS_ICE)
1382 #if defined(CONFIG_USER_ONLY)
1383 static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1385 tb_invalidate_phys_page_range(pc, pc + 1, 0);
1387 #else
1388 static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1390 target_phys_addr_t addr;
1391 target_ulong pd;
1392 ram_addr_t ram_addr;
1393 PhysPageDesc *p;
1395 addr = cpu_get_phys_page_debug(env, pc);
1396 p = phys_page_find(addr >> TARGET_PAGE_BITS);
1397 if (!p) {
1398 pd = IO_MEM_UNASSIGNED;
1399 } else {
1400 pd = p->phys_offset;
1402 ram_addr = (pd & TARGET_PAGE_MASK) | (pc & ~TARGET_PAGE_MASK);
1403 tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1405 #endif
1406 #endif /* TARGET_HAS_ICE */
1408 #if defined(CONFIG_USER_ONLY)
1409 void cpu_watchpoint_remove_all(CPUState *env, int mask)
1414 int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1415 int flags, CPUWatchpoint **watchpoint)
1417 return -ENOSYS;
1419 #else
1420 /* Add a watchpoint. */
1421 int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1422 int flags, CPUWatchpoint **watchpoint)
1424 target_ulong len_mask = ~(len - 1);
1425 CPUWatchpoint *wp;
1427 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1428 if ((len != 1 && len != 2 && len != 4 && len != 8) || (addr & ~len_mask)) {
1429 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1430 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1431 return -EINVAL;
1433 wp = qemu_malloc(sizeof(*wp));
1435 wp->vaddr = addr;
1436 wp->len_mask = len_mask;
1437 wp->flags = flags;
1439 /* keep all GDB-injected watchpoints in front */
1440 if (flags & BP_GDB)
1441 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1442 else
1443 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1445 tlb_flush_page(env, addr);
1447 if (watchpoint)
1448 *watchpoint = wp;
1449 return 0;
1452 /* Remove a specific watchpoint. */
1453 int cpu_watchpoint_remove(CPUState *env, target_ulong addr, target_ulong len,
1454 int flags)
1456 target_ulong len_mask = ~(len - 1);
1457 CPUWatchpoint *wp;
1459 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1460 if (addr == wp->vaddr && len_mask == wp->len_mask
1461 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1462 cpu_watchpoint_remove_by_ref(env, wp);
1463 return 0;
1466 return -ENOENT;
1469 /* Remove a specific watchpoint by reference. */
1470 void cpu_watchpoint_remove_by_ref(CPUState *env, CPUWatchpoint *watchpoint)
1472 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1474 tlb_flush_page(env, watchpoint->vaddr);
1476 qemu_free(watchpoint);
1479 /* Remove all matching watchpoints. */
1480 void cpu_watchpoint_remove_all(CPUState *env, int mask)
1482 CPUWatchpoint *wp, *next;
1484 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1485 if (wp->flags & mask)
1486 cpu_watchpoint_remove_by_ref(env, wp);
1489 #endif
1491 /* Add a breakpoint. */
1492 int cpu_breakpoint_insert(CPUState *env, target_ulong pc, int flags,
1493 CPUBreakpoint **breakpoint)
1495 #if defined(TARGET_HAS_ICE)
1496 CPUBreakpoint *bp;
1498 bp = qemu_malloc(sizeof(*bp));
1500 bp->pc = pc;
1501 bp->flags = flags;
1503 /* keep all GDB-injected breakpoints in front */
1504 if (flags & BP_GDB)
1505 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1506 else
1507 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1509 breakpoint_invalidate(env, pc);
1511 if (breakpoint)
1512 *breakpoint = bp;
1513 return 0;
1514 #else
1515 return -ENOSYS;
1516 #endif
1519 /* Remove a specific breakpoint. */
1520 int cpu_breakpoint_remove(CPUState *env, target_ulong pc, int flags)
1522 #if defined(TARGET_HAS_ICE)
1523 CPUBreakpoint *bp;
1525 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1526 if (bp->pc == pc && bp->flags == flags) {
1527 cpu_breakpoint_remove_by_ref(env, bp);
1528 return 0;
1531 return -ENOENT;
1532 #else
1533 return -ENOSYS;
1534 #endif
1537 /* Remove a specific breakpoint by reference. */
1538 void cpu_breakpoint_remove_by_ref(CPUState *env, CPUBreakpoint *breakpoint)
1540 #if defined(TARGET_HAS_ICE)
1541 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1543 breakpoint_invalidate(env, breakpoint->pc);
1545 qemu_free(breakpoint);
1546 #endif
1549 /* Remove all matching breakpoints. */
1550 void cpu_breakpoint_remove_all(CPUState *env, int mask)
1552 #if defined(TARGET_HAS_ICE)
1553 CPUBreakpoint *bp, *next;
1555 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1556 if (bp->flags & mask)
1557 cpu_breakpoint_remove_by_ref(env, bp);
1559 #endif
1562 /* enable or disable single step mode. EXCP_DEBUG is returned by the
1563 CPU loop after each instruction */
1564 void cpu_single_step(CPUState *env, int enabled)
1566 #if defined(TARGET_HAS_ICE)
1567 if (env->singlestep_enabled != enabled) {
1568 env->singlestep_enabled = enabled;
1569 if (kvm_enabled())
1570 kvm_update_guest_debug(env, 0);
1571 else {
1572 /* must flush all the translated code to avoid inconsistencies */
1573 /* XXX: only flush what is necessary */
1574 tb_flush(env);
1577 #endif
1580 /* enable or disable low levels log */
1581 void cpu_set_log(int log_flags)
1583 loglevel = log_flags;
1584 if (loglevel && !logfile) {
1585 logfile = fopen(logfilename, log_append ? "a" : "w");
1586 if (!logfile) {
1587 perror(logfilename);
1588 _exit(1);
1590 #if !defined(CONFIG_SOFTMMU)
1591 /* must avoid mmap() usage of glibc by setting a buffer "by hand" */
1593 static char logfile_buf[4096];
1594 setvbuf(logfile, logfile_buf, _IOLBF, sizeof(logfile_buf));
1596 #elif !defined(_WIN32)
1597 /* Win32 doesn't support line-buffering and requires size >= 2 */
1598 setvbuf(logfile, NULL, _IOLBF, 0);
1599 #endif
1600 log_append = 1;
1602 if (!loglevel && logfile) {
1603 fclose(logfile);
1604 logfile = NULL;
1608 void cpu_set_log_filename(const char *filename)
1610 logfilename = strdup(filename);
1611 if (logfile) {
1612 fclose(logfile);
1613 logfile = NULL;
1615 cpu_set_log(loglevel);
1618 static void cpu_unlink_tb(CPUState *env)
1620 /* FIXME: TB unchaining isn't SMP safe. For now just ignore the
1621 problem and hope the cpu will stop of its own accord. For userspace
1622 emulation this often isn't actually as bad as it sounds. Often
1623 signals are used primarily to interrupt blocking syscalls. */
1624 TranslationBlock *tb;
1625 static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1627 spin_lock(&interrupt_lock);
1628 tb = env->current_tb;
1629 /* if the cpu is currently executing code, we must unlink it and
1630 all the potentially executing TB */
1631 if (tb) {
1632 env->current_tb = NULL;
1633 tb_reset_jump_recursive(tb);
1635 spin_unlock(&interrupt_lock);
1638 #ifndef CONFIG_USER_ONLY
1639 /* mask must never be zero, except for A20 change call */
1640 static void tcg_handle_interrupt(CPUState *env, int mask)
1642 int old_mask;
1644 old_mask = env->interrupt_request;
1645 env->interrupt_request |= mask;
1648 * If called from iothread context, wake the target cpu in
1649 * case its halted.
1651 if (!qemu_cpu_is_self(env)) {
1652 qemu_cpu_kick(env);
1653 return;
1656 if (use_icount) {
1657 env->icount_decr.u16.high = 0xffff;
1658 if (!can_do_io(env)
1659 && (mask & ~old_mask) != 0) {
1660 cpu_abort(env, "Raised interrupt while not in I/O function");
1662 } else {
1663 cpu_unlink_tb(env);
1667 CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
1669 #else /* CONFIG_USER_ONLY */
1671 void cpu_interrupt(CPUState *env, int mask)
1673 env->interrupt_request |= mask;
1674 cpu_unlink_tb(env);
1676 #endif /* CONFIG_USER_ONLY */
1678 void cpu_reset_interrupt(CPUState *env, int mask)
1680 env->interrupt_request &= ~mask;
1683 void cpu_exit(CPUState *env)
1685 env->exit_request = 1;
1686 cpu_unlink_tb(env);
1689 const CPULogItem cpu_log_items[] = {
1690 { CPU_LOG_TB_OUT_ASM, "out_asm",
1691 "show generated host assembly code for each compiled TB" },
1692 { CPU_LOG_TB_IN_ASM, "in_asm",
1693 "show target assembly code for each compiled TB" },
1694 { CPU_LOG_TB_OP, "op",
1695 "show micro ops for each compiled TB" },
1696 { CPU_LOG_TB_OP_OPT, "op_opt",
1697 "show micro ops "
1698 #ifdef TARGET_I386
1699 "before eflags optimization and "
1700 #endif
1701 "after liveness analysis" },
1702 { CPU_LOG_INT, "int",
1703 "show interrupts/exceptions in short format" },
1704 { CPU_LOG_EXEC, "exec",
1705 "show trace before each executed TB (lots of logs)" },
1706 { CPU_LOG_TB_CPU, "cpu",
1707 "show CPU state before block translation" },
1708 #ifdef TARGET_I386
1709 { CPU_LOG_PCALL, "pcall",
1710 "show protected mode far calls/returns/exceptions" },
1711 { CPU_LOG_RESET, "cpu_reset",
1712 "show CPU state before CPU resets" },
1713 #endif
1714 #ifdef DEBUG_IOPORT
1715 { CPU_LOG_IOPORT, "ioport",
1716 "show all i/o ports accesses" },
1717 #endif
1718 { 0, NULL, NULL },
1721 #ifndef CONFIG_USER_ONLY
1722 static QLIST_HEAD(memory_client_list, CPUPhysMemoryClient) memory_client_list
1723 = QLIST_HEAD_INITIALIZER(memory_client_list);
1725 static void cpu_notify_set_memory(target_phys_addr_t start_addr,
1726 ram_addr_t size,
1727 ram_addr_t phys_offset,
1728 bool log_dirty)
1730 CPUPhysMemoryClient *client;
1731 QLIST_FOREACH(client, &memory_client_list, list) {
1732 client->set_memory(client, start_addr, size, phys_offset, log_dirty);
1736 static int cpu_notify_sync_dirty_bitmap(target_phys_addr_t start,
1737 target_phys_addr_t end)
1739 CPUPhysMemoryClient *client;
1740 QLIST_FOREACH(client, &memory_client_list, list) {
1741 int r = client->sync_dirty_bitmap(client, start, end);
1742 if (r < 0)
1743 return r;
1745 return 0;
1748 static int cpu_notify_migration_log(int enable)
1750 CPUPhysMemoryClient *client;
1751 QLIST_FOREACH(client, &memory_client_list, list) {
1752 int r = client->migration_log(client, enable);
1753 if (r < 0)
1754 return r;
1756 return 0;
1759 struct last_map {
1760 target_phys_addr_t start_addr;
1761 ram_addr_t size;
1762 ram_addr_t phys_offset;
1765 /* The l1_phys_map provides the upper P_L1_BITs of the guest physical
1766 * address. Each intermediate table provides the next L2_BITs of guest
1767 * physical address space. The number of levels vary based on host and
1768 * guest configuration, making it efficient to build the final guest
1769 * physical address by seeding the L1 offset and shifting and adding in
1770 * each L2 offset as we recurse through them. */
1771 static void phys_page_for_each_1(CPUPhysMemoryClient *client, int level,
1772 void **lp, target_phys_addr_t addr,
1773 struct last_map *map)
1775 int i;
1777 if (*lp == NULL) {
1778 return;
1780 if (level == 0) {
1781 PhysPageDesc *pd = *lp;
1782 addr <<= L2_BITS + TARGET_PAGE_BITS;
1783 for (i = 0; i < L2_SIZE; ++i) {
1784 if (pd[i].phys_offset != IO_MEM_UNASSIGNED) {
1785 target_phys_addr_t start_addr = addr | i << TARGET_PAGE_BITS;
1787 if (map->size &&
1788 start_addr == map->start_addr + map->size &&
1789 pd[i].phys_offset == map->phys_offset + map->size) {
1791 map->size += TARGET_PAGE_SIZE;
1792 continue;
1793 } else if (map->size) {
1794 client->set_memory(client, map->start_addr,
1795 map->size, map->phys_offset, false);
1798 map->start_addr = start_addr;
1799 map->size = TARGET_PAGE_SIZE;
1800 map->phys_offset = pd[i].phys_offset;
1803 } else {
1804 void **pp = *lp;
1805 for (i = 0; i < L2_SIZE; ++i) {
1806 phys_page_for_each_1(client, level - 1, pp + i,
1807 (addr << L2_BITS) | i, map);
1812 static void phys_page_for_each(CPUPhysMemoryClient *client)
1814 int i;
1815 struct last_map map = { };
1817 for (i = 0; i < P_L1_SIZE; ++i) {
1818 phys_page_for_each_1(client, P_L1_SHIFT / L2_BITS - 1,
1819 l1_phys_map + i, i, &map);
1821 if (map.size) {
1822 client->set_memory(client, map.start_addr, map.size, map.phys_offset,
1823 false);
1827 void cpu_register_phys_memory_client(CPUPhysMemoryClient *client)
1829 QLIST_INSERT_HEAD(&memory_client_list, client, list);
1830 phys_page_for_each(client);
1833 void cpu_unregister_phys_memory_client(CPUPhysMemoryClient *client)
1835 QLIST_REMOVE(client, list);
1837 #endif
1839 static int cmp1(const char *s1, int n, const char *s2)
1841 if (strlen(s2) != n)
1842 return 0;
1843 return memcmp(s1, s2, n) == 0;
1846 /* takes a comma separated list of log masks. Return 0 if error. */
1847 int cpu_str_to_log_mask(const char *str)
1849 const CPULogItem *item;
1850 int mask;
1851 const char *p, *p1;
1853 p = str;
1854 mask = 0;
1855 for(;;) {
1856 p1 = strchr(p, ',');
1857 if (!p1)
1858 p1 = p + strlen(p);
1859 if(cmp1(p,p1-p,"all")) {
1860 for(item = cpu_log_items; item->mask != 0; item++) {
1861 mask |= item->mask;
1863 } else {
1864 for(item = cpu_log_items; item->mask != 0; item++) {
1865 if (cmp1(p, p1 - p, item->name))
1866 goto found;
1868 return 0;
1870 found:
1871 mask |= item->mask;
1872 if (*p1 != ',')
1873 break;
1874 p = p1 + 1;
1876 return mask;
1879 void cpu_abort(CPUState *env, const char *fmt, ...)
1881 va_list ap;
1882 va_list ap2;
1884 va_start(ap, fmt);
1885 va_copy(ap2, ap);
1886 fprintf(stderr, "qemu: fatal: ");
1887 vfprintf(stderr, fmt, ap);
1888 fprintf(stderr, "\n");
1889 #ifdef TARGET_I386
1890 cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU | X86_DUMP_CCOP);
1891 #else
1892 cpu_dump_state(env, stderr, fprintf, 0);
1893 #endif
1894 if (qemu_log_enabled()) {
1895 qemu_log("qemu: fatal: ");
1896 qemu_log_vprintf(fmt, ap2);
1897 qemu_log("\n");
1898 #ifdef TARGET_I386
1899 log_cpu_state(env, X86_DUMP_FPU | X86_DUMP_CCOP);
1900 #else
1901 log_cpu_state(env, 0);
1902 #endif
1903 qemu_log_flush();
1904 qemu_log_close();
1906 va_end(ap2);
1907 va_end(ap);
1908 #if defined(CONFIG_USER_ONLY)
1910 struct sigaction act;
1911 sigfillset(&act.sa_mask);
1912 act.sa_handler = SIG_DFL;
1913 sigaction(SIGABRT, &act, NULL);
1915 #endif
1916 abort();
1919 CPUState *cpu_copy(CPUState *env)
1921 CPUState *new_env = cpu_init(env->cpu_model_str);
1922 CPUState *next_cpu = new_env->next_cpu;
1923 int cpu_index = new_env->cpu_index;
1924 #if defined(TARGET_HAS_ICE)
1925 CPUBreakpoint *bp;
1926 CPUWatchpoint *wp;
1927 #endif
1929 memcpy(new_env, env, sizeof(CPUState));
1931 /* Preserve chaining and index. */
1932 new_env->next_cpu = next_cpu;
1933 new_env->cpu_index = cpu_index;
1935 /* Clone all break/watchpoints.
1936 Note: Once we support ptrace with hw-debug register access, make sure
1937 BP_CPU break/watchpoints are handled correctly on clone. */
1938 QTAILQ_INIT(&env->breakpoints);
1939 QTAILQ_INIT(&env->watchpoints);
1940 #if defined(TARGET_HAS_ICE)
1941 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1942 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1944 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1945 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1946 wp->flags, NULL);
1948 #endif
1950 return new_env;
1953 #if !defined(CONFIG_USER_ONLY)
1955 static inline void tlb_flush_jmp_cache(CPUState *env, target_ulong addr)
1957 unsigned int i;
1959 /* Discard jump cache entries for any tb which might potentially
1960 overlap the flushed page. */
1961 i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1962 memset (&env->tb_jmp_cache[i], 0,
1963 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1965 i = tb_jmp_cache_hash_page(addr);
1966 memset (&env->tb_jmp_cache[i], 0,
1967 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1970 static CPUTLBEntry s_cputlb_empty_entry = {
1971 .addr_read = -1,
1972 .addr_write = -1,
1973 .addr_code = -1,
1974 .addend = -1,
1977 /* NOTE: if flush_global is true, also flush global entries (not
1978 implemented yet) */
1979 void tlb_flush(CPUState *env, int flush_global)
1981 int i;
1983 #if defined(DEBUG_TLB)
1984 printf("tlb_flush:\n");
1985 #endif
1986 /* must reset current TB so that interrupts cannot modify the
1987 links while we are modifying them */
1988 env->current_tb = NULL;
1990 for(i = 0; i < CPU_TLB_SIZE; i++) {
1991 int mmu_idx;
1992 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
1993 env->tlb_table[mmu_idx][i] = s_cputlb_empty_entry;
1997 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
1999 env->tlb_flush_addr = -1;
2000 env->tlb_flush_mask = 0;
2001 tlb_flush_count++;
2004 static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
2006 if (addr == (tlb_entry->addr_read &
2007 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
2008 addr == (tlb_entry->addr_write &
2009 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
2010 addr == (tlb_entry->addr_code &
2011 (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
2012 *tlb_entry = s_cputlb_empty_entry;
2016 void tlb_flush_page(CPUState *env, target_ulong addr)
2018 int i;
2019 int mmu_idx;
2021 #if defined(DEBUG_TLB)
2022 printf("tlb_flush_page: " TARGET_FMT_lx "\n", addr);
2023 #endif
2024 /* Check if we need to flush due to large pages. */
2025 if ((addr & env->tlb_flush_mask) == env->tlb_flush_addr) {
2026 #if defined(DEBUG_TLB)
2027 printf("tlb_flush_page: forced full flush ("
2028 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
2029 env->tlb_flush_addr, env->tlb_flush_mask);
2030 #endif
2031 tlb_flush(env, 1);
2032 return;
2034 /* must reset current TB so that interrupts cannot modify the
2035 links while we are modifying them */
2036 env->current_tb = NULL;
2038 addr &= TARGET_PAGE_MASK;
2039 i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2040 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
2041 tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr);
2043 tlb_flush_jmp_cache(env, addr);
2046 /* update the TLBs so that writes to code in the virtual page 'addr'
2047 can be detected */
2048 static void tlb_protect_code(ram_addr_t ram_addr)
2050 cpu_physical_memory_reset_dirty(ram_addr,
2051 ram_addr + TARGET_PAGE_SIZE,
2052 CODE_DIRTY_FLAG);
2055 /* update the TLB so that writes in physical page 'phys_addr' are no longer
2056 tested for self modifying code */
2057 static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
2058 target_ulong vaddr)
2060 cpu_physical_memory_set_dirty_flags(ram_addr, CODE_DIRTY_FLAG);
2063 static inline void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry,
2064 unsigned long start, unsigned long length)
2066 unsigned long addr;
2067 if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == IO_MEM_RAM) {
2068 addr = (tlb_entry->addr_write & TARGET_PAGE_MASK) + tlb_entry->addend;
2069 if ((addr - start) < length) {
2070 tlb_entry->addr_write = (tlb_entry->addr_write & TARGET_PAGE_MASK) | TLB_NOTDIRTY;
2075 /* Note: start and end must be within the same ram block. */
2076 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
2077 int dirty_flags)
2079 CPUState *env;
2080 unsigned long length, start1;
2081 int i;
2083 start &= TARGET_PAGE_MASK;
2084 end = TARGET_PAGE_ALIGN(end);
2086 length = end - start;
2087 if (length == 0)
2088 return;
2089 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
2091 /* we modify the TLB cache so that the dirty bit will be set again
2092 when accessing the range */
2093 start1 = (unsigned long)qemu_safe_ram_ptr(start);
2094 /* Check that we don't span multiple blocks - this breaks the
2095 address comparisons below. */
2096 if ((unsigned long)qemu_safe_ram_ptr(end - 1) - start1
2097 != (end - 1) - start) {
2098 abort();
2101 for(env = first_cpu; env != NULL; env = env->next_cpu) {
2102 int mmu_idx;
2103 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2104 for(i = 0; i < CPU_TLB_SIZE; i++)
2105 tlb_reset_dirty_range(&env->tlb_table[mmu_idx][i],
2106 start1, length);
2111 int cpu_physical_memory_set_dirty_tracking(int enable)
2113 int ret = 0;
2114 in_migration = enable;
2115 ret = cpu_notify_migration_log(!!enable);
2116 return ret;
2119 int cpu_physical_memory_get_dirty_tracking(void)
2121 return in_migration;
2124 int cpu_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
2125 target_phys_addr_t end_addr)
2127 int ret;
2129 ret = cpu_notify_sync_dirty_bitmap(start_addr, end_addr);
2130 return ret;
2133 int cpu_physical_log_start(target_phys_addr_t start_addr,
2134 ram_addr_t size)
2136 CPUPhysMemoryClient *client;
2137 QLIST_FOREACH(client, &memory_client_list, list) {
2138 if (client->log_start) {
2139 int r = client->log_start(client, start_addr, size);
2140 if (r < 0) {
2141 return r;
2145 return 0;
2148 int cpu_physical_log_stop(target_phys_addr_t start_addr,
2149 ram_addr_t size)
2151 CPUPhysMemoryClient *client;
2152 QLIST_FOREACH(client, &memory_client_list, list) {
2153 if (client->log_stop) {
2154 int r = client->log_stop(client, start_addr, size);
2155 if (r < 0) {
2156 return r;
2160 return 0;
2163 static inline void tlb_update_dirty(CPUTLBEntry *tlb_entry)
2165 ram_addr_t ram_addr;
2166 void *p;
2168 if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == IO_MEM_RAM) {
2169 p = (void *)(unsigned long)((tlb_entry->addr_write & TARGET_PAGE_MASK)
2170 + tlb_entry->addend);
2171 ram_addr = qemu_ram_addr_from_host_nofail(p);
2172 if (!cpu_physical_memory_is_dirty(ram_addr)) {
2173 tlb_entry->addr_write |= TLB_NOTDIRTY;
2178 /* update the TLB according to the current state of the dirty bits */
2179 void cpu_tlb_update_dirty(CPUState *env)
2181 int i;
2182 int mmu_idx;
2183 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2184 for(i = 0; i < CPU_TLB_SIZE; i++)
2185 tlb_update_dirty(&env->tlb_table[mmu_idx][i]);
2189 static inline void tlb_set_dirty1(CPUTLBEntry *tlb_entry, target_ulong vaddr)
2191 if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY))
2192 tlb_entry->addr_write = vaddr;
2195 /* update the TLB corresponding to virtual page vaddr
2196 so that it is no longer dirty */
2197 static inline void tlb_set_dirty(CPUState *env, target_ulong vaddr)
2199 int i;
2200 int mmu_idx;
2202 vaddr &= TARGET_PAGE_MASK;
2203 i = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2204 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
2205 tlb_set_dirty1(&env->tlb_table[mmu_idx][i], vaddr);
2208 /* Our TLB does not support large pages, so remember the area covered by
2209 large pages and trigger a full TLB flush if these are invalidated. */
2210 static void tlb_add_large_page(CPUState *env, target_ulong vaddr,
2211 target_ulong size)
2213 target_ulong mask = ~(size - 1);
2215 if (env->tlb_flush_addr == (target_ulong)-1) {
2216 env->tlb_flush_addr = vaddr & mask;
2217 env->tlb_flush_mask = mask;
2218 return;
2220 /* Extend the existing region to include the new page.
2221 This is a compromise between unnecessary flushes and the cost
2222 of maintaining a full variable size TLB. */
2223 mask &= env->tlb_flush_mask;
2224 while (((env->tlb_flush_addr ^ vaddr) & mask) != 0) {
2225 mask <<= 1;
2227 env->tlb_flush_addr &= mask;
2228 env->tlb_flush_mask = mask;
2231 /* Add a new TLB entry. At most one entry for a given virtual address
2232 is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
2233 supplied size is only used by tlb_flush_page. */
2234 void tlb_set_page(CPUState *env, target_ulong vaddr,
2235 target_phys_addr_t paddr, int prot,
2236 int mmu_idx, target_ulong size)
2238 PhysPageDesc *p;
2239 unsigned long pd;
2240 unsigned int index;
2241 target_ulong address;
2242 target_ulong code_address;
2243 unsigned long addend;
2244 CPUTLBEntry *te;
2245 CPUWatchpoint *wp;
2246 target_phys_addr_t iotlb;
2248 assert(size >= TARGET_PAGE_SIZE);
2249 if (size != TARGET_PAGE_SIZE) {
2250 tlb_add_large_page(env, vaddr, size);
2252 p = phys_page_find(paddr >> TARGET_PAGE_BITS);
2253 if (!p) {
2254 pd = IO_MEM_UNASSIGNED;
2255 } else {
2256 pd = p->phys_offset;
2258 #if defined(DEBUG_TLB)
2259 printf("tlb_set_page: vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
2260 " prot=%x idx=%d pd=0x%08lx\n",
2261 vaddr, paddr, prot, mmu_idx, pd);
2262 #endif
2264 address = vaddr;
2265 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM && !(pd & IO_MEM_ROMD)) {
2266 /* IO memory case (romd handled later) */
2267 address |= TLB_MMIO;
2269 addend = (unsigned long)qemu_get_ram_ptr(pd & TARGET_PAGE_MASK);
2270 if ((pd & ~TARGET_PAGE_MASK) <= IO_MEM_ROM) {
2271 /* Normal RAM. */
2272 iotlb = pd & TARGET_PAGE_MASK;
2273 if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_RAM)
2274 iotlb |= IO_MEM_NOTDIRTY;
2275 else
2276 iotlb |= IO_MEM_ROM;
2277 } else {
2278 /* IO handlers are currently passed a physical address.
2279 It would be nice to pass an offset from the base address
2280 of that region. This would avoid having to special case RAM,
2281 and avoid full address decoding in every device.
2282 We can't use the high bits of pd for this because
2283 IO_MEM_ROMD uses these as a ram address. */
2284 iotlb = (pd & ~TARGET_PAGE_MASK);
2285 if (p) {
2286 iotlb += p->region_offset;
2287 } else {
2288 iotlb += paddr;
2292 code_address = address;
2293 /* Make accesses to pages with watchpoints go via the
2294 watchpoint trap routines. */
2295 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2296 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
2297 /* Avoid trapping reads of pages with a write breakpoint. */
2298 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
2299 iotlb = io_mem_watch + paddr;
2300 address |= TLB_MMIO;
2301 break;
2306 index = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2307 env->iotlb[mmu_idx][index] = iotlb - vaddr;
2308 te = &env->tlb_table[mmu_idx][index];
2309 te->addend = addend - vaddr;
2310 if (prot & PAGE_READ) {
2311 te->addr_read = address;
2312 } else {
2313 te->addr_read = -1;
2316 if (prot & PAGE_EXEC) {
2317 te->addr_code = code_address;
2318 } else {
2319 te->addr_code = -1;
2321 if (prot & PAGE_WRITE) {
2322 if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_ROM ||
2323 (pd & IO_MEM_ROMD)) {
2324 /* Write access calls the I/O callback. */
2325 te->addr_write = address | TLB_MMIO;
2326 } else if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_RAM &&
2327 !cpu_physical_memory_is_dirty(pd)) {
2328 te->addr_write = address | TLB_NOTDIRTY;
2329 } else {
2330 te->addr_write = address;
2332 } else {
2333 te->addr_write = -1;
2337 #else
2339 void tlb_flush(CPUState *env, int flush_global)
2343 void tlb_flush_page(CPUState *env, target_ulong addr)
2348 * Walks guest process memory "regions" one by one
2349 * and calls callback function 'fn' for each region.
2352 struct walk_memory_regions_data
2354 walk_memory_regions_fn fn;
2355 void *priv;
2356 unsigned long start;
2357 int prot;
2360 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
2361 abi_ulong end, int new_prot)
2363 if (data->start != -1ul) {
2364 int rc = data->fn(data->priv, data->start, end, data->prot);
2365 if (rc != 0) {
2366 return rc;
2370 data->start = (new_prot ? end : -1ul);
2371 data->prot = new_prot;
2373 return 0;
2376 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
2377 abi_ulong base, int level, void **lp)
2379 abi_ulong pa;
2380 int i, rc;
2382 if (*lp == NULL) {
2383 return walk_memory_regions_end(data, base, 0);
2386 if (level == 0) {
2387 PageDesc *pd = *lp;
2388 for (i = 0; i < L2_SIZE; ++i) {
2389 int prot = pd[i].flags;
2391 pa = base | (i << TARGET_PAGE_BITS);
2392 if (prot != data->prot) {
2393 rc = walk_memory_regions_end(data, pa, prot);
2394 if (rc != 0) {
2395 return rc;
2399 } else {
2400 void **pp = *lp;
2401 for (i = 0; i < L2_SIZE; ++i) {
2402 pa = base | ((abi_ulong)i <<
2403 (TARGET_PAGE_BITS + L2_BITS * level));
2404 rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
2405 if (rc != 0) {
2406 return rc;
2411 return 0;
2414 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
2416 struct walk_memory_regions_data data;
2417 unsigned long i;
2419 data.fn = fn;
2420 data.priv = priv;
2421 data.start = -1ul;
2422 data.prot = 0;
2424 for (i = 0; i < V_L1_SIZE; i++) {
2425 int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
2426 V_L1_SHIFT / L2_BITS - 1, l1_map + i);
2427 if (rc != 0) {
2428 return rc;
2432 return walk_memory_regions_end(&data, 0, 0);
2435 static int dump_region(void *priv, abi_ulong start,
2436 abi_ulong end, unsigned long prot)
2438 FILE *f = (FILE *)priv;
2440 (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2441 " "TARGET_ABI_FMT_lx" %c%c%c\n",
2442 start, end, end - start,
2443 ((prot & PAGE_READ) ? 'r' : '-'),
2444 ((prot & PAGE_WRITE) ? 'w' : '-'),
2445 ((prot & PAGE_EXEC) ? 'x' : '-'));
2447 return (0);
2450 /* dump memory mappings */
2451 void page_dump(FILE *f)
2453 (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2454 "start", "end", "size", "prot");
2455 walk_memory_regions(f, dump_region);
2458 int page_get_flags(target_ulong address)
2460 PageDesc *p;
2462 p = page_find(address >> TARGET_PAGE_BITS);
2463 if (!p)
2464 return 0;
2465 return p->flags;
2468 /* Modify the flags of a page and invalidate the code if necessary.
2469 The flag PAGE_WRITE_ORG is positioned automatically depending
2470 on PAGE_WRITE. The mmap_lock should already be held. */
2471 void page_set_flags(target_ulong start, target_ulong end, int flags)
2473 target_ulong addr, len;
2475 /* This function should never be called with addresses outside the
2476 guest address space. If this assert fires, it probably indicates
2477 a missing call to h2g_valid. */
2478 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2479 assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2480 #endif
2481 assert(start < end);
2483 start = start & TARGET_PAGE_MASK;
2484 end = TARGET_PAGE_ALIGN(end);
2486 if (flags & PAGE_WRITE) {
2487 flags |= PAGE_WRITE_ORG;
2490 for (addr = start, len = end - start;
2491 len != 0;
2492 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2493 PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2495 /* If the write protection bit is set, then we invalidate
2496 the code inside. */
2497 if (!(p->flags & PAGE_WRITE) &&
2498 (flags & PAGE_WRITE) &&
2499 p->first_tb) {
2500 tb_invalidate_phys_page(addr, 0, NULL);
2502 p->flags = flags;
2506 int page_check_range(target_ulong start, target_ulong len, int flags)
2508 PageDesc *p;
2509 target_ulong end;
2510 target_ulong addr;
2512 /* This function should never be called with addresses outside the
2513 guest address space. If this assert fires, it probably indicates
2514 a missing call to h2g_valid. */
2515 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2516 assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2517 #endif
2519 if (len == 0) {
2520 return 0;
2522 if (start + len - 1 < start) {
2523 /* We've wrapped around. */
2524 return -1;
2527 end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2528 start = start & TARGET_PAGE_MASK;
2530 for (addr = start, len = end - start;
2531 len != 0;
2532 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2533 p = page_find(addr >> TARGET_PAGE_BITS);
2534 if( !p )
2535 return -1;
2536 if( !(p->flags & PAGE_VALID) )
2537 return -1;
2539 if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2540 return -1;
2541 if (flags & PAGE_WRITE) {
2542 if (!(p->flags & PAGE_WRITE_ORG))
2543 return -1;
2544 /* unprotect the page if it was put read-only because it
2545 contains translated code */
2546 if (!(p->flags & PAGE_WRITE)) {
2547 if (!page_unprotect(addr, 0, NULL))
2548 return -1;
2550 return 0;
2553 return 0;
2556 /* called from signal handler: invalidate the code and unprotect the
2557 page. Return TRUE if the fault was successfully handled. */
2558 int page_unprotect(target_ulong address, unsigned long pc, void *puc)
2560 unsigned int prot;
2561 PageDesc *p;
2562 target_ulong host_start, host_end, addr;
2564 /* Technically this isn't safe inside a signal handler. However we
2565 know this only ever happens in a synchronous SEGV handler, so in
2566 practice it seems to be ok. */
2567 mmap_lock();
2569 p = page_find(address >> TARGET_PAGE_BITS);
2570 if (!p) {
2571 mmap_unlock();
2572 return 0;
2575 /* if the page was really writable, then we change its
2576 protection back to writable */
2577 if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2578 host_start = address & qemu_host_page_mask;
2579 host_end = host_start + qemu_host_page_size;
2581 prot = 0;
2582 for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2583 p = page_find(addr >> TARGET_PAGE_BITS);
2584 p->flags |= PAGE_WRITE;
2585 prot |= p->flags;
2587 /* and since the content will be modified, we must invalidate
2588 the corresponding translated code. */
2589 tb_invalidate_phys_page(addr, pc, puc);
2590 #ifdef DEBUG_TB_CHECK
2591 tb_invalidate_check(addr);
2592 #endif
2594 mprotect((void *)g2h(host_start), qemu_host_page_size,
2595 prot & PAGE_BITS);
2597 mmap_unlock();
2598 return 1;
2600 mmap_unlock();
2601 return 0;
2604 static inline void tlb_set_dirty(CPUState *env,
2605 unsigned long addr, target_ulong vaddr)
2608 #endif /* defined(CONFIG_USER_ONLY) */
2610 #if !defined(CONFIG_USER_ONLY)
2612 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2613 typedef struct subpage_t {
2614 target_phys_addr_t base;
2615 ram_addr_t sub_io_index[TARGET_PAGE_SIZE];
2616 ram_addr_t region_offset[TARGET_PAGE_SIZE];
2617 } subpage_t;
2619 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2620 ram_addr_t memory, ram_addr_t region_offset);
2621 static subpage_t *subpage_init (target_phys_addr_t base, ram_addr_t *phys,
2622 ram_addr_t orig_memory,
2623 ram_addr_t region_offset);
2624 #define CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2, \
2625 need_subpage) \
2626 do { \
2627 if (addr > start_addr) \
2628 start_addr2 = 0; \
2629 else { \
2630 start_addr2 = start_addr & ~TARGET_PAGE_MASK; \
2631 if (start_addr2 > 0) \
2632 need_subpage = 1; \
2635 if ((start_addr + orig_size) - addr >= TARGET_PAGE_SIZE) \
2636 end_addr2 = TARGET_PAGE_SIZE - 1; \
2637 else { \
2638 end_addr2 = (start_addr + orig_size - 1) & ~TARGET_PAGE_MASK; \
2639 if (end_addr2 < TARGET_PAGE_SIZE - 1) \
2640 need_subpage = 1; \
2642 } while (0)
2644 /* register physical memory.
2645 For RAM, 'size' must be a multiple of the target page size.
2646 If (phys_offset & ~TARGET_PAGE_MASK) != 0, then it is an
2647 io memory page. The address used when calling the IO function is
2648 the offset from the start of the region, plus region_offset. Both
2649 start_addr and region_offset are rounded down to a page boundary
2650 before calculating this offset. This should not be a problem unless
2651 the low bits of start_addr and region_offset differ. */
2652 void cpu_register_physical_memory_log(target_phys_addr_t start_addr,
2653 ram_addr_t size,
2654 ram_addr_t phys_offset,
2655 ram_addr_t region_offset,
2656 bool log_dirty)
2658 target_phys_addr_t addr, end_addr;
2659 PhysPageDesc *p;
2660 CPUState *env;
2661 ram_addr_t orig_size = size;
2662 subpage_t *subpage;
2664 assert(size);
2665 cpu_notify_set_memory(start_addr, size, phys_offset, log_dirty);
2667 if (phys_offset == IO_MEM_UNASSIGNED) {
2668 region_offset = start_addr;
2670 region_offset &= TARGET_PAGE_MASK;
2671 size = (size + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK;
2672 end_addr = start_addr + (target_phys_addr_t)size;
2674 addr = start_addr;
2675 do {
2676 p = phys_page_find(addr >> TARGET_PAGE_BITS);
2677 if (p && p->phys_offset != IO_MEM_UNASSIGNED) {
2678 ram_addr_t orig_memory = p->phys_offset;
2679 target_phys_addr_t start_addr2, end_addr2;
2680 int need_subpage = 0;
2682 CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2,
2683 need_subpage);
2684 if (need_subpage) {
2685 if (!(orig_memory & IO_MEM_SUBPAGE)) {
2686 subpage = subpage_init((addr & TARGET_PAGE_MASK),
2687 &p->phys_offset, orig_memory,
2688 p->region_offset);
2689 } else {
2690 subpage = io_mem_opaque[(orig_memory & ~TARGET_PAGE_MASK)
2691 >> IO_MEM_SHIFT];
2693 subpage_register(subpage, start_addr2, end_addr2, phys_offset,
2694 region_offset);
2695 p->region_offset = 0;
2696 } else {
2697 p->phys_offset = phys_offset;
2698 if ((phys_offset & ~TARGET_PAGE_MASK) <= IO_MEM_ROM ||
2699 (phys_offset & IO_MEM_ROMD))
2700 phys_offset += TARGET_PAGE_SIZE;
2702 } else {
2703 p = phys_page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2704 p->phys_offset = phys_offset;
2705 p->region_offset = region_offset;
2706 if ((phys_offset & ~TARGET_PAGE_MASK) <= IO_MEM_ROM ||
2707 (phys_offset & IO_MEM_ROMD)) {
2708 phys_offset += TARGET_PAGE_SIZE;
2709 } else {
2710 target_phys_addr_t start_addr2, end_addr2;
2711 int need_subpage = 0;
2713 CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr,
2714 end_addr2, need_subpage);
2716 if (need_subpage) {
2717 subpage = subpage_init((addr & TARGET_PAGE_MASK),
2718 &p->phys_offset, IO_MEM_UNASSIGNED,
2719 addr & TARGET_PAGE_MASK);
2720 subpage_register(subpage, start_addr2, end_addr2,
2721 phys_offset, region_offset);
2722 p->region_offset = 0;
2726 region_offset += TARGET_PAGE_SIZE;
2727 addr += TARGET_PAGE_SIZE;
2728 } while (addr != end_addr);
2730 /* since each CPU stores ram addresses in its TLB cache, we must
2731 reset the modified entries */
2732 /* XXX: slow ! */
2733 for(env = first_cpu; env != NULL; env = env->next_cpu) {
2734 tlb_flush(env, 1);
2738 /* XXX: temporary until new memory mapping API */
2739 ram_addr_t cpu_get_physical_page_desc(target_phys_addr_t addr)
2741 PhysPageDesc *p;
2743 p = phys_page_find(addr >> TARGET_PAGE_BITS);
2744 if (!p)
2745 return IO_MEM_UNASSIGNED;
2746 return p->phys_offset;
2749 void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2751 if (kvm_enabled())
2752 kvm_coalesce_mmio_region(addr, size);
2755 void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2757 if (kvm_enabled())
2758 kvm_uncoalesce_mmio_region(addr, size);
2761 void qemu_flush_coalesced_mmio_buffer(void)
2763 if (kvm_enabled())
2764 kvm_flush_coalesced_mmio_buffer();
2767 #if defined(__linux__) && !defined(TARGET_S390X)
2769 #include <sys/vfs.h>
2771 #define HUGETLBFS_MAGIC 0x958458f6
2773 static long gethugepagesize(const char *path)
2775 struct statfs fs;
2776 int ret;
2778 do {
2779 ret = statfs(path, &fs);
2780 } while (ret != 0 && errno == EINTR);
2782 if (ret != 0) {
2783 perror(path);
2784 return 0;
2787 if (fs.f_type != HUGETLBFS_MAGIC)
2788 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2790 return fs.f_bsize;
2793 static void *file_ram_alloc(RAMBlock *block,
2794 ram_addr_t memory,
2795 const char *path)
2797 char *filename;
2798 void *area;
2799 int fd;
2800 #ifdef MAP_POPULATE
2801 int flags;
2802 #endif
2803 unsigned long hpagesize;
2805 hpagesize = gethugepagesize(path);
2806 if (!hpagesize) {
2807 return NULL;
2810 if (memory < hpagesize) {
2811 return NULL;
2814 if (kvm_enabled() && !kvm_has_sync_mmu()) {
2815 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2816 return NULL;
2819 if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2820 return NULL;
2823 fd = mkstemp(filename);
2824 if (fd < 0) {
2825 perror("unable to create backing store for hugepages");
2826 free(filename);
2827 return NULL;
2829 unlink(filename);
2830 free(filename);
2832 memory = (memory+hpagesize-1) & ~(hpagesize-1);
2835 * ftruncate is not supported by hugetlbfs in older
2836 * hosts, so don't bother bailing out on errors.
2837 * If anything goes wrong with it under other filesystems,
2838 * mmap will fail.
2840 if (ftruncate(fd, memory))
2841 perror("ftruncate");
2843 #ifdef MAP_POPULATE
2844 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2845 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
2846 * to sidestep this quirk.
2848 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2849 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2850 #else
2851 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2852 #endif
2853 if (area == MAP_FAILED) {
2854 perror("file_ram_alloc: can't mmap RAM pages");
2855 close(fd);
2856 return (NULL);
2858 block->fd = fd;
2859 return area;
2861 #endif
2863 static ram_addr_t find_ram_offset(ram_addr_t size)
2865 RAMBlock *block, *next_block;
2866 ram_addr_t offset = 0, mingap = ULONG_MAX;
2868 if (QLIST_EMPTY(&ram_list.blocks))
2869 return 0;
2871 QLIST_FOREACH(block, &ram_list.blocks, next) {
2872 ram_addr_t end, next = ULONG_MAX;
2874 end = block->offset + block->length;
2876 QLIST_FOREACH(next_block, &ram_list.blocks, next) {
2877 if (next_block->offset >= end) {
2878 next = MIN(next, next_block->offset);
2881 if (next - end >= size && next - end < mingap) {
2882 offset = end;
2883 mingap = next - end;
2886 return offset;
2889 static ram_addr_t last_ram_offset(void)
2891 RAMBlock *block;
2892 ram_addr_t last = 0;
2894 QLIST_FOREACH(block, &ram_list.blocks, next)
2895 last = MAX(last, block->offset + block->length);
2897 return last;
2900 ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name,
2901 ram_addr_t size, void *host)
2903 RAMBlock *new_block, *block;
2905 size = TARGET_PAGE_ALIGN(size);
2906 new_block = qemu_mallocz(sizeof(*new_block));
2908 if (dev && dev->parent_bus && dev->parent_bus->info->get_dev_path) {
2909 char *id = dev->parent_bus->info->get_dev_path(dev);
2910 if (id) {
2911 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
2912 qemu_free(id);
2915 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
2917 QLIST_FOREACH(block, &ram_list.blocks, next) {
2918 if (!strcmp(block->idstr, new_block->idstr)) {
2919 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
2920 new_block->idstr);
2921 abort();
2925 new_block->offset = find_ram_offset(size);
2926 if (host) {
2927 new_block->host = host;
2928 new_block->flags |= RAM_PREALLOC_MASK;
2929 } else {
2930 if (mem_path) {
2931 #if defined (__linux__) && !defined(TARGET_S390X)
2932 new_block->host = file_ram_alloc(new_block, size, mem_path);
2933 if (!new_block->host) {
2934 new_block->host = qemu_vmalloc(size);
2935 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2937 #else
2938 fprintf(stderr, "-mem-path option unsupported\n");
2939 exit(1);
2940 #endif
2941 } else {
2942 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2943 /* S390 KVM requires the topmost vma of the RAM to be smaller than
2944 an system defined value, which is at least 256GB. Larger systems
2945 have larger values. We put the guest between the end of data
2946 segment (system break) and this value. We use 32GB as a base to
2947 have enough room for the system break to grow. */
2948 new_block->host = mmap((void*)0x800000000, size,
2949 PROT_EXEC|PROT_READ|PROT_WRITE,
2950 MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
2951 if (new_block->host == MAP_FAILED) {
2952 fprintf(stderr, "Allocating RAM failed\n");
2953 abort();
2955 #else
2956 if (xen_enabled()) {
2957 xen_ram_alloc(new_block->offset, size);
2958 } else {
2959 new_block->host = qemu_vmalloc(size);
2961 #endif
2962 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2965 new_block->length = size;
2967 QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
2969 ram_list.phys_dirty = qemu_realloc(ram_list.phys_dirty,
2970 last_ram_offset() >> TARGET_PAGE_BITS);
2971 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
2972 0xff, size >> TARGET_PAGE_BITS);
2974 if (kvm_enabled())
2975 kvm_setup_guest_memory(new_block->host, size);
2977 return new_block->offset;
2980 ram_addr_t qemu_ram_alloc(DeviceState *dev, const char *name, ram_addr_t size)
2982 return qemu_ram_alloc_from_ptr(dev, name, size, NULL);
2985 void qemu_ram_free_from_ptr(ram_addr_t addr)
2987 RAMBlock *block;
2989 QLIST_FOREACH(block, &ram_list.blocks, next) {
2990 if (addr == block->offset) {
2991 QLIST_REMOVE(block, next);
2992 qemu_free(block);
2993 return;
2998 void qemu_ram_free(ram_addr_t addr)
3000 RAMBlock *block;
3002 QLIST_FOREACH(block, &ram_list.blocks, next) {
3003 if (addr == block->offset) {
3004 QLIST_REMOVE(block, next);
3005 if (block->flags & RAM_PREALLOC_MASK) {
3007 } else if (mem_path) {
3008 #if defined (__linux__) && !defined(TARGET_S390X)
3009 if (block->fd) {
3010 munmap(block->host, block->length);
3011 close(block->fd);
3012 } else {
3013 qemu_vfree(block->host);
3015 #else
3016 abort();
3017 #endif
3018 } else {
3019 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
3020 munmap(block->host, block->length);
3021 #else
3022 if (xen_enabled()) {
3023 xen_invalidate_map_cache_entry(block->host);
3024 } else {
3025 qemu_vfree(block->host);
3027 #endif
3029 qemu_free(block);
3030 return;
3036 #ifndef _WIN32
3037 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
3039 RAMBlock *block;
3040 ram_addr_t offset;
3041 int flags;
3042 void *area, *vaddr;
3044 QLIST_FOREACH(block, &ram_list.blocks, next) {
3045 offset = addr - block->offset;
3046 if (offset < block->length) {
3047 vaddr = block->host + offset;
3048 if (block->flags & RAM_PREALLOC_MASK) {
3050 } else {
3051 flags = MAP_FIXED;
3052 munmap(vaddr, length);
3053 if (mem_path) {
3054 #if defined(__linux__) && !defined(TARGET_S390X)
3055 if (block->fd) {
3056 #ifdef MAP_POPULATE
3057 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
3058 MAP_PRIVATE;
3059 #else
3060 flags |= MAP_PRIVATE;
3061 #endif
3062 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3063 flags, block->fd, offset);
3064 } else {
3065 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
3066 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3067 flags, -1, 0);
3069 #else
3070 abort();
3071 #endif
3072 } else {
3073 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
3074 flags |= MAP_SHARED | MAP_ANONYMOUS;
3075 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
3076 flags, -1, 0);
3077 #else
3078 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
3079 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3080 flags, -1, 0);
3081 #endif
3083 if (area != vaddr) {
3084 fprintf(stderr, "Could not remap addr: %lx@%lx\n",
3085 length, addr);
3086 exit(1);
3088 qemu_madvise(vaddr, length, QEMU_MADV_MERGEABLE);
3090 return;
3094 #endif /* !_WIN32 */
3096 /* Return a host pointer to ram allocated with qemu_ram_alloc.
3097 With the exception of the softmmu code in this file, this should
3098 only be used for local memory (e.g. video ram) that the device owns,
3099 and knows it isn't going to access beyond the end of the block.
3101 It should not be used for general purpose DMA.
3102 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
3104 void *qemu_get_ram_ptr(ram_addr_t addr)
3106 RAMBlock *block;
3108 QLIST_FOREACH(block, &ram_list.blocks, next) {
3109 if (addr - block->offset < block->length) {
3110 /* Move this entry to to start of the list. */
3111 if (block != QLIST_FIRST(&ram_list.blocks)) {
3112 QLIST_REMOVE(block, next);
3113 QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
3115 if (xen_enabled()) {
3116 /* We need to check if the requested address is in the RAM
3117 * because we don't want to map the entire memory in QEMU.
3118 * In that case just map until the end of the page.
3120 if (block->offset == 0) {
3121 return xen_map_cache(addr, 0, 0);
3122 } else if (block->host == NULL) {
3123 block->host =
3124 xen_map_cache(block->offset, block->length, 1);
3127 return block->host + (addr - block->offset);
3131 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3132 abort();
3134 return NULL;
3137 /* Return a host pointer to ram allocated with qemu_ram_alloc.
3138 * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
3140 void *qemu_safe_ram_ptr(ram_addr_t addr)
3142 RAMBlock *block;
3144 QLIST_FOREACH(block, &ram_list.blocks, next) {
3145 if (addr - block->offset < block->length) {
3146 if (xen_enabled()) {
3147 /* We need to check if the requested address is in the RAM
3148 * because we don't want to map the entire memory in QEMU.
3149 * In that case just map until the end of the page.
3151 if (block->offset == 0) {
3152 return xen_map_cache(addr, 0, 0);
3153 } else if (block->host == NULL) {
3154 block->host =
3155 xen_map_cache(block->offset, block->length, 1);
3158 return block->host + (addr - block->offset);
3162 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3163 abort();
3165 return NULL;
3168 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
3169 * but takes a size argument */
3170 void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
3172 if (*size == 0) {
3173 return NULL;
3175 if (xen_enabled()) {
3176 return xen_map_cache(addr, *size, 1);
3177 } else {
3178 RAMBlock *block;
3180 QLIST_FOREACH(block, &ram_list.blocks, next) {
3181 if (addr - block->offset < block->length) {
3182 if (addr - block->offset + *size > block->length)
3183 *size = block->length - addr + block->offset;
3184 return block->host + (addr - block->offset);
3188 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3189 abort();
3193 void qemu_put_ram_ptr(void *addr)
3195 trace_qemu_put_ram_ptr(addr);
3198 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
3200 RAMBlock *block;
3201 uint8_t *host = ptr;
3203 if (xen_enabled()) {
3204 *ram_addr = xen_ram_addr_from_mapcache(ptr);
3205 return 0;
3208 QLIST_FOREACH(block, &ram_list.blocks, next) {
3209 /* This case append when the block is not mapped. */
3210 if (block->host == NULL) {
3211 continue;
3213 if (host - block->host < block->length) {
3214 *ram_addr = block->offset + (host - block->host);
3215 return 0;
3219 return -1;
3222 /* Some of the softmmu routines need to translate from a host pointer
3223 (typically a TLB entry) back to a ram offset. */
3224 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
3226 ram_addr_t ram_addr;
3228 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
3229 fprintf(stderr, "Bad ram pointer %p\n", ptr);
3230 abort();
3232 return ram_addr;
3235 static uint32_t unassigned_mem_readb(void *opaque, target_phys_addr_t addr)
3237 #ifdef DEBUG_UNASSIGNED
3238 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3239 #endif
3240 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3241 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, 1);
3242 #endif
3243 return 0;
3246 static uint32_t unassigned_mem_readw(void *opaque, target_phys_addr_t addr)
3248 #ifdef DEBUG_UNASSIGNED
3249 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3250 #endif
3251 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3252 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, 2);
3253 #endif
3254 return 0;
3257 static uint32_t unassigned_mem_readl(void *opaque, target_phys_addr_t addr)
3259 #ifdef DEBUG_UNASSIGNED
3260 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3261 #endif
3262 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3263 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, 4);
3264 #endif
3265 return 0;
3268 static void unassigned_mem_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
3270 #ifdef DEBUG_UNASSIGNED
3271 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3272 #endif
3273 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3274 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, 1);
3275 #endif
3278 static void unassigned_mem_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
3280 #ifdef DEBUG_UNASSIGNED
3281 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3282 #endif
3283 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3284 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, 2);
3285 #endif
3288 static void unassigned_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
3290 #ifdef DEBUG_UNASSIGNED
3291 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3292 #endif
3293 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3294 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, 4);
3295 #endif
3298 static CPUReadMemoryFunc * const unassigned_mem_read[3] = {
3299 unassigned_mem_readb,
3300 unassigned_mem_readw,
3301 unassigned_mem_readl,
3304 static CPUWriteMemoryFunc * const unassigned_mem_write[3] = {
3305 unassigned_mem_writeb,
3306 unassigned_mem_writew,
3307 unassigned_mem_writel,
3310 static void notdirty_mem_writeb(void *opaque, target_phys_addr_t ram_addr,
3311 uint32_t val)
3313 int dirty_flags;
3314 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3315 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3316 #if !defined(CONFIG_USER_ONLY)
3317 tb_invalidate_phys_page_fast(ram_addr, 1);
3318 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3319 #endif
3321 stb_p(qemu_get_ram_ptr(ram_addr), val);
3322 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3323 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3324 /* we remove the notdirty callback only if the code has been
3325 flushed */
3326 if (dirty_flags == 0xff)
3327 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3330 static void notdirty_mem_writew(void *opaque, target_phys_addr_t ram_addr,
3331 uint32_t val)
3333 int dirty_flags;
3334 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3335 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3336 #if !defined(CONFIG_USER_ONLY)
3337 tb_invalidate_phys_page_fast(ram_addr, 2);
3338 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3339 #endif
3341 stw_p(qemu_get_ram_ptr(ram_addr), val);
3342 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3343 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3344 /* we remove the notdirty callback only if the code has been
3345 flushed */
3346 if (dirty_flags == 0xff)
3347 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3350 static void notdirty_mem_writel(void *opaque, target_phys_addr_t ram_addr,
3351 uint32_t val)
3353 int dirty_flags;
3354 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3355 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3356 #if !defined(CONFIG_USER_ONLY)
3357 tb_invalidate_phys_page_fast(ram_addr, 4);
3358 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3359 #endif
3361 stl_p(qemu_get_ram_ptr(ram_addr), val);
3362 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3363 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3364 /* we remove the notdirty callback only if the code has been
3365 flushed */
3366 if (dirty_flags == 0xff)
3367 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3370 static CPUReadMemoryFunc * const error_mem_read[3] = {
3371 NULL, /* never used */
3372 NULL, /* never used */
3373 NULL, /* never used */
3376 static CPUWriteMemoryFunc * const notdirty_mem_write[3] = {
3377 notdirty_mem_writeb,
3378 notdirty_mem_writew,
3379 notdirty_mem_writel,
3382 /* Generate a debug exception if a watchpoint has been hit. */
3383 static void check_watchpoint(int offset, int len_mask, int flags)
3385 CPUState *env = cpu_single_env;
3386 target_ulong pc, cs_base;
3387 TranslationBlock *tb;
3388 target_ulong vaddr;
3389 CPUWatchpoint *wp;
3390 int cpu_flags;
3392 if (env->watchpoint_hit) {
3393 /* We re-entered the check after replacing the TB. Now raise
3394 * the debug interrupt so that is will trigger after the
3395 * current instruction. */
3396 cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
3397 return;
3399 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
3400 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
3401 if ((vaddr == (wp->vaddr & len_mask) ||
3402 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
3403 wp->flags |= BP_WATCHPOINT_HIT;
3404 if (!env->watchpoint_hit) {
3405 env->watchpoint_hit = wp;
3406 tb = tb_find_pc(env->mem_io_pc);
3407 if (!tb) {
3408 cpu_abort(env, "check_watchpoint: could not find TB for "
3409 "pc=%p", (void *)env->mem_io_pc);
3411 cpu_restore_state(tb, env, env->mem_io_pc);
3412 tb_phys_invalidate(tb, -1);
3413 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
3414 env->exception_index = EXCP_DEBUG;
3415 } else {
3416 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
3417 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
3419 cpu_resume_from_signal(env, NULL);
3421 } else {
3422 wp->flags &= ~BP_WATCHPOINT_HIT;
3427 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
3428 so these check for a hit then pass through to the normal out-of-line
3429 phys routines. */
3430 static uint32_t watch_mem_readb(void *opaque, target_phys_addr_t addr)
3432 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x0, BP_MEM_READ);
3433 return ldub_phys(addr);
3436 static uint32_t watch_mem_readw(void *opaque, target_phys_addr_t addr)
3438 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x1, BP_MEM_READ);
3439 return lduw_phys(addr);
3442 static uint32_t watch_mem_readl(void *opaque, target_phys_addr_t addr)
3444 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x3, BP_MEM_READ);
3445 return ldl_phys(addr);
3448 static void watch_mem_writeb(void *opaque, target_phys_addr_t addr,
3449 uint32_t val)
3451 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x0, BP_MEM_WRITE);
3452 stb_phys(addr, val);
3455 static void watch_mem_writew(void *opaque, target_phys_addr_t addr,
3456 uint32_t val)
3458 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x1, BP_MEM_WRITE);
3459 stw_phys(addr, val);
3462 static void watch_mem_writel(void *opaque, target_phys_addr_t addr,
3463 uint32_t val)
3465 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x3, BP_MEM_WRITE);
3466 stl_phys(addr, val);
3469 static CPUReadMemoryFunc * const watch_mem_read[3] = {
3470 watch_mem_readb,
3471 watch_mem_readw,
3472 watch_mem_readl,
3475 static CPUWriteMemoryFunc * const watch_mem_write[3] = {
3476 watch_mem_writeb,
3477 watch_mem_writew,
3478 watch_mem_writel,
3481 static inline uint32_t subpage_readlen (subpage_t *mmio,
3482 target_phys_addr_t addr,
3483 unsigned int len)
3485 unsigned int idx = SUBPAGE_IDX(addr);
3486 #if defined(DEBUG_SUBPAGE)
3487 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3488 mmio, len, addr, idx);
3489 #endif
3491 addr += mmio->region_offset[idx];
3492 idx = mmio->sub_io_index[idx];
3493 return io_mem_read[idx][len](io_mem_opaque[idx], addr);
3496 static inline void subpage_writelen (subpage_t *mmio, target_phys_addr_t addr,
3497 uint32_t value, unsigned int len)
3499 unsigned int idx = SUBPAGE_IDX(addr);
3500 #if defined(DEBUG_SUBPAGE)
3501 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d value %08x\n",
3502 __func__, mmio, len, addr, idx, value);
3503 #endif
3505 addr += mmio->region_offset[idx];
3506 idx = mmio->sub_io_index[idx];
3507 io_mem_write[idx][len](io_mem_opaque[idx], addr, value);
3510 static uint32_t subpage_readb (void *opaque, target_phys_addr_t addr)
3512 return subpage_readlen(opaque, addr, 0);
3515 static void subpage_writeb (void *opaque, target_phys_addr_t addr,
3516 uint32_t value)
3518 subpage_writelen(opaque, addr, value, 0);
3521 static uint32_t subpage_readw (void *opaque, target_phys_addr_t addr)
3523 return subpage_readlen(opaque, addr, 1);
3526 static void subpage_writew (void *opaque, target_phys_addr_t addr,
3527 uint32_t value)
3529 subpage_writelen(opaque, addr, value, 1);
3532 static uint32_t subpage_readl (void *opaque, target_phys_addr_t addr)
3534 return subpage_readlen(opaque, addr, 2);
3537 static void subpage_writel (void *opaque, target_phys_addr_t addr,
3538 uint32_t value)
3540 subpage_writelen(opaque, addr, value, 2);
3543 static CPUReadMemoryFunc * const subpage_read[] = {
3544 &subpage_readb,
3545 &subpage_readw,
3546 &subpage_readl,
3549 static CPUWriteMemoryFunc * const subpage_write[] = {
3550 &subpage_writeb,
3551 &subpage_writew,
3552 &subpage_writel,
3555 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3556 ram_addr_t memory, ram_addr_t region_offset)
3558 int idx, eidx;
3560 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3561 return -1;
3562 idx = SUBPAGE_IDX(start);
3563 eidx = SUBPAGE_IDX(end);
3564 #if defined(DEBUG_SUBPAGE)
3565 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3566 mmio, start, end, idx, eidx, memory);
3567 #endif
3568 if ((memory & ~TARGET_PAGE_MASK) == IO_MEM_RAM)
3569 memory = IO_MEM_UNASSIGNED;
3570 memory = (memory >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3571 for (; idx <= eidx; idx++) {
3572 mmio->sub_io_index[idx] = memory;
3573 mmio->region_offset[idx] = region_offset;
3576 return 0;
3579 static subpage_t *subpage_init (target_phys_addr_t base, ram_addr_t *phys,
3580 ram_addr_t orig_memory,
3581 ram_addr_t region_offset)
3583 subpage_t *mmio;
3584 int subpage_memory;
3586 mmio = qemu_mallocz(sizeof(subpage_t));
3588 mmio->base = base;
3589 subpage_memory = cpu_register_io_memory(subpage_read, subpage_write, mmio,
3590 DEVICE_NATIVE_ENDIAN);
3591 #if defined(DEBUG_SUBPAGE)
3592 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3593 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3594 #endif
3595 *phys = subpage_memory | IO_MEM_SUBPAGE;
3596 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, orig_memory, region_offset);
3598 return mmio;
3601 static int get_free_io_mem_idx(void)
3603 int i;
3605 for (i = 0; i<IO_MEM_NB_ENTRIES; i++)
3606 if (!io_mem_used[i]) {
3607 io_mem_used[i] = 1;
3608 return i;
3610 fprintf(stderr, "RAN out out io_mem_idx, max %d !\n", IO_MEM_NB_ENTRIES);
3611 return -1;
3615 * Usually, devices operate in little endian mode. There are devices out
3616 * there that operate in big endian too. Each device gets byte swapped
3617 * mmio if plugged onto a CPU that does the other endianness.
3619 * CPU Device swap?
3621 * little little no
3622 * little big yes
3623 * big little yes
3624 * big big no
3627 typedef struct SwapEndianContainer {
3628 CPUReadMemoryFunc *read[3];
3629 CPUWriteMemoryFunc *write[3];
3630 void *opaque;
3631 } SwapEndianContainer;
3633 static uint32_t swapendian_mem_readb (void *opaque, target_phys_addr_t addr)
3635 uint32_t val;
3636 SwapEndianContainer *c = opaque;
3637 val = c->read[0](c->opaque, addr);
3638 return val;
3641 static uint32_t swapendian_mem_readw(void *opaque, target_phys_addr_t addr)
3643 uint32_t val;
3644 SwapEndianContainer *c = opaque;
3645 val = bswap16(c->read[1](c->opaque, addr));
3646 return val;
3649 static uint32_t swapendian_mem_readl(void *opaque, target_phys_addr_t addr)
3651 uint32_t val;
3652 SwapEndianContainer *c = opaque;
3653 val = bswap32(c->read[2](c->opaque, addr));
3654 return val;
3657 static CPUReadMemoryFunc * const swapendian_readfn[3]={
3658 swapendian_mem_readb,
3659 swapendian_mem_readw,
3660 swapendian_mem_readl
3663 static void swapendian_mem_writeb(void *opaque, target_phys_addr_t addr,
3664 uint32_t val)
3666 SwapEndianContainer *c = opaque;
3667 c->write[0](c->opaque, addr, val);
3670 static void swapendian_mem_writew(void *opaque, target_phys_addr_t addr,
3671 uint32_t val)
3673 SwapEndianContainer *c = opaque;
3674 c->write[1](c->opaque, addr, bswap16(val));
3677 static void swapendian_mem_writel(void *opaque, target_phys_addr_t addr,
3678 uint32_t val)
3680 SwapEndianContainer *c = opaque;
3681 c->write[2](c->opaque, addr, bswap32(val));
3684 static CPUWriteMemoryFunc * const swapendian_writefn[3]={
3685 swapendian_mem_writeb,
3686 swapendian_mem_writew,
3687 swapendian_mem_writel
3690 static void swapendian_init(int io_index)
3692 SwapEndianContainer *c = qemu_malloc(sizeof(SwapEndianContainer));
3693 int i;
3695 /* Swap mmio for big endian targets */
3696 c->opaque = io_mem_opaque[io_index];
3697 for (i = 0; i < 3; i++) {
3698 c->read[i] = io_mem_read[io_index][i];
3699 c->write[i] = io_mem_write[io_index][i];
3701 io_mem_read[io_index][i] = swapendian_readfn[i];
3702 io_mem_write[io_index][i] = swapendian_writefn[i];
3704 io_mem_opaque[io_index] = c;
3707 static void swapendian_del(int io_index)
3709 if (io_mem_read[io_index][0] == swapendian_readfn[0]) {
3710 qemu_free(io_mem_opaque[io_index]);
3714 /* mem_read and mem_write are arrays of functions containing the
3715 function to access byte (index 0), word (index 1) and dword (index
3716 2). Functions can be omitted with a NULL function pointer.
3717 If io_index is non zero, the corresponding io zone is
3718 modified. If it is zero, a new io zone is allocated. The return
3719 value can be used with cpu_register_physical_memory(). (-1) is
3720 returned if error. */
3721 static int cpu_register_io_memory_fixed(int io_index,
3722 CPUReadMemoryFunc * const *mem_read,
3723 CPUWriteMemoryFunc * const *mem_write,
3724 void *opaque, enum device_endian endian)
3726 int i;
3728 if (io_index <= 0) {
3729 io_index = get_free_io_mem_idx();
3730 if (io_index == -1)
3731 return io_index;
3732 } else {
3733 io_index >>= IO_MEM_SHIFT;
3734 if (io_index >= IO_MEM_NB_ENTRIES)
3735 return -1;
3738 for (i = 0; i < 3; ++i) {
3739 io_mem_read[io_index][i]
3740 = (mem_read[i] ? mem_read[i] : unassigned_mem_read[i]);
3742 for (i = 0; i < 3; ++i) {
3743 io_mem_write[io_index][i]
3744 = (mem_write[i] ? mem_write[i] : unassigned_mem_write[i]);
3746 io_mem_opaque[io_index] = opaque;
3748 switch (endian) {
3749 case DEVICE_BIG_ENDIAN:
3750 #ifndef TARGET_WORDS_BIGENDIAN
3751 swapendian_init(io_index);
3752 #endif
3753 break;
3754 case DEVICE_LITTLE_ENDIAN:
3755 #ifdef TARGET_WORDS_BIGENDIAN
3756 swapendian_init(io_index);
3757 #endif
3758 break;
3759 case DEVICE_NATIVE_ENDIAN:
3760 default:
3761 break;
3764 return (io_index << IO_MEM_SHIFT);
3767 int cpu_register_io_memory(CPUReadMemoryFunc * const *mem_read,
3768 CPUWriteMemoryFunc * const *mem_write,
3769 void *opaque, enum device_endian endian)
3771 return cpu_register_io_memory_fixed(0, mem_read, mem_write, opaque, endian);
3774 void cpu_unregister_io_memory(int io_table_address)
3776 int i;
3777 int io_index = io_table_address >> IO_MEM_SHIFT;
3779 swapendian_del(io_index);
3781 for (i=0;i < 3; i++) {
3782 io_mem_read[io_index][i] = unassigned_mem_read[i];
3783 io_mem_write[io_index][i] = unassigned_mem_write[i];
3785 io_mem_opaque[io_index] = NULL;
3786 io_mem_used[io_index] = 0;
3789 static void io_mem_init(void)
3791 int i;
3793 cpu_register_io_memory_fixed(IO_MEM_ROM, error_mem_read,
3794 unassigned_mem_write, NULL,
3795 DEVICE_NATIVE_ENDIAN);
3796 cpu_register_io_memory_fixed(IO_MEM_UNASSIGNED, unassigned_mem_read,
3797 unassigned_mem_write, NULL,
3798 DEVICE_NATIVE_ENDIAN);
3799 cpu_register_io_memory_fixed(IO_MEM_NOTDIRTY, error_mem_read,
3800 notdirty_mem_write, NULL,
3801 DEVICE_NATIVE_ENDIAN);
3802 for (i=0; i<5; i++)
3803 io_mem_used[i] = 1;
3805 io_mem_watch = cpu_register_io_memory(watch_mem_read,
3806 watch_mem_write, NULL,
3807 DEVICE_NATIVE_ENDIAN);
3810 #endif /* !defined(CONFIG_USER_ONLY) */
3812 /* physical memory access (slow version, mainly for debug) */
3813 #if defined(CONFIG_USER_ONLY)
3814 int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
3815 uint8_t *buf, int len, int is_write)
3817 int l, flags;
3818 target_ulong page;
3819 void * p;
3821 while (len > 0) {
3822 page = addr & TARGET_PAGE_MASK;
3823 l = (page + TARGET_PAGE_SIZE) - addr;
3824 if (l > len)
3825 l = len;
3826 flags = page_get_flags(page);
3827 if (!(flags & PAGE_VALID))
3828 return -1;
3829 if (is_write) {
3830 if (!(flags & PAGE_WRITE))
3831 return -1;
3832 /* XXX: this code should not depend on lock_user */
3833 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3834 return -1;
3835 memcpy(p, buf, l);
3836 unlock_user(p, addr, l);
3837 } else {
3838 if (!(flags & PAGE_READ))
3839 return -1;
3840 /* XXX: this code should not depend on lock_user */
3841 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3842 return -1;
3843 memcpy(buf, p, l);
3844 unlock_user(p, addr, 0);
3846 len -= l;
3847 buf += l;
3848 addr += l;
3850 return 0;
3853 #else
3854 void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
3855 int len, int is_write)
3857 int l, io_index;
3858 uint8_t *ptr;
3859 uint32_t val;
3860 target_phys_addr_t page;
3861 unsigned long pd;
3862 PhysPageDesc *p;
3864 while (len > 0) {
3865 page = addr & TARGET_PAGE_MASK;
3866 l = (page + TARGET_PAGE_SIZE) - addr;
3867 if (l > len)
3868 l = len;
3869 p = phys_page_find(page >> TARGET_PAGE_BITS);
3870 if (!p) {
3871 pd = IO_MEM_UNASSIGNED;
3872 } else {
3873 pd = p->phys_offset;
3876 if (is_write) {
3877 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
3878 target_phys_addr_t addr1 = addr;
3879 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3880 if (p)
3881 addr1 = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3882 /* XXX: could force cpu_single_env to NULL to avoid
3883 potential bugs */
3884 if (l >= 4 && ((addr1 & 3) == 0)) {
3885 /* 32 bit write access */
3886 val = ldl_p(buf);
3887 io_mem_write[io_index][2](io_mem_opaque[io_index], addr1, val);
3888 l = 4;
3889 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3890 /* 16 bit write access */
3891 val = lduw_p(buf);
3892 io_mem_write[io_index][1](io_mem_opaque[io_index], addr1, val);
3893 l = 2;
3894 } else {
3895 /* 8 bit write access */
3896 val = ldub_p(buf);
3897 io_mem_write[io_index][0](io_mem_opaque[io_index], addr1, val);
3898 l = 1;
3900 } else {
3901 unsigned long addr1;
3902 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
3903 /* RAM case */
3904 ptr = qemu_get_ram_ptr(addr1);
3905 memcpy(ptr, buf, l);
3906 if (!cpu_physical_memory_is_dirty(addr1)) {
3907 /* invalidate code */
3908 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
3909 /* set dirty bit */
3910 cpu_physical_memory_set_dirty_flags(
3911 addr1, (0xff & ~CODE_DIRTY_FLAG));
3913 qemu_put_ram_ptr(ptr);
3915 } else {
3916 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
3917 !(pd & IO_MEM_ROMD)) {
3918 target_phys_addr_t addr1 = addr;
3919 /* I/O case */
3920 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3921 if (p)
3922 addr1 = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3923 if (l >= 4 && ((addr1 & 3) == 0)) {
3924 /* 32 bit read access */
3925 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr1);
3926 stl_p(buf, val);
3927 l = 4;
3928 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3929 /* 16 bit read access */
3930 val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr1);
3931 stw_p(buf, val);
3932 l = 2;
3933 } else {
3934 /* 8 bit read access */
3935 val = io_mem_read[io_index][0](io_mem_opaque[io_index], addr1);
3936 stb_p(buf, val);
3937 l = 1;
3939 } else {
3940 /* RAM case */
3941 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK);
3942 memcpy(buf, ptr + (addr & ~TARGET_PAGE_MASK), l);
3943 qemu_put_ram_ptr(ptr);
3946 len -= l;
3947 buf += l;
3948 addr += l;
3952 /* used for ROM loading : can write in RAM and ROM */
3953 void cpu_physical_memory_write_rom(target_phys_addr_t addr,
3954 const uint8_t *buf, int len)
3956 int l;
3957 uint8_t *ptr;
3958 target_phys_addr_t page;
3959 unsigned long pd;
3960 PhysPageDesc *p;
3962 while (len > 0) {
3963 page = addr & TARGET_PAGE_MASK;
3964 l = (page + TARGET_PAGE_SIZE) - addr;
3965 if (l > len)
3966 l = len;
3967 p = phys_page_find(page >> TARGET_PAGE_BITS);
3968 if (!p) {
3969 pd = IO_MEM_UNASSIGNED;
3970 } else {
3971 pd = p->phys_offset;
3974 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM &&
3975 (pd & ~TARGET_PAGE_MASK) != IO_MEM_ROM &&
3976 !(pd & IO_MEM_ROMD)) {
3977 /* do nothing */
3978 } else {
3979 unsigned long addr1;
3980 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
3981 /* ROM/RAM case */
3982 ptr = qemu_get_ram_ptr(addr1);
3983 memcpy(ptr, buf, l);
3984 qemu_put_ram_ptr(ptr);
3986 len -= l;
3987 buf += l;
3988 addr += l;
3992 typedef struct {
3993 void *buffer;
3994 target_phys_addr_t addr;
3995 target_phys_addr_t len;
3996 } BounceBuffer;
3998 static BounceBuffer bounce;
4000 typedef struct MapClient {
4001 void *opaque;
4002 void (*callback)(void *opaque);
4003 QLIST_ENTRY(MapClient) link;
4004 } MapClient;
4006 static QLIST_HEAD(map_client_list, MapClient) map_client_list
4007 = QLIST_HEAD_INITIALIZER(map_client_list);
4009 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
4011 MapClient *client = qemu_malloc(sizeof(*client));
4013 client->opaque = opaque;
4014 client->callback = callback;
4015 QLIST_INSERT_HEAD(&map_client_list, client, link);
4016 return client;
4019 void cpu_unregister_map_client(void *_client)
4021 MapClient *client = (MapClient *)_client;
4023 QLIST_REMOVE(client, link);
4024 qemu_free(client);
4027 static void cpu_notify_map_clients(void)
4029 MapClient *client;
4031 while (!QLIST_EMPTY(&map_client_list)) {
4032 client = QLIST_FIRST(&map_client_list);
4033 client->callback(client->opaque);
4034 cpu_unregister_map_client(client);
4038 /* Map a physical memory region into a host virtual address.
4039 * May map a subset of the requested range, given by and returned in *plen.
4040 * May return NULL if resources needed to perform the mapping are exhausted.
4041 * Use only for reads OR writes - not for read-modify-write operations.
4042 * Use cpu_register_map_client() to know when retrying the map operation is
4043 * likely to succeed.
4045 void *cpu_physical_memory_map(target_phys_addr_t addr,
4046 target_phys_addr_t *plen,
4047 int is_write)
4049 target_phys_addr_t len = *plen;
4050 target_phys_addr_t todo = 0;
4051 int l;
4052 target_phys_addr_t page;
4053 unsigned long pd;
4054 PhysPageDesc *p;
4055 ram_addr_t raddr = ULONG_MAX;
4056 ram_addr_t rlen;
4057 void *ret;
4059 while (len > 0) {
4060 page = addr & TARGET_PAGE_MASK;
4061 l = (page + TARGET_PAGE_SIZE) - addr;
4062 if (l > len)
4063 l = len;
4064 p = phys_page_find(page >> TARGET_PAGE_BITS);
4065 if (!p) {
4066 pd = IO_MEM_UNASSIGNED;
4067 } else {
4068 pd = p->phys_offset;
4071 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4072 if (todo || bounce.buffer) {
4073 break;
4075 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
4076 bounce.addr = addr;
4077 bounce.len = l;
4078 if (!is_write) {
4079 cpu_physical_memory_read(addr, bounce.buffer, l);
4082 *plen = l;
4083 return bounce.buffer;
4085 if (!todo) {
4086 raddr = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4089 len -= l;
4090 addr += l;
4091 todo += l;
4093 rlen = todo;
4094 ret = qemu_ram_ptr_length(raddr, &rlen);
4095 *plen = rlen;
4096 return ret;
4099 /* Unmaps a memory region previously mapped by cpu_physical_memory_map().
4100 * Will also mark the memory as dirty if is_write == 1. access_len gives
4101 * the amount of memory that was actually read or written by the caller.
4103 void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
4104 int is_write, target_phys_addr_t access_len)
4106 if (buffer != bounce.buffer) {
4107 if (is_write) {
4108 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
4109 while (access_len) {
4110 unsigned l;
4111 l = TARGET_PAGE_SIZE;
4112 if (l > access_len)
4113 l = access_len;
4114 if (!cpu_physical_memory_is_dirty(addr1)) {
4115 /* invalidate code */
4116 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
4117 /* set dirty bit */
4118 cpu_physical_memory_set_dirty_flags(
4119 addr1, (0xff & ~CODE_DIRTY_FLAG));
4121 addr1 += l;
4122 access_len -= l;
4125 if (xen_enabled()) {
4126 xen_invalidate_map_cache_entry(buffer);
4128 return;
4130 if (is_write) {
4131 cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len);
4133 qemu_vfree(bounce.buffer);
4134 bounce.buffer = NULL;
4135 cpu_notify_map_clients();
4138 /* warning: addr must be aligned */
4139 static inline uint32_t ldl_phys_internal(target_phys_addr_t addr,
4140 enum device_endian endian)
4142 int io_index;
4143 uint8_t *ptr;
4144 uint32_t val;
4145 unsigned long pd;
4146 PhysPageDesc *p;
4148 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4149 if (!p) {
4150 pd = IO_MEM_UNASSIGNED;
4151 } else {
4152 pd = p->phys_offset;
4155 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4156 !(pd & IO_MEM_ROMD)) {
4157 /* I/O case */
4158 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4159 if (p)
4160 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4161 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
4162 #if defined(TARGET_WORDS_BIGENDIAN)
4163 if (endian == DEVICE_LITTLE_ENDIAN) {
4164 val = bswap32(val);
4166 #else
4167 if (endian == DEVICE_BIG_ENDIAN) {
4168 val = bswap32(val);
4170 #endif
4171 } else {
4172 /* RAM case */
4173 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4174 (addr & ~TARGET_PAGE_MASK);
4175 switch (endian) {
4176 case DEVICE_LITTLE_ENDIAN:
4177 val = ldl_le_p(ptr);
4178 break;
4179 case DEVICE_BIG_ENDIAN:
4180 val = ldl_be_p(ptr);
4181 break;
4182 default:
4183 val = ldl_p(ptr);
4184 break;
4187 return val;
4190 uint32_t ldl_phys(target_phys_addr_t addr)
4192 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4195 uint32_t ldl_le_phys(target_phys_addr_t addr)
4197 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4200 uint32_t ldl_be_phys(target_phys_addr_t addr)
4202 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
4205 /* warning: addr must be aligned */
4206 static inline uint64_t ldq_phys_internal(target_phys_addr_t addr,
4207 enum device_endian endian)
4209 int io_index;
4210 uint8_t *ptr;
4211 uint64_t val;
4212 unsigned long pd;
4213 PhysPageDesc *p;
4215 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4216 if (!p) {
4217 pd = IO_MEM_UNASSIGNED;
4218 } else {
4219 pd = p->phys_offset;
4222 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4223 !(pd & IO_MEM_ROMD)) {
4224 /* I/O case */
4225 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4226 if (p)
4227 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4229 /* XXX This is broken when device endian != cpu endian.
4230 Fix and add "endian" variable check */
4231 #ifdef TARGET_WORDS_BIGENDIAN
4232 val = (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr) << 32;
4233 val |= io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4);
4234 #else
4235 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
4236 val |= (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4) << 32;
4237 #endif
4238 } else {
4239 /* RAM case */
4240 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4241 (addr & ~TARGET_PAGE_MASK);
4242 switch (endian) {
4243 case DEVICE_LITTLE_ENDIAN:
4244 val = ldq_le_p(ptr);
4245 break;
4246 case DEVICE_BIG_ENDIAN:
4247 val = ldq_be_p(ptr);
4248 break;
4249 default:
4250 val = ldq_p(ptr);
4251 break;
4254 return val;
4257 uint64_t ldq_phys(target_phys_addr_t addr)
4259 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4262 uint64_t ldq_le_phys(target_phys_addr_t addr)
4264 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4267 uint64_t ldq_be_phys(target_phys_addr_t addr)
4269 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
4272 /* XXX: optimize */
4273 uint32_t ldub_phys(target_phys_addr_t addr)
4275 uint8_t val;
4276 cpu_physical_memory_read(addr, &val, 1);
4277 return val;
4280 /* warning: addr must be aligned */
4281 static inline uint32_t lduw_phys_internal(target_phys_addr_t addr,
4282 enum device_endian endian)
4284 int io_index;
4285 uint8_t *ptr;
4286 uint64_t val;
4287 unsigned long pd;
4288 PhysPageDesc *p;
4290 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4291 if (!p) {
4292 pd = IO_MEM_UNASSIGNED;
4293 } else {
4294 pd = p->phys_offset;
4297 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4298 !(pd & IO_MEM_ROMD)) {
4299 /* I/O case */
4300 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4301 if (p)
4302 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4303 val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr);
4304 #if defined(TARGET_WORDS_BIGENDIAN)
4305 if (endian == DEVICE_LITTLE_ENDIAN) {
4306 val = bswap16(val);
4308 #else
4309 if (endian == DEVICE_BIG_ENDIAN) {
4310 val = bswap16(val);
4312 #endif
4313 } else {
4314 /* RAM case */
4315 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4316 (addr & ~TARGET_PAGE_MASK);
4317 switch (endian) {
4318 case DEVICE_LITTLE_ENDIAN:
4319 val = lduw_le_p(ptr);
4320 break;
4321 case DEVICE_BIG_ENDIAN:
4322 val = lduw_be_p(ptr);
4323 break;
4324 default:
4325 val = lduw_p(ptr);
4326 break;
4329 return val;
4332 uint32_t lduw_phys(target_phys_addr_t addr)
4334 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4337 uint32_t lduw_le_phys(target_phys_addr_t addr)
4339 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4342 uint32_t lduw_be_phys(target_phys_addr_t addr)
4344 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
4347 /* warning: addr must be aligned. The ram page is not masked as dirty
4348 and the code inside is not invalidated. It is useful if the dirty
4349 bits are used to track modified PTEs */
4350 void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val)
4352 int io_index;
4353 uint8_t *ptr;
4354 unsigned long pd;
4355 PhysPageDesc *p;
4357 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4358 if (!p) {
4359 pd = IO_MEM_UNASSIGNED;
4360 } else {
4361 pd = p->phys_offset;
4364 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4365 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4366 if (p)
4367 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4368 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4369 } else {
4370 unsigned long addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4371 ptr = qemu_get_ram_ptr(addr1);
4372 stl_p(ptr, val);
4374 if (unlikely(in_migration)) {
4375 if (!cpu_physical_memory_is_dirty(addr1)) {
4376 /* invalidate code */
4377 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4378 /* set dirty bit */
4379 cpu_physical_memory_set_dirty_flags(
4380 addr1, (0xff & ~CODE_DIRTY_FLAG));
4386 void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val)
4388 int io_index;
4389 uint8_t *ptr;
4390 unsigned long pd;
4391 PhysPageDesc *p;
4393 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4394 if (!p) {
4395 pd = IO_MEM_UNASSIGNED;
4396 } else {
4397 pd = p->phys_offset;
4400 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4401 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4402 if (p)
4403 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4404 #ifdef TARGET_WORDS_BIGENDIAN
4405 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val >> 32);
4406 io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val);
4407 #else
4408 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4409 io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val >> 32);
4410 #endif
4411 } else {
4412 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4413 (addr & ~TARGET_PAGE_MASK);
4414 stq_p(ptr, val);
4418 /* warning: addr must be aligned */
4419 static inline void stl_phys_internal(target_phys_addr_t addr, uint32_t val,
4420 enum device_endian endian)
4422 int io_index;
4423 uint8_t *ptr;
4424 unsigned long pd;
4425 PhysPageDesc *p;
4427 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4428 if (!p) {
4429 pd = IO_MEM_UNASSIGNED;
4430 } else {
4431 pd = p->phys_offset;
4434 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4435 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4436 if (p)
4437 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4438 #if defined(TARGET_WORDS_BIGENDIAN)
4439 if (endian == DEVICE_LITTLE_ENDIAN) {
4440 val = bswap32(val);
4442 #else
4443 if (endian == DEVICE_BIG_ENDIAN) {
4444 val = bswap32(val);
4446 #endif
4447 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4448 } else {
4449 unsigned long addr1;
4450 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4451 /* RAM case */
4452 ptr = qemu_get_ram_ptr(addr1);
4453 switch (endian) {
4454 case DEVICE_LITTLE_ENDIAN:
4455 stl_le_p(ptr, val);
4456 break;
4457 case DEVICE_BIG_ENDIAN:
4458 stl_be_p(ptr, val);
4459 break;
4460 default:
4461 stl_p(ptr, val);
4462 break;
4464 if (!cpu_physical_memory_is_dirty(addr1)) {
4465 /* invalidate code */
4466 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4467 /* set dirty bit */
4468 cpu_physical_memory_set_dirty_flags(addr1,
4469 (0xff & ~CODE_DIRTY_FLAG));
4474 void stl_phys(target_phys_addr_t addr, uint32_t val)
4476 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4479 void stl_le_phys(target_phys_addr_t addr, uint32_t val)
4481 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4484 void stl_be_phys(target_phys_addr_t addr, uint32_t val)
4486 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4489 /* XXX: optimize */
4490 void stb_phys(target_phys_addr_t addr, uint32_t val)
4492 uint8_t v = val;
4493 cpu_physical_memory_write(addr, &v, 1);
4496 /* warning: addr must be aligned */
4497 static inline void stw_phys_internal(target_phys_addr_t addr, uint32_t val,
4498 enum device_endian endian)
4500 int io_index;
4501 uint8_t *ptr;
4502 unsigned long pd;
4503 PhysPageDesc *p;
4505 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4506 if (!p) {
4507 pd = IO_MEM_UNASSIGNED;
4508 } else {
4509 pd = p->phys_offset;
4512 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4513 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4514 if (p)
4515 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4516 #if defined(TARGET_WORDS_BIGENDIAN)
4517 if (endian == DEVICE_LITTLE_ENDIAN) {
4518 val = bswap16(val);
4520 #else
4521 if (endian == DEVICE_BIG_ENDIAN) {
4522 val = bswap16(val);
4524 #endif
4525 io_mem_write[io_index][1](io_mem_opaque[io_index], addr, val);
4526 } else {
4527 unsigned long addr1;
4528 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4529 /* RAM case */
4530 ptr = qemu_get_ram_ptr(addr1);
4531 switch (endian) {
4532 case DEVICE_LITTLE_ENDIAN:
4533 stw_le_p(ptr, val);
4534 break;
4535 case DEVICE_BIG_ENDIAN:
4536 stw_be_p(ptr, val);
4537 break;
4538 default:
4539 stw_p(ptr, val);
4540 break;
4542 if (!cpu_physical_memory_is_dirty(addr1)) {
4543 /* invalidate code */
4544 tb_invalidate_phys_page_range(addr1, addr1 + 2, 0);
4545 /* set dirty bit */
4546 cpu_physical_memory_set_dirty_flags(addr1,
4547 (0xff & ~CODE_DIRTY_FLAG));
4552 void stw_phys(target_phys_addr_t addr, uint32_t val)
4554 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4557 void stw_le_phys(target_phys_addr_t addr, uint32_t val)
4559 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4562 void stw_be_phys(target_phys_addr_t addr, uint32_t val)
4564 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4567 /* XXX: optimize */
4568 void stq_phys(target_phys_addr_t addr, uint64_t val)
4570 val = tswap64(val);
4571 cpu_physical_memory_write(addr, &val, 8);
4574 void stq_le_phys(target_phys_addr_t addr, uint64_t val)
4576 val = cpu_to_le64(val);
4577 cpu_physical_memory_write(addr, &val, 8);
4580 void stq_be_phys(target_phys_addr_t addr, uint64_t val)
4582 val = cpu_to_be64(val);
4583 cpu_physical_memory_write(addr, &val, 8);
4586 /* virtual memory access for debug (includes writing to ROM) */
4587 int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
4588 uint8_t *buf, int len, int is_write)
4590 int l;
4591 target_phys_addr_t phys_addr;
4592 target_ulong page;
4594 while (len > 0) {
4595 page = addr & TARGET_PAGE_MASK;
4596 phys_addr = cpu_get_phys_page_debug(env, page);
4597 /* if no physical page mapped, return an error */
4598 if (phys_addr == -1)
4599 return -1;
4600 l = (page + TARGET_PAGE_SIZE) - addr;
4601 if (l > len)
4602 l = len;
4603 phys_addr += (addr & ~TARGET_PAGE_MASK);
4604 if (is_write)
4605 cpu_physical_memory_write_rom(phys_addr, buf, l);
4606 else
4607 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
4608 len -= l;
4609 buf += l;
4610 addr += l;
4612 return 0;
4614 #endif
4616 /* in deterministic execution mode, instructions doing device I/Os
4617 must be at the end of the TB */
4618 void cpu_io_recompile(CPUState *env, void *retaddr)
4620 TranslationBlock *tb;
4621 uint32_t n, cflags;
4622 target_ulong pc, cs_base;
4623 uint64_t flags;
4625 tb = tb_find_pc((unsigned long)retaddr);
4626 if (!tb) {
4627 cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
4628 retaddr);
4630 n = env->icount_decr.u16.low + tb->icount;
4631 cpu_restore_state(tb, env, (unsigned long)retaddr);
4632 /* Calculate how many instructions had been executed before the fault
4633 occurred. */
4634 n = n - env->icount_decr.u16.low;
4635 /* Generate a new TB ending on the I/O insn. */
4636 n++;
4637 /* On MIPS and SH, delay slot instructions can only be restarted if
4638 they were already the first instruction in the TB. If this is not
4639 the first instruction in a TB then re-execute the preceding
4640 branch. */
4641 #if defined(TARGET_MIPS)
4642 if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
4643 env->active_tc.PC -= 4;
4644 env->icount_decr.u16.low++;
4645 env->hflags &= ~MIPS_HFLAG_BMASK;
4647 #elif defined(TARGET_SH4)
4648 if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
4649 && n > 1) {
4650 env->pc -= 2;
4651 env->icount_decr.u16.low++;
4652 env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
4654 #endif
4655 /* This should never happen. */
4656 if (n > CF_COUNT_MASK)
4657 cpu_abort(env, "TB too big during recompile");
4659 cflags = n | CF_LAST_IO;
4660 pc = tb->pc;
4661 cs_base = tb->cs_base;
4662 flags = tb->flags;
4663 tb_phys_invalidate(tb, -1);
4664 /* FIXME: In theory this could raise an exception. In practice
4665 we have already translated the block once so it's probably ok. */
4666 tb_gen_code(env, pc, cs_base, flags, cflags);
4667 /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4668 the first in the TB) then we end up generating a whole new TB and
4669 repeating the fault, which is horribly inefficient.
4670 Better would be to execute just this insn uncached, or generate a
4671 second new TB. */
4672 cpu_resume_from_signal(env, NULL);
4675 #if !defined(CONFIG_USER_ONLY)
4677 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
4679 int i, target_code_size, max_target_code_size;
4680 int direct_jmp_count, direct_jmp2_count, cross_page;
4681 TranslationBlock *tb;
4683 target_code_size = 0;
4684 max_target_code_size = 0;
4685 cross_page = 0;
4686 direct_jmp_count = 0;
4687 direct_jmp2_count = 0;
4688 for(i = 0; i < nb_tbs; i++) {
4689 tb = &tbs[i];
4690 target_code_size += tb->size;
4691 if (tb->size > max_target_code_size)
4692 max_target_code_size = tb->size;
4693 if (tb->page_addr[1] != -1)
4694 cross_page++;
4695 if (tb->tb_next_offset[0] != 0xffff) {
4696 direct_jmp_count++;
4697 if (tb->tb_next_offset[1] != 0xffff) {
4698 direct_jmp2_count++;
4702 /* XXX: avoid using doubles ? */
4703 cpu_fprintf(f, "Translation buffer state:\n");
4704 cpu_fprintf(f, "gen code size %td/%ld\n",
4705 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4706 cpu_fprintf(f, "TB count %d/%d\n",
4707 nb_tbs, code_gen_max_blocks);
4708 cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
4709 nb_tbs ? target_code_size / nb_tbs : 0,
4710 max_target_code_size);
4711 cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n",
4712 nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4713 target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4714 cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4715 cross_page,
4716 nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4717 cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n",
4718 direct_jmp_count,
4719 nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4720 direct_jmp2_count,
4721 nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4722 cpu_fprintf(f, "\nStatistics:\n");
4723 cpu_fprintf(f, "TB flush count %d\n", tb_flush_count);
4724 cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4725 cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
4726 tcg_dump_info(f, cpu_fprintf);
4729 #define MMUSUFFIX _cmmu
4730 #define GETPC() NULL
4731 #define env cpu_single_env
4732 #define SOFTMMU_CODE_ACCESS
4734 #define SHIFT 0
4735 #include "softmmu_template.h"
4737 #define SHIFT 1
4738 #include "softmmu_template.h"
4740 #define SHIFT 2
4741 #include "softmmu_template.h"
4743 #define SHIFT 3
4744 #include "softmmu_template.h"
4746 #undef env
4748 #endif