Merge remote-tracking branch 'stefanha/trivial-patches' into staging
[qemu/opensuse.git] / exec.c
blob03d3a6b6095ff7d05ebf232253304d64ca3bfdcf
1 /*
2 * virtual page mapping and translated block handling
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "tcg.h"
30 #include "hw/hw.h"
31 #include "hw/qdev.h"
32 #include "osdep.h"
33 #include "kvm.h"
34 #include "hw/xen.h"
35 #include "qemu-timer.h"
36 #include "memory.h"
37 #include "exec-memory.h"
38 #if defined(CONFIG_USER_ONLY)
39 #include <qemu.h>
40 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
41 #include <sys/param.h>
42 #if __FreeBSD_version >= 700104
43 #define HAVE_KINFO_GETVMMAP
44 #define sigqueue sigqueue_freebsd /* avoid redefinition */
45 #include <sys/time.h>
46 #include <sys/proc.h>
47 #include <machine/profile.h>
48 #define _KERNEL
49 #include <sys/user.h>
50 #undef _KERNEL
51 #undef sigqueue
52 #include <libutil.h>
53 #endif
54 #endif
55 #else /* !CONFIG_USER_ONLY */
56 #include "xen-mapcache.h"
57 #include "trace.h"
58 #endif
60 #define WANT_EXEC_OBSOLETE
61 #include "exec-obsolete.h"
63 //#define DEBUG_TB_INVALIDATE
64 //#define DEBUG_FLUSH
65 //#define DEBUG_TLB
66 //#define DEBUG_UNASSIGNED
68 /* make various TB consistency checks */
69 //#define DEBUG_TB_CHECK
70 //#define DEBUG_TLB_CHECK
72 //#define DEBUG_IOPORT
73 //#define DEBUG_SUBPAGE
75 #if !defined(CONFIG_USER_ONLY)
76 /* TB consistency checks only implemented for usermode emulation. */
77 #undef DEBUG_TB_CHECK
78 #endif
80 #define SMC_BITMAP_USE_THRESHOLD 10
82 static TranslationBlock *tbs;
83 static int code_gen_max_blocks;
84 TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
85 static int nb_tbs;
86 /* any access to the tbs or the page table must use this lock */
87 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
89 #if defined(__arm__) || defined(__sparc_v9__)
90 /* The prologue must be reachable with a direct jump. ARM and Sparc64
91 have limited branch ranges (possibly also PPC) so place it in a
92 section close to code segment. */
93 #define code_gen_section \
94 __attribute__((__section__(".gen_code"))) \
95 __attribute__((aligned (32)))
96 #elif defined(_WIN32)
97 /* Maximum alignment for Win32 is 16. */
98 #define code_gen_section \
99 __attribute__((aligned (16)))
100 #else
101 #define code_gen_section \
102 __attribute__((aligned (32)))
103 #endif
105 uint8_t code_gen_prologue[1024] code_gen_section;
106 static uint8_t *code_gen_buffer;
107 static unsigned long code_gen_buffer_size;
108 /* threshold to flush the translated code buffer */
109 static unsigned long code_gen_buffer_max_size;
110 static uint8_t *code_gen_ptr;
112 #if !defined(CONFIG_USER_ONLY)
113 int phys_ram_fd;
114 static int in_migration;
116 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
118 static MemoryRegion *system_memory;
119 static MemoryRegion *system_io;
121 MemoryRegion io_mem_ram, io_mem_rom, io_mem_unassigned, io_mem_notdirty;
122 static MemoryRegion io_mem_subpage_ram;
124 #endif
126 CPUArchState *first_cpu;
127 /* current CPU in the current thread. It is only valid inside
128 cpu_exec() */
129 DEFINE_TLS(CPUArchState *,cpu_single_env);
130 /* 0 = Do not count executed instructions.
131 1 = Precise instruction counting.
132 2 = Adaptive rate instruction counting. */
133 int use_icount = 0;
135 typedef struct PageDesc {
136 /* list of TBs intersecting this ram page */
137 TranslationBlock *first_tb;
138 /* in order to optimize self modifying code, we count the number
139 of lookups we do to a given page to use a bitmap */
140 unsigned int code_write_count;
141 uint8_t *code_bitmap;
142 #if defined(CONFIG_USER_ONLY)
143 unsigned long flags;
144 #endif
145 } PageDesc;
147 /* In system mode we want L1_MAP to be based on ram offsets,
148 while in user mode we want it to be based on virtual addresses. */
149 #if !defined(CONFIG_USER_ONLY)
150 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
151 # define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS
152 #else
153 # define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS
154 #endif
155 #else
156 # define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS
157 #endif
159 /* Size of the L2 (and L3, etc) page tables. */
160 #define L2_BITS 10
161 #define L2_SIZE (1 << L2_BITS)
163 #define P_L2_LEVELS \
164 (((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / L2_BITS) + 1)
166 /* The bits remaining after N lower levels of page tables. */
167 #define V_L1_BITS_REM \
168 ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
170 #if V_L1_BITS_REM < 4
171 #define V_L1_BITS (V_L1_BITS_REM + L2_BITS)
172 #else
173 #define V_L1_BITS V_L1_BITS_REM
174 #endif
176 #define V_L1_SIZE ((target_ulong)1 << V_L1_BITS)
178 #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
180 unsigned long qemu_real_host_page_size;
181 unsigned long qemu_host_page_size;
182 unsigned long qemu_host_page_mask;
184 /* This is a multi-level map on the virtual address space.
185 The bottom level has pointers to PageDesc. */
186 static void *l1_map[V_L1_SIZE];
188 #if !defined(CONFIG_USER_ONLY)
189 typedef struct PhysPageEntry PhysPageEntry;
191 static MemoryRegionSection *phys_sections;
192 static unsigned phys_sections_nb, phys_sections_nb_alloc;
193 static uint16_t phys_section_unassigned;
194 static uint16_t phys_section_notdirty;
195 static uint16_t phys_section_rom;
196 static uint16_t phys_section_watch;
198 struct PhysPageEntry {
199 uint16_t is_leaf : 1;
200 /* index into phys_sections (is_leaf) or phys_map_nodes (!is_leaf) */
201 uint16_t ptr : 15;
204 /* Simple allocator for PhysPageEntry nodes */
205 static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
206 static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
208 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
210 /* This is a multi-level map on the physical address space.
211 The bottom level has pointers to MemoryRegionSections. */
212 static PhysPageEntry phys_map = { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
214 static void io_mem_init(void);
215 static void memory_map_init(void);
217 static MemoryRegion io_mem_watch;
218 #endif
220 /* log support */
221 #ifdef WIN32
222 static const char *logfilename = "qemu.log";
223 #else
224 static const char *logfilename = "/tmp/qemu.log";
225 #endif
226 FILE *logfile;
227 int loglevel;
228 static int log_append = 0;
230 /* statistics */
231 #if !defined(CONFIG_USER_ONLY)
232 static int tlb_flush_count;
233 #endif
234 static int tb_flush_count;
235 static int tb_phys_invalidate_count;
237 #ifdef _WIN32
238 static void map_exec(void *addr, long size)
240 DWORD old_protect;
241 VirtualProtect(addr, size,
242 PAGE_EXECUTE_READWRITE, &old_protect);
245 #else
246 static void map_exec(void *addr, long size)
248 unsigned long start, end, page_size;
250 page_size = getpagesize();
251 start = (unsigned long)addr;
252 start &= ~(page_size - 1);
254 end = (unsigned long)addr + size;
255 end += page_size - 1;
256 end &= ~(page_size - 1);
258 mprotect((void *)start, end - start,
259 PROT_READ | PROT_WRITE | PROT_EXEC);
261 #endif
263 static void page_init(void)
265 /* NOTE: we can always suppose that qemu_host_page_size >=
266 TARGET_PAGE_SIZE */
267 #ifdef _WIN32
269 SYSTEM_INFO system_info;
271 GetSystemInfo(&system_info);
272 qemu_real_host_page_size = system_info.dwPageSize;
274 #else
275 qemu_real_host_page_size = getpagesize();
276 #endif
277 if (qemu_host_page_size == 0)
278 qemu_host_page_size = qemu_real_host_page_size;
279 if (qemu_host_page_size < TARGET_PAGE_SIZE)
280 qemu_host_page_size = TARGET_PAGE_SIZE;
281 qemu_host_page_mask = ~(qemu_host_page_size - 1);
283 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
285 #ifdef HAVE_KINFO_GETVMMAP
286 struct kinfo_vmentry *freep;
287 int i, cnt;
289 freep = kinfo_getvmmap(getpid(), &cnt);
290 if (freep) {
291 mmap_lock();
292 for (i = 0; i < cnt; i++) {
293 unsigned long startaddr, endaddr;
295 startaddr = freep[i].kve_start;
296 endaddr = freep[i].kve_end;
297 if (h2g_valid(startaddr)) {
298 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
300 if (h2g_valid(endaddr)) {
301 endaddr = h2g(endaddr);
302 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
303 } else {
304 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
305 endaddr = ~0ul;
306 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
307 #endif
311 free(freep);
312 mmap_unlock();
314 #else
315 FILE *f;
317 last_brk = (unsigned long)sbrk(0);
319 f = fopen("/compat/linux/proc/self/maps", "r");
320 if (f) {
321 mmap_lock();
323 do {
324 unsigned long startaddr, endaddr;
325 int n;
327 n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
329 if (n == 2 && h2g_valid(startaddr)) {
330 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
332 if (h2g_valid(endaddr)) {
333 endaddr = h2g(endaddr);
334 } else {
335 endaddr = ~0ul;
337 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
339 } while (!feof(f));
341 fclose(f);
342 mmap_unlock();
344 #endif
346 #endif
349 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
351 PageDesc *pd;
352 void **lp;
353 int i;
355 #if defined(CONFIG_USER_ONLY)
356 /* We can't use g_malloc because it may recurse into a locked mutex. */
357 # define ALLOC(P, SIZE) \
358 do { \
359 P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \
360 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
361 } while (0)
362 #else
363 # define ALLOC(P, SIZE) \
364 do { P = g_malloc0(SIZE); } while (0)
365 #endif
367 /* Level 1. Always allocated. */
368 lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
370 /* Level 2..N-1. */
371 for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
372 void **p = *lp;
374 if (p == NULL) {
375 if (!alloc) {
376 return NULL;
378 ALLOC(p, sizeof(void *) * L2_SIZE);
379 *lp = p;
382 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
385 pd = *lp;
386 if (pd == NULL) {
387 if (!alloc) {
388 return NULL;
390 ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
391 *lp = pd;
394 #undef ALLOC
396 return pd + (index & (L2_SIZE - 1));
399 static inline PageDesc *page_find(tb_page_addr_t index)
401 return page_find_alloc(index, 0);
404 #if !defined(CONFIG_USER_ONLY)
406 static void phys_map_node_reserve(unsigned nodes)
408 if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
409 typedef PhysPageEntry Node[L2_SIZE];
410 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
411 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
412 phys_map_nodes_nb + nodes);
413 phys_map_nodes = g_renew(Node, phys_map_nodes,
414 phys_map_nodes_nb_alloc);
418 static uint16_t phys_map_node_alloc(void)
420 unsigned i;
421 uint16_t ret;
423 ret = phys_map_nodes_nb++;
424 assert(ret != PHYS_MAP_NODE_NIL);
425 assert(ret != phys_map_nodes_nb_alloc);
426 for (i = 0; i < L2_SIZE; ++i) {
427 phys_map_nodes[ret][i].is_leaf = 0;
428 phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
430 return ret;
433 static void phys_map_nodes_reset(void)
435 phys_map_nodes_nb = 0;
439 static void phys_page_set_level(PhysPageEntry *lp, target_phys_addr_t *index,
440 target_phys_addr_t *nb, uint16_t leaf,
441 int level)
443 PhysPageEntry *p;
444 int i;
445 target_phys_addr_t step = (target_phys_addr_t)1 << (level * L2_BITS);
447 if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
448 lp->ptr = phys_map_node_alloc();
449 p = phys_map_nodes[lp->ptr];
450 if (level == 0) {
451 for (i = 0; i < L2_SIZE; i++) {
452 p[i].is_leaf = 1;
453 p[i].ptr = phys_section_unassigned;
456 } else {
457 p = phys_map_nodes[lp->ptr];
459 lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
461 while (*nb && lp < &p[L2_SIZE]) {
462 if ((*index & (step - 1)) == 0 && *nb >= step) {
463 lp->is_leaf = true;
464 lp->ptr = leaf;
465 *index += step;
466 *nb -= step;
467 } else {
468 phys_page_set_level(lp, index, nb, leaf, level - 1);
470 ++lp;
474 static void phys_page_set(target_phys_addr_t index, target_phys_addr_t nb,
475 uint16_t leaf)
477 /* Wildly overreserve - it doesn't matter much. */
478 phys_map_node_reserve(3 * P_L2_LEVELS);
480 phys_page_set_level(&phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
483 static MemoryRegionSection *phys_page_find(target_phys_addr_t index)
485 PhysPageEntry lp = phys_map;
486 PhysPageEntry *p;
487 int i;
488 uint16_t s_index = phys_section_unassigned;
490 for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
491 if (lp.ptr == PHYS_MAP_NODE_NIL) {
492 goto not_found;
494 p = phys_map_nodes[lp.ptr];
495 lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
498 s_index = lp.ptr;
499 not_found:
500 return &phys_sections[s_index];
503 static target_phys_addr_t section_addr(MemoryRegionSection *section,
504 target_phys_addr_t addr)
506 addr -= section->offset_within_address_space;
507 addr += section->offset_within_region;
508 return addr;
511 static void tlb_protect_code(ram_addr_t ram_addr);
512 static void tlb_unprotect_code_phys(CPUArchState *env, ram_addr_t ram_addr,
513 target_ulong vaddr);
514 #define mmap_lock() do { } while(0)
515 #define mmap_unlock() do { } while(0)
516 #endif
518 #define DEFAULT_CODE_GEN_BUFFER_SIZE (32 * 1024 * 1024)
520 #if defined(CONFIG_USER_ONLY)
521 /* Currently it is not recommended to allocate big chunks of data in
522 user mode. It will change when a dedicated libc will be used */
523 #define USE_STATIC_CODE_GEN_BUFFER
524 #endif
526 #ifdef USE_STATIC_CODE_GEN_BUFFER
527 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
528 __attribute__((aligned (CODE_GEN_ALIGN)));
529 #endif
531 static void code_gen_alloc(unsigned long tb_size)
533 #ifdef USE_STATIC_CODE_GEN_BUFFER
534 code_gen_buffer = static_code_gen_buffer;
535 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
536 map_exec(code_gen_buffer, code_gen_buffer_size);
537 #else
538 code_gen_buffer_size = tb_size;
539 if (code_gen_buffer_size == 0) {
540 #if defined(CONFIG_USER_ONLY)
541 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
542 #else
543 /* XXX: needs adjustments */
544 code_gen_buffer_size = (unsigned long)(ram_size / 4);
545 #endif
547 if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE)
548 code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE;
549 /* The code gen buffer location may have constraints depending on
550 the host cpu and OS */
551 #if defined(__linux__)
553 int flags;
554 void *start = NULL;
556 flags = MAP_PRIVATE | MAP_ANONYMOUS;
557 #if defined(__x86_64__)
558 flags |= MAP_32BIT;
559 /* Cannot map more than that */
560 if (code_gen_buffer_size > (800 * 1024 * 1024))
561 code_gen_buffer_size = (800 * 1024 * 1024);
562 #elif defined(__sparc_v9__)
563 // Map the buffer below 2G, so we can use direct calls and branches
564 flags |= MAP_FIXED;
565 start = (void *) 0x60000000UL;
566 if (code_gen_buffer_size > (512 * 1024 * 1024))
567 code_gen_buffer_size = (512 * 1024 * 1024);
568 #elif defined(__arm__)
569 /* Keep the buffer no bigger than 16MB to branch between blocks */
570 if (code_gen_buffer_size > 16 * 1024 * 1024)
571 code_gen_buffer_size = 16 * 1024 * 1024;
572 #elif defined(__s390x__)
573 /* Map the buffer so that we can use direct calls and branches. */
574 /* We have a +- 4GB range on the branches; leave some slop. */
575 if (code_gen_buffer_size > (3ul * 1024 * 1024 * 1024)) {
576 code_gen_buffer_size = 3ul * 1024 * 1024 * 1024;
578 start = (void *)0x90000000UL;
579 #endif
580 code_gen_buffer = mmap(start, code_gen_buffer_size,
581 PROT_WRITE | PROT_READ | PROT_EXEC,
582 flags, -1, 0);
583 if (code_gen_buffer == MAP_FAILED) {
584 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
585 exit(1);
588 #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
589 || defined(__DragonFly__) || defined(__OpenBSD__) \
590 || defined(__NetBSD__)
592 int flags;
593 void *addr = NULL;
594 flags = MAP_PRIVATE | MAP_ANONYMOUS;
595 #if defined(__x86_64__)
596 /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume
597 * 0x40000000 is free */
598 flags |= MAP_FIXED;
599 addr = (void *)0x40000000;
600 /* Cannot map more than that */
601 if (code_gen_buffer_size > (800 * 1024 * 1024))
602 code_gen_buffer_size = (800 * 1024 * 1024);
603 #elif defined(__sparc_v9__)
604 // Map the buffer below 2G, so we can use direct calls and branches
605 flags |= MAP_FIXED;
606 addr = (void *) 0x60000000UL;
607 if (code_gen_buffer_size > (512 * 1024 * 1024)) {
608 code_gen_buffer_size = (512 * 1024 * 1024);
610 #endif
611 code_gen_buffer = mmap(addr, code_gen_buffer_size,
612 PROT_WRITE | PROT_READ | PROT_EXEC,
613 flags, -1, 0);
614 if (code_gen_buffer == MAP_FAILED) {
615 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
616 exit(1);
619 #else
620 code_gen_buffer = g_malloc(code_gen_buffer_size);
621 map_exec(code_gen_buffer, code_gen_buffer_size);
622 #endif
623 #endif /* !USE_STATIC_CODE_GEN_BUFFER */
624 map_exec(code_gen_prologue, sizeof(code_gen_prologue));
625 code_gen_buffer_max_size = code_gen_buffer_size -
626 (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
627 code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
628 tbs = g_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
631 /* Must be called before using the QEMU cpus. 'tb_size' is the size
632 (in bytes) allocated to the translation buffer. Zero means default
633 size. */
634 void tcg_exec_init(unsigned long tb_size)
636 cpu_gen_init();
637 code_gen_alloc(tb_size);
638 code_gen_ptr = code_gen_buffer;
639 tcg_register_jit(code_gen_buffer, code_gen_buffer_size);
640 page_init();
641 #if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
642 /* There's no guest base to take into account, so go ahead and
643 initialize the prologue now. */
644 tcg_prologue_init(&tcg_ctx);
645 #endif
648 bool tcg_enabled(void)
650 return code_gen_buffer != NULL;
653 void cpu_exec_init_all(void)
655 #if !defined(CONFIG_USER_ONLY)
656 memory_map_init();
657 io_mem_init();
658 #endif
661 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
663 static int cpu_common_post_load(void *opaque, int version_id)
665 CPUArchState *env = opaque;
667 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
668 version_id is increased. */
669 env->interrupt_request &= ~0x01;
670 tlb_flush(env, 1);
672 return 0;
675 static const VMStateDescription vmstate_cpu_common = {
676 .name = "cpu_common",
677 .version_id = 1,
678 .minimum_version_id = 1,
679 .minimum_version_id_old = 1,
680 .post_load = cpu_common_post_load,
681 .fields = (VMStateField []) {
682 VMSTATE_UINT32(halted, CPUArchState),
683 VMSTATE_UINT32(interrupt_request, CPUArchState),
684 VMSTATE_END_OF_LIST()
687 #endif
689 CPUArchState *qemu_get_cpu(int cpu)
691 CPUArchState *env = first_cpu;
693 while (env) {
694 if (env->cpu_index == cpu)
695 break;
696 env = env->next_cpu;
699 return env;
702 void cpu_exec_init(CPUArchState *env)
704 CPUArchState **penv;
705 int cpu_index;
707 #if defined(CONFIG_USER_ONLY)
708 cpu_list_lock();
709 #endif
710 env->next_cpu = NULL;
711 penv = &first_cpu;
712 cpu_index = 0;
713 while (*penv != NULL) {
714 penv = &(*penv)->next_cpu;
715 cpu_index++;
717 env->cpu_index = cpu_index;
718 env->numa_node = 0;
719 QTAILQ_INIT(&env->breakpoints);
720 QTAILQ_INIT(&env->watchpoints);
721 #ifndef CONFIG_USER_ONLY
722 env->thread_id = qemu_get_thread_id();
723 #endif
724 *penv = env;
725 #if defined(CONFIG_USER_ONLY)
726 cpu_list_unlock();
727 #endif
728 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
729 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
730 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
731 cpu_save, cpu_load, env);
732 #endif
735 /* Allocate a new translation block. Flush the translation buffer if
736 too many translation blocks or too much generated code. */
737 static TranslationBlock *tb_alloc(target_ulong pc)
739 TranslationBlock *tb;
741 if (nb_tbs >= code_gen_max_blocks ||
742 (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
743 return NULL;
744 tb = &tbs[nb_tbs++];
745 tb->pc = pc;
746 tb->cflags = 0;
747 return tb;
750 void tb_free(TranslationBlock *tb)
752 /* In practice this is mostly used for single use temporary TB
753 Ignore the hard cases and just back up if this TB happens to
754 be the last one generated. */
755 if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
756 code_gen_ptr = tb->tc_ptr;
757 nb_tbs--;
761 static inline void invalidate_page_bitmap(PageDesc *p)
763 if (p->code_bitmap) {
764 g_free(p->code_bitmap);
765 p->code_bitmap = NULL;
767 p->code_write_count = 0;
770 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
772 static void page_flush_tb_1 (int level, void **lp)
774 int i;
776 if (*lp == NULL) {
777 return;
779 if (level == 0) {
780 PageDesc *pd = *lp;
781 for (i = 0; i < L2_SIZE; ++i) {
782 pd[i].first_tb = NULL;
783 invalidate_page_bitmap(pd + i);
785 } else {
786 void **pp = *lp;
787 for (i = 0; i < L2_SIZE; ++i) {
788 page_flush_tb_1 (level - 1, pp + i);
793 static void page_flush_tb(void)
795 int i;
796 for (i = 0; i < V_L1_SIZE; i++) {
797 page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
801 /* flush all the translation blocks */
802 /* XXX: tb_flush is currently not thread safe */
803 void tb_flush(CPUArchState *env1)
805 CPUArchState *env;
806 #if defined(DEBUG_FLUSH)
807 printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
808 (unsigned long)(code_gen_ptr - code_gen_buffer),
809 nb_tbs, nb_tbs > 0 ?
810 ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
811 #endif
812 if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
813 cpu_abort(env1, "Internal error: code buffer overflow\n");
815 nb_tbs = 0;
817 for(env = first_cpu; env != NULL; env = env->next_cpu) {
818 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
821 memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
822 page_flush_tb();
824 code_gen_ptr = code_gen_buffer;
825 /* XXX: flush processor icache at this point if cache flush is
826 expensive */
827 tb_flush_count++;
830 #ifdef DEBUG_TB_CHECK
832 static void tb_invalidate_check(target_ulong address)
834 TranslationBlock *tb;
835 int i;
836 address &= TARGET_PAGE_MASK;
837 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
838 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
839 if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
840 address >= tb->pc + tb->size)) {
841 printf("ERROR invalidate: address=" TARGET_FMT_lx
842 " PC=%08lx size=%04x\n",
843 address, (long)tb->pc, tb->size);
849 /* verify that all the pages have correct rights for code */
850 static void tb_page_check(void)
852 TranslationBlock *tb;
853 int i, flags1, flags2;
855 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
856 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
857 flags1 = page_get_flags(tb->pc);
858 flags2 = page_get_flags(tb->pc + tb->size - 1);
859 if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
860 printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
861 (long)tb->pc, tb->size, flags1, flags2);
867 #endif
869 /* invalidate one TB */
870 static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
871 int next_offset)
873 TranslationBlock *tb1;
874 for(;;) {
875 tb1 = *ptb;
876 if (tb1 == tb) {
877 *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
878 break;
880 ptb = (TranslationBlock **)((char *)tb1 + next_offset);
884 static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
886 TranslationBlock *tb1;
887 unsigned int n1;
889 for(;;) {
890 tb1 = *ptb;
891 n1 = (long)tb1 & 3;
892 tb1 = (TranslationBlock *)((long)tb1 & ~3);
893 if (tb1 == tb) {
894 *ptb = tb1->page_next[n1];
895 break;
897 ptb = &tb1->page_next[n1];
901 static inline void tb_jmp_remove(TranslationBlock *tb, int n)
903 TranslationBlock *tb1, **ptb;
904 unsigned int n1;
906 ptb = &tb->jmp_next[n];
907 tb1 = *ptb;
908 if (tb1) {
909 /* find tb(n) in circular list */
910 for(;;) {
911 tb1 = *ptb;
912 n1 = (long)tb1 & 3;
913 tb1 = (TranslationBlock *)((long)tb1 & ~3);
914 if (n1 == n && tb1 == tb)
915 break;
916 if (n1 == 2) {
917 ptb = &tb1->jmp_first;
918 } else {
919 ptb = &tb1->jmp_next[n1];
922 /* now we can suppress tb(n) from the list */
923 *ptb = tb->jmp_next[n];
925 tb->jmp_next[n] = NULL;
929 /* reset the jump entry 'n' of a TB so that it is not chained to
930 another TB */
931 static inline void tb_reset_jump(TranslationBlock *tb, int n)
933 tb_set_jmp_target(tb, n, (unsigned long)(tb->tc_ptr + tb->tb_next_offset[n]));
936 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
938 CPUArchState *env;
939 PageDesc *p;
940 unsigned int h, n1;
941 tb_page_addr_t phys_pc;
942 TranslationBlock *tb1, *tb2;
944 /* remove the TB from the hash list */
945 phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
946 h = tb_phys_hash_func(phys_pc);
947 tb_remove(&tb_phys_hash[h], tb,
948 offsetof(TranslationBlock, phys_hash_next));
950 /* remove the TB from the page list */
951 if (tb->page_addr[0] != page_addr) {
952 p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
953 tb_page_remove(&p->first_tb, tb);
954 invalidate_page_bitmap(p);
956 if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
957 p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
958 tb_page_remove(&p->first_tb, tb);
959 invalidate_page_bitmap(p);
962 tb_invalidated_flag = 1;
964 /* remove the TB from the hash list */
965 h = tb_jmp_cache_hash_func(tb->pc);
966 for(env = first_cpu; env != NULL; env = env->next_cpu) {
967 if (env->tb_jmp_cache[h] == tb)
968 env->tb_jmp_cache[h] = NULL;
971 /* suppress this TB from the two jump lists */
972 tb_jmp_remove(tb, 0);
973 tb_jmp_remove(tb, 1);
975 /* suppress any remaining jumps to this TB */
976 tb1 = tb->jmp_first;
977 for(;;) {
978 n1 = (long)tb1 & 3;
979 if (n1 == 2)
980 break;
981 tb1 = (TranslationBlock *)((long)tb1 & ~3);
982 tb2 = tb1->jmp_next[n1];
983 tb_reset_jump(tb1, n1);
984 tb1->jmp_next[n1] = NULL;
985 tb1 = tb2;
987 tb->jmp_first = (TranslationBlock *)((long)tb | 2); /* fail safe */
989 tb_phys_invalidate_count++;
992 static inline void set_bits(uint8_t *tab, int start, int len)
994 int end, mask, end1;
996 end = start + len;
997 tab += start >> 3;
998 mask = 0xff << (start & 7);
999 if ((start & ~7) == (end & ~7)) {
1000 if (start < end) {
1001 mask &= ~(0xff << (end & 7));
1002 *tab |= mask;
1004 } else {
1005 *tab++ |= mask;
1006 start = (start + 8) & ~7;
1007 end1 = end & ~7;
1008 while (start < end1) {
1009 *tab++ = 0xff;
1010 start += 8;
1012 if (start < end) {
1013 mask = ~(0xff << (end & 7));
1014 *tab |= mask;
1019 static void build_page_bitmap(PageDesc *p)
1021 int n, tb_start, tb_end;
1022 TranslationBlock *tb;
1024 p->code_bitmap = g_malloc0(TARGET_PAGE_SIZE / 8);
1026 tb = p->first_tb;
1027 while (tb != NULL) {
1028 n = (long)tb & 3;
1029 tb = (TranslationBlock *)((long)tb & ~3);
1030 /* NOTE: this is subtle as a TB may span two physical pages */
1031 if (n == 0) {
1032 /* NOTE: tb_end may be after the end of the page, but
1033 it is not a problem */
1034 tb_start = tb->pc & ~TARGET_PAGE_MASK;
1035 tb_end = tb_start + tb->size;
1036 if (tb_end > TARGET_PAGE_SIZE)
1037 tb_end = TARGET_PAGE_SIZE;
1038 } else {
1039 tb_start = 0;
1040 tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1042 set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
1043 tb = tb->page_next[n];
1047 TranslationBlock *tb_gen_code(CPUArchState *env,
1048 target_ulong pc, target_ulong cs_base,
1049 int flags, int cflags)
1051 TranslationBlock *tb;
1052 uint8_t *tc_ptr;
1053 tb_page_addr_t phys_pc, phys_page2;
1054 target_ulong virt_page2;
1055 int code_gen_size;
1057 phys_pc = get_page_addr_code(env, pc);
1058 tb = tb_alloc(pc);
1059 if (!tb) {
1060 /* flush must be done */
1061 tb_flush(env);
1062 /* cannot fail at this point */
1063 tb = tb_alloc(pc);
1064 /* Don't forget to invalidate previous TB info. */
1065 tb_invalidated_flag = 1;
1067 tc_ptr = code_gen_ptr;
1068 tb->tc_ptr = tc_ptr;
1069 tb->cs_base = cs_base;
1070 tb->flags = flags;
1071 tb->cflags = cflags;
1072 cpu_gen_code(env, tb, &code_gen_size);
1073 code_gen_ptr = (void *)(((unsigned long)code_gen_ptr + code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
1075 /* check next page if needed */
1076 virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1077 phys_page2 = -1;
1078 if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1079 phys_page2 = get_page_addr_code(env, virt_page2);
1081 tb_link_page(tb, phys_pc, phys_page2);
1082 return tb;
1085 /* invalidate all TBs which intersect with the target physical page
1086 starting in range [start;end[. NOTE: start and end must refer to
1087 the same physical page. 'is_cpu_write_access' should be true if called
1088 from a real cpu write access: the virtual CPU will exit the current
1089 TB if code is modified inside this TB. */
1090 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1091 int is_cpu_write_access)
1093 TranslationBlock *tb, *tb_next, *saved_tb;
1094 CPUArchState *env = cpu_single_env;
1095 tb_page_addr_t tb_start, tb_end;
1096 PageDesc *p;
1097 int n;
1098 #ifdef TARGET_HAS_PRECISE_SMC
1099 int current_tb_not_found = is_cpu_write_access;
1100 TranslationBlock *current_tb = NULL;
1101 int current_tb_modified = 0;
1102 target_ulong current_pc = 0;
1103 target_ulong current_cs_base = 0;
1104 int current_flags = 0;
1105 #endif /* TARGET_HAS_PRECISE_SMC */
1107 p = page_find(start >> TARGET_PAGE_BITS);
1108 if (!p)
1109 return;
1110 if (!p->code_bitmap &&
1111 ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1112 is_cpu_write_access) {
1113 /* build code bitmap */
1114 build_page_bitmap(p);
1117 /* we remove all the TBs in the range [start, end[ */
1118 /* XXX: see if in some cases it could be faster to invalidate all the code */
1119 tb = p->first_tb;
1120 while (tb != NULL) {
1121 n = (long)tb & 3;
1122 tb = (TranslationBlock *)((long)tb & ~3);
1123 tb_next = tb->page_next[n];
1124 /* NOTE: this is subtle as a TB may span two physical pages */
1125 if (n == 0) {
1126 /* NOTE: tb_end may be after the end of the page, but
1127 it is not a problem */
1128 tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1129 tb_end = tb_start + tb->size;
1130 } else {
1131 tb_start = tb->page_addr[1];
1132 tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1134 if (!(tb_end <= start || tb_start >= end)) {
1135 #ifdef TARGET_HAS_PRECISE_SMC
1136 if (current_tb_not_found) {
1137 current_tb_not_found = 0;
1138 current_tb = NULL;
1139 if (env->mem_io_pc) {
1140 /* now we have a real cpu fault */
1141 current_tb = tb_find_pc(env->mem_io_pc);
1144 if (current_tb == tb &&
1145 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1146 /* If we are modifying the current TB, we must stop
1147 its execution. We could be more precise by checking
1148 that the modification is after the current PC, but it
1149 would require a specialized function to partially
1150 restore the CPU state */
1152 current_tb_modified = 1;
1153 cpu_restore_state(current_tb, env, env->mem_io_pc);
1154 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1155 &current_flags);
1157 #endif /* TARGET_HAS_PRECISE_SMC */
1158 /* we need to do that to handle the case where a signal
1159 occurs while doing tb_phys_invalidate() */
1160 saved_tb = NULL;
1161 if (env) {
1162 saved_tb = env->current_tb;
1163 env->current_tb = NULL;
1165 tb_phys_invalidate(tb, -1);
1166 if (env) {
1167 env->current_tb = saved_tb;
1168 if (env->interrupt_request && env->current_tb)
1169 cpu_interrupt(env, env->interrupt_request);
1172 tb = tb_next;
1174 #if !defined(CONFIG_USER_ONLY)
1175 /* if no code remaining, no need to continue to use slow writes */
1176 if (!p->first_tb) {
1177 invalidate_page_bitmap(p);
1178 if (is_cpu_write_access) {
1179 tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1182 #endif
1183 #ifdef TARGET_HAS_PRECISE_SMC
1184 if (current_tb_modified) {
1185 /* we generate a block containing just the instruction
1186 modifying the memory. It will ensure that it cannot modify
1187 itself */
1188 env->current_tb = NULL;
1189 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1190 cpu_resume_from_signal(env, NULL);
1192 #endif
1195 /* len must be <= 8 and start must be a multiple of len */
1196 static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1198 PageDesc *p;
1199 int offset, b;
1200 #if 0
1201 if (1) {
1202 qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1203 cpu_single_env->mem_io_vaddr, len,
1204 cpu_single_env->eip,
1205 cpu_single_env->eip + (long)cpu_single_env->segs[R_CS].base);
1207 #endif
1208 p = page_find(start >> TARGET_PAGE_BITS);
1209 if (!p)
1210 return;
1211 if (p->code_bitmap) {
1212 offset = start & ~TARGET_PAGE_MASK;
1213 b = p->code_bitmap[offset >> 3] >> (offset & 7);
1214 if (b & ((1 << len) - 1))
1215 goto do_invalidate;
1216 } else {
1217 do_invalidate:
1218 tb_invalidate_phys_page_range(start, start + len, 1);
1222 #if !defined(CONFIG_SOFTMMU)
1223 static void tb_invalidate_phys_page(tb_page_addr_t addr,
1224 unsigned long pc, void *puc)
1226 TranslationBlock *tb;
1227 PageDesc *p;
1228 int n;
1229 #ifdef TARGET_HAS_PRECISE_SMC
1230 TranslationBlock *current_tb = NULL;
1231 CPUArchState *env = cpu_single_env;
1232 int current_tb_modified = 0;
1233 target_ulong current_pc = 0;
1234 target_ulong current_cs_base = 0;
1235 int current_flags = 0;
1236 #endif
1238 addr &= TARGET_PAGE_MASK;
1239 p = page_find(addr >> TARGET_PAGE_BITS);
1240 if (!p)
1241 return;
1242 tb = p->first_tb;
1243 #ifdef TARGET_HAS_PRECISE_SMC
1244 if (tb && pc != 0) {
1245 current_tb = tb_find_pc(pc);
1247 #endif
1248 while (tb != NULL) {
1249 n = (long)tb & 3;
1250 tb = (TranslationBlock *)((long)tb & ~3);
1251 #ifdef TARGET_HAS_PRECISE_SMC
1252 if (current_tb == tb &&
1253 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1254 /* If we are modifying the current TB, we must stop
1255 its execution. We could be more precise by checking
1256 that the modification is after the current PC, but it
1257 would require a specialized function to partially
1258 restore the CPU state */
1260 current_tb_modified = 1;
1261 cpu_restore_state(current_tb, env, pc);
1262 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1263 &current_flags);
1265 #endif /* TARGET_HAS_PRECISE_SMC */
1266 tb_phys_invalidate(tb, addr);
1267 tb = tb->page_next[n];
1269 p->first_tb = NULL;
1270 #ifdef TARGET_HAS_PRECISE_SMC
1271 if (current_tb_modified) {
1272 /* we generate a block containing just the instruction
1273 modifying the memory. It will ensure that it cannot modify
1274 itself */
1275 env->current_tb = NULL;
1276 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1277 cpu_resume_from_signal(env, puc);
1279 #endif
1281 #endif
1283 /* add the tb in the target page and protect it if necessary */
1284 static inline void tb_alloc_page(TranslationBlock *tb,
1285 unsigned int n, tb_page_addr_t page_addr)
1287 PageDesc *p;
1288 #ifndef CONFIG_USER_ONLY
1289 bool page_already_protected;
1290 #endif
1292 tb->page_addr[n] = page_addr;
1293 p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1294 tb->page_next[n] = p->first_tb;
1295 #ifndef CONFIG_USER_ONLY
1296 page_already_protected = p->first_tb != NULL;
1297 #endif
1298 p->first_tb = (TranslationBlock *)((long)tb | n);
1299 invalidate_page_bitmap(p);
1301 #if defined(TARGET_HAS_SMC) || 1
1303 #if defined(CONFIG_USER_ONLY)
1304 if (p->flags & PAGE_WRITE) {
1305 target_ulong addr;
1306 PageDesc *p2;
1307 int prot;
1309 /* force the host page as non writable (writes will have a
1310 page fault + mprotect overhead) */
1311 page_addr &= qemu_host_page_mask;
1312 prot = 0;
1313 for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1314 addr += TARGET_PAGE_SIZE) {
1316 p2 = page_find (addr >> TARGET_PAGE_BITS);
1317 if (!p2)
1318 continue;
1319 prot |= p2->flags;
1320 p2->flags &= ~PAGE_WRITE;
1322 mprotect(g2h(page_addr), qemu_host_page_size,
1323 (prot & PAGE_BITS) & ~PAGE_WRITE);
1324 #ifdef DEBUG_TB_INVALIDATE
1325 printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1326 page_addr);
1327 #endif
1329 #else
1330 /* if some code is already present, then the pages are already
1331 protected. So we handle the case where only the first TB is
1332 allocated in a physical page */
1333 if (!page_already_protected) {
1334 tlb_protect_code(page_addr);
1336 #endif
1338 #endif /* TARGET_HAS_SMC */
1341 /* add a new TB and link it to the physical page tables. phys_page2 is
1342 (-1) to indicate that only one page contains the TB. */
1343 void tb_link_page(TranslationBlock *tb,
1344 tb_page_addr_t phys_pc, tb_page_addr_t phys_page2)
1346 unsigned int h;
1347 TranslationBlock **ptb;
1349 /* Grab the mmap lock to stop another thread invalidating this TB
1350 before we are done. */
1351 mmap_lock();
1352 /* add in the physical hash table */
1353 h = tb_phys_hash_func(phys_pc);
1354 ptb = &tb_phys_hash[h];
1355 tb->phys_hash_next = *ptb;
1356 *ptb = tb;
1358 /* add in the page list */
1359 tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1360 if (phys_page2 != -1)
1361 tb_alloc_page(tb, 1, phys_page2);
1362 else
1363 tb->page_addr[1] = -1;
1365 tb->jmp_first = (TranslationBlock *)((long)tb | 2);
1366 tb->jmp_next[0] = NULL;
1367 tb->jmp_next[1] = NULL;
1369 /* init original jump addresses */
1370 if (tb->tb_next_offset[0] != 0xffff)
1371 tb_reset_jump(tb, 0);
1372 if (tb->tb_next_offset[1] != 0xffff)
1373 tb_reset_jump(tb, 1);
1375 #ifdef DEBUG_TB_CHECK
1376 tb_page_check();
1377 #endif
1378 mmap_unlock();
1381 /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1382 tb[1].tc_ptr. Return NULL if not found */
1383 TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
1385 int m_min, m_max, m;
1386 unsigned long v;
1387 TranslationBlock *tb;
1389 if (nb_tbs <= 0)
1390 return NULL;
1391 if (tc_ptr < (unsigned long)code_gen_buffer ||
1392 tc_ptr >= (unsigned long)code_gen_ptr)
1393 return NULL;
1394 /* binary search (cf Knuth) */
1395 m_min = 0;
1396 m_max = nb_tbs - 1;
1397 while (m_min <= m_max) {
1398 m = (m_min + m_max) >> 1;
1399 tb = &tbs[m];
1400 v = (unsigned long)tb->tc_ptr;
1401 if (v == tc_ptr)
1402 return tb;
1403 else if (tc_ptr < v) {
1404 m_max = m - 1;
1405 } else {
1406 m_min = m + 1;
1409 return &tbs[m_max];
1412 static void tb_reset_jump_recursive(TranslationBlock *tb);
1414 static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1416 TranslationBlock *tb1, *tb_next, **ptb;
1417 unsigned int n1;
1419 tb1 = tb->jmp_next[n];
1420 if (tb1 != NULL) {
1421 /* find head of list */
1422 for(;;) {
1423 n1 = (long)tb1 & 3;
1424 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1425 if (n1 == 2)
1426 break;
1427 tb1 = tb1->jmp_next[n1];
1429 /* we are now sure now that tb jumps to tb1 */
1430 tb_next = tb1;
1432 /* remove tb from the jmp_first list */
1433 ptb = &tb_next->jmp_first;
1434 for(;;) {
1435 tb1 = *ptb;
1436 n1 = (long)tb1 & 3;
1437 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1438 if (n1 == n && tb1 == tb)
1439 break;
1440 ptb = &tb1->jmp_next[n1];
1442 *ptb = tb->jmp_next[n];
1443 tb->jmp_next[n] = NULL;
1445 /* suppress the jump to next tb in generated code */
1446 tb_reset_jump(tb, n);
1448 /* suppress jumps in the tb on which we could have jumped */
1449 tb_reset_jump_recursive(tb_next);
1453 static void tb_reset_jump_recursive(TranslationBlock *tb)
1455 tb_reset_jump_recursive2(tb, 0);
1456 tb_reset_jump_recursive2(tb, 1);
1459 #if defined(TARGET_HAS_ICE)
1460 #if defined(CONFIG_USER_ONLY)
1461 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1463 tb_invalidate_phys_page_range(pc, pc + 1, 0);
1465 #else
1466 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1468 target_phys_addr_t addr;
1469 ram_addr_t ram_addr;
1470 MemoryRegionSection *section;
1472 addr = cpu_get_phys_page_debug(env, pc);
1473 section = phys_page_find(addr >> TARGET_PAGE_BITS);
1474 if (!(memory_region_is_ram(section->mr)
1475 || (section->mr->rom_device && section->mr->readable))) {
1476 return;
1478 ram_addr = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1479 + section_addr(section, addr);
1480 tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1482 #endif
1483 #endif /* TARGET_HAS_ICE */
1485 #if defined(CONFIG_USER_ONLY)
1486 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1491 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1492 int flags, CPUWatchpoint **watchpoint)
1494 return -ENOSYS;
1496 #else
1497 /* Add a watchpoint. */
1498 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1499 int flags, CPUWatchpoint **watchpoint)
1501 target_ulong len_mask = ~(len - 1);
1502 CPUWatchpoint *wp;
1504 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1505 if ((len & (len - 1)) || (addr & ~len_mask) ||
1506 len == 0 || len > TARGET_PAGE_SIZE) {
1507 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1508 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1509 return -EINVAL;
1511 wp = g_malloc(sizeof(*wp));
1513 wp->vaddr = addr;
1514 wp->len_mask = len_mask;
1515 wp->flags = flags;
1517 /* keep all GDB-injected watchpoints in front */
1518 if (flags & BP_GDB)
1519 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1520 else
1521 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1523 tlb_flush_page(env, addr);
1525 if (watchpoint)
1526 *watchpoint = wp;
1527 return 0;
1530 /* Remove a specific watchpoint. */
1531 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
1532 int flags)
1534 target_ulong len_mask = ~(len - 1);
1535 CPUWatchpoint *wp;
1537 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1538 if (addr == wp->vaddr && len_mask == wp->len_mask
1539 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1540 cpu_watchpoint_remove_by_ref(env, wp);
1541 return 0;
1544 return -ENOENT;
1547 /* Remove a specific watchpoint by reference. */
1548 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
1550 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1552 tlb_flush_page(env, watchpoint->vaddr);
1554 g_free(watchpoint);
1557 /* Remove all matching watchpoints. */
1558 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1560 CPUWatchpoint *wp, *next;
1562 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1563 if (wp->flags & mask)
1564 cpu_watchpoint_remove_by_ref(env, wp);
1567 #endif
1569 /* Add a breakpoint. */
1570 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
1571 CPUBreakpoint **breakpoint)
1573 #if defined(TARGET_HAS_ICE)
1574 CPUBreakpoint *bp;
1576 bp = g_malloc(sizeof(*bp));
1578 bp->pc = pc;
1579 bp->flags = flags;
1581 /* keep all GDB-injected breakpoints in front */
1582 if (flags & BP_GDB)
1583 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1584 else
1585 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1587 breakpoint_invalidate(env, pc);
1589 if (breakpoint)
1590 *breakpoint = bp;
1591 return 0;
1592 #else
1593 return -ENOSYS;
1594 #endif
1597 /* Remove a specific breakpoint. */
1598 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
1600 #if defined(TARGET_HAS_ICE)
1601 CPUBreakpoint *bp;
1603 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1604 if (bp->pc == pc && bp->flags == flags) {
1605 cpu_breakpoint_remove_by_ref(env, bp);
1606 return 0;
1609 return -ENOENT;
1610 #else
1611 return -ENOSYS;
1612 #endif
1615 /* Remove a specific breakpoint by reference. */
1616 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
1618 #if defined(TARGET_HAS_ICE)
1619 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1621 breakpoint_invalidate(env, breakpoint->pc);
1623 g_free(breakpoint);
1624 #endif
1627 /* Remove all matching breakpoints. */
1628 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
1630 #if defined(TARGET_HAS_ICE)
1631 CPUBreakpoint *bp, *next;
1633 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1634 if (bp->flags & mask)
1635 cpu_breakpoint_remove_by_ref(env, bp);
1637 #endif
1640 /* enable or disable single step mode. EXCP_DEBUG is returned by the
1641 CPU loop after each instruction */
1642 void cpu_single_step(CPUArchState *env, int enabled)
1644 #if defined(TARGET_HAS_ICE)
1645 if (env->singlestep_enabled != enabled) {
1646 env->singlestep_enabled = enabled;
1647 if (kvm_enabled())
1648 kvm_update_guest_debug(env, 0);
1649 else {
1650 /* must flush all the translated code to avoid inconsistencies */
1651 /* XXX: only flush what is necessary */
1652 tb_flush(env);
1655 #endif
1658 /* enable or disable low levels log */
1659 void cpu_set_log(int log_flags)
1661 loglevel = log_flags;
1662 if (loglevel && !logfile) {
1663 logfile = fopen(logfilename, log_append ? "a" : "w");
1664 if (!logfile) {
1665 perror(logfilename);
1666 _exit(1);
1668 #if !defined(CONFIG_SOFTMMU)
1669 /* must avoid mmap() usage of glibc by setting a buffer "by hand" */
1671 static char logfile_buf[4096];
1672 setvbuf(logfile, logfile_buf, _IOLBF, sizeof(logfile_buf));
1674 #elif defined(_WIN32)
1675 /* Win32 doesn't support line-buffering, so use unbuffered output. */
1676 setvbuf(logfile, NULL, _IONBF, 0);
1677 #else
1678 setvbuf(logfile, NULL, _IOLBF, 0);
1679 #endif
1680 log_append = 1;
1682 if (!loglevel && logfile) {
1683 fclose(logfile);
1684 logfile = NULL;
1688 void cpu_set_log_filename(const char *filename)
1690 logfilename = strdup(filename);
1691 if (logfile) {
1692 fclose(logfile);
1693 logfile = NULL;
1695 cpu_set_log(loglevel);
1698 static void cpu_unlink_tb(CPUArchState *env)
1700 /* FIXME: TB unchaining isn't SMP safe. For now just ignore the
1701 problem and hope the cpu will stop of its own accord. For userspace
1702 emulation this often isn't actually as bad as it sounds. Often
1703 signals are used primarily to interrupt blocking syscalls. */
1704 TranslationBlock *tb;
1705 static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1707 spin_lock(&interrupt_lock);
1708 tb = env->current_tb;
1709 /* if the cpu is currently executing code, we must unlink it and
1710 all the potentially executing TB */
1711 if (tb) {
1712 env->current_tb = NULL;
1713 tb_reset_jump_recursive(tb);
1715 spin_unlock(&interrupt_lock);
1718 #ifndef CONFIG_USER_ONLY
1719 /* mask must never be zero, except for A20 change call */
1720 static void tcg_handle_interrupt(CPUArchState *env, int mask)
1722 int old_mask;
1724 old_mask = env->interrupt_request;
1725 env->interrupt_request |= mask;
1728 * If called from iothread context, wake the target cpu in
1729 * case its halted.
1731 if (!qemu_cpu_is_self(env)) {
1732 qemu_cpu_kick(env);
1733 return;
1736 if (use_icount) {
1737 env->icount_decr.u16.high = 0xffff;
1738 if (!can_do_io(env)
1739 && (mask & ~old_mask) != 0) {
1740 cpu_abort(env, "Raised interrupt while not in I/O function");
1742 } else {
1743 cpu_unlink_tb(env);
1747 CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
1749 #else /* CONFIG_USER_ONLY */
1751 void cpu_interrupt(CPUArchState *env, int mask)
1753 env->interrupt_request |= mask;
1754 cpu_unlink_tb(env);
1756 #endif /* CONFIG_USER_ONLY */
1758 void cpu_reset_interrupt(CPUArchState *env, int mask)
1760 env->interrupt_request &= ~mask;
1763 void cpu_exit(CPUArchState *env)
1765 env->exit_request = 1;
1766 cpu_unlink_tb(env);
1769 const CPULogItem cpu_log_items[] = {
1770 { CPU_LOG_TB_OUT_ASM, "out_asm",
1771 "show generated host assembly code for each compiled TB" },
1772 { CPU_LOG_TB_IN_ASM, "in_asm",
1773 "show target assembly code for each compiled TB" },
1774 { CPU_LOG_TB_OP, "op",
1775 "show micro ops for each compiled TB" },
1776 { CPU_LOG_TB_OP_OPT, "op_opt",
1777 "show micro ops "
1778 #ifdef TARGET_I386
1779 "before eflags optimization and "
1780 #endif
1781 "after liveness analysis" },
1782 { CPU_LOG_INT, "int",
1783 "show interrupts/exceptions in short format" },
1784 { CPU_LOG_EXEC, "exec",
1785 "show trace before each executed TB (lots of logs)" },
1786 { CPU_LOG_TB_CPU, "cpu",
1787 "show CPU state before block translation" },
1788 #ifdef TARGET_I386
1789 { CPU_LOG_PCALL, "pcall",
1790 "show protected mode far calls/returns/exceptions" },
1791 { CPU_LOG_RESET, "cpu_reset",
1792 "show CPU state before CPU resets" },
1793 #endif
1794 #ifdef DEBUG_IOPORT
1795 { CPU_LOG_IOPORT, "ioport",
1796 "show all i/o ports accesses" },
1797 #endif
1798 { 0, NULL, NULL },
1801 static int cmp1(const char *s1, int n, const char *s2)
1803 if (strlen(s2) != n)
1804 return 0;
1805 return memcmp(s1, s2, n) == 0;
1808 /* takes a comma separated list of log masks. Return 0 if error. */
1809 int cpu_str_to_log_mask(const char *str)
1811 const CPULogItem *item;
1812 int mask;
1813 const char *p, *p1;
1815 p = str;
1816 mask = 0;
1817 for(;;) {
1818 p1 = strchr(p, ',');
1819 if (!p1)
1820 p1 = p + strlen(p);
1821 if(cmp1(p,p1-p,"all")) {
1822 for(item = cpu_log_items; item->mask != 0; item++) {
1823 mask |= item->mask;
1825 } else {
1826 for(item = cpu_log_items; item->mask != 0; item++) {
1827 if (cmp1(p, p1 - p, item->name))
1828 goto found;
1830 return 0;
1832 found:
1833 mask |= item->mask;
1834 if (*p1 != ',')
1835 break;
1836 p = p1 + 1;
1838 return mask;
1841 void cpu_abort(CPUArchState *env, const char *fmt, ...)
1843 va_list ap;
1844 va_list ap2;
1846 va_start(ap, fmt);
1847 va_copy(ap2, ap);
1848 fprintf(stderr, "qemu: fatal: ");
1849 vfprintf(stderr, fmt, ap);
1850 fprintf(stderr, "\n");
1851 #ifdef TARGET_I386
1852 cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU | X86_DUMP_CCOP);
1853 #else
1854 cpu_dump_state(env, stderr, fprintf, 0);
1855 #endif
1856 if (qemu_log_enabled()) {
1857 qemu_log("qemu: fatal: ");
1858 qemu_log_vprintf(fmt, ap2);
1859 qemu_log("\n");
1860 #ifdef TARGET_I386
1861 log_cpu_state(env, X86_DUMP_FPU | X86_DUMP_CCOP);
1862 #else
1863 log_cpu_state(env, 0);
1864 #endif
1865 qemu_log_flush();
1866 qemu_log_close();
1868 va_end(ap2);
1869 va_end(ap);
1870 #if defined(CONFIG_USER_ONLY)
1872 struct sigaction act;
1873 sigfillset(&act.sa_mask);
1874 act.sa_handler = SIG_DFL;
1875 sigaction(SIGABRT, &act, NULL);
1877 #endif
1878 abort();
1881 CPUArchState *cpu_copy(CPUArchState *env)
1883 CPUArchState *new_env = cpu_init(env->cpu_model_str);
1884 CPUArchState *next_cpu = new_env->next_cpu;
1885 int cpu_index = new_env->cpu_index;
1886 #if defined(TARGET_HAS_ICE)
1887 CPUBreakpoint *bp;
1888 CPUWatchpoint *wp;
1889 #endif
1891 memcpy(new_env, env, sizeof(CPUArchState));
1893 /* Preserve chaining and index. */
1894 new_env->next_cpu = next_cpu;
1895 new_env->cpu_index = cpu_index;
1897 /* Clone all break/watchpoints.
1898 Note: Once we support ptrace with hw-debug register access, make sure
1899 BP_CPU break/watchpoints are handled correctly on clone. */
1900 QTAILQ_INIT(&env->breakpoints);
1901 QTAILQ_INIT(&env->watchpoints);
1902 #if defined(TARGET_HAS_ICE)
1903 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1904 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1906 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1907 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1908 wp->flags, NULL);
1910 #endif
1912 return new_env;
1915 #if !defined(CONFIG_USER_ONLY)
1917 static inline void tlb_flush_jmp_cache(CPUArchState *env, target_ulong addr)
1919 unsigned int i;
1921 /* Discard jump cache entries for any tb which might potentially
1922 overlap the flushed page. */
1923 i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1924 memset (&env->tb_jmp_cache[i], 0,
1925 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1927 i = tb_jmp_cache_hash_page(addr);
1928 memset (&env->tb_jmp_cache[i], 0,
1929 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1932 static CPUTLBEntry s_cputlb_empty_entry = {
1933 .addr_read = -1,
1934 .addr_write = -1,
1935 .addr_code = -1,
1936 .addend = -1,
1939 /* NOTE:
1940 * If flush_global is true (the usual case), flush all tlb entries.
1941 * If flush_global is false, flush (at least) all tlb entries not
1942 * marked global.
1944 * Since QEMU doesn't currently implement a global/not-global flag
1945 * for tlb entries, at the moment tlb_flush() will also flush all
1946 * tlb entries in the flush_global == false case. This is OK because
1947 * CPU architectures generally permit an implementation to drop
1948 * entries from the TLB at any time, so flushing more entries than
1949 * required is only an efficiency issue, not a correctness issue.
1951 void tlb_flush(CPUArchState *env, int flush_global)
1953 int i;
1955 #if defined(DEBUG_TLB)
1956 printf("tlb_flush:\n");
1957 #endif
1958 /* must reset current TB so that interrupts cannot modify the
1959 links while we are modifying them */
1960 env->current_tb = NULL;
1962 for(i = 0; i < CPU_TLB_SIZE; i++) {
1963 int mmu_idx;
1964 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
1965 env->tlb_table[mmu_idx][i] = s_cputlb_empty_entry;
1969 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
1971 env->tlb_flush_addr = -1;
1972 env->tlb_flush_mask = 0;
1973 tlb_flush_count++;
1976 static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
1978 if (addr == (tlb_entry->addr_read &
1979 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
1980 addr == (tlb_entry->addr_write &
1981 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
1982 addr == (tlb_entry->addr_code &
1983 (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
1984 *tlb_entry = s_cputlb_empty_entry;
1988 void tlb_flush_page(CPUArchState *env, target_ulong addr)
1990 int i;
1991 int mmu_idx;
1993 #if defined(DEBUG_TLB)
1994 printf("tlb_flush_page: " TARGET_FMT_lx "\n", addr);
1995 #endif
1996 /* Check if we need to flush due to large pages. */
1997 if ((addr & env->tlb_flush_mask) == env->tlb_flush_addr) {
1998 #if defined(DEBUG_TLB)
1999 printf("tlb_flush_page: forced full flush ("
2000 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
2001 env->tlb_flush_addr, env->tlb_flush_mask);
2002 #endif
2003 tlb_flush(env, 1);
2004 return;
2006 /* must reset current TB so that interrupts cannot modify the
2007 links while we are modifying them */
2008 env->current_tb = NULL;
2010 addr &= TARGET_PAGE_MASK;
2011 i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2012 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
2013 tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr);
2015 tlb_flush_jmp_cache(env, addr);
2018 /* update the TLBs so that writes to code in the virtual page 'addr'
2019 can be detected */
2020 static void tlb_protect_code(ram_addr_t ram_addr)
2022 cpu_physical_memory_reset_dirty(ram_addr,
2023 ram_addr + TARGET_PAGE_SIZE,
2024 CODE_DIRTY_FLAG);
2027 /* update the TLB so that writes in physical page 'phys_addr' are no longer
2028 tested for self modifying code */
2029 static void tlb_unprotect_code_phys(CPUArchState *env, ram_addr_t ram_addr,
2030 target_ulong vaddr)
2032 cpu_physical_memory_set_dirty_flags(ram_addr, CODE_DIRTY_FLAG);
2035 static bool tlb_is_dirty_ram(CPUTLBEntry *tlbe)
2037 return (tlbe->addr_write & (TLB_INVALID_MASK|TLB_MMIO|TLB_NOTDIRTY)) == 0;
2040 static inline void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry,
2041 unsigned long start, unsigned long length)
2043 unsigned long addr;
2044 if (tlb_is_dirty_ram(tlb_entry)) {
2045 addr = (tlb_entry->addr_write & TARGET_PAGE_MASK) + tlb_entry->addend;
2046 if ((addr - start) < length) {
2047 tlb_entry->addr_write |= TLB_NOTDIRTY;
2052 /* Note: start and end must be within the same ram block. */
2053 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
2054 int dirty_flags)
2056 CPUArchState *env;
2057 unsigned long length, start1;
2058 int i;
2060 start &= TARGET_PAGE_MASK;
2061 end = TARGET_PAGE_ALIGN(end);
2063 length = end - start;
2064 if (length == 0)
2065 return;
2066 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
2068 /* we modify the TLB cache so that the dirty bit will be set again
2069 when accessing the range */
2070 start1 = (unsigned long)qemu_safe_ram_ptr(start);
2071 /* Check that we don't span multiple blocks - this breaks the
2072 address comparisons below. */
2073 if ((unsigned long)qemu_safe_ram_ptr(end - 1) - start1
2074 != (end - 1) - start) {
2075 abort();
2078 for(env = first_cpu; env != NULL; env = env->next_cpu) {
2079 int mmu_idx;
2080 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2081 for(i = 0; i < CPU_TLB_SIZE; i++)
2082 tlb_reset_dirty_range(&env->tlb_table[mmu_idx][i],
2083 start1, length);
2088 int cpu_physical_memory_set_dirty_tracking(int enable)
2090 int ret = 0;
2091 in_migration = enable;
2092 return ret;
2095 static inline void tlb_update_dirty(CPUTLBEntry *tlb_entry)
2097 ram_addr_t ram_addr;
2098 void *p;
2100 if (tlb_is_dirty_ram(tlb_entry)) {
2101 p = (void *)(unsigned long)((tlb_entry->addr_write & TARGET_PAGE_MASK)
2102 + tlb_entry->addend);
2103 ram_addr = qemu_ram_addr_from_host_nofail(p);
2104 if (!cpu_physical_memory_is_dirty(ram_addr)) {
2105 tlb_entry->addr_write |= TLB_NOTDIRTY;
2110 /* update the TLB according to the current state of the dirty bits */
2111 void cpu_tlb_update_dirty(CPUArchState *env)
2113 int i;
2114 int mmu_idx;
2115 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2116 for(i = 0; i < CPU_TLB_SIZE; i++)
2117 tlb_update_dirty(&env->tlb_table[mmu_idx][i]);
2121 static inline void tlb_set_dirty1(CPUTLBEntry *tlb_entry, target_ulong vaddr)
2123 if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY))
2124 tlb_entry->addr_write = vaddr;
2127 /* update the TLB corresponding to virtual page vaddr
2128 so that it is no longer dirty */
2129 static inline void tlb_set_dirty(CPUArchState *env, target_ulong vaddr)
2131 int i;
2132 int mmu_idx;
2134 vaddr &= TARGET_PAGE_MASK;
2135 i = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2136 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
2137 tlb_set_dirty1(&env->tlb_table[mmu_idx][i], vaddr);
2140 /* Our TLB does not support large pages, so remember the area covered by
2141 large pages and trigger a full TLB flush if these are invalidated. */
2142 static void tlb_add_large_page(CPUArchState *env, target_ulong vaddr,
2143 target_ulong size)
2145 target_ulong mask = ~(size - 1);
2147 if (env->tlb_flush_addr == (target_ulong)-1) {
2148 env->tlb_flush_addr = vaddr & mask;
2149 env->tlb_flush_mask = mask;
2150 return;
2152 /* Extend the existing region to include the new page.
2153 This is a compromise between unnecessary flushes and the cost
2154 of maintaining a full variable size TLB. */
2155 mask &= env->tlb_flush_mask;
2156 while (((env->tlb_flush_addr ^ vaddr) & mask) != 0) {
2157 mask <<= 1;
2159 env->tlb_flush_addr &= mask;
2160 env->tlb_flush_mask = mask;
2163 static bool is_ram_rom(MemoryRegionSection *s)
2165 return memory_region_is_ram(s->mr);
2168 static bool is_romd(MemoryRegionSection *s)
2170 MemoryRegion *mr = s->mr;
2172 return mr->rom_device && mr->readable;
2175 static bool is_ram_rom_romd(MemoryRegionSection *s)
2177 return is_ram_rom(s) || is_romd(s);
2180 /* Add a new TLB entry. At most one entry for a given virtual address
2181 is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
2182 supplied size is only used by tlb_flush_page. */
2183 void tlb_set_page(CPUArchState *env, target_ulong vaddr,
2184 target_phys_addr_t paddr, int prot,
2185 int mmu_idx, target_ulong size)
2187 MemoryRegionSection *section;
2188 unsigned int index;
2189 target_ulong address;
2190 target_ulong code_address;
2191 unsigned long addend;
2192 CPUTLBEntry *te;
2193 CPUWatchpoint *wp;
2194 target_phys_addr_t iotlb;
2196 assert(size >= TARGET_PAGE_SIZE);
2197 if (size != TARGET_PAGE_SIZE) {
2198 tlb_add_large_page(env, vaddr, size);
2200 section = phys_page_find(paddr >> TARGET_PAGE_BITS);
2201 #if defined(DEBUG_TLB)
2202 printf("tlb_set_page: vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
2203 " prot=%x idx=%d pd=0x%08lx\n",
2204 vaddr, paddr, prot, mmu_idx, pd);
2205 #endif
2207 address = vaddr;
2208 if (!is_ram_rom_romd(section)) {
2209 /* IO memory case (romd handled later) */
2210 address |= TLB_MMIO;
2212 if (is_ram_rom_romd(section)) {
2213 addend = (unsigned long)memory_region_get_ram_ptr(section->mr)
2214 + section_addr(section, paddr);
2215 } else {
2216 addend = 0;
2218 if (is_ram_rom(section)) {
2219 /* Normal RAM. */
2220 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
2221 + section_addr(section, paddr);
2222 if (!section->readonly)
2223 iotlb |= phys_section_notdirty;
2224 else
2225 iotlb |= phys_section_rom;
2226 } else {
2227 /* IO handlers are currently passed a physical address.
2228 It would be nice to pass an offset from the base address
2229 of that region. This would avoid having to special case RAM,
2230 and avoid full address decoding in every device.
2231 We can't use the high bits of pd for this because
2232 IO_MEM_ROMD uses these as a ram address. */
2233 iotlb = section - phys_sections;
2234 iotlb += section_addr(section, paddr);
2237 code_address = address;
2238 /* Make accesses to pages with watchpoints go via the
2239 watchpoint trap routines. */
2240 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2241 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
2242 /* Avoid trapping reads of pages with a write breakpoint. */
2243 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
2244 iotlb = phys_section_watch + paddr;
2245 address |= TLB_MMIO;
2246 break;
2251 index = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2252 env->iotlb[mmu_idx][index] = iotlb - vaddr;
2253 te = &env->tlb_table[mmu_idx][index];
2254 te->addend = addend - vaddr;
2255 if (prot & PAGE_READ) {
2256 te->addr_read = address;
2257 } else {
2258 te->addr_read = -1;
2261 if (prot & PAGE_EXEC) {
2262 te->addr_code = code_address;
2263 } else {
2264 te->addr_code = -1;
2266 if (prot & PAGE_WRITE) {
2267 if ((memory_region_is_ram(section->mr) && section->readonly)
2268 || is_romd(section)) {
2269 /* Write access calls the I/O callback. */
2270 te->addr_write = address | TLB_MMIO;
2271 } else if (memory_region_is_ram(section->mr)
2272 && !cpu_physical_memory_is_dirty(
2273 section->mr->ram_addr
2274 + section_addr(section, paddr))) {
2275 te->addr_write = address | TLB_NOTDIRTY;
2276 } else {
2277 te->addr_write = address;
2279 } else {
2280 te->addr_write = -1;
2284 #else
2286 void tlb_flush(CPUArchState *env, int flush_global)
2290 void tlb_flush_page(CPUArchState *env, target_ulong addr)
2295 * Walks guest process memory "regions" one by one
2296 * and calls callback function 'fn' for each region.
2299 struct walk_memory_regions_data
2301 walk_memory_regions_fn fn;
2302 void *priv;
2303 unsigned long start;
2304 int prot;
2307 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
2308 abi_ulong end, int new_prot)
2310 if (data->start != -1ul) {
2311 int rc = data->fn(data->priv, data->start, end, data->prot);
2312 if (rc != 0) {
2313 return rc;
2317 data->start = (new_prot ? end : -1ul);
2318 data->prot = new_prot;
2320 return 0;
2323 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
2324 abi_ulong base, int level, void **lp)
2326 abi_ulong pa;
2327 int i, rc;
2329 if (*lp == NULL) {
2330 return walk_memory_regions_end(data, base, 0);
2333 if (level == 0) {
2334 PageDesc *pd = *lp;
2335 for (i = 0; i < L2_SIZE; ++i) {
2336 int prot = pd[i].flags;
2338 pa = base | (i << TARGET_PAGE_BITS);
2339 if (prot != data->prot) {
2340 rc = walk_memory_regions_end(data, pa, prot);
2341 if (rc != 0) {
2342 return rc;
2346 } else {
2347 void **pp = *lp;
2348 for (i = 0; i < L2_SIZE; ++i) {
2349 pa = base | ((abi_ulong)i <<
2350 (TARGET_PAGE_BITS + L2_BITS * level));
2351 rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
2352 if (rc != 0) {
2353 return rc;
2358 return 0;
2361 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
2363 struct walk_memory_regions_data data;
2364 unsigned long i;
2366 data.fn = fn;
2367 data.priv = priv;
2368 data.start = -1ul;
2369 data.prot = 0;
2371 for (i = 0; i < V_L1_SIZE; i++) {
2372 int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
2373 V_L1_SHIFT / L2_BITS - 1, l1_map + i);
2374 if (rc != 0) {
2375 return rc;
2379 return walk_memory_regions_end(&data, 0, 0);
2382 static int dump_region(void *priv, abi_ulong start,
2383 abi_ulong end, unsigned long prot)
2385 FILE *f = (FILE *)priv;
2387 (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2388 " "TARGET_ABI_FMT_lx" %c%c%c\n",
2389 start, end, end - start,
2390 ((prot & PAGE_READ) ? 'r' : '-'),
2391 ((prot & PAGE_WRITE) ? 'w' : '-'),
2392 ((prot & PAGE_EXEC) ? 'x' : '-'));
2394 return (0);
2397 /* dump memory mappings */
2398 void page_dump(FILE *f)
2400 (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2401 "start", "end", "size", "prot");
2402 walk_memory_regions(f, dump_region);
2405 int page_get_flags(target_ulong address)
2407 PageDesc *p;
2409 p = page_find(address >> TARGET_PAGE_BITS);
2410 if (!p)
2411 return 0;
2412 return p->flags;
2415 /* Modify the flags of a page and invalidate the code if necessary.
2416 The flag PAGE_WRITE_ORG is positioned automatically depending
2417 on PAGE_WRITE. The mmap_lock should already be held. */
2418 void page_set_flags(target_ulong start, target_ulong end, int flags)
2420 target_ulong addr, len;
2422 /* This function should never be called with addresses outside the
2423 guest address space. If this assert fires, it probably indicates
2424 a missing call to h2g_valid. */
2425 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2426 assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2427 #endif
2428 assert(start < end);
2430 start = start & TARGET_PAGE_MASK;
2431 end = TARGET_PAGE_ALIGN(end);
2433 if (flags & PAGE_WRITE) {
2434 flags |= PAGE_WRITE_ORG;
2437 for (addr = start, len = end - start;
2438 len != 0;
2439 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2440 PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2442 /* If the write protection bit is set, then we invalidate
2443 the code inside. */
2444 if (!(p->flags & PAGE_WRITE) &&
2445 (flags & PAGE_WRITE) &&
2446 p->first_tb) {
2447 tb_invalidate_phys_page(addr, 0, NULL);
2449 p->flags = flags;
2453 int page_check_range(target_ulong start, target_ulong len, int flags)
2455 PageDesc *p;
2456 target_ulong end;
2457 target_ulong addr;
2459 /* This function should never be called with addresses outside the
2460 guest address space. If this assert fires, it probably indicates
2461 a missing call to h2g_valid. */
2462 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2463 assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2464 #endif
2466 if (len == 0) {
2467 return 0;
2469 if (start + len - 1 < start) {
2470 /* We've wrapped around. */
2471 return -1;
2474 end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2475 start = start & TARGET_PAGE_MASK;
2477 for (addr = start, len = end - start;
2478 len != 0;
2479 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2480 p = page_find(addr >> TARGET_PAGE_BITS);
2481 if( !p )
2482 return -1;
2483 if( !(p->flags & PAGE_VALID) )
2484 return -1;
2486 if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2487 return -1;
2488 if (flags & PAGE_WRITE) {
2489 if (!(p->flags & PAGE_WRITE_ORG))
2490 return -1;
2491 /* unprotect the page if it was put read-only because it
2492 contains translated code */
2493 if (!(p->flags & PAGE_WRITE)) {
2494 if (!page_unprotect(addr, 0, NULL))
2495 return -1;
2497 return 0;
2500 return 0;
2503 /* called from signal handler: invalidate the code and unprotect the
2504 page. Return TRUE if the fault was successfully handled. */
2505 int page_unprotect(target_ulong address, uintptr_t pc, void *puc)
2507 unsigned int prot;
2508 PageDesc *p;
2509 target_ulong host_start, host_end, addr;
2511 /* Technically this isn't safe inside a signal handler. However we
2512 know this only ever happens in a synchronous SEGV handler, so in
2513 practice it seems to be ok. */
2514 mmap_lock();
2516 p = page_find(address >> TARGET_PAGE_BITS);
2517 if (!p) {
2518 mmap_unlock();
2519 return 0;
2522 /* if the page was really writable, then we change its
2523 protection back to writable */
2524 if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2525 host_start = address & qemu_host_page_mask;
2526 host_end = host_start + qemu_host_page_size;
2528 prot = 0;
2529 for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2530 p = page_find(addr >> TARGET_PAGE_BITS);
2531 p->flags |= PAGE_WRITE;
2532 prot |= p->flags;
2534 /* and since the content will be modified, we must invalidate
2535 the corresponding translated code. */
2536 tb_invalidate_phys_page(addr, pc, puc);
2537 #ifdef DEBUG_TB_CHECK
2538 tb_invalidate_check(addr);
2539 #endif
2541 mprotect((void *)g2h(host_start), qemu_host_page_size,
2542 prot & PAGE_BITS);
2544 mmap_unlock();
2545 return 1;
2547 mmap_unlock();
2548 return 0;
2551 static inline void tlb_set_dirty(CPUArchState *env,
2552 unsigned long addr, target_ulong vaddr)
2555 #endif /* defined(CONFIG_USER_ONLY) */
2557 #if !defined(CONFIG_USER_ONLY)
2559 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2560 typedef struct subpage_t {
2561 MemoryRegion iomem;
2562 target_phys_addr_t base;
2563 uint16_t sub_section[TARGET_PAGE_SIZE];
2564 } subpage_t;
2566 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2567 uint16_t section);
2568 static subpage_t *subpage_init(target_phys_addr_t base);
2569 static void destroy_page_desc(uint16_t section_index)
2571 MemoryRegionSection *section = &phys_sections[section_index];
2572 MemoryRegion *mr = section->mr;
2574 if (mr->subpage) {
2575 subpage_t *subpage = container_of(mr, subpage_t, iomem);
2576 memory_region_destroy(&subpage->iomem);
2577 g_free(subpage);
2581 static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
2583 unsigned i;
2584 PhysPageEntry *p;
2586 if (lp->ptr == PHYS_MAP_NODE_NIL) {
2587 return;
2590 p = phys_map_nodes[lp->ptr];
2591 for (i = 0; i < L2_SIZE; ++i) {
2592 if (!p[i].is_leaf) {
2593 destroy_l2_mapping(&p[i], level - 1);
2594 } else {
2595 destroy_page_desc(p[i].ptr);
2598 lp->is_leaf = 0;
2599 lp->ptr = PHYS_MAP_NODE_NIL;
2602 static void destroy_all_mappings(void)
2604 destroy_l2_mapping(&phys_map, P_L2_LEVELS - 1);
2605 phys_map_nodes_reset();
2608 static uint16_t phys_section_add(MemoryRegionSection *section)
2610 if (phys_sections_nb == phys_sections_nb_alloc) {
2611 phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
2612 phys_sections = g_renew(MemoryRegionSection, phys_sections,
2613 phys_sections_nb_alloc);
2615 phys_sections[phys_sections_nb] = *section;
2616 return phys_sections_nb++;
2619 static void phys_sections_clear(void)
2621 phys_sections_nb = 0;
2624 /* register physical memory.
2625 For RAM, 'size' must be a multiple of the target page size.
2626 If (phys_offset & ~TARGET_PAGE_MASK) != 0, then it is an
2627 io memory page. The address used when calling the IO function is
2628 the offset from the start of the region, plus region_offset. Both
2629 start_addr and region_offset are rounded down to a page boundary
2630 before calculating this offset. This should not be a problem unless
2631 the low bits of start_addr and region_offset differ. */
2632 static void register_subpage(MemoryRegionSection *section)
2634 subpage_t *subpage;
2635 target_phys_addr_t base = section->offset_within_address_space
2636 & TARGET_PAGE_MASK;
2637 MemoryRegionSection *existing = phys_page_find(base >> TARGET_PAGE_BITS);
2638 MemoryRegionSection subsection = {
2639 .offset_within_address_space = base,
2640 .size = TARGET_PAGE_SIZE,
2642 target_phys_addr_t start, end;
2644 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
2646 if (!(existing->mr->subpage)) {
2647 subpage = subpage_init(base);
2648 subsection.mr = &subpage->iomem;
2649 phys_page_set(base >> TARGET_PAGE_BITS, 1,
2650 phys_section_add(&subsection));
2651 } else {
2652 subpage = container_of(existing->mr, subpage_t, iomem);
2654 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
2655 end = start + section->size;
2656 subpage_register(subpage, start, end, phys_section_add(section));
2660 static void register_multipage(MemoryRegionSection *section)
2662 target_phys_addr_t start_addr = section->offset_within_address_space;
2663 ram_addr_t size = section->size;
2664 target_phys_addr_t addr;
2665 uint16_t section_index = phys_section_add(section);
2667 assert(size);
2669 addr = start_addr;
2670 phys_page_set(addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
2671 section_index);
2674 void cpu_register_physical_memory_log(MemoryRegionSection *section,
2675 bool readonly)
2677 MemoryRegionSection now = *section, remain = *section;
2679 if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
2680 || (now.size < TARGET_PAGE_SIZE)) {
2681 now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
2682 - now.offset_within_address_space,
2683 now.size);
2684 register_subpage(&now);
2685 remain.size -= now.size;
2686 remain.offset_within_address_space += now.size;
2687 remain.offset_within_region += now.size;
2689 now = remain;
2690 now.size &= TARGET_PAGE_MASK;
2691 if (now.size) {
2692 register_multipage(&now);
2693 remain.size -= now.size;
2694 remain.offset_within_address_space += now.size;
2695 remain.offset_within_region += now.size;
2697 now = remain;
2698 if (now.size) {
2699 register_subpage(&now);
2704 void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2706 if (kvm_enabled())
2707 kvm_coalesce_mmio_region(addr, size);
2710 void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2712 if (kvm_enabled())
2713 kvm_uncoalesce_mmio_region(addr, size);
2716 void qemu_flush_coalesced_mmio_buffer(void)
2718 if (kvm_enabled())
2719 kvm_flush_coalesced_mmio_buffer();
2722 #if defined(__linux__) && !defined(TARGET_S390X)
2724 #include <sys/vfs.h>
2726 #define HUGETLBFS_MAGIC 0x958458f6
2728 static long gethugepagesize(const char *path)
2730 struct statfs fs;
2731 int ret;
2733 do {
2734 ret = statfs(path, &fs);
2735 } while (ret != 0 && errno == EINTR);
2737 if (ret != 0) {
2738 perror(path);
2739 return 0;
2742 if (fs.f_type != HUGETLBFS_MAGIC)
2743 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2745 return fs.f_bsize;
2748 static void *file_ram_alloc(RAMBlock *block,
2749 ram_addr_t memory,
2750 const char *path)
2752 char *filename;
2753 void *area;
2754 int fd;
2755 #ifdef MAP_POPULATE
2756 int flags;
2757 #endif
2758 unsigned long hpagesize;
2760 hpagesize = gethugepagesize(path);
2761 if (!hpagesize) {
2762 return NULL;
2765 if (memory < hpagesize) {
2766 return NULL;
2769 if (kvm_enabled() && !kvm_has_sync_mmu()) {
2770 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2771 return NULL;
2774 if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2775 return NULL;
2778 fd = mkstemp(filename);
2779 if (fd < 0) {
2780 perror("unable to create backing store for hugepages");
2781 free(filename);
2782 return NULL;
2784 unlink(filename);
2785 free(filename);
2787 memory = (memory+hpagesize-1) & ~(hpagesize-1);
2790 * ftruncate is not supported by hugetlbfs in older
2791 * hosts, so don't bother bailing out on errors.
2792 * If anything goes wrong with it under other filesystems,
2793 * mmap will fail.
2795 if (ftruncate(fd, memory))
2796 perror("ftruncate");
2798 #ifdef MAP_POPULATE
2799 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2800 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
2801 * to sidestep this quirk.
2803 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2804 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2805 #else
2806 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2807 #endif
2808 if (area == MAP_FAILED) {
2809 perror("file_ram_alloc: can't mmap RAM pages");
2810 close(fd);
2811 return (NULL);
2813 block->fd = fd;
2814 return area;
2816 #endif
2818 static ram_addr_t find_ram_offset(ram_addr_t size)
2820 RAMBlock *block, *next_block;
2821 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
2823 if (QLIST_EMPTY(&ram_list.blocks))
2824 return 0;
2826 QLIST_FOREACH(block, &ram_list.blocks, next) {
2827 ram_addr_t end, next = RAM_ADDR_MAX;
2829 end = block->offset + block->length;
2831 QLIST_FOREACH(next_block, &ram_list.blocks, next) {
2832 if (next_block->offset >= end) {
2833 next = MIN(next, next_block->offset);
2836 if (next - end >= size && next - end < mingap) {
2837 offset = end;
2838 mingap = next - end;
2842 if (offset == RAM_ADDR_MAX) {
2843 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
2844 (uint64_t)size);
2845 abort();
2848 return offset;
2851 static ram_addr_t last_ram_offset(void)
2853 RAMBlock *block;
2854 ram_addr_t last = 0;
2856 QLIST_FOREACH(block, &ram_list.blocks, next)
2857 last = MAX(last, block->offset + block->length);
2859 return last;
2862 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
2864 RAMBlock *new_block, *block;
2866 new_block = NULL;
2867 QLIST_FOREACH(block, &ram_list.blocks, next) {
2868 if (block->offset == addr) {
2869 new_block = block;
2870 break;
2873 assert(new_block);
2874 assert(!new_block->idstr[0]);
2876 if (dev && dev->parent_bus && dev->parent_bus->info->get_dev_path) {
2877 char *id = dev->parent_bus->info->get_dev_path(dev);
2878 if (id) {
2879 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
2880 g_free(id);
2883 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
2885 QLIST_FOREACH(block, &ram_list.blocks, next) {
2886 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
2887 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
2888 new_block->idstr);
2889 abort();
2894 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
2895 MemoryRegion *mr)
2897 RAMBlock *new_block;
2899 size = TARGET_PAGE_ALIGN(size);
2900 new_block = g_malloc0(sizeof(*new_block));
2902 new_block->mr = mr;
2903 new_block->offset = find_ram_offset(size);
2904 if (host) {
2905 new_block->host = host;
2906 new_block->flags |= RAM_PREALLOC_MASK;
2907 } else {
2908 if (mem_path) {
2909 #if defined (__linux__) && !defined(TARGET_S390X)
2910 new_block->host = file_ram_alloc(new_block, size, mem_path);
2911 if (!new_block->host) {
2912 new_block->host = qemu_vmalloc(size);
2913 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2915 #else
2916 fprintf(stderr, "-mem-path option unsupported\n");
2917 exit(1);
2918 #endif
2919 } else {
2920 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2921 /* S390 KVM requires the topmost vma of the RAM to be smaller than
2922 an system defined value, which is at least 256GB. Larger systems
2923 have larger values. We put the guest between the end of data
2924 segment (system break) and this value. We use 32GB as a base to
2925 have enough room for the system break to grow. */
2926 new_block->host = mmap((void*)0x800000000, size,
2927 PROT_EXEC|PROT_READ|PROT_WRITE,
2928 MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
2929 if (new_block->host == MAP_FAILED) {
2930 fprintf(stderr, "Allocating RAM failed\n");
2931 abort();
2933 #else
2934 if (xen_enabled()) {
2935 xen_ram_alloc(new_block->offset, size, mr);
2936 } else {
2937 new_block->host = qemu_vmalloc(size);
2939 #endif
2940 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2943 new_block->length = size;
2945 QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
2947 ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
2948 last_ram_offset() >> TARGET_PAGE_BITS);
2949 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
2950 0xff, size >> TARGET_PAGE_BITS);
2952 if (kvm_enabled())
2953 kvm_setup_guest_memory(new_block->host, size);
2955 return new_block->offset;
2958 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
2960 return qemu_ram_alloc_from_ptr(size, NULL, mr);
2963 void qemu_ram_free_from_ptr(ram_addr_t addr)
2965 RAMBlock *block;
2967 QLIST_FOREACH(block, &ram_list.blocks, next) {
2968 if (addr == block->offset) {
2969 QLIST_REMOVE(block, next);
2970 g_free(block);
2971 return;
2976 void qemu_ram_free(ram_addr_t addr)
2978 RAMBlock *block;
2980 QLIST_FOREACH(block, &ram_list.blocks, next) {
2981 if (addr == block->offset) {
2982 QLIST_REMOVE(block, next);
2983 if (block->flags & RAM_PREALLOC_MASK) {
2985 } else if (mem_path) {
2986 #if defined (__linux__) && !defined(TARGET_S390X)
2987 if (block->fd) {
2988 munmap(block->host, block->length);
2989 close(block->fd);
2990 } else {
2991 qemu_vfree(block->host);
2993 #else
2994 abort();
2995 #endif
2996 } else {
2997 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2998 munmap(block->host, block->length);
2999 #else
3000 if (xen_enabled()) {
3001 xen_invalidate_map_cache_entry(block->host);
3002 } else {
3003 qemu_vfree(block->host);
3005 #endif
3007 g_free(block);
3008 return;
3014 #ifndef _WIN32
3015 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
3017 RAMBlock *block;
3018 ram_addr_t offset;
3019 int flags;
3020 void *area, *vaddr;
3022 QLIST_FOREACH(block, &ram_list.blocks, next) {
3023 offset = addr - block->offset;
3024 if (offset < block->length) {
3025 vaddr = block->host + offset;
3026 if (block->flags & RAM_PREALLOC_MASK) {
3028 } else {
3029 flags = MAP_FIXED;
3030 munmap(vaddr, length);
3031 if (mem_path) {
3032 #if defined(__linux__) && !defined(TARGET_S390X)
3033 if (block->fd) {
3034 #ifdef MAP_POPULATE
3035 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
3036 MAP_PRIVATE;
3037 #else
3038 flags |= MAP_PRIVATE;
3039 #endif
3040 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3041 flags, block->fd, offset);
3042 } else {
3043 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
3044 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3045 flags, -1, 0);
3047 #else
3048 abort();
3049 #endif
3050 } else {
3051 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
3052 flags |= MAP_SHARED | MAP_ANONYMOUS;
3053 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
3054 flags, -1, 0);
3055 #else
3056 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
3057 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3058 flags, -1, 0);
3059 #endif
3061 if (area != vaddr) {
3062 fprintf(stderr, "Could not remap addr: "
3063 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
3064 length, addr);
3065 exit(1);
3067 qemu_madvise(vaddr, length, QEMU_MADV_MERGEABLE);
3069 return;
3073 #endif /* !_WIN32 */
3075 /* Return a host pointer to ram allocated with qemu_ram_alloc.
3076 With the exception of the softmmu code in this file, this should
3077 only be used for local memory (e.g. video ram) that the device owns,
3078 and knows it isn't going to access beyond the end of the block.
3080 It should not be used for general purpose DMA.
3081 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
3083 void *qemu_get_ram_ptr(ram_addr_t addr)
3085 RAMBlock *block;
3087 QLIST_FOREACH(block, &ram_list.blocks, next) {
3088 if (addr - block->offset < block->length) {
3089 /* Move this entry to to start of the list. */
3090 if (block != QLIST_FIRST(&ram_list.blocks)) {
3091 QLIST_REMOVE(block, next);
3092 QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
3094 if (xen_enabled()) {
3095 /* We need to check if the requested address is in the RAM
3096 * because we don't want to map the entire memory in QEMU.
3097 * In that case just map until the end of the page.
3099 if (block->offset == 0) {
3100 return xen_map_cache(addr, 0, 0);
3101 } else if (block->host == NULL) {
3102 block->host =
3103 xen_map_cache(block->offset, block->length, 1);
3106 return block->host + (addr - block->offset);
3110 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3111 abort();
3113 return NULL;
3116 /* Return a host pointer to ram allocated with qemu_ram_alloc.
3117 * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
3119 void *qemu_safe_ram_ptr(ram_addr_t addr)
3121 RAMBlock *block;
3123 QLIST_FOREACH(block, &ram_list.blocks, next) {
3124 if (addr - block->offset < block->length) {
3125 if (xen_enabled()) {
3126 /* We need to check if the requested address is in the RAM
3127 * because we don't want to map the entire memory in QEMU.
3128 * In that case just map until the end of the page.
3130 if (block->offset == 0) {
3131 return xen_map_cache(addr, 0, 0);
3132 } else if (block->host == NULL) {
3133 block->host =
3134 xen_map_cache(block->offset, block->length, 1);
3137 return block->host + (addr - block->offset);
3141 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3142 abort();
3144 return NULL;
3147 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
3148 * but takes a size argument */
3149 void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
3151 if (*size == 0) {
3152 return NULL;
3154 if (xen_enabled()) {
3155 return xen_map_cache(addr, *size, 1);
3156 } else {
3157 RAMBlock *block;
3159 QLIST_FOREACH(block, &ram_list.blocks, next) {
3160 if (addr - block->offset < block->length) {
3161 if (addr - block->offset + *size > block->length)
3162 *size = block->length - addr + block->offset;
3163 return block->host + (addr - block->offset);
3167 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3168 abort();
3172 void qemu_put_ram_ptr(void *addr)
3174 trace_qemu_put_ram_ptr(addr);
3177 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
3179 RAMBlock *block;
3180 uint8_t *host = ptr;
3182 if (xen_enabled()) {
3183 *ram_addr = xen_ram_addr_from_mapcache(ptr);
3184 return 0;
3187 QLIST_FOREACH(block, &ram_list.blocks, next) {
3188 /* This case append when the block is not mapped. */
3189 if (block->host == NULL) {
3190 continue;
3192 if (host - block->host < block->length) {
3193 *ram_addr = block->offset + (host - block->host);
3194 return 0;
3198 return -1;
3201 /* Some of the softmmu routines need to translate from a host pointer
3202 (typically a TLB entry) back to a ram offset. */
3203 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
3205 ram_addr_t ram_addr;
3207 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
3208 fprintf(stderr, "Bad ram pointer %p\n", ptr);
3209 abort();
3211 return ram_addr;
3214 static uint64_t unassigned_mem_read(void *opaque, target_phys_addr_t addr,
3215 unsigned size)
3217 #ifdef DEBUG_UNASSIGNED
3218 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3219 #endif
3220 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3221 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, size);
3222 #endif
3223 return 0;
3226 static void unassigned_mem_write(void *opaque, target_phys_addr_t addr,
3227 uint64_t val, unsigned size)
3229 #ifdef DEBUG_UNASSIGNED
3230 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%"PRIx64"\n", addr, val);
3231 #endif
3232 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3233 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, size);
3234 #endif
3237 static const MemoryRegionOps unassigned_mem_ops = {
3238 .read = unassigned_mem_read,
3239 .write = unassigned_mem_write,
3240 .endianness = DEVICE_NATIVE_ENDIAN,
3243 static uint64_t error_mem_read(void *opaque, target_phys_addr_t addr,
3244 unsigned size)
3246 abort();
3249 static void error_mem_write(void *opaque, target_phys_addr_t addr,
3250 uint64_t value, unsigned size)
3252 abort();
3255 static const MemoryRegionOps error_mem_ops = {
3256 .read = error_mem_read,
3257 .write = error_mem_write,
3258 .endianness = DEVICE_NATIVE_ENDIAN,
3261 static const MemoryRegionOps rom_mem_ops = {
3262 .read = error_mem_read,
3263 .write = unassigned_mem_write,
3264 .endianness = DEVICE_NATIVE_ENDIAN,
3267 static void notdirty_mem_write(void *opaque, target_phys_addr_t ram_addr,
3268 uint64_t val, unsigned size)
3270 int dirty_flags;
3271 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3272 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3273 #if !defined(CONFIG_USER_ONLY)
3274 tb_invalidate_phys_page_fast(ram_addr, size);
3275 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3276 #endif
3278 switch (size) {
3279 case 1:
3280 stb_p(qemu_get_ram_ptr(ram_addr), val);
3281 break;
3282 case 2:
3283 stw_p(qemu_get_ram_ptr(ram_addr), val);
3284 break;
3285 case 4:
3286 stl_p(qemu_get_ram_ptr(ram_addr), val);
3287 break;
3288 default:
3289 abort();
3291 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3292 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3293 /* we remove the notdirty callback only if the code has been
3294 flushed */
3295 if (dirty_flags == 0xff)
3296 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3299 static const MemoryRegionOps notdirty_mem_ops = {
3300 .read = error_mem_read,
3301 .write = notdirty_mem_write,
3302 .endianness = DEVICE_NATIVE_ENDIAN,
3305 /* Generate a debug exception if a watchpoint has been hit. */
3306 static void check_watchpoint(int offset, int len_mask, int flags)
3308 CPUArchState *env = cpu_single_env;
3309 target_ulong pc, cs_base;
3310 TranslationBlock *tb;
3311 target_ulong vaddr;
3312 CPUWatchpoint *wp;
3313 int cpu_flags;
3315 if (env->watchpoint_hit) {
3316 /* We re-entered the check after replacing the TB. Now raise
3317 * the debug interrupt so that is will trigger after the
3318 * current instruction. */
3319 cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
3320 return;
3322 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
3323 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
3324 if ((vaddr == (wp->vaddr & len_mask) ||
3325 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
3326 wp->flags |= BP_WATCHPOINT_HIT;
3327 if (!env->watchpoint_hit) {
3328 env->watchpoint_hit = wp;
3329 tb = tb_find_pc(env->mem_io_pc);
3330 if (!tb) {
3331 cpu_abort(env, "check_watchpoint: could not find TB for "
3332 "pc=%p", (void *)env->mem_io_pc);
3334 cpu_restore_state(tb, env, env->mem_io_pc);
3335 tb_phys_invalidate(tb, -1);
3336 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
3337 env->exception_index = EXCP_DEBUG;
3338 cpu_loop_exit(env);
3339 } else {
3340 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
3341 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
3342 cpu_resume_from_signal(env, NULL);
3345 } else {
3346 wp->flags &= ~BP_WATCHPOINT_HIT;
3351 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
3352 so these check for a hit then pass through to the normal out-of-line
3353 phys routines. */
3354 static uint64_t watch_mem_read(void *opaque, target_phys_addr_t addr,
3355 unsigned size)
3357 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
3358 switch (size) {
3359 case 1: return ldub_phys(addr);
3360 case 2: return lduw_phys(addr);
3361 case 4: return ldl_phys(addr);
3362 default: abort();
3366 static void watch_mem_write(void *opaque, target_phys_addr_t addr,
3367 uint64_t val, unsigned size)
3369 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
3370 switch (size) {
3371 case 1:
3372 stb_phys(addr, val);
3373 break;
3374 case 2:
3375 stw_phys(addr, val);
3376 break;
3377 case 4:
3378 stl_phys(addr, val);
3379 break;
3380 default: abort();
3384 static const MemoryRegionOps watch_mem_ops = {
3385 .read = watch_mem_read,
3386 .write = watch_mem_write,
3387 .endianness = DEVICE_NATIVE_ENDIAN,
3390 static uint64_t subpage_read(void *opaque, target_phys_addr_t addr,
3391 unsigned len)
3393 subpage_t *mmio = opaque;
3394 unsigned int idx = SUBPAGE_IDX(addr);
3395 MemoryRegionSection *section;
3396 #if defined(DEBUG_SUBPAGE)
3397 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3398 mmio, len, addr, idx);
3399 #endif
3401 section = &phys_sections[mmio->sub_section[idx]];
3402 addr += mmio->base;
3403 addr -= section->offset_within_address_space;
3404 addr += section->offset_within_region;
3405 return io_mem_read(section->mr, addr, len);
3408 static void subpage_write(void *opaque, target_phys_addr_t addr,
3409 uint64_t value, unsigned len)
3411 subpage_t *mmio = opaque;
3412 unsigned int idx = SUBPAGE_IDX(addr);
3413 MemoryRegionSection *section;
3414 #if defined(DEBUG_SUBPAGE)
3415 printf("%s: subpage %p len %d addr " TARGET_FMT_plx
3416 " idx %d value %"PRIx64"\n",
3417 __func__, mmio, len, addr, idx, value);
3418 #endif
3420 section = &phys_sections[mmio->sub_section[idx]];
3421 addr += mmio->base;
3422 addr -= section->offset_within_address_space;
3423 addr += section->offset_within_region;
3424 io_mem_write(section->mr, addr, value, len);
3427 static const MemoryRegionOps subpage_ops = {
3428 .read = subpage_read,
3429 .write = subpage_write,
3430 .endianness = DEVICE_NATIVE_ENDIAN,
3433 static uint64_t subpage_ram_read(void *opaque, target_phys_addr_t addr,
3434 unsigned size)
3436 ram_addr_t raddr = addr;
3437 void *ptr = qemu_get_ram_ptr(raddr);
3438 switch (size) {
3439 case 1: return ldub_p(ptr);
3440 case 2: return lduw_p(ptr);
3441 case 4: return ldl_p(ptr);
3442 default: abort();
3446 static void subpage_ram_write(void *opaque, target_phys_addr_t addr,
3447 uint64_t value, unsigned size)
3449 ram_addr_t raddr = addr;
3450 void *ptr = qemu_get_ram_ptr(raddr);
3451 switch (size) {
3452 case 1: return stb_p(ptr, value);
3453 case 2: return stw_p(ptr, value);
3454 case 4: return stl_p(ptr, value);
3455 default: abort();
3459 static const MemoryRegionOps subpage_ram_ops = {
3460 .read = subpage_ram_read,
3461 .write = subpage_ram_write,
3462 .endianness = DEVICE_NATIVE_ENDIAN,
3465 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3466 uint16_t section)
3468 int idx, eidx;
3470 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3471 return -1;
3472 idx = SUBPAGE_IDX(start);
3473 eidx = SUBPAGE_IDX(end);
3474 #if defined(DEBUG_SUBPAGE)
3475 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3476 mmio, start, end, idx, eidx, memory);
3477 #endif
3478 if (memory_region_is_ram(phys_sections[section].mr)) {
3479 MemoryRegionSection new_section = phys_sections[section];
3480 new_section.mr = &io_mem_subpage_ram;
3481 section = phys_section_add(&new_section);
3483 for (; idx <= eidx; idx++) {
3484 mmio->sub_section[idx] = section;
3487 return 0;
3490 static subpage_t *subpage_init(target_phys_addr_t base)
3492 subpage_t *mmio;
3494 mmio = g_malloc0(sizeof(subpage_t));
3496 mmio->base = base;
3497 memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
3498 "subpage", TARGET_PAGE_SIZE);
3499 mmio->iomem.subpage = true;
3500 #if defined(DEBUG_SUBPAGE)
3501 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3502 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3503 #endif
3504 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
3506 return mmio;
3509 static uint16_t dummy_section(MemoryRegion *mr)
3511 MemoryRegionSection section = {
3512 .mr = mr,
3513 .offset_within_address_space = 0,
3514 .offset_within_region = 0,
3515 .size = UINT64_MAX,
3518 return phys_section_add(&section);
3521 MemoryRegion *iotlb_to_region(target_phys_addr_t index)
3523 return phys_sections[index & ~TARGET_PAGE_MASK].mr;
3526 static void io_mem_init(void)
3528 memory_region_init_io(&io_mem_ram, &error_mem_ops, NULL, "ram", UINT64_MAX);
3529 memory_region_init_io(&io_mem_rom, &rom_mem_ops, NULL, "rom", UINT64_MAX);
3530 memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
3531 "unassigned", UINT64_MAX);
3532 memory_region_init_io(&io_mem_notdirty, &notdirty_mem_ops, NULL,
3533 "notdirty", UINT64_MAX);
3534 memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
3535 "subpage-ram", UINT64_MAX);
3536 memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
3537 "watch", UINT64_MAX);
3540 static void core_begin(MemoryListener *listener)
3542 destroy_all_mappings();
3543 phys_sections_clear();
3544 phys_map.ptr = PHYS_MAP_NODE_NIL;
3545 phys_section_unassigned = dummy_section(&io_mem_unassigned);
3546 phys_section_notdirty = dummy_section(&io_mem_notdirty);
3547 phys_section_rom = dummy_section(&io_mem_rom);
3548 phys_section_watch = dummy_section(&io_mem_watch);
3551 static void core_commit(MemoryListener *listener)
3553 CPUArchState *env;
3555 /* since each CPU stores ram addresses in its TLB cache, we must
3556 reset the modified entries */
3557 /* XXX: slow ! */
3558 for(env = first_cpu; env != NULL; env = env->next_cpu) {
3559 tlb_flush(env, 1);
3563 static void core_region_add(MemoryListener *listener,
3564 MemoryRegionSection *section)
3566 cpu_register_physical_memory_log(section, section->readonly);
3569 static void core_region_del(MemoryListener *listener,
3570 MemoryRegionSection *section)
3574 static void core_region_nop(MemoryListener *listener,
3575 MemoryRegionSection *section)
3577 cpu_register_physical_memory_log(section, section->readonly);
3580 static void core_log_start(MemoryListener *listener,
3581 MemoryRegionSection *section)
3585 static void core_log_stop(MemoryListener *listener,
3586 MemoryRegionSection *section)
3590 static void core_log_sync(MemoryListener *listener,
3591 MemoryRegionSection *section)
3595 static void core_log_global_start(MemoryListener *listener)
3597 cpu_physical_memory_set_dirty_tracking(1);
3600 static void core_log_global_stop(MemoryListener *listener)
3602 cpu_physical_memory_set_dirty_tracking(0);
3605 static void core_eventfd_add(MemoryListener *listener,
3606 MemoryRegionSection *section,
3607 bool match_data, uint64_t data, int fd)
3611 static void core_eventfd_del(MemoryListener *listener,
3612 MemoryRegionSection *section,
3613 bool match_data, uint64_t data, int fd)
3617 static void io_begin(MemoryListener *listener)
3621 static void io_commit(MemoryListener *listener)
3625 static void io_region_add(MemoryListener *listener,
3626 MemoryRegionSection *section)
3628 MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
3630 mrio->mr = section->mr;
3631 mrio->offset = section->offset_within_region;
3632 iorange_init(&mrio->iorange, &memory_region_iorange_ops,
3633 section->offset_within_address_space, section->size);
3634 ioport_register(&mrio->iorange);
3637 static void io_region_del(MemoryListener *listener,
3638 MemoryRegionSection *section)
3640 isa_unassign_ioport(section->offset_within_address_space, section->size);
3643 static void io_region_nop(MemoryListener *listener,
3644 MemoryRegionSection *section)
3648 static void io_log_start(MemoryListener *listener,
3649 MemoryRegionSection *section)
3653 static void io_log_stop(MemoryListener *listener,
3654 MemoryRegionSection *section)
3658 static void io_log_sync(MemoryListener *listener,
3659 MemoryRegionSection *section)
3663 static void io_log_global_start(MemoryListener *listener)
3667 static void io_log_global_stop(MemoryListener *listener)
3671 static void io_eventfd_add(MemoryListener *listener,
3672 MemoryRegionSection *section,
3673 bool match_data, uint64_t data, int fd)
3677 static void io_eventfd_del(MemoryListener *listener,
3678 MemoryRegionSection *section,
3679 bool match_data, uint64_t data, int fd)
3683 static MemoryListener core_memory_listener = {
3684 .begin = core_begin,
3685 .commit = core_commit,
3686 .region_add = core_region_add,
3687 .region_del = core_region_del,
3688 .region_nop = core_region_nop,
3689 .log_start = core_log_start,
3690 .log_stop = core_log_stop,
3691 .log_sync = core_log_sync,
3692 .log_global_start = core_log_global_start,
3693 .log_global_stop = core_log_global_stop,
3694 .eventfd_add = core_eventfd_add,
3695 .eventfd_del = core_eventfd_del,
3696 .priority = 0,
3699 static MemoryListener io_memory_listener = {
3700 .begin = io_begin,
3701 .commit = io_commit,
3702 .region_add = io_region_add,
3703 .region_del = io_region_del,
3704 .region_nop = io_region_nop,
3705 .log_start = io_log_start,
3706 .log_stop = io_log_stop,
3707 .log_sync = io_log_sync,
3708 .log_global_start = io_log_global_start,
3709 .log_global_stop = io_log_global_stop,
3710 .eventfd_add = io_eventfd_add,
3711 .eventfd_del = io_eventfd_del,
3712 .priority = 0,
3715 static void memory_map_init(void)
3717 system_memory = g_malloc(sizeof(*system_memory));
3718 memory_region_init(system_memory, "system", INT64_MAX);
3719 set_system_memory_map(system_memory);
3721 system_io = g_malloc(sizeof(*system_io));
3722 memory_region_init(system_io, "io", 65536);
3723 set_system_io_map(system_io);
3725 memory_listener_register(&core_memory_listener, system_memory);
3726 memory_listener_register(&io_memory_listener, system_io);
3729 MemoryRegion *get_system_memory(void)
3731 return system_memory;
3734 MemoryRegion *get_system_io(void)
3736 return system_io;
3739 #endif /* !defined(CONFIG_USER_ONLY) */
3741 /* physical memory access (slow version, mainly for debug) */
3742 #if defined(CONFIG_USER_ONLY)
3743 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
3744 uint8_t *buf, int len, int is_write)
3746 int l, flags;
3747 target_ulong page;
3748 void * p;
3750 while (len > 0) {
3751 page = addr & TARGET_PAGE_MASK;
3752 l = (page + TARGET_PAGE_SIZE) - addr;
3753 if (l > len)
3754 l = len;
3755 flags = page_get_flags(page);
3756 if (!(flags & PAGE_VALID))
3757 return -1;
3758 if (is_write) {
3759 if (!(flags & PAGE_WRITE))
3760 return -1;
3761 /* XXX: this code should not depend on lock_user */
3762 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3763 return -1;
3764 memcpy(p, buf, l);
3765 unlock_user(p, addr, l);
3766 } else {
3767 if (!(flags & PAGE_READ))
3768 return -1;
3769 /* XXX: this code should not depend on lock_user */
3770 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3771 return -1;
3772 memcpy(buf, p, l);
3773 unlock_user(p, addr, 0);
3775 len -= l;
3776 buf += l;
3777 addr += l;
3779 return 0;
3782 #else
3783 void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
3784 int len, int is_write)
3786 int l;
3787 uint8_t *ptr;
3788 uint32_t val;
3789 target_phys_addr_t page;
3790 MemoryRegionSection *section;
3792 while (len > 0) {
3793 page = addr & TARGET_PAGE_MASK;
3794 l = (page + TARGET_PAGE_SIZE) - addr;
3795 if (l > len)
3796 l = len;
3797 section = phys_page_find(page >> TARGET_PAGE_BITS);
3799 if (is_write) {
3800 if (!memory_region_is_ram(section->mr)) {
3801 target_phys_addr_t addr1;
3802 addr1 = section_addr(section, addr);
3803 /* XXX: could force cpu_single_env to NULL to avoid
3804 potential bugs */
3805 if (l >= 4 && ((addr1 & 3) == 0)) {
3806 /* 32 bit write access */
3807 val = ldl_p(buf);
3808 io_mem_write(section->mr, addr1, val, 4);
3809 l = 4;
3810 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3811 /* 16 bit write access */
3812 val = lduw_p(buf);
3813 io_mem_write(section->mr, addr1, val, 2);
3814 l = 2;
3815 } else {
3816 /* 8 bit write access */
3817 val = ldub_p(buf);
3818 io_mem_write(section->mr, addr1, val, 1);
3819 l = 1;
3821 } else if (!section->readonly) {
3822 ram_addr_t addr1;
3823 addr1 = memory_region_get_ram_addr(section->mr)
3824 + section_addr(section, addr);
3825 /* RAM case */
3826 ptr = qemu_get_ram_ptr(addr1);
3827 memcpy(ptr, buf, l);
3828 if (!cpu_physical_memory_is_dirty(addr1)) {
3829 /* invalidate code */
3830 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
3831 /* set dirty bit */
3832 cpu_physical_memory_set_dirty_flags(
3833 addr1, (0xff & ~CODE_DIRTY_FLAG));
3835 qemu_put_ram_ptr(ptr);
3837 } else {
3838 if (!is_ram_rom_romd(section)) {
3839 target_phys_addr_t addr1;
3840 /* I/O case */
3841 addr1 = section_addr(section, addr);
3842 if (l >= 4 && ((addr1 & 3) == 0)) {
3843 /* 32 bit read access */
3844 val = io_mem_read(section->mr, addr1, 4);
3845 stl_p(buf, val);
3846 l = 4;
3847 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3848 /* 16 bit read access */
3849 val = io_mem_read(section->mr, addr1, 2);
3850 stw_p(buf, val);
3851 l = 2;
3852 } else {
3853 /* 8 bit read access */
3854 val = io_mem_read(section->mr, addr1, 1);
3855 stb_p(buf, val);
3856 l = 1;
3858 } else {
3859 /* RAM case */
3860 ptr = qemu_get_ram_ptr(section->mr->ram_addr
3861 + section_addr(section, addr));
3862 memcpy(buf, ptr, l);
3863 qemu_put_ram_ptr(ptr);
3866 len -= l;
3867 buf += l;
3868 addr += l;
3872 /* used for ROM loading : can write in RAM and ROM */
3873 void cpu_physical_memory_write_rom(target_phys_addr_t addr,
3874 const uint8_t *buf, int len)
3876 int l;
3877 uint8_t *ptr;
3878 target_phys_addr_t page;
3879 MemoryRegionSection *section;
3881 while (len > 0) {
3882 page = addr & TARGET_PAGE_MASK;
3883 l = (page + TARGET_PAGE_SIZE) - addr;
3884 if (l > len)
3885 l = len;
3886 section = phys_page_find(page >> TARGET_PAGE_BITS);
3888 if (!is_ram_rom_romd(section)) {
3889 /* do nothing */
3890 } else {
3891 unsigned long addr1;
3892 addr1 = memory_region_get_ram_addr(section->mr)
3893 + section_addr(section, addr);
3894 /* ROM/RAM case */
3895 ptr = qemu_get_ram_ptr(addr1);
3896 memcpy(ptr, buf, l);
3897 qemu_put_ram_ptr(ptr);
3899 len -= l;
3900 buf += l;
3901 addr += l;
3905 typedef struct {
3906 void *buffer;
3907 target_phys_addr_t addr;
3908 target_phys_addr_t len;
3909 } BounceBuffer;
3911 static BounceBuffer bounce;
3913 typedef struct MapClient {
3914 void *opaque;
3915 void (*callback)(void *opaque);
3916 QLIST_ENTRY(MapClient) link;
3917 } MapClient;
3919 static QLIST_HEAD(map_client_list, MapClient) map_client_list
3920 = QLIST_HEAD_INITIALIZER(map_client_list);
3922 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
3924 MapClient *client = g_malloc(sizeof(*client));
3926 client->opaque = opaque;
3927 client->callback = callback;
3928 QLIST_INSERT_HEAD(&map_client_list, client, link);
3929 return client;
3932 void cpu_unregister_map_client(void *_client)
3934 MapClient *client = (MapClient *)_client;
3936 QLIST_REMOVE(client, link);
3937 g_free(client);
3940 static void cpu_notify_map_clients(void)
3942 MapClient *client;
3944 while (!QLIST_EMPTY(&map_client_list)) {
3945 client = QLIST_FIRST(&map_client_list);
3946 client->callback(client->opaque);
3947 cpu_unregister_map_client(client);
3951 /* Map a physical memory region into a host virtual address.
3952 * May map a subset of the requested range, given by and returned in *plen.
3953 * May return NULL if resources needed to perform the mapping are exhausted.
3954 * Use only for reads OR writes - not for read-modify-write operations.
3955 * Use cpu_register_map_client() to know when retrying the map operation is
3956 * likely to succeed.
3958 void *cpu_physical_memory_map(target_phys_addr_t addr,
3959 target_phys_addr_t *plen,
3960 int is_write)
3962 target_phys_addr_t len = *plen;
3963 target_phys_addr_t todo = 0;
3964 int l;
3965 target_phys_addr_t page;
3966 MemoryRegionSection *section;
3967 ram_addr_t raddr = RAM_ADDR_MAX;
3968 ram_addr_t rlen;
3969 void *ret;
3971 while (len > 0) {
3972 page = addr & TARGET_PAGE_MASK;
3973 l = (page + TARGET_PAGE_SIZE) - addr;
3974 if (l > len)
3975 l = len;
3976 section = phys_page_find(page >> TARGET_PAGE_BITS);
3978 if (!(memory_region_is_ram(section->mr) && !section->readonly)) {
3979 if (todo || bounce.buffer) {
3980 break;
3982 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
3983 bounce.addr = addr;
3984 bounce.len = l;
3985 if (!is_write) {
3986 cpu_physical_memory_read(addr, bounce.buffer, l);
3989 *plen = l;
3990 return bounce.buffer;
3992 if (!todo) {
3993 raddr = memory_region_get_ram_addr(section->mr)
3994 + section_addr(section, addr);
3997 len -= l;
3998 addr += l;
3999 todo += l;
4001 rlen = todo;
4002 ret = qemu_ram_ptr_length(raddr, &rlen);
4003 *plen = rlen;
4004 return ret;
4007 /* Unmaps a memory region previously mapped by cpu_physical_memory_map().
4008 * Will also mark the memory as dirty if is_write == 1. access_len gives
4009 * the amount of memory that was actually read or written by the caller.
4011 void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
4012 int is_write, target_phys_addr_t access_len)
4014 if (buffer != bounce.buffer) {
4015 if (is_write) {
4016 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
4017 while (access_len) {
4018 unsigned l;
4019 l = TARGET_PAGE_SIZE;
4020 if (l > access_len)
4021 l = access_len;
4022 if (!cpu_physical_memory_is_dirty(addr1)) {
4023 /* invalidate code */
4024 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
4025 /* set dirty bit */
4026 cpu_physical_memory_set_dirty_flags(
4027 addr1, (0xff & ~CODE_DIRTY_FLAG));
4029 addr1 += l;
4030 access_len -= l;
4033 if (xen_enabled()) {
4034 xen_invalidate_map_cache_entry(buffer);
4036 return;
4038 if (is_write) {
4039 cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len);
4041 qemu_vfree(bounce.buffer);
4042 bounce.buffer = NULL;
4043 cpu_notify_map_clients();
4046 /* warning: addr must be aligned */
4047 static inline uint32_t ldl_phys_internal(target_phys_addr_t addr,
4048 enum device_endian endian)
4050 uint8_t *ptr;
4051 uint32_t val;
4052 MemoryRegionSection *section;
4054 section = phys_page_find(addr >> TARGET_PAGE_BITS);
4056 if (!is_ram_rom_romd(section)) {
4057 /* I/O case */
4058 addr = section_addr(section, addr);
4059 val = io_mem_read(section->mr, addr, 4);
4060 #if defined(TARGET_WORDS_BIGENDIAN)
4061 if (endian == DEVICE_LITTLE_ENDIAN) {
4062 val = bswap32(val);
4064 #else
4065 if (endian == DEVICE_BIG_ENDIAN) {
4066 val = bswap32(val);
4068 #endif
4069 } else {
4070 /* RAM case */
4071 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
4072 & TARGET_PAGE_MASK)
4073 + section_addr(section, addr));
4074 switch (endian) {
4075 case DEVICE_LITTLE_ENDIAN:
4076 val = ldl_le_p(ptr);
4077 break;
4078 case DEVICE_BIG_ENDIAN:
4079 val = ldl_be_p(ptr);
4080 break;
4081 default:
4082 val = ldl_p(ptr);
4083 break;
4086 return val;
4089 uint32_t ldl_phys(target_phys_addr_t addr)
4091 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4094 uint32_t ldl_le_phys(target_phys_addr_t addr)
4096 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4099 uint32_t ldl_be_phys(target_phys_addr_t addr)
4101 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
4104 /* warning: addr must be aligned */
4105 static inline uint64_t ldq_phys_internal(target_phys_addr_t addr,
4106 enum device_endian endian)
4108 uint8_t *ptr;
4109 uint64_t val;
4110 MemoryRegionSection *section;
4112 section = phys_page_find(addr >> TARGET_PAGE_BITS);
4114 if (!is_ram_rom_romd(section)) {
4115 /* I/O case */
4116 addr = section_addr(section, addr);
4118 /* XXX This is broken when device endian != cpu endian.
4119 Fix and add "endian" variable check */
4120 #ifdef TARGET_WORDS_BIGENDIAN
4121 val = io_mem_read(section->mr, addr, 4) << 32;
4122 val |= io_mem_read(section->mr, addr + 4, 4);
4123 #else
4124 val = io_mem_read(section->mr, addr, 4);
4125 val |= io_mem_read(section->mr, addr + 4, 4) << 32;
4126 #endif
4127 } else {
4128 /* RAM case */
4129 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
4130 & TARGET_PAGE_MASK)
4131 + section_addr(section, addr));
4132 switch (endian) {
4133 case DEVICE_LITTLE_ENDIAN:
4134 val = ldq_le_p(ptr);
4135 break;
4136 case DEVICE_BIG_ENDIAN:
4137 val = ldq_be_p(ptr);
4138 break;
4139 default:
4140 val = ldq_p(ptr);
4141 break;
4144 return val;
4147 uint64_t ldq_phys(target_phys_addr_t addr)
4149 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4152 uint64_t ldq_le_phys(target_phys_addr_t addr)
4154 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4157 uint64_t ldq_be_phys(target_phys_addr_t addr)
4159 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
4162 /* XXX: optimize */
4163 uint32_t ldub_phys(target_phys_addr_t addr)
4165 uint8_t val;
4166 cpu_physical_memory_read(addr, &val, 1);
4167 return val;
4170 /* warning: addr must be aligned */
4171 static inline uint32_t lduw_phys_internal(target_phys_addr_t addr,
4172 enum device_endian endian)
4174 uint8_t *ptr;
4175 uint64_t val;
4176 MemoryRegionSection *section;
4178 section = phys_page_find(addr >> TARGET_PAGE_BITS);
4180 if (!is_ram_rom_romd(section)) {
4181 /* I/O case */
4182 addr = section_addr(section, addr);
4183 val = io_mem_read(section->mr, addr, 2);
4184 #if defined(TARGET_WORDS_BIGENDIAN)
4185 if (endian == DEVICE_LITTLE_ENDIAN) {
4186 val = bswap16(val);
4188 #else
4189 if (endian == DEVICE_BIG_ENDIAN) {
4190 val = bswap16(val);
4192 #endif
4193 } else {
4194 /* RAM case */
4195 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
4196 & TARGET_PAGE_MASK)
4197 + section_addr(section, addr));
4198 switch (endian) {
4199 case DEVICE_LITTLE_ENDIAN:
4200 val = lduw_le_p(ptr);
4201 break;
4202 case DEVICE_BIG_ENDIAN:
4203 val = lduw_be_p(ptr);
4204 break;
4205 default:
4206 val = lduw_p(ptr);
4207 break;
4210 return val;
4213 uint32_t lduw_phys(target_phys_addr_t addr)
4215 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4218 uint32_t lduw_le_phys(target_phys_addr_t addr)
4220 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4223 uint32_t lduw_be_phys(target_phys_addr_t addr)
4225 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
4228 /* warning: addr must be aligned. The ram page is not masked as dirty
4229 and the code inside is not invalidated. It is useful if the dirty
4230 bits are used to track modified PTEs */
4231 void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val)
4233 uint8_t *ptr;
4234 MemoryRegionSection *section;
4236 section = phys_page_find(addr >> TARGET_PAGE_BITS);
4238 if (!memory_region_is_ram(section->mr) || section->readonly) {
4239 addr = section_addr(section, addr);
4240 if (memory_region_is_ram(section->mr)) {
4241 section = &phys_sections[phys_section_rom];
4243 io_mem_write(section->mr, addr, val, 4);
4244 } else {
4245 unsigned long addr1 = (memory_region_get_ram_addr(section->mr)
4246 & TARGET_PAGE_MASK)
4247 + section_addr(section, addr);
4248 ptr = qemu_get_ram_ptr(addr1);
4249 stl_p(ptr, val);
4251 if (unlikely(in_migration)) {
4252 if (!cpu_physical_memory_is_dirty(addr1)) {
4253 /* invalidate code */
4254 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4255 /* set dirty bit */
4256 cpu_physical_memory_set_dirty_flags(
4257 addr1, (0xff & ~CODE_DIRTY_FLAG));
4263 void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val)
4265 uint8_t *ptr;
4266 MemoryRegionSection *section;
4268 section = phys_page_find(addr >> TARGET_PAGE_BITS);
4270 if (!memory_region_is_ram(section->mr) || section->readonly) {
4271 addr = section_addr(section, addr);
4272 if (memory_region_is_ram(section->mr)) {
4273 section = &phys_sections[phys_section_rom];
4275 #ifdef TARGET_WORDS_BIGENDIAN
4276 io_mem_write(section->mr, addr, val >> 32, 4);
4277 io_mem_write(section->mr, addr + 4, (uint32_t)val, 4);
4278 #else
4279 io_mem_write(section->mr, addr, (uint32_t)val, 4);
4280 io_mem_write(section->mr, addr + 4, val >> 32, 4);
4281 #endif
4282 } else {
4283 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
4284 & TARGET_PAGE_MASK)
4285 + section_addr(section, addr));
4286 stq_p(ptr, val);
4290 /* warning: addr must be aligned */
4291 static inline void stl_phys_internal(target_phys_addr_t addr, uint32_t val,
4292 enum device_endian endian)
4294 uint8_t *ptr;
4295 MemoryRegionSection *section;
4297 section = phys_page_find(addr >> TARGET_PAGE_BITS);
4299 if (!memory_region_is_ram(section->mr) || section->readonly) {
4300 addr = section_addr(section, addr);
4301 if (memory_region_is_ram(section->mr)) {
4302 section = &phys_sections[phys_section_rom];
4304 #if defined(TARGET_WORDS_BIGENDIAN)
4305 if (endian == DEVICE_LITTLE_ENDIAN) {
4306 val = bswap32(val);
4308 #else
4309 if (endian == DEVICE_BIG_ENDIAN) {
4310 val = bswap32(val);
4312 #endif
4313 io_mem_write(section->mr, addr, val, 4);
4314 } else {
4315 unsigned long addr1;
4316 addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
4317 + section_addr(section, addr);
4318 /* RAM case */
4319 ptr = qemu_get_ram_ptr(addr1);
4320 switch (endian) {
4321 case DEVICE_LITTLE_ENDIAN:
4322 stl_le_p(ptr, val);
4323 break;
4324 case DEVICE_BIG_ENDIAN:
4325 stl_be_p(ptr, val);
4326 break;
4327 default:
4328 stl_p(ptr, val);
4329 break;
4331 if (!cpu_physical_memory_is_dirty(addr1)) {
4332 /* invalidate code */
4333 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4334 /* set dirty bit */
4335 cpu_physical_memory_set_dirty_flags(addr1,
4336 (0xff & ~CODE_DIRTY_FLAG));
4341 void stl_phys(target_phys_addr_t addr, uint32_t val)
4343 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4346 void stl_le_phys(target_phys_addr_t addr, uint32_t val)
4348 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4351 void stl_be_phys(target_phys_addr_t addr, uint32_t val)
4353 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4356 /* XXX: optimize */
4357 void stb_phys(target_phys_addr_t addr, uint32_t val)
4359 uint8_t v = val;
4360 cpu_physical_memory_write(addr, &v, 1);
4363 /* warning: addr must be aligned */
4364 static inline void stw_phys_internal(target_phys_addr_t addr, uint32_t val,
4365 enum device_endian endian)
4367 uint8_t *ptr;
4368 MemoryRegionSection *section;
4370 section = phys_page_find(addr >> TARGET_PAGE_BITS);
4372 if (!memory_region_is_ram(section->mr) || section->readonly) {
4373 addr = section_addr(section, addr);
4374 if (memory_region_is_ram(section->mr)) {
4375 section = &phys_sections[phys_section_rom];
4377 #if defined(TARGET_WORDS_BIGENDIAN)
4378 if (endian == DEVICE_LITTLE_ENDIAN) {
4379 val = bswap16(val);
4381 #else
4382 if (endian == DEVICE_BIG_ENDIAN) {
4383 val = bswap16(val);
4385 #endif
4386 io_mem_write(section->mr, addr, val, 2);
4387 } else {
4388 unsigned long addr1;
4389 addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
4390 + section_addr(section, addr);
4391 /* RAM case */
4392 ptr = qemu_get_ram_ptr(addr1);
4393 switch (endian) {
4394 case DEVICE_LITTLE_ENDIAN:
4395 stw_le_p(ptr, val);
4396 break;
4397 case DEVICE_BIG_ENDIAN:
4398 stw_be_p(ptr, val);
4399 break;
4400 default:
4401 stw_p(ptr, val);
4402 break;
4404 if (!cpu_physical_memory_is_dirty(addr1)) {
4405 /* invalidate code */
4406 tb_invalidate_phys_page_range(addr1, addr1 + 2, 0);
4407 /* set dirty bit */
4408 cpu_physical_memory_set_dirty_flags(addr1,
4409 (0xff & ~CODE_DIRTY_FLAG));
4414 void stw_phys(target_phys_addr_t addr, uint32_t val)
4416 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4419 void stw_le_phys(target_phys_addr_t addr, uint32_t val)
4421 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4424 void stw_be_phys(target_phys_addr_t addr, uint32_t val)
4426 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4429 /* XXX: optimize */
4430 void stq_phys(target_phys_addr_t addr, uint64_t val)
4432 val = tswap64(val);
4433 cpu_physical_memory_write(addr, &val, 8);
4436 void stq_le_phys(target_phys_addr_t addr, uint64_t val)
4438 val = cpu_to_le64(val);
4439 cpu_physical_memory_write(addr, &val, 8);
4442 void stq_be_phys(target_phys_addr_t addr, uint64_t val)
4444 val = cpu_to_be64(val);
4445 cpu_physical_memory_write(addr, &val, 8);
4448 /* virtual memory access for debug (includes writing to ROM) */
4449 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
4450 uint8_t *buf, int len, int is_write)
4452 int l;
4453 target_phys_addr_t phys_addr;
4454 target_ulong page;
4456 while (len > 0) {
4457 page = addr & TARGET_PAGE_MASK;
4458 phys_addr = cpu_get_phys_page_debug(env, page);
4459 /* if no physical page mapped, return an error */
4460 if (phys_addr == -1)
4461 return -1;
4462 l = (page + TARGET_PAGE_SIZE) - addr;
4463 if (l > len)
4464 l = len;
4465 phys_addr += (addr & ~TARGET_PAGE_MASK);
4466 if (is_write)
4467 cpu_physical_memory_write_rom(phys_addr, buf, l);
4468 else
4469 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
4470 len -= l;
4471 buf += l;
4472 addr += l;
4474 return 0;
4476 #endif
4478 /* in deterministic execution mode, instructions doing device I/Os
4479 must be at the end of the TB */
4480 void cpu_io_recompile(CPUArchState *env, void *retaddr)
4482 TranslationBlock *tb;
4483 uint32_t n, cflags;
4484 target_ulong pc, cs_base;
4485 uint64_t flags;
4487 tb = tb_find_pc((uintptr_t)retaddr);
4488 if (!tb) {
4489 cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
4490 retaddr);
4492 n = env->icount_decr.u16.low + tb->icount;
4493 cpu_restore_state(tb, env, (unsigned long)retaddr);
4494 /* Calculate how many instructions had been executed before the fault
4495 occurred. */
4496 n = n - env->icount_decr.u16.low;
4497 /* Generate a new TB ending on the I/O insn. */
4498 n++;
4499 /* On MIPS and SH, delay slot instructions can only be restarted if
4500 they were already the first instruction in the TB. If this is not
4501 the first instruction in a TB then re-execute the preceding
4502 branch. */
4503 #if defined(TARGET_MIPS)
4504 if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
4505 env->active_tc.PC -= 4;
4506 env->icount_decr.u16.low++;
4507 env->hflags &= ~MIPS_HFLAG_BMASK;
4509 #elif defined(TARGET_SH4)
4510 if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
4511 && n > 1) {
4512 env->pc -= 2;
4513 env->icount_decr.u16.low++;
4514 env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
4516 #endif
4517 /* This should never happen. */
4518 if (n > CF_COUNT_MASK)
4519 cpu_abort(env, "TB too big during recompile");
4521 cflags = n | CF_LAST_IO;
4522 pc = tb->pc;
4523 cs_base = tb->cs_base;
4524 flags = tb->flags;
4525 tb_phys_invalidate(tb, -1);
4526 /* FIXME: In theory this could raise an exception. In practice
4527 we have already translated the block once so it's probably ok. */
4528 tb_gen_code(env, pc, cs_base, flags, cflags);
4529 /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4530 the first in the TB) then we end up generating a whole new TB and
4531 repeating the fault, which is horribly inefficient.
4532 Better would be to execute just this insn uncached, or generate a
4533 second new TB. */
4534 cpu_resume_from_signal(env, NULL);
4537 #if !defined(CONFIG_USER_ONLY)
4539 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
4541 int i, target_code_size, max_target_code_size;
4542 int direct_jmp_count, direct_jmp2_count, cross_page;
4543 TranslationBlock *tb;
4545 target_code_size = 0;
4546 max_target_code_size = 0;
4547 cross_page = 0;
4548 direct_jmp_count = 0;
4549 direct_jmp2_count = 0;
4550 for(i = 0; i < nb_tbs; i++) {
4551 tb = &tbs[i];
4552 target_code_size += tb->size;
4553 if (tb->size > max_target_code_size)
4554 max_target_code_size = tb->size;
4555 if (tb->page_addr[1] != -1)
4556 cross_page++;
4557 if (tb->tb_next_offset[0] != 0xffff) {
4558 direct_jmp_count++;
4559 if (tb->tb_next_offset[1] != 0xffff) {
4560 direct_jmp2_count++;
4564 /* XXX: avoid using doubles ? */
4565 cpu_fprintf(f, "Translation buffer state:\n");
4566 cpu_fprintf(f, "gen code size %td/%ld\n",
4567 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4568 cpu_fprintf(f, "TB count %d/%d\n",
4569 nb_tbs, code_gen_max_blocks);
4570 cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
4571 nb_tbs ? target_code_size / nb_tbs : 0,
4572 max_target_code_size);
4573 cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n",
4574 nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4575 target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4576 cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4577 cross_page,
4578 nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4579 cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n",
4580 direct_jmp_count,
4581 nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4582 direct_jmp2_count,
4583 nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4584 cpu_fprintf(f, "\nStatistics:\n");
4585 cpu_fprintf(f, "TB flush count %d\n", tb_flush_count);
4586 cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4587 cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
4588 tcg_dump_info(f, cpu_fprintf);
4591 /* NOTE: this function can trigger an exception */
4592 /* NOTE2: the returned address is not exactly the physical address: it
4593 is the offset relative to phys_ram_base */
4594 tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr)
4596 int mmu_idx, page_index, pd;
4597 void *p;
4598 MemoryRegion *mr;
4600 page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
4601 mmu_idx = cpu_mmu_index(env1);
4602 if (unlikely(env1->tlb_table[mmu_idx][page_index].addr_code !=
4603 (addr & TARGET_PAGE_MASK))) {
4604 #ifdef CONFIG_TCG_PASS_AREG0
4605 cpu_ldub_code(env1, addr);
4606 #else
4607 ldub_code(addr);
4608 #endif
4610 pd = env1->iotlb[mmu_idx][page_index] & ~TARGET_PAGE_MASK;
4611 mr = iotlb_to_region(pd);
4612 if (mr != &io_mem_ram && mr != &io_mem_rom
4613 && mr != &io_mem_notdirty && !mr->rom_device
4614 && mr != &io_mem_watch) {
4615 #if defined(TARGET_ALPHA) || defined(TARGET_MIPS) || defined(TARGET_SPARC)
4616 cpu_unassigned_access(env1, addr, 0, 1, 0, 4);
4617 #else
4618 cpu_abort(env1, "Trying to execute code outside RAM or ROM at 0x" TARGET_FMT_lx "\n", addr);
4619 #endif
4621 p = (void *)((uintptr_t)addr + env1->tlb_table[mmu_idx][page_index].addend);
4622 return qemu_ram_addr_from_host_nofail(p);
4626 * A helper function for the _utterly broken_ virtio device model to find out if
4627 * it's running on a big endian machine. Don't do this at home kids!
4629 bool virtio_is_big_endian(void);
4630 bool virtio_is_big_endian(void)
4632 #if defined(TARGET_WORDS_BIGENDIAN)
4633 return true;
4634 #else
4635 return false;
4636 #endif
4639 #define MMUSUFFIX _cmmu
4640 #undef GETPC
4641 #define GETPC() NULL
4642 #define env cpu_single_env
4643 #define SOFTMMU_CODE_ACCESS
4645 #define SHIFT 0
4646 #include "softmmu_template.h"
4648 #define SHIFT 1
4649 #include "softmmu_template.h"
4651 #define SHIFT 2
4652 #include "softmmu_template.h"
4654 #define SHIFT 3
4655 #include "softmmu_template.h"
4657 #undef env
4659 #endif