apci: switch piix4 to memory api
[qemu/opensuse.git] / exec.c
blob8435de0bd2f9248e9050db27ff7fbd78eb4351f8
1 /*
2 * virtual page mapping and translated block handling
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "tcg.h"
30 #include "hw/hw.h"
31 #include "hw/qdev.h"
32 #include "osdep.h"
33 #include "kvm.h"
34 #include "hw/xen.h"
35 #include "qemu-timer.h"
36 #include "memory.h"
37 #include "dma.h"
38 #include "exec-memory.h"
39 #if defined(CONFIG_USER_ONLY)
40 #include <qemu.h>
41 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
42 #include <sys/param.h>
43 #if __FreeBSD_version >= 700104
44 #define HAVE_KINFO_GETVMMAP
45 #define sigqueue sigqueue_freebsd /* avoid redefinition */
46 #include <sys/time.h>
47 #include <sys/proc.h>
48 #include <machine/profile.h>
49 #define _KERNEL
50 #include <sys/user.h>
51 #undef _KERNEL
52 #undef sigqueue
53 #include <libutil.h>
54 #endif
55 #endif
56 #else /* !CONFIG_USER_ONLY */
57 #include "xen-mapcache.h"
58 #include "trace.h"
59 #endif
61 #include "cputlb.h"
63 #include "memory-internal.h"
65 //#define DEBUG_TB_INVALIDATE
66 //#define DEBUG_FLUSH
67 //#define DEBUG_UNASSIGNED
69 /* make various TB consistency checks */
70 //#define DEBUG_TB_CHECK
72 //#define DEBUG_IOPORT
73 //#define DEBUG_SUBPAGE
75 #if !defined(CONFIG_USER_ONLY)
76 /* TB consistency checks only implemented for usermode emulation. */
77 #undef DEBUG_TB_CHECK
78 #endif
80 #define SMC_BITMAP_USE_THRESHOLD 10
82 static TranslationBlock *tbs;
83 static int code_gen_max_blocks;
84 TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
85 static int nb_tbs;
86 /* any access to the tbs or the page table must use this lock */
87 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
89 uint8_t *code_gen_prologue;
90 static uint8_t *code_gen_buffer;
91 static size_t code_gen_buffer_size;
92 /* threshold to flush the translated code buffer */
93 static size_t code_gen_buffer_max_size;
94 static uint8_t *code_gen_ptr;
96 #if !defined(CONFIG_USER_ONLY)
97 int phys_ram_fd;
98 static int in_migration;
100 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
102 static MemoryRegion *system_memory;
103 static MemoryRegion *system_io;
105 AddressSpace address_space_io;
106 AddressSpace address_space_memory;
107 DMAContext dma_context_memory;
109 MemoryRegion io_mem_ram, io_mem_rom, io_mem_unassigned, io_mem_notdirty;
110 static MemoryRegion io_mem_subpage_ram;
112 #endif
114 CPUArchState *first_cpu;
115 /* current CPU in the current thread. It is only valid inside
116 cpu_exec() */
117 DEFINE_TLS(CPUArchState *,cpu_single_env);
118 /* 0 = Do not count executed instructions.
119 1 = Precise instruction counting.
120 2 = Adaptive rate instruction counting. */
121 int use_icount = 0;
123 typedef struct PageDesc {
124 /* list of TBs intersecting this ram page */
125 TranslationBlock *first_tb;
126 /* in order to optimize self modifying code, we count the number
127 of lookups we do to a given page to use a bitmap */
128 unsigned int code_write_count;
129 uint8_t *code_bitmap;
130 #if defined(CONFIG_USER_ONLY)
131 unsigned long flags;
132 #endif
133 } PageDesc;
135 /* In system mode we want L1_MAP to be based on ram offsets,
136 while in user mode we want it to be based on virtual addresses. */
137 #if !defined(CONFIG_USER_ONLY)
138 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
139 # define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS
140 #else
141 # define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS
142 #endif
143 #else
144 # define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS
145 #endif
147 /* Size of the L2 (and L3, etc) page tables. */
148 #define L2_BITS 10
149 #define L2_SIZE (1 << L2_BITS)
151 #define P_L2_LEVELS \
152 (((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / L2_BITS) + 1)
154 /* The bits remaining after N lower levels of page tables. */
155 #define V_L1_BITS_REM \
156 ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
158 #if V_L1_BITS_REM < 4
159 #define V_L1_BITS (V_L1_BITS_REM + L2_BITS)
160 #else
161 #define V_L1_BITS V_L1_BITS_REM
162 #endif
164 #define V_L1_SIZE ((target_ulong)1 << V_L1_BITS)
166 #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
168 uintptr_t qemu_real_host_page_size;
169 uintptr_t qemu_host_page_size;
170 uintptr_t qemu_host_page_mask;
172 /* This is a multi-level map on the virtual address space.
173 The bottom level has pointers to PageDesc. */
174 static void *l1_map[V_L1_SIZE];
176 #if !defined(CONFIG_USER_ONLY)
178 static MemoryRegionSection *phys_sections;
179 static unsigned phys_sections_nb, phys_sections_nb_alloc;
180 static uint16_t phys_section_unassigned;
181 static uint16_t phys_section_notdirty;
182 static uint16_t phys_section_rom;
183 static uint16_t phys_section_watch;
185 /* Simple allocator for PhysPageEntry nodes */
186 static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
187 static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
189 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
191 static void io_mem_init(void);
192 static void memory_map_init(void);
193 static void *qemu_safe_ram_ptr(ram_addr_t addr);
195 static MemoryRegion io_mem_watch;
196 #endif
197 static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
198 tb_page_addr_t phys_page2);
200 /* statistics */
201 static int tb_flush_count;
202 static int tb_phys_invalidate_count;
204 #ifdef _WIN32
205 static inline void map_exec(void *addr, long size)
207 DWORD old_protect;
208 VirtualProtect(addr, size,
209 PAGE_EXECUTE_READWRITE, &old_protect);
212 #else
213 static inline void map_exec(void *addr, long size)
215 unsigned long start, end, page_size;
217 page_size = getpagesize();
218 start = (unsigned long)addr;
219 start &= ~(page_size - 1);
221 end = (unsigned long)addr + size;
222 end += page_size - 1;
223 end &= ~(page_size - 1);
225 mprotect((void *)start, end - start,
226 PROT_READ | PROT_WRITE | PROT_EXEC);
228 #endif
230 static void page_init(void)
232 /* NOTE: we can always suppose that qemu_host_page_size >=
233 TARGET_PAGE_SIZE */
234 #ifdef _WIN32
236 SYSTEM_INFO system_info;
238 GetSystemInfo(&system_info);
239 qemu_real_host_page_size = system_info.dwPageSize;
241 #else
242 qemu_real_host_page_size = getpagesize();
243 #endif
244 if (qemu_host_page_size == 0)
245 qemu_host_page_size = qemu_real_host_page_size;
246 if (qemu_host_page_size < TARGET_PAGE_SIZE)
247 qemu_host_page_size = TARGET_PAGE_SIZE;
248 qemu_host_page_mask = ~(qemu_host_page_size - 1);
250 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
252 #ifdef HAVE_KINFO_GETVMMAP
253 struct kinfo_vmentry *freep;
254 int i, cnt;
256 freep = kinfo_getvmmap(getpid(), &cnt);
257 if (freep) {
258 mmap_lock();
259 for (i = 0; i < cnt; i++) {
260 unsigned long startaddr, endaddr;
262 startaddr = freep[i].kve_start;
263 endaddr = freep[i].kve_end;
264 if (h2g_valid(startaddr)) {
265 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
267 if (h2g_valid(endaddr)) {
268 endaddr = h2g(endaddr);
269 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
270 } else {
271 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
272 endaddr = ~0ul;
273 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
274 #endif
278 free(freep);
279 mmap_unlock();
281 #else
282 FILE *f;
284 last_brk = (unsigned long)sbrk(0);
286 f = fopen("/compat/linux/proc/self/maps", "r");
287 if (f) {
288 mmap_lock();
290 do {
291 unsigned long startaddr, endaddr;
292 int n;
294 n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
296 if (n == 2 && h2g_valid(startaddr)) {
297 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
299 if (h2g_valid(endaddr)) {
300 endaddr = h2g(endaddr);
301 } else {
302 endaddr = ~0ul;
304 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
306 } while (!feof(f));
308 fclose(f);
309 mmap_unlock();
311 #endif
313 #endif
316 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
318 PageDesc *pd;
319 void **lp;
320 int i;
322 #if defined(CONFIG_USER_ONLY)
323 /* We can't use g_malloc because it may recurse into a locked mutex. */
324 # define ALLOC(P, SIZE) \
325 do { \
326 P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \
327 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
328 } while (0)
329 #else
330 # define ALLOC(P, SIZE) \
331 do { P = g_malloc0(SIZE); } while (0)
332 #endif
334 /* Level 1. Always allocated. */
335 lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
337 /* Level 2..N-1. */
338 for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
339 void **p = *lp;
341 if (p == NULL) {
342 if (!alloc) {
343 return NULL;
345 ALLOC(p, sizeof(void *) * L2_SIZE);
346 *lp = p;
349 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
352 pd = *lp;
353 if (pd == NULL) {
354 if (!alloc) {
355 return NULL;
357 ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
358 *lp = pd;
361 #undef ALLOC
363 return pd + (index & (L2_SIZE - 1));
366 static inline PageDesc *page_find(tb_page_addr_t index)
368 return page_find_alloc(index, 0);
371 #if !defined(CONFIG_USER_ONLY)
373 static void phys_map_node_reserve(unsigned nodes)
375 if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
376 typedef PhysPageEntry Node[L2_SIZE];
377 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
378 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
379 phys_map_nodes_nb + nodes);
380 phys_map_nodes = g_renew(Node, phys_map_nodes,
381 phys_map_nodes_nb_alloc);
385 static uint16_t phys_map_node_alloc(void)
387 unsigned i;
388 uint16_t ret;
390 ret = phys_map_nodes_nb++;
391 assert(ret != PHYS_MAP_NODE_NIL);
392 assert(ret != phys_map_nodes_nb_alloc);
393 for (i = 0; i < L2_SIZE; ++i) {
394 phys_map_nodes[ret][i].is_leaf = 0;
395 phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
397 return ret;
400 static void phys_map_nodes_reset(void)
402 phys_map_nodes_nb = 0;
406 static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
407 hwaddr *nb, uint16_t leaf,
408 int level)
410 PhysPageEntry *p;
411 int i;
412 hwaddr step = (hwaddr)1 << (level * L2_BITS);
414 if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
415 lp->ptr = phys_map_node_alloc();
416 p = phys_map_nodes[lp->ptr];
417 if (level == 0) {
418 for (i = 0; i < L2_SIZE; i++) {
419 p[i].is_leaf = 1;
420 p[i].ptr = phys_section_unassigned;
423 } else {
424 p = phys_map_nodes[lp->ptr];
426 lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
428 while (*nb && lp < &p[L2_SIZE]) {
429 if ((*index & (step - 1)) == 0 && *nb >= step) {
430 lp->is_leaf = true;
431 lp->ptr = leaf;
432 *index += step;
433 *nb -= step;
434 } else {
435 phys_page_set_level(lp, index, nb, leaf, level - 1);
437 ++lp;
441 static void phys_page_set(AddressSpaceDispatch *d,
442 hwaddr index, hwaddr nb,
443 uint16_t leaf)
445 /* Wildly overreserve - it doesn't matter much. */
446 phys_map_node_reserve(3 * P_L2_LEVELS);
448 phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
451 MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr index)
453 PhysPageEntry lp = d->phys_map;
454 PhysPageEntry *p;
455 int i;
456 uint16_t s_index = phys_section_unassigned;
458 for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
459 if (lp.ptr == PHYS_MAP_NODE_NIL) {
460 goto not_found;
462 p = phys_map_nodes[lp.ptr];
463 lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
466 s_index = lp.ptr;
467 not_found:
468 return &phys_sections[s_index];
471 bool memory_region_is_unassigned(MemoryRegion *mr)
473 return mr != &io_mem_ram && mr != &io_mem_rom
474 && mr != &io_mem_notdirty && !mr->rom_device
475 && mr != &io_mem_watch;
478 #define mmap_lock() do { } while(0)
479 #define mmap_unlock() do { } while(0)
480 #endif
482 #if defined(CONFIG_USER_ONLY)
483 /* Currently it is not recommended to allocate big chunks of data in
484 user mode. It will change when a dedicated libc will be used. */
485 /* ??? 64-bit hosts ought to have no problem mmaping data outside the
486 region in which the guest needs to run. Revisit this. */
487 #define USE_STATIC_CODE_GEN_BUFFER
488 #endif
490 /* ??? Should configure for this, not list operating systems here. */
491 #if (defined(__linux__) \
492 || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
493 || defined(__DragonFly__) || defined(__OpenBSD__) \
494 || defined(__NetBSD__))
495 # define USE_MMAP
496 #endif
498 /* Minimum size of the code gen buffer. This number is randomly chosen,
499 but not so small that we can't have a fair number of TB's live. */
500 #define MIN_CODE_GEN_BUFFER_SIZE (1024u * 1024)
502 /* Maximum size of the code gen buffer we'd like to use. Unless otherwise
503 indicated, this is constrained by the range of direct branches on the
504 host cpu, as used by the TCG implementation of goto_tb. */
505 #if defined(__x86_64__)
506 # define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024)
507 #elif defined(__sparc__)
508 # define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024)
509 #elif defined(__arm__)
510 # define MAX_CODE_GEN_BUFFER_SIZE (16u * 1024 * 1024)
511 #elif defined(__s390x__)
512 /* We have a +- 4GB range on the branches; leave some slop. */
513 # define MAX_CODE_GEN_BUFFER_SIZE (3ul * 1024 * 1024 * 1024)
514 #else
515 # define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
516 #endif
518 #define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32u * 1024 * 1024)
520 #define DEFAULT_CODE_GEN_BUFFER_SIZE \
521 (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
522 ? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE)
524 static inline size_t size_code_gen_buffer(size_t tb_size)
526 /* Size the buffer. */
527 if (tb_size == 0) {
528 #ifdef USE_STATIC_CODE_GEN_BUFFER
529 tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
530 #else
531 /* ??? Needs adjustments. */
532 /* ??? If we relax the requirement that CONFIG_USER_ONLY use the
533 static buffer, we could size this on RESERVED_VA, on the text
534 segment size of the executable, or continue to use the default. */
535 tb_size = (unsigned long)(ram_size / 4);
536 #endif
538 if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
539 tb_size = MIN_CODE_GEN_BUFFER_SIZE;
541 if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {
542 tb_size = MAX_CODE_GEN_BUFFER_SIZE;
544 code_gen_buffer_size = tb_size;
545 return tb_size;
548 #ifdef USE_STATIC_CODE_GEN_BUFFER
549 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
550 __attribute__((aligned(CODE_GEN_ALIGN)));
552 static inline void *alloc_code_gen_buffer(void)
554 map_exec(static_code_gen_buffer, code_gen_buffer_size);
555 return static_code_gen_buffer;
557 #elif defined(USE_MMAP)
558 static inline void *alloc_code_gen_buffer(void)
560 int flags = MAP_PRIVATE | MAP_ANONYMOUS;
561 uintptr_t start = 0;
562 void *buf;
564 /* Constrain the position of the buffer based on the host cpu.
565 Note that these addresses are chosen in concert with the
566 addresses assigned in the relevant linker script file. */
567 # if defined(__PIE__) || defined(__PIC__)
568 /* Don't bother setting a preferred location if we're building
569 a position-independent executable. We're more likely to get
570 an address near the main executable if we let the kernel
571 choose the address. */
572 # elif defined(__x86_64__) && defined(MAP_32BIT)
573 /* Force the memory down into low memory with the executable.
574 Leave the choice of exact location with the kernel. */
575 flags |= MAP_32BIT;
576 /* Cannot expect to map more than 800MB in low memory. */
577 if (code_gen_buffer_size > 800u * 1024 * 1024) {
578 code_gen_buffer_size = 800u * 1024 * 1024;
580 # elif defined(__sparc__)
581 start = 0x40000000ul;
582 # elif defined(__s390x__)
583 start = 0x90000000ul;
584 # endif
586 buf = mmap((void *)start, code_gen_buffer_size,
587 PROT_WRITE | PROT_READ | PROT_EXEC, flags, -1, 0);
588 return buf == MAP_FAILED ? NULL : buf;
590 #else
591 static inline void *alloc_code_gen_buffer(void)
593 void *buf = g_malloc(code_gen_buffer_size);
594 if (buf) {
595 map_exec(buf, code_gen_buffer_size);
597 return buf;
599 #endif /* USE_STATIC_CODE_GEN_BUFFER, USE_MMAP */
601 static inline void code_gen_alloc(size_t tb_size)
603 code_gen_buffer_size = size_code_gen_buffer(tb_size);
604 code_gen_buffer = alloc_code_gen_buffer();
605 if (code_gen_buffer == NULL) {
606 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
607 exit(1);
610 /* Steal room for the prologue at the end of the buffer. This ensures
611 (via the MAX_CODE_GEN_BUFFER_SIZE limits above) that direct branches
612 from TB's to the prologue are going to be in range. It also means
613 that we don't need to mark (additional) portions of the data segment
614 as executable. */
615 code_gen_prologue = code_gen_buffer + code_gen_buffer_size - 1024;
616 code_gen_buffer_size -= 1024;
618 code_gen_buffer_max_size = code_gen_buffer_size -
619 (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
620 code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
621 tbs = g_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
624 /* Must be called before using the QEMU cpus. 'tb_size' is the size
625 (in bytes) allocated to the translation buffer. Zero means default
626 size. */
627 void tcg_exec_init(unsigned long tb_size)
629 cpu_gen_init();
630 code_gen_alloc(tb_size);
631 code_gen_ptr = code_gen_buffer;
632 tcg_register_jit(code_gen_buffer, code_gen_buffer_size);
633 page_init();
634 #if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
635 /* There's no guest base to take into account, so go ahead and
636 initialize the prologue now. */
637 tcg_prologue_init(&tcg_ctx);
638 #endif
641 bool tcg_enabled(void)
643 return code_gen_buffer != NULL;
646 void cpu_exec_init_all(void)
648 #if !defined(CONFIG_USER_ONLY)
649 memory_map_init();
650 io_mem_init();
651 #endif
654 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
656 static int cpu_common_post_load(void *opaque, int version_id)
658 CPUArchState *env = opaque;
660 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
661 version_id is increased. */
662 env->interrupt_request &= ~0x01;
663 tlb_flush(env, 1);
665 return 0;
668 static const VMStateDescription vmstate_cpu_common = {
669 .name = "cpu_common",
670 .version_id = 1,
671 .minimum_version_id = 1,
672 .minimum_version_id_old = 1,
673 .post_load = cpu_common_post_load,
674 .fields = (VMStateField []) {
675 VMSTATE_UINT32(halted, CPUArchState),
676 VMSTATE_UINT32(interrupt_request, CPUArchState),
677 VMSTATE_END_OF_LIST()
680 #endif
682 CPUArchState *qemu_get_cpu(int cpu)
684 CPUArchState *env = first_cpu;
686 while (env) {
687 if (env->cpu_index == cpu)
688 break;
689 env = env->next_cpu;
692 return env;
695 void cpu_exec_init(CPUArchState *env)
697 #ifndef CONFIG_USER_ONLY
698 CPUState *cpu = ENV_GET_CPU(env);
699 #endif
700 CPUArchState **penv;
701 int cpu_index;
703 #if defined(CONFIG_USER_ONLY)
704 cpu_list_lock();
705 #endif
706 env->next_cpu = NULL;
707 penv = &first_cpu;
708 cpu_index = 0;
709 while (*penv != NULL) {
710 penv = &(*penv)->next_cpu;
711 cpu_index++;
713 env->cpu_index = cpu_index;
714 env->numa_node = 0;
715 QTAILQ_INIT(&env->breakpoints);
716 QTAILQ_INIT(&env->watchpoints);
717 #ifndef CONFIG_USER_ONLY
718 cpu->thread_id = qemu_get_thread_id();
719 #endif
720 *penv = env;
721 #if defined(CONFIG_USER_ONLY)
722 cpu_list_unlock();
723 #endif
724 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
725 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
726 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
727 cpu_save, cpu_load, env);
728 #endif
731 /* Allocate a new translation block. Flush the translation buffer if
732 too many translation blocks or too much generated code. */
733 static TranslationBlock *tb_alloc(target_ulong pc)
735 TranslationBlock *tb;
737 if (nb_tbs >= code_gen_max_blocks ||
738 (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
739 return NULL;
740 tb = &tbs[nb_tbs++];
741 tb->pc = pc;
742 tb->cflags = 0;
743 return tb;
746 void tb_free(TranslationBlock *tb)
748 /* In practice this is mostly used for single use temporary TB
749 Ignore the hard cases and just back up if this TB happens to
750 be the last one generated. */
751 if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
752 code_gen_ptr = tb->tc_ptr;
753 nb_tbs--;
757 static inline void invalidate_page_bitmap(PageDesc *p)
759 if (p->code_bitmap) {
760 g_free(p->code_bitmap);
761 p->code_bitmap = NULL;
763 p->code_write_count = 0;
766 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
768 static void page_flush_tb_1 (int level, void **lp)
770 int i;
772 if (*lp == NULL) {
773 return;
775 if (level == 0) {
776 PageDesc *pd = *lp;
777 for (i = 0; i < L2_SIZE; ++i) {
778 pd[i].first_tb = NULL;
779 invalidate_page_bitmap(pd + i);
781 } else {
782 void **pp = *lp;
783 for (i = 0; i < L2_SIZE; ++i) {
784 page_flush_tb_1 (level - 1, pp + i);
789 static void page_flush_tb(void)
791 int i;
792 for (i = 0; i < V_L1_SIZE; i++) {
793 page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
797 /* flush all the translation blocks */
798 /* XXX: tb_flush is currently not thread safe */
799 void tb_flush(CPUArchState *env1)
801 CPUArchState *env;
802 #if defined(DEBUG_FLUSH)
803 printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
804 (unsigned long)(code_gen_ptr - code_gen_buffer),
805 nb_tbs, nb_tbs > 0 ?
806 ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
807 #endif
808 if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
809 cpu_abort(env1, "Internal error: code buffer overflow\n");
811 nb_tbs = 0;
813 for(env = first_cpu; env != NULL; env = env->next_cpu) {
814 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
817 memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
818 page_flush_tb();
820 code_gen_ptr = code_gen_buffer;
821 /* XXX: flush processor icache at this point if cache flush is
822 expensive */
823 tb_flush_count++;
826 #ifdef DEBUG_TB_CHECK
828 static void tb_invalidate_check(target_ulong address)
830 TranslationBlock *tb;
831 int i;
832 address &= TARGET_PAGE_MASK;
833 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
834 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
835 if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
836 address >= tb->pc + tb->size)) {
837 printf("ERROR invalidate: address=" TARGET_FMT_lx
838 " PC=%08lx size=%04x\n",
839 address, (long)tb->pc, tb->size);
845 /* verify that all the pages have correct rights for code */
846 static void tb_page_check(void)
848 TranslationBlock *tb;
849 int i, flags1, flags2;
851 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
852 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
853 flags1 = page_get_flags(tb->pc);
854 flags2 = page_get_flags(tb->pc + tb->size - 1);
855 if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
856 printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
857 (long)tb->pc, tb->size, flags1, flags2);
863 #endif
865 /* invalidate one TB */
866 static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
867 int next_offset)
869 TranslationBlock *tb1;
870 for(;;) {
871 tb1 = *ptb;
872 if (tb1 == tb) {
873 *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
874 break;
876 ptb = (TranslationBlock **)((char *)tb1 + next_offset);
880 static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
882 TranslationBlock *tb1;
883 unsigned int n1;
885 for(;;) {
886 tb1 = *ptb;
887 n1 = (uintptr_t)tb1 & 3;
888 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
889 if (tb1 == tb) {
890 *ptb = tb1->page_next[n1];
891 break;
893 ptb = &tb1->page_next[n1];
897 static inline void tb_jmp_remove(TranslationBlock *tb, int n)
899 TranslationBlock *tb1, **ptb;
900 unsigned int n1;
902 ptb = &tb->jmp_next[n];
903 tb1 = *ptb;
904 if (tb1) {
905 /* find tb(n) in circular list */
906 for(;;) {
907 tb1 = *ptb;
908 n1 = (uintptr_t)tb1 & 3;
909 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
910 if (n1 == n && tb1 == tb)
911 break;
912 if (n1 == 2) {
913 ptb = &tb1->jmp_first;
914 } else {
915 ptb = &tb1->jmp_next[n1];
918 /* now we can suppress tb(n) from the list */
919 *ptb = tb->jmp_next[n];
921 tb->jmp_next[n] = NULL;
925 /* reset the jump entry 'n' of a TB so that it is not chained to
926 another TB */
927 static inline void tb_reset_jump(TranslationBlock *tb, int n)
929 tb_set_jmp_target(tb, n, (uintptr_t)(tb->tc_ptr + tb->tb_next_offset[n]));
932 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
934 CPUArchState *env;
935 PageDesc *p;
936 unsigned int h, n1;
937 tb_page_addr_t phys_pc;
938 TranslationBlock *tb1, *tb2;
940 /* remove the TB from the hash list */
941 phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
942 h = tb_phys_hash_func(phys_pc);
943 tb_remove(&tb_phys_hash[h], tb,
944 offsetof(TranslationBlock, phys_hash_next));
946 /* remove the TB from the page list */
947 if (tb->page_addr[0] != page_addr) {
948 p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
949 tb_page_remove(&p->first_tb, tb);
950 invalidate_page_bitmap(p);
952 if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
953 p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
954 tb_page_remove(&p->first_tb, tb);
955 invalidate_page_bitmap(p);
958 tb_invalidated_flag = 1;
960 /* remove the TB from the hash list */
961 h = tb_jmp_cache_hash_func(tb->pc);
962 for(env = first_cpu; env != NULL; env = env->next_cpu) {
963 if (env->tb_jmp_cache[h] == tb)
964 env->tb_jmp_cache[h] = NULL;
967 /* suppress this TB from the two jump lists */
968 tb_jmp_remove(tb, 0);
969 tb_jmp_remove(tb, 1);
971 /* suppress any remaining jumps to this TB */
972 tb1 = tb->jmp_first;
973 for(;;) {
974 n1 = (uintptr_t)tb1 & 3;
975 if (n1 == 2)
976 break;
977 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
978 tb2 = tb1->jmp_next[n1];
979 tb_reset_jump(tb1, n1);
980 tb1->jmp_next[n1] = NULL;
981 tb1 = tb2;
983 tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); /* fail safe */
985 tb_phys_invalidate_count++;
988 static inline void set_bits(uint8_t *tab, int start, int len)
990 int end, mask, end1;
992 end = start + len;
993 tab += start >> 3;
994 mask = 0xff << (start & 7);
995 if ((start & ~7) == (end & ~7)) {
996 if (start < end) {
997 mask &= ~(0xff << (end & 7));
998 *tab |= mask;
1000 } else {
1001 *tab++ |= mask;
1002 start = (start + 8) & ~7;
1003 end1 = end & ~7;
1004 while (start < end1) {
1005 *tab++ = 0xff;
1006 start += 8;
1008 if (start < end) {
1009 mask = ~(0xff << (end & 7));
1010 *tab |= mask;
1015 static void build_page_bitmap(PageDesc *p)
1017 int n, tb_start, tb_end;
1018 TranslationBlock *tb;
1020 p->code_bitmap = g_malloc0(TARGET_PAGE_SIZE / 8);
1022 tb = p->first_tb;
1023 while (tb != NULL) {
1024 n = (uintptr_t)tb & 3;
1025 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1026 /* NOTE: this is subtle as a TB may span two physical pages */
1027 if (n == 0) {
1028 /* NOTE: tb_end may be after the end of the page, but
1029 it is not a problem */
1030 tb_start = tb->pc & ~TARGET_PAGE_MASK;
1031 tb_end = tb_start + tb->size;
1032 if (tb_end > TARGET_PAGE_SIZE)
1033 tb_end = TARGET_PAGE_SIZE;
1034 } else {
1035 tb_start = 0;
1036 tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1038 set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
1039 tb = tb->page_next[n];
1043 TranslationBlock *tb_gen_code(CPUArchState *env,
1044 target_ulong pc, target_ulong cs_base,
1045 int flags, int cflags)
1047 TranslationBlock *tb;
1048 uint8_t *tc_ptr;
1049 tb_page_addr_t phys_pc, phys_page2;
1050 target_ulong virt_page2;
1051 int code_gen_size;
1053 phys_pc = get_page_addr_code(env, pc);
1054 tb = tb_alloc(pc);
1055 if (!tb) {
1056 /* flush must be done */
1057 tb_flush(env);
1058 /* cannot fail at this point */
1059 tb = tb_alloc(pc);
1060 /* Don't forget to invalidate previous TB info. */
1061 tb_invalidated_flag = 1;
1063 tc_ptr = code_gen_ptr;
1064 tb->tc_ptr = tc_ptr;
1065 tb->cs_base = cs_base;
1066 tb->flags = flags;
1067 tb->cflags = cflags;
1068 cpu_gen_code(env, tb, &code_gen_size);
1069 code_gen_ptr = (void *)(((uintptr_t)code_gen_ptr + code_gen_size +
1070 CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
1072 /* check next page if needed */
1073 virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1074 phys_page2 = -1;
1075 if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1076 phys_page2 = get_page_addr_code(env, virt_page2);
1078 tb_link_page(tb, phys_pc, phys_page2);
1079 return tb;
1083 * Invalidate all TBs which intersect with the target physical address range
1084 * [start;end[. NOTE: start and end may refer to *different* physical pages.
1085 * 'is_cpu_write_access' should be true if called from a real cpu write
1086 * access: the virtual CPU will exit the current TB if code is modified inside
1087 * this TB.
1089 void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end,
1090 int is_cpu_write_access)
1092 while (start < end) {
1093 tb_invalidate_phys_page_range(start, end, is_cpu_write_access);
1094 start &= TARGET_PAGE_MASK;
1095 start += TARGET_PAGE_SIZE;
1100 * Invalidate all TBs which intersect with the target physical address range
1101 * [start;end[. NOTE: start and end must refer to the *same* physical page.
1102 * 'is_cpu_write_access' should be true if called from a real cpu write
1103 * access: the virtual CPU will exit the current TB if code is modified inside
1104 * this TB.
1106 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1107 int is_cpu_write_access)
1109 TranslationBlock *tb, *tb_next, *saved_tb;
1110 CPUArchState *env = cpu_single_env;
1111 tb_page_addr_t tb_start, tb_end;
1112 PageDesc *p;
1113 int n;
1114 #ifdef TARGET_HAS_PRECISE_SMC
1115 int current_tb_not_found = is_cpu_write_access;
1116 TranslationBlock *current_tb = NULL;
1117 int current_tb_modified = 0;
1118 target_ulong current_pc = 0;
1119 target_ulong current_cs_base = 0;
1120 int current_flags = 0;
1121 #endif /* TARGET_HAS_PRECISE_SMC */
1123 p = page_find(start >> TARGET_PAGE_BITS);
1124 if (!p)
1125 return;
1126 if (!p->code_bitmap &&
1127 ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1128 is_cpu_write_access) {
1129 /* build code bitmap */
1130 build_page_bitmap(p);
1133 /* we remove all the TBs in the range [start, end[ */
1134 /* XXX: see if in some cases it could be faster to invalidate all the code */
1135 tb = p->first_tb;
1136 while (tb != NULL) {
1137 n = (uintptr_t)tb & 3;
1138 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1139 tb_next = tb->page_next[n];
1140 /* NOTE: this is subtle as a TB may span two physical pages */
1141 if (n == 0) {
1142 /* NOTE: tb_end may be after the end of the page, but
1143 it is not a problem */
1144 tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1145 tb_end = tb_start + tb->size;
1146 } else {
1147 tb_start = tb->page_addr[1];
1148 tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1150 if (!(tb_end <= start || tb_start >= end)) {
1151 #ifdef TARGET_HAS_PRECISE_SMC
1152 if (current_tb_not_found) {
1153 current_tb_not_found = 0;
1154 current_tb = NULL;
1155 if (env->mem_io_pc) {
1156 /* now we have a real cpu fault */
1157 current_tb = tb_find_pc(env->mem_io_pc);
1160 if (current_tb == tb &&
1161 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1162 /* If we are modifying the current TB, we must stop
1163 its execution. We could be more precise by checking
1164 that the modification is after the current PC, but it
1165 would require a specialized function to partially
1166 restore the CPU state */
1168 current_tb_modified = 1;
1169 cpu_restore_state(current_tb, env, env->mem_io_pc);
1170 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1171 &current_flags);
1173 #endif /* TARGET_HAS_PRECISE_SMC */
1174 /* we need to do that to handle the case where a signal
1175 occurs while doing tb_phys_invalidate() */
1176 saved_tb = NULL;
1177 if (env) {
1178 saved_tb = env->current_tb;
1179 env->current_tb = NULL;
1181 tb_phys_invalidate(tb, -1);
1182 if (env) {
1183 env->current_tb = saved_tb;
1184 if (env->interrupt_request && env->current_tb)
1185 cpu_interrupt(env, env->interrupt_request);
1188 tb = tb_next;
1190 #if !defined(CONFIG_USER_ONLY)
1191 /* if no code remaining, no need to continue to use slow writes */
1192 if (!p->first_tb) {
1193 invalidate_page_bitmap(p);
1194 if (is_cpu_write_access) {
1195 tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1198 #endif
1199 #ifdef TARGET_HAS_PRECISE_SMC
1200 if (current_tb_modified) {
1201 /* we generate a block containing just the instruction
1202 modifying the memory. It will ensure that it cannot modify
1203 itself */
1204 env->current_tb = NULL;
1205 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1206 cpu_resume_from_signal(env, NULL);
1208 #endif
1211 /* len must be <= 8 and start must be a multiple of len */
1212 static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1214 PageDesc *p;
1215 int offset, b;
1216 #if 0
1217 if (1) {
1218 qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1219 cpu_single_env->mem_io_vaddr, len,
1220 cpu_single_env->eip,
1221 cpu_single_env->eip +
1222 (intptr_t)cpu_single_env->segs[R_CS].base);
1224 #endif
1225 p = page_find(start >> TARGET_PAGE_BITS);
1226 if (!p)
1227 return;
1228 if (p->code_bitmap) {
1229 offset = start & ~TARGET_PAGE_MASK;
1230 b = p->code_bitmap[offset >> 3] >> (offset & 7);
1231 if (b & ((1 << len) - 1))
1232 goto do_invalidate;
1233 } else {
1234 do_invalidate:
1235 tb_invalidate_phys_page_range(start, start + len, 1);
1239 #if !defined(CONFIG_SOFTMMU)
1240 static void tb_invalidate_phys_page(tb_page_addr_t addr,
1241 uintptr_t pc, void *puc)
1243 TranslationBlock *tb;
1244 PageDesc *p;
1245 int n;
1246 #ifdef TARGET_HAS_PRECISE_SMC
1247 TranslationBlock *current_tb = NULL;
1248 CPUArchState *env = cpu_single_env;
1249 int current_tb_modified = 0;
1250 target_ulong current_pc = 0;
1251 target_ulong current_cs_base = 0;
1252 int current_flags = 0;
1253 #endif
1255 addr &= TARGET_PAGE_MASK;
1256 p = page_find(addr >> TARGET_PAGE_BITS);
1257 if (!p)
1258 return;
1259 tb = p->first_tb;
1260 #ifdef TARGET_HAS_PRECISE_SMC
1261 if (tb && pc != 0) {
1262 current_tb = tb_find_pc(pc);
1264 #endif
1265 while (tb != NULL) {
1266 n = (uintptr_t)tb & 3;
1267 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1268 #ifdef TARGET_HAS_PRECISE_SMC
1269 if (current_tb == tb &&
1270 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1271 /* If we are modifying the current TB, we must stop
1272 its execution. We could be more precise by checking
1273 that the modification is after the current PC, but it
1274 would require a specialized function to partially
1275 restore the CPU state */
1277 current_tb_modified = 1;
1278 cpu_restore_state(current_tb, env, pc);
1279 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1280 &current_flags);
1282 #endif /* TARGET_HAS_PRECISE_SMC */
1283 tb_phys_invalidate(tb, addr);
1284 tb = tb->page_next[n];
1286 p->first_tb = NULL;
1287 #ifdef TARGET_HAS_PRECISE_SMC
1288 if (current_tb_modified) {
1289 /* we generate a block containing just the instruction
1290 modifying the memory. It will ensure that it cannot modify
1291 itself */
1292 env->current_tb = NULL;
1293 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1294 cpu_resume_from_signal(env, puc);
1296 #endif
1298 #endif
1300 /* add the tb in the target page and protect it if necessary */
1301 static inline void tb_alloc_page(TranslationBlock *tb,
1302 unsigned int n, tb_page_addr_t page_addr)
1304 PageDesc *p;
1305 #ifndef CONFIG_USER_ONLY
1306 bool page_already_protected;
1307 #endif
1309 tb->page_addr[n] = page_addr;
1310 p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1311 tb->page_next[n] = p->first_tb;
1312 #ifndef CONFIG_USER_ONLY
1313 page_already_protected = p->first_tb != NULL;
1314 #endif
1315 p->first_tb = (TranslationBlock *)((uintptr_t)tb | n);
1316 invalidate_page_bitmap(p);
1318 #if defined(TARGET_HAS_SMC) || 1
1320 #if defined(CONFIG_USER_ONLY)
1321 if (p->flags & PAGE_WRITE) {
1322 target_ulong addr;
1323 PageDesc *p2;
1324 int prot;
1326 /* force the host page as non writable (writes will have a
1327 page fault + mprotect overhead) */
1328 page_addr &= qemu_host_page_mask;
1329 prot = 0;
1330 for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1331 addr += TARGET_PAGE_SIZE) {
1333 p2 = page_find (addr >> TARGET_PAGE_BITS);
1334 if (!p2)
1335 continue;
1336 prot |= p2->flags;
1337 p2->flags &= ~PAGE_WRITE;
1339 mprotect(g2h(page_addr), qemu_host_page_size,
1340 (prot & PAGE_BITS) & ~PAGE_WRITE);
1341 #ifdef DEBUG_TB_INVALIDATE
1342 printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1343 page_addr);
1344 #endif
1346 #else
1347 /* if some code is already present, then the pages are already
1348 protected. So we handle the case where only the first TB is
1349 allocated in a physical page */
1350 if (!page_already_protected) {
1351 tlb_protect_code(page_addr);
1353 #endif
1355 #endif /* TARGET_HAS_SMC */
1358 /* add a new TB and link it to the physical page tables. phys_page2 is
1359 (-1) to indicate that only one page contains the TB. */
1360 static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
1361 tb_page_addr_t phys_page2)
1363 unsigned int h;
1364 TranslationBlock **ptb;
1366 /* Grab the mmap lock to stop another thread invalidating this TB
1367 before we are done. */
1368 mmap_lock();
1369 /* add in the physical hash table */
1370 h = tb_phys_hash_func(phys_pc);
1371 ptb = &tb_phys_hash[h];
1372 tb->phys_hash_next = *ptb;
1373 *ptb = tb;
1375 /* add in the page list */
1376 tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1377 if (phys_page2 != -1)
1378 tb_alloc_page(tb, 1, phys_page2);
1379 else
1380 tb->page_addr[1] = -1;
1382 tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2);
1383 tb->jmp_next[0] = NULL;
1384 tb->jmp_next[1] = NULL;
1386 /* init original jump addresses */
1387 if (tb->tb_next_offset[0] != 0xffff)
1388 tb_reset_jump(tb, 0);
1389 if (tb->tb_next_offset[1] != 0xffff)
1390 tb_reset_jump(tb, 1);
1392 #ifdef DEBUG_TB_CHECK
1393 tb_page_check();
1394 #endif
1395 mmap_unlock();
1398 #if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU)
1399 /* check whether the given addr is in TCG generated code buffer or not */
1400 bool is_tcg_gen_code(uintptr_t tc_ptr)
1402 /* This can be called during code generation, code_gen_buffer_max_size
1403 is used instead of code_gen_ptr for upper boundary checking */
1404 return (tc_ptr >= (uintptr_t)code_gen_buffer &&
1405 tc_ptr < (uintptr_t)(code_gen_buffer + code_gen_buffer_max_size));
1407 #endif
1409 /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1410 tb[1].tc_ptr. Return NULL if not found */
1411 TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
1413 int m_min, m_max, m;
1414 uintptr_t v;
1415 TranslationBlock *tb;
1417 if (nb_tbs <= 0)
1418 return NULL;
1419 if (tc_ptr < (uintptr_t)code_gen_buffer ||
1420 tc_ptr >= (uintptr_t)code_gen_ptr) {
1421 return NULL;
1423 /* binary search (cf Knuth) */
1424 m_min = 0;
1425 m_max = nb_tbs - 1;
1426 while (m_min <= m_max) {
1427 m = (m_min + m_max) >> 1;
1428 tb = &tbs[m];
1429 v = (uintptr_t)tb->tc_ptr;
1430 if (v == tc_ptr)
1431 return tb;
1432 else if (tc_ptr < v) {
1433 m_max = m - 1;
1434 } else {
1435 m_min = m + 1;
1438 return &tbs[m_max];
1441 static void tb_reset_jump_recursive(TranslationBlock *tb);
1443 static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1445 TranslationBlock *tb1, *tb_next, **ptb;
1446 unsigned int n1;
1448 tb1 = tb->jmp_next[n];
1449 if (tb1 != NULL) {
1450 /* find head of list */
1451 for(;;) {
1452 n1 = (uintptr_t)tb1 & 3;
1453 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1454 if (n1 == 2)
1455 break;
1456 tb1 = tb1->jmp_next[n1];
1458 /* we are now sure now that tb jumps to tb1 */
1459 tb_next = tb1;
1461 /* remove tb from the jmp_first list */
1462 ptb = &tb_next->jmp_first;
1463 for(;;) {
1464 tb1 = *ptb;
1465 n1 = (uintptr_t)tb1 & 3;
1466 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1467 if (n1 == n && tb1 == tb)
1468 break;
1469 ptb = &tb1->jmp_next[n1];
1471 *ptb = tb->jmp_next[n];
1472 tb->jmp_next[n] = NULL;
1474 /* suppress the jump to next tb in generated code */
1475 tb_reset_jump(tb, n);
1477 /* suppress jumps in the tb on which we could have jumped */
1478 tb_reset_jump_recursive(tb_next);
1482 static void tb_reset_jump_recursive(TranslationBlock *tb)
1484 tb_reset_jump_recursive2(tb, 0);
1485 tb_reset_jump_recursive2(tb, 1);
1488 #if defined(TARGET_HAS_ICE)
1489 #if defined(CONFIG_USER_ONLY)
1490 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1492 tb_invalidate_phys_page_range(pc, pc + 1, 0);
1494 #else
1495 void tb_invalidate_phys_addr(hwaddr addr)
1497 ram_addr_t ram_addr;
1498 MemoryRegionSection *section;
1500 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
1501 if (!(memory_region_is_ram(section->mr)
1502 || (section->mr->rom_device && section->mr->readable))) {
1503 return;
1505 ram_addr = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1506 + memory_region_section_addr(section, addr);
1507 tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1510 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1512 tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
1513 (pc & ~TARGET_PAGE_MASK));
1515 #endif
1516 #endif /* TARGET_HAS_ICE */
1518 #if defined(CONFIG_USER_ONLY)
1519 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1524 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1525 int flags, CPUWatchpoint **watchpoint)
1527 return -ENOSYS;
1529 #else
1530 /* Add a watchpoint. */
1531 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1532 int flags, CPUWatchpoint **watchpoint)
1534 target_ulong len_mask = ~(len - 1);
1535 CPUWatchpoint *wp;
1537 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1538 if ((len & (len - 1)) || (addr & ~len_mask) ||
1539 len == 0 || len > TARGET_PAGE_SIZE) {
1540 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1541 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1542 return -EINVAL;
1544 wp = g_malloc(sizeof(*wp));
1546 wp->vaddr = addr;
1547 wp->len_mask = len_mask;
1548 wp->flags = flags;
1550 /* keep all GDB-injected watchpoints in front */
1551 if (flags & BP_GDB)
1552 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1553 else
1554 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1556 tlb_flush_page(env, addr);
1558 if (watchpoint)
1559 *watchpoint = wp;
1560 return 0;
1563 /* Remove a specific watchpoint. */
1564 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
1565 int flags)
1567 target_ulong len_mask = ~(len - 1);
1568 CPUWatchpoint *wp;
1570 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1571 if (addr == wp->vaddr && len_mask == wp->len_mask
1572 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1573 cpu_watchpoint_remove_by_ref(env, wp);
1574 return 0;
1577 return -ENOENT;
1580 /* Remove a specific watchpoint by reference. */
1581 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
1583 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1585 tlb_flush_page(env, watchpoint->vaddr);
1587 g_free(watchpoint);
1590 /* Remove all matching watchpoints. */
1591 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1593 CPUWatchpoint *wp, *next;
1595 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1596 if (wp->flags & mask)
1597 cpu_watchpoint_remove_by_ref(env, wp);
1600 #endif
1602 /* Add a breakpoint. */
1603 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
1604 CPUBreakpoint **breakpoint)
1606 #if defined(TARGET_HAS_ICE)
1607 CPUBreakpoint *bp;
1609 bp = g_malloc(sizeof(*bp));
1611 bp->pc = pc;
1612 bp->flags = flags;
1614 /* keep all GDB-injected breakpoints in front */
1615 if (flags & BP_GDB)
1616 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1617 else
1618 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1620 breakpoint_invalidate(env, pc);
1622 if (breakpoint)
1623 *breakpoint = bp;
1624 return 0;
1625 #else
1626 return -ENOSYS;
1627 #endif
1630 /* Remove a specific breakpoint. */
1631 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
1633 #if defined(TARGET_HAS_ICE)
1634 CPUBreakpoint *bp;
1636 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1637 if (bp->pc == pc && bp->flags == flags) {
1638 cpu_breakpoint_remove_by_ref(env, bp);
1639 return 0;
1642 return -ENOENT;
1643 #else
1644 return -ENOSYS;
1645 #endif
1648 /* Remove a specific breakpoint by reference. */
1649 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
1651 #if defined(TARGET_HAS_ICE)
1652 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1654 breakpoint_invalidate(env, breakpoint->pc);
1656 g_free(breakpoint);
1657 #endif
1660 /* Remove all matching breakpoints. */
1661 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
1663 #if defined(TARGET_HAS_ICE)
1664 CPUBreakpoint *bp, *next;
1666 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1667 if (bp->flags & mask)
1668 cpu_breakpoint_remove_by_ref(env, bp);
1670 #endif
1673 /* enable or disable single step mode. EXCP_DEBUG is returned by the
1674 CPU loop after each instruction */
1675 void cpu_single_step(CPUArchState *env, int enabled)
1677 #if defined(TARGET_HAS_ICE)
1678 if (env->singlestep_enabled != enabled) {
1679 env->singlestep_enabled = enabled;
1680 if (kvm_enabled())
1681 kvm_update_guest_debug(env, 0);
1682 else {
1683 /* must flush all the translated code to avoid inconsistencies */
1684 /* XXX: only flush what is necessary */
1685 tb_flush(env);
1688 #endif
1691 static void cpu_unlink_tb(CPUArchState *env)
1693 /* FIXME: TB unchaining isn't SMP safe. For now just ignore the
1694 problem and hope the cpu will stop of its own accord. For userspace
1695 emulation this often isn't actually as bad as it sounds. Often
1696 signals are used primarily to interrupt blocking syscalls. */
1697 TranslationBlock *tb;
1698 static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1700 spin_lock(&interrupt_lock);
1701 tb = env->current_tb;
1702 /* if the cpu is currently executing code, we must unlink it and
1703 all the potentially executing TB */
1704 if (tb) {
1705 env->current_tb = NULL;
1706 tb_reset_jump_recursive(tb);
1708 spin_unlock(&interrupt_lock);
1711 #ifndef CONFIG_USER_ONLY
1712 /* mask must never be zero, except for A20 change call */
1713 static void tcg_handle_interrupt(CPUArchState *env, int mask)
1715 CPUState *cpu = ENV_GET_CPU(env);
1716 int old_mask;
1718 old_mask = env->interrupt_request;
1719 env->interrupt_request |= mask;
1722 * If called from iothread context, wake the target cpu in
1723 * case its halted.
1725 if (!qemu_cpu_is_self(cpu)) {
1726 qemu_cpu_kick(cpu);
1727 return;
1730 if (use_icount) {
1731 env->icount_decr.u16.high = 0xffff;
1732 if (!can_do_io(env)
1733 && (mask & ~old_mask) != 0) {
1734 cpu_abort(env, "Raised interrupt while not in I/O function");
1736 } else {
1737 cpu_unlink_tb(env);
1741 CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
1743 #else /* CONFIG_USER_ONLY */
1745 void cpu_interrupt(CPUArchState *env, int mask)
1747 env->interrupt_request |= mask;
1748 cpu_unlink_tb(env);
1750 #endif /* CONFIG_USER_ONLY */
1752 void cpu_reset_interrupt(CPUArchState *env, int mask)
1754 env->interrupt_request &= ~mask;
1757 void cpu_exit(CPUArchState *env)
1759 env->exit_request = 1;
1760 cpu_unlink_tb(env);
1763 void cpu_abort(CPUArchState *env, const char *fmt, ...)
1765 va_list ap;
1766 va_list ap2;
1768 va_start(ap, fmt);
1769 va_copy(ap2, ap);
1770 fprintf(stderr, "qemu: fatal: ");
1771 vfprintf(stderr, fmt, ap);
1772 fprintf(stderr, "\n");
1773 cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
1774 if (qemu_log_enabled()) {
1775 qemu_log("qemu: fatal: ");
1776 qemu_log_vprintf(fmt, ap2);
1777 qemu_log("\n");
1778 log_cpu_state(env, CPU_DUMP_FPU | CPU_DUMP_CCOP);
1779 qemu_log_flush();
1780 qemu_log_close();
1782 va_end(ap2);
1783 va_end(ap);
1784 #if defined(CONFIG_USER_ONLY)
1786 struct sigaction act;
1787 sigfillset(&act.sa_mask);
1788 act.sa_handler = SIG_DFL;
1789 sigaction(SIGABRT, &act, NULL);
1791 #endif
1792 abort();
1795 CPUArchState *cpu_copy(CPUArchState *env)
1797 CPUArchState *new_env = cpu_init(env->cpu_model_str);
1798 CPUArchState *next_cpu = new_env->next_cpu;
1799 int cpu_index = new_env->cpu_index;
1800 #if defined(TARGET_HAS_ICE)
1801 CPUBreakpoint *bp;
1802 CPUWatchpoint *wp;
1803 #endif
1805 memcpy(new_env, env, sizeof(CPUArchState));
1807 /* Preserve chaining and index. */
1808 new_env->next_cpu = next_cpu;
1809 new_env->cpu_index = cpu_index;
1811 /* Clone all break/watchpoints.
1812 Note: Once we support ptrace with hw-debug register access, make sure
1813 BP_CPU break/watchpoints are handled correctly on clone. */
1814 QTAILQ_INIT(&env->breakpoints);
1815 QTAILQ_INIT(&env->watchpoints);
1816 #if defined(TARGET_HAS_ICE)
1817 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1818 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1820 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1821 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1822 wp->flags, NULL);
1824 #endif
1826 return new_env;
1829 #if !defined(CONFIG_USER_ONLY)
1830 void tb_flush_jmp_cache(CPUArchState *env, target_ulong addr)
1832 unsigned int i;
1834 /* Discard jump cache entries for any tb which might potentially
1835 overlap the flushed page. */
1836 i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1837 memset (&env->tb_jmp_cache[i], 0,
1838 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1840 i = tb_jmp_cache_hash_page(addr);
1841 memset (&env->tb_jmp_cache[i], 0,
1842 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1845 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
1846 uintptr_t length)
1848 uintptr_t start1;
1850 /* we modify the TLB cache so that the dirty bit will be set again
1851 when accessing the range */
1852 start1 = (uintptr_t)qemu_safe_ram_ptr(start);
1853 /* Check that we don't span multiple blocks - this breaks the
1854 address comparisons below. */
1855 if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
1856 != (end - 1) - start) {
1857 abort();
1859 cpu_tlb_reset_dirty_all(start1, length);
1863 /* Note: start and end must be within the same ram block. */
1864 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
1865 int dirty_flags)
1867 uintptr_t length;
1869 start &= TARGET_PAGE_MASK;
1870 end = TARGET_PAGE_ALIGN(end);
1872 length = end - start;
1873 if (length == 0)
1874 return;
1875 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
1877 if (tcg_enabled()) {
1878 tlb_reset_dirty_range_all(start, end, length);
1882 static int cpu_physical_memory_set_dirty_tracking(int enable)
1884 int ret = 0;
1885 in_migration = enable;
1886 return ret;
1889 hwaddr memory_region_section_get_iotlb(CPUArchState *env,
1890 MemoryRegionSection *section,
1891 target_ulong vaddr,
1892 hwaddr paddr,
1893 int prot,
1894 target_ulong *address)
1896 hwaddr iotlb;
1897 CPUWatchpoint *wp;
1899 if (memory_region_is_ram(section->mr)) {
1900 /* Normal RAM. */
1901 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1902 + memory_region_section_addr(section, paddr);
1903 if (!section->readonly) {
1904 iotlb |= phys_section_notdirty;
1905 } else {
1906 iotlb |= phys_section_rom;
1908 } else {
1909 /* IO handlers are currently passed a physical address.
1910 It would be nice to pass an offset from the base address
1911 of that region. This would avoid having to special case RAM,
1912 and avoid full address decoding in every device.
1913 We can't use the high bits of pd for this because
1914 IO_MEM_ROMD uses these as a ram address. */
1915 iotlb = section - phys_sections;
1916 iotlb += memory_region_section_addr(section, paddr);
1919 /* Make accesses to pages with watchpoints go via the
1920 watchpoint trap routines. */
1921 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1922 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
1923 /* Avoid trapping reads of pages with a write breakpoint. */
1924 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1925 iotlb = phys_section_watch + paddr;
1926 *address |= TLB_MMIO;
1927 break;
1932 return iotlb;
1935 #else
1937 * Walks guest process memory "regions" one by one
1938 * and calls callback function 'fn' for each region.
1941 struct walk_memory_regions_data
1943 walk_memory_regions_fn fn;
1944 void *priv;
1945 uintptr_t start;
1946 int prot;
1949 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
1950 abi_ulong end, int new_prot)
1952 if (data->start != -1ul) {
1953 int rc = data->fn(data->priv, data->start, end, data->prot);
1954 if (rc != 0) {
1955 return rc;
1959 data->start = (new_prot ? end : -1ul);
1960 data->prot = new_prot;
1962 return 0;
1965 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
1966 abi_ulong base, int level, void **lp)
1968 abi_ulong pa;
1969 int i, rc;
1971 if (*lp == NULL) {
1972 return walk_memory_regions_end(data, base, 0);
1975 if (level == 0) {
1976 PageDesc *pd = *lp;
1977 for (i = 0; i < L2_SIZE; ++i) {
1978 int prot = pd[i].flags;
1980 pa = base | (i << TARGET_PAGE_BITS);
1981 if (prot != data->prot) {
1982 rc = walk_memory_regions_end(data, pa, prot);
1983 if (rc != 0) {
1984 return rc;
1988 } else {
1989 void **pp = *lp;
1990 for (i = 0; i < L2_SIZE; ++i) {
1991 pa = base | ((abi_ulong)i <<
1992 (TARGET_PAGE_BITS + L2_BITS * level));
1993 rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
1994 if (rc != 0) {
1995 return rc;
2000 return 0;
2003 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
2005 struct walk_memory_regions_data data;
2006 uintptr_t i;
2008 data.fn = fn;
2009 data.priv = priv;
2010 data.start = -1ul;
2011 data.prot = 0;
2013 for (i = 0; i < V_L1_SIZE; i++) {
2014 int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
2015 V_L1_SHIFT / L2_BITS - 1, l1_map + i);
2016 if (rc != 0) {
2017 return rc;
2021 return walk_memory_regions_end(&data, 0, 0);
2024 static int dump_region(void *priv, abi_ulong start,
2025 abi_ulong end, unsigned long prot)
2027 FILE *f = (FILE *)priv;
2029 (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2030 " "TARGET_ABI_FMT_lx" %c%c%c\n",
2031 start, end, end - start,
2032 ((prot & PAGE_READ) ? 'r' : '-'),
2033 ((prot & PAGE_WRITE) ? 'w' : '-'),
2034 ((prot & PAGE_EXEC) ? 'x' : '-'));
2036 return (0);
2039 /* dump memory mappings */
2040 void page_dump(FILE *f)
2042 (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2043 "start", "end", "size", "prot");
2044 walk_memory_regions(f, dump_region);
2047 int page_get_flags(target_ulong address)
2049 PageDesc *p;
2051 p = page_find(address >> TARGET_PAGE_BITS);
2052 if (!p)
2053 return 0;
2054 return p->flags;
2057 /* Modify the flags of a page and invalidate the code if necessary.
2058 The flag PAGE_WRITE_ORG is positioned automatically depending
2059 on PAGE_WRITE. The mmap_lock should already be held. */
2060 void page_set_flags(target_ulong start, target_ulong end, int flags)
2062 target_ulong addr, len;
2064 /* This function should never be called with addresses outside the
2065 guest address space. If this assert fires, it probably indicates
2066 a missing call to h2g_valid. */
2067 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2068 assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2069 #endif
2070 assert(start < end);
2072 start = start & TARGET_PAGE_MASK;
2073 end = TARGET_PAGE_ALIGN(end);
2075 if (flags & PAGE_WRITE) {
2076 flags |= PAGE_WRITE_ORG;
2079 for (addr = start, len = end - start;
2080 len != 0;
2081 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2082 PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2084 /* If the write protection bit is set, then we invalidate
2085 the code inside. */
2086 if (!(p->flags & PAGE_WRITE) &&
2087 (flags & PAGE_WRITE) &&
2088 p->first_tb) {
2089 tb_invalidate_phys_page(addr, 0, NULL);
2091 p->flags = flags;
2095 int page_check_range(target_ulong start, target_ulong len, int flags)
2097 PageDesc *p;
2098 target_ulong end;
2099 target_ulong addr;
2101 /* This function should never be called with addresses outside the
2102 guest address space. If this assert fires, it probably indicates
2103 a missing call to h2g_valid. */
2104 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2105 assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2106 #endif
2108 if (len == 0) {
2109 return 0;
2111 if (start + len - 1 < start) {
2112 /* We've wrapped around. */
2113 return -1;
2116 end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2117 start = start & TARGET_PAGE_MASK;
2119 for (addr = start, len = end - start;
2120 len != 0;
2121 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2122 p = page_find(addr >> TARGET_PAGE_BITS);
2123 if( !p )
2124 return -1;
2125 if( !(p->flags & PAGE_VALID) )
2126 return -1;
2128 if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2129 return -1;
2130 if (flags & PAGE_WRITE) {
2131 if (!(p->flags & PAGE_WRITE_ORG))
2132 return -1;
2133 /* unprotect the page if it was put read-only because it
2134 contains translated code */
2135 if (!(p->flags & PAGE_WRITE)) {
2136 if (!page_unprotect(addr, 0, NULL))
2137 return -1;
2139 return 0;
2142 return 0;
2145 /* called from signal handler: invalidate the code and unprotect the
2146 page. Return TRUE if the fault was successfully handled. */
2147 int page_unprotect(target_ulong address, uintptr_t pc, void *puc)
2149 unsigned int prot;
2150 PageDesc *p;
2151 target_ulong host_start, host_end, addr;
2153 /* Technically this isn't safe inside a signal handler. However we
2154 know this only ever happens in a synchronous SEGV handler, so in
2155 practice it seems to be ok. */
2156 mmap_lock();
2158 p = page_find(address >> TARGET_PAGE_BITS);
2159 if (!p) {
2160 mmap_unlock();
2161 return 0;
2164 /* if the page was really writable, then we change its
2165 protection back to writable */
2166 if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2167 host_start = address & qemu_host_page_mask;
2168 host_end = host_start + qemu_host_page_size;
2170 prot = 0;
2171 for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2172 p = page_find(addr >> TARGET_PAGE_BITS);
2173 p->flags |= PAGE_WRITE;
2174 prot |= p->flags;
2176 /* and since the content will be modified, we must invalidate
2177 the corresponding translated code. */
2178 tb_invalidate_phys_page(addr, pc, puc);
2179 #ifdef DEBUG_TB_CHECK
2180 tb_invalidate_check(addr);
2181 #endif
2183 mprotect((void *)g2h(host_start), qemu_host_page_size,
2184 prot & PAGE_BITS);
2186 mmap_unlock();
2187 return 1;
2189 mmap_unlock();
2190 return 0;
2192 #endif /* defined(CONFIG_USER_ONLY) */
2194 #if !defined(CONFIG_USER_ONLY)
2196 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2197 typedef struct subpage_t {
2198 MemoryRegion iomem;
2199 hwaddr base;
2200 uint16_t sub_section[TARGET_PAGE_SIZE];
2201 } subpage_t;
2203 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2204 uint16_t section);
2205 static subpage_t *subpage_init(hwaddr base);
2206 static void destroy_page_desc(uint16_t section_index)
2208 MemoryRegionSection *section = &phys_sections[section_index];
2209 MemoryRegion *mr = section->mr;
2211 if (mr->subpage) {
2212 subpage_t *subpage = container_of(mr, subpage_t, iomem);
2213 memory_region_destroy(&subpage->iomem);
2214 g_free(subpage);
2218 static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
2220 unsigned i;
2221 PhysPageEntry *p;
2223 if (lp->ptr == PHYS_MAP_NODE_NIL) {
2224 return;
2227 p = phys_map_nodes[lp->ptr];
2228 for (i = 0; i < L2_SIZE; ++i) {
2229 if (!p[i].is_leaf) {
2230 destroy_l2_mapping(&p[i], level - 1);
2231 } else {
2232 destroy_page_desc(p[i].ptr);
2235 lp->is_leaf = 0;
2236 lp->ptr = PHYS_MAP_NODE_NIL;
2239 static void destroy_all_mappings(AddressSpaceDispatch *d)
2241 destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
2242 phys_map_nodes_reset();
2245 static uint16_t phys_section_add(MemoryRegionSection *section)
2247 if (phys_sections_nb == phys_sections_nb_alloc) {
2248 phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
2249 phys_sections = g_renew(MemoryRegionSection, phys_sections,
2250 phys_sections_nb_alloc);
2252 phys_sections[phys_sections_nb] = *section;
2253 return phys_sections_nb++;
2256 static void phys_sections_clear(void)
2258 phys_sections_nb = 0;
2261 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
2263 subpage_t *subpage;
2264 hwaddr base = section->offset_within_address_space
2265 & TARGET_PAGE_MASK;
2266 MemoryRegionSection *existing = phys_page_find(d, base >> TARGET_PAGE_BITS);
2267 MemoryRegionSection subsection = {
2268 .offset_within_address_space = base,
2269 .size = TARGET_PAGE_SIZE,
2271 hwaddr start, end;
2273 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
2275 if (!(existing->mr->subpage)) {
2276 subpage = subpage_init(base);
2277 subsection.mr = &subpage->iomem;
2278 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
2279 phys_section_add(&subsection));
2280 } else {
2281 subpage = container_of(existing->mr, subpage_t, iomem);
2283 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
2284 end = start + section->size - 1;
2285 subpage_register(subpage, start, end, phys_section_add(section));
2289 static void register_multipage(AddressSpaceDispatch *d, MemoryRegionSection *section)
2291 hwaddr start_addr = section->offset_within_address_space;
2292 ram_addr_t size = section->size;
2293 hwaddr addr;
2294 uint16_t section_index = phys_section_add(section);
2296 assert(size);
2298 addr = start_addr;
2299 phys_page_set(d, addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
2300 section_index);
2303 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
2305 AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
2306 MemoryRegionSection now = *section, remain = *section;
2308 if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
2309 || (now.size < TARGET_PAGE_SIZE)) {
2310 now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
2311 - now.offset_within_address_space,
2312 now.size);
2313 register_subpage(d, &now);
2314 remain.size -= now.size;
2315 remain.offset_within_address_space += now.size;
2316 remain.offset_within_region += now.size;
2318 while (remain.size >= TARGET_PAGE_SIZE) {
2319 now = remain;
2320 if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
2321 now.size = TARGET_PAGE_SIZE;
2322 register_subpage(d, &now);
2323 } else {
2324 now.size &= TARGET_PAGE_MASK;
2325 register_multipage(d, &now);
2327 remain.size -= now.size;
2328 remain.offset_within_address_space += now.size;
2329 remain.offset_within_region += now.size;
2331 now = remain;
2332 if (now.size) {
2333 register_subpage(d, &now);
2337 void qemu_flush_coalesced_mmio_buffer(void)
2339 if (kvm_enabled())
2340 kvm_flush_coalesced_mmio_buffer();
2343 #if defined(__linux__) && !defined(TARGET_S390X)
2345 #include <sys/vfs.h>
2347 #define HUGETLBFS_MAGIC 0x958458f6
2349 static long gethugepagesize(const char *path)
2351 struct statfs fs;
2352 int ret;
2354 do {
2355 ret = statfs(path, &fs);
2356 } while (ret != 0 && errno == EINTR);
2358 if (ret != 0) {
2359 perror(path);
2360 return 0;
2363 if (fs.f_type != HUGETLBFS_MAGIC)
2364 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2366 return fs.f_bsize;
2369 static void *file_ram_alloc(RAMBlock *block,
2370 ram_addr_t memory,
2371 const char *path)
2373 char *filename;
2374 void *area;
2375 int fd;
2376 #ifdef MAP_POPULATE
2377 int flags;
2378 #endif
2379 unsigned long hpagesize;
2381 hpagesize = gethugepagesize(path);
2382 if (!hpagesize) {
2383 return NULL;
2386 if (memory < hpagesize) {
2387 return NULL;
2390 if (kvm_enabled() && !kvm_has_sync_mmu()) {
2391 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2392 return NULL;
2395 if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2396 return NULL;
2399 fd = mkstemp(filename);
2400 if (fd < 0) {
2401 perror("unable to create backing store for hugepages");
2402 free(filename);
2403 return NULL;
2405 unlink(filename);
2406 free(filename);
2408 memory = (memory+hpagesize-1) & ~(hpagesize-1);
2411 * ftruncate is not supported by hugetlbfs in older
2412 * hosts, so don't bother bailing out on errors.
2413 * If anything goes wrong with it under other filesystems,
2414 * mmap will fail.
2416 if (ftruncate(fd, memory))
2417 perror("ftruncate");
2419 #ifdef MAP_POPULATE
2420 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2421 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
2422 * to sidestep this quirk.
2424 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2425 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2426 #else
2427 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2428 #endif
2429 if (area == MAP_FAILED) {
2430 perror("file_ram_alloc: can't mmap RAM pages");
2431 close(fd);
2432 return (NULL);
2434 block->fd = fd;
2435 return area;
2437 #endif
2439 static ram_addr_t find_ram_offset(ram_addr_t size)
2441 RAMBlock *block, *next_block;
2442 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
2444 if (QLIST_EMPTY(&ram_list.blocks))
2445 return 0;
2447 QLIST_FOREACH(block, &ram_list.blocks, next) {
2448 ram_addr_t end, next = RAM_ADDR_MAX;
2450 end = block->offset + block->length;
2452 QLIST_FOREACH(next_block, &ram_list.blocks, next) {
2453 if (next_block->offset >= end) {
2454 next = MIN(next, next_block->offset);
2457 if (next - end >= size && next - end < mingap) {
2458 offset = end;
2459 mingap = next - end;
2463 if (offset == RAM_ADDR_MAX) {
2464 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
2465 (uint64_t)size);
2466 abort();
2469 return offset;
2472 ram_addr_t last_ram_offset(void)
2474 RAMBlock *block;
2475 ram_addr_t last = 0;
2477 QLIST_FOREACH(block, &ram_list.blocks, next)
2478 last = MAX(last, block->offset + block->length);
2480 return last;
2483 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
2485 int ret;
2486 QemuOpts *machine_opts;
2488 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
2489 machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
2490 if (machine_opts &&
2491 !qemu_opt_get_bool(machine_opts, "dump-guest-core", true)) {
2492 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
2493 if (ret) {
2494 perror("qemu_madvise");
2495 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
2496 "but dump_guest_core=off specified\n");
2501 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
2503 RAMBlock *new_block, *block;
2505 new_block = NULL;
2506 QLIST_FOREACH(block, &ram_list.blocks, next) {
2507 if (block->offset == addr) {
2508 new_block = block;
2509 break;
2512 assert(new_block);
2513 assert(!new_block->idstr[0]);
2515 if (dev) {
2516 char *id = qdev_get_dev_path(dev);
2517 if (id) {
2518 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
2519 g_free(id);
2522 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
2524 QLIST_FOREACH(block, &ram_list.blocks, next) {
2525 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
2526 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
2527 new_block->idstr);
2528 abort();
2533 static int memory_try_enable_merging(void *addr, size_t len)
2535 QemuOpts *opts;
2537 opts = qemu_opts_find(qemu_find_opts("machine"), 0);
2538 if (opts && !qemu_opt_get_bool(opts, "mem-merge", true)) {
2539 /* disabled by the user */
2540 return 0;
2543 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
2546 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
2547 MemoryRegion *mr)
2549 RAMBlock *new_block;
2551 size = TARGET_PAGE_ALIGN(size);
2552 new_block = g_malloc0(sizeof(*new_block));
2554 new_block->mr = mr;
2555 new_block->offset = find_ram_offset(size);
2556 if (host) {
2557 new_block->host = host;
2558 new_block->flags |= RAM_PREALLOC_MASK;
2559 } else {
2560 if (mem_path) {
2561 #if defined (__linux__) && !defined(TARGET_S390X)
2562 new_block->host = file_ram_alloc(new_block, size, mem_path);
2563 if (!new_block->host) {
2564 new_block->host = qemu_vmalloc(size);
2565 memory_try_enable_merging(new_block->host, size);
2567 #else
2568 fprintf(stderr, "-mem-path option unsupported\n");
2569 exit(1);
2570 #endif
2571 } else {
2572 if (xen_enabled()) {
2573 xen_ram_alloc(new_block->offset, size, mr);
2574 } else if (kvm_enabled()) {
2575 /* some s390/kvm configurations have special constraints */
2576 new_block->host = kvm_vmalloc(size);
2577 } else {
2578 new_block->host = qemu_vmalloc(size);
2580 memory_try_enable_merging(new_block->host, size);
2583 new_block->length = size;
2585 QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
2587 ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
2588 last_ram_offset() >> TARGET_PAGE_BITS);
2589 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
2590 0, size >> TARGET_PAGE_BITS);
2591 cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
2593 qemu_ram_setup_dump(new_block->host, size);
2594 qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
2596 if (kvm_enabled())
2597 kvm_setup_guest_memory(new_block->host, size);
2599 return new_block->offset;
2602 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
2604 return qemu_ram_alloc_from_ptr(size, NULL, mr);
2607 void qemu_ram_free_from_ptr(ram_addr_t addr)
2609 RAMBlock *block;
2611 QLIST_FOREACH(block, &ram_list.blocks, next) {
2612 if (addr == block->offset) {
2613 QLIST_REMOVE(block, next);
2614 g_free(block);
2615 return;
2620 void qemu_ram_free(ram_addr_t addr)
2622 RAMBlock *block;
2624 QLIST_FOREACH(block, &ram_list.blocks, next) {
2625 if (addr == block->offset) {
2626 QLIST_REMOVE(block, next);
2627 if (block->flags & RAM_PREALLOC_MASK) {
2629 } else if (mem_path) {
2630 #if defined (__linux__) && !defined(TARGET_S390X)
2631 if (block->fd) {
2632 munmap(block->host, block->length);
2633 close(block->fd);
2634 } else {
2635 qemu_vfree(block->host);
2637 #else
2638 abort();
2639 #endif
2640 } else {
2641 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2642 munmap(block->host, block->length);
2643 #else
2644 if (xen_enabled()) {
2645 xen_invalidate_map_cache_entry(block->host);
2646 } else {
2647 qemu_vfree(block->host);
2649 #endif
2651 g_free(block);
2652 return;
2658 #ifndef _WIN32
2659 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
2661 RAMBlock *block;
2662 ram_addr_t offset;
2663 int flags;
2664 void *area, *vaddr;
2666 QLIST_FOREACH(block, &ram_list.blocks, next) {
2667 offset = addr - block->offset;
2668 if (offset < block->length) {
2669 vaddr = block->host + offset;
2670 if (block->flags & RAM_PREALLOC_MASK) {
2672 } else {
2673 flags = MAP_FIXED;
2674 munmap(vaddr, length);
2675 if (mem_path) {
2676 #if defined(__linux__) && !defined(TARGET_S390X)
2677 if (block->fd) {
2678 #ifdef MAP_POPULATE
2679 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
2680 MAP_PRIVATE;
2681 #else
2682 flags |= MAP_PRIVATE;
2683 #endif
2684 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2685 flags, block->fd, offset);
2686 } else {
2687 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2688 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2689 flags, -1, 0);
2691 #else
2692 abort();
2693 #endif
2694 } else {
2695 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2696 flags |= MAP_SHARED | MAP_ANONYMOUS;
2697 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
2698 flags, -1, 0);
2699 #else
2700 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2701 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2702 flags, -1, 0);
2703 #endif
2705 if (area != vaddr) {
2706 fprintf(stderr, "Could not remap addr: "
2707 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
2708 length, addr);
2709 exit(1);
2711 memory_try_enable_merging(vaddr, length);
2712 qemu_ram_setup_dump(vaddr, length);
2714 return;
2718 #endif /* !_WIN32 */
2720 /* Return a host pointer to ram allocated with qemu_ram_alloc.
2721 With the exception of the softmmu code in this file, this should
2722 only be used for local memory (e.g. video ram) that the device owns,
2723 and knows it isn't going to access beyond the end of the block.
2725 It should not be used for general purpose DMA.
2726 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
2728 void *qemu_get_ram_ptr(ram_addr_t addr)
2730 RAMBlock *block;
2732 QLIST_FOREACH(block, &ram_list.blocks, next) {
2733 if (addr - block->offset < block->length) {
2734 /* Move this entry to to start of the list. */
2735 if (block != QLIST_FIRST(&ram_list.blocks)) {
2736 QLIST_REMOVE(block, next);
2737 QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
2739 if (xen_enabled()) {
2740 /* We need to check if the requested address is in the RAM
2741 * because we don't want to map the entire memory in QEMU.
2742 * In that case just map until the end of the page.
2744 if (block->offset == 0) {
2745 return xen_map_cache(addr, 0, 0);
2746 } else if (block->host == NULL) {
2747 block->host =
2748 xen_map_cache(block->offset, block->length, 1);
2751 return block->host + (addr - block->offset);
2755 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2756 abort();
2758 return NULL;
2761 /* Return a host pointer to ram allocated with qemu_ram_alloc.
2762 * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
2764 static void *qemu_safe_ram_ptr(ram_addr_t addr)
2766 RAMBlock *block;
2768 QLIST_FOREACH(block, &ram_list.blocks, next) {
2769 if (addr - block->offset < block->length) {
2770 if (xen_enabled()) {
2771 /* We need to check if the requested address is in the RAM
2772 * because we don't want to map the entire memory in QEMU.
2773 * In that case just map until the end of the page.
2775 if (block->offset == 0) {
2776 return xen_map_cache(addr, 0, 0);
2777 } else if (block->host == NULL) {
2778 block->host =
2779 xen_map_cache(block->offset, block->length, 1);
2782 return block->host + (addr - block->offset);
2786 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2787 abort();
2789 return NULL;
2792 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
2793 * but takes a size argument */
2794 static void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
2796 if (*size == 0) {
2797 return NULL;
2799 if (xen_enabled()) {
2800 return xen_map_cache(addr, *size, 1);
2801 } else {
2802 RAMBlock *block;
2804 QLIST_FOREACH(block, &ram_list.blocks, next) {
2805 if (addr - block->offset < block->length) {
2806 if (addr - block->offset + *size > block->length)
2807 *size = block->length - addr + block->offset;
2808 return block->host + (addr - block->offset);
2812 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2813 abort();
2817 void qemu_put_ram_ptr(void *addr)
2819 trace_qemu_put_ram_ptr(addr);
2822 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
2824 RAMBlock *block;
2825 uint8_t *host = ptr;
2827 if (xen_enabled()) {
2828 *ram_addr = xen_ram_addr_from_mapcache(ptr);
2829 return 0;
2832 QLIST_FOREACH(block, &ram_list.blocks, next) {
2833 /* This case append when the block is not mapped. */
2834 if (block->host == NULL) {
2835 continue;
2837 if (host - block->host < block->length) {
2838 *ram_addr = block->offset + (host - block->host);
2839 return 0;
2843 return -1;
2846 /* Some of the softmmu routines need to translate from a host pointer
2847 (typically a TLB entry) back to a ram offset. */
2848 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
2850 ram_addr_t ram_addr;
2852 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
2853 fprintf(stderr, "Bad ram pointer %p\n", ptr);
2854 abort();
2856 return ram_addr;
2859 static uint64_t unassigned_mem_read(void *opaque, hwaddr addr,
2860 unsigned size)
2862 #ifdef DEBUG_UNASSIGNED
2863 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
2864 #endif
2865 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2866 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, size);
2867 #endif
2868 return 0;
2871 static void unassigned_mem_write(void *opaque, hwaddr addr,
2872 uint64_t val, unsigned size)
2874 #ifdef DEBUG_UNASSIGNED
2875 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%"PRIx64"\n", addr, val);
2876 #endif
2877 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2878 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, size);
2879 #endif
2882 static const MemoryRegionOps unassigned_mem_ops = {
2883 .read = unassigned_mem_read,
2884 .write = unassigned_mem_write,
2885 .endianness = DEVICE_NATIVE_ENDIAN,
2888 static uint64_t error_mem_read(void *opaque, hwaddr addr,
2889 unsigned size)
2891 abort();
2894 static void error_mem_write(void *opaque, hwaddr addr,
2895 uint64_t value, unsigned size)
2897 abort();
2900 static const MemoryRegionOps error_mem_ops = {
2901 .read = error_mem_read,
2902 .write = error_mem_write,
2903 .endianness = DEVICE_NATIVE_ENDIAN,
2906 static const MemoryRegionOps rom_mem_ops = {
2907 .read = error_mem_read,
2908 .write = unassigned_mem_write,
2909 .endianness = DEVICE_NATIVE_ENDIAN,
2912 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
2913 uint64_t val, unsigned size)
2915 int dirty_flags;
2916 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2917 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
2918 #if !defined(CONFIG_USER_ONLY)
2919 tb_invalidate_phys_page_fast(ram_addr, size);
2920 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2921 #endif
2923 switch (size) {
2924 case 1:
2925 stb_p(qemu_get_ram_ptr(ram_addr), val);
2926 break;
2927 case 2:
2928 stw_p(qemu_get_ram_ptr(ram_addr), val);
2929 break;
2930 case 4:
2931 stl_p(qemu_get_ram_ptr(ram_addr), val);
2932 break;
2933 default:
2934 abort();
2936 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
2937 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
2938 /* we remove the notdirty callback only if the code has been
2939 flushed */
2940 if (dirty_flags == 0xff)
2941 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
2944 static const MemoryRegionOps notdirty_mem_ops = {
2945 .read = error_mem_read,
2946 .write = notdirty_mem_write,
2947 .endianness = DEVICE_NATIVE_ENDIAN,
2950 /* Generate a debug exception if a watchpoint has been hit. */
2951 static void check_watchpoint(int offset, int len_mask, int flags)
2953 CPUArchState *env = cpu_single_env;
2954 target_ulong pc, cs_base;
2955 TranslationBlock *tb;
2956 target_ulong vaddr;
2957 CPUWatchpoint *wp;
2958 int cpu_flags;
2960 if (env->watchpoint_hit) {
2961 /* We re-entered the check after replacing the TB. Now raise
2962 * the debug interrupt so that is will trigger after the
2963 * current instruction. */
2964 cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
2965 return;
2967 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2968 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2969 if ((vaddr == (wp->vaddr & len_mask) ||
2970 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
2971 wp->flags |= BP_WATCHPOINT_HIT;
2972 if (!env->watchpoint_hit) {
2973 env->watchpoint_hit = wp;
2974 tb = tb_find_pc(env->mem_io_pc);
2975 if (!tb) {
2976 cpu_abort(env, "check_watchpoint: could not find TB for "
2977 "pc=%p", (void *)env->mem_io_pc);
2979 cpu_restore_state(tb, env, env->mem_io_pc);
2980 tb_phys_invalidate(tb, -1);
2981 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2982 env->exception_index = EXCP_DEBUG;
2983 cpu_loop_exit(env);
2984 } else {
2985 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2986 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
2987 cpu_resume_from_signal(env, NULL);
2990 } else {
2991 wp->flags &= ~BP_WATCHPOINT_HIT;
2996 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
2997 so these check for a hit then pass through to the normal out-of-line
2998 phys routines. */
2999 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
3000 unsigned size)
3002 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
3003 switch (size) {
3004 case 1: return ldub_phys(addr);
3005 case 2: return lduw_phys(addr);
3006 case 4: return ldl_phys(addr);
3007 default: abort();
3011 static void watch_mem_write(void *opaque, hwaddr addr,
3012 uint64_t val, unsigned size)
3014 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
3015 switch (size) {
3016 case 1:
3017 stb_phys(addr, val);
3018 break;
3019 case 2:
3020 stw_phys(addr, val);
3021 break;
3022 case 4:
3023 stl_phys(addr, val);
3024 break;
3025 default: abort();
3029 static const MemoryRegionOps watch_mem_ops = {
3030 .read = watch_mem_read,
3031 .write = watch_mem_write,
3032 .endianness = DEVICE_NATIVE_ENDIAN,
3035 static uint64_t subpage_read(void *opaque, hwaddr addr,
3036 unsigned len)
3038 subpage_t *mmio = opaque;
3039 unsigned int idx = SUBPAGE_IDX(addr);
3040 MemoryRegionSection *section;
3041 #if defined(DEBUG_SUBPAGE)
3042 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3043 mmio, len, addr, idx);
3044 #endif
3046 section = &phys_sections[mmio->sub_section[idx]];
3047 addr += mmio->base;
3048 addr -= section->offset_within_address_space;
3049 addr += section->offset_within_region;
3050 return io_mem_read(section->mr, addr, len);
3053 static void subpage_write(void *opaque, hwaddr addr,
3054 uint64_t value, unsigned len)
3056 subpage_t *mmio = opaque;
3057 unsigned int idx = SUBPAGE_IDX(addr);
3058 MemoryRegionSection *section;
3059 #if defined(DEBUG_SUBPAGE)
3060 printf("%s: subpage %p len %d addr " TARGET_FMT_plx
3061 " idx %d value %"PRIx64"\n",
3062 __func__, mmio, len, addr, idx, value);
3063 #endif
3065 section = &phys_sections[mmio->sub_section[idx]];
3066 addr += mmio->base;
3067 addr -= section->offset_within_address_space;
3068 addr += section->offset_within_region;
3069 io_mem_write(section->mr, addr, value, len);
3072 static const MemoryRegionOps subpage_ops = {
3073 .read = subpage_read,
3074 .write = subpage_write,
3075 .endianness = DEVICE_NATIVE_ENDIAN,
3078 static uint64_t subpage_ram_read(void *opaque, hwaddr addr,
3079 unsigned size)
3081 ram_addr_t raddr = addr;
3082 void *ptr = qemu_get_ram_ptr(raddr);
3083 switch (size) {
3084 case 1: return ldub_p(ptr);
3085 case 2: return lduw_p(ptr);
3086 case 4: return ldl_p(ptr);
3087 default: abort();
3091 static void subpage_ram_write(void *opaque, hwaddr addr,
3092 uint64_t value, unsigned size)
3094 ram_addr_t raddr = addr;
3095 void *ptr = qemu_get_ram_ptr(raddr);
3096 switch (size) {
3097 case 1: return stb_p(ptr, value);
3098 case 2: return stw_p(ptr, value);
3099 case 4: return stl_p(ptr, value);
3100 default: abort();
3104 static const MemoryRegionOps subpage_ram_ops = {
3105 .read = subpage_ram_read,
3106 .write = subpage_ram_write,
3107 .endianness = DEVICE_NATIVE_ENDIAN,
3110 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3111 uint16_t section)
3113 int idx, eidx;
3115 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3116 return -1;
3117 idx = SUBPAGE_IDX(start);
3118 eidx = SUBPAGE_IDX(end);
3119 #if defined(DEBUG_SUBPAGE)
3120 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3121 mmio, start, end, idx, eidx, memory);
3122 #endif
3123 if (memory_region_is_ram(phys_sections[section].mr)) {
3124 MemoryRegionSection new_section = phys_sections[section];
3125 new_section.mr = &io_mem_subpage_ram;
3126 section = phys_section_add(&new_section);
3128 for (; idx <= eidx; idx++) {
3129 mmio->sub_section[idx] = section;
3132 return 0;
3135 static subpage_t *subpage_init(hwaddr base)
3137 subpage_t *mmio;
3139 mmio = g_malloc0(sizeof(subpage_t));
3141 mmio->base = base;
3142 memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
3143 "subpage", TARGET_PAGE_SIZE);
3144 mmio->iomem.subpage = true;
3145 #if defined(DEBUG_SUBPAGE)
3146 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3147 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3148 #endif
3149 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
3151 return mmio;
3154 static uint16_t dummy_section(MemoryRegion *mr)
3156 MemoryRegionSection section = {
3157 .mr = mr,
3158 .offset_within_address_space = 0,
3159 .offset_within_region = 0,
3160 .size = UINT64_MAX,
3163 return phys_section_add(&section);
3166 MemoryRegion *iotlb_to_region(hwaddr index)
3168 return phys_sections[index & ~TARGET_PAGE_MASK].mr;
3171 static void io_mem_init(void)
3173 memory_region_init_io(&io_mem_ram, &error_mem_ops, NULL, "ram", UINT64_MAX);
3174 memory_region_init_io(&io_mem_rom, &rom_mem_ops, NULL, "rom", UINT64_MAX);
3175 memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
3176 "unassigned", UINT64_MAX);
3177 memory_region_init_io(&io_mem_notdirty, &notdirty_mem_ops, NULL,
3178 "notdirty", UINT64_MAX);
3179 memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
3180 "subpage-ram", UINT64_MAX);
3181 memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
3182 "watch", UINT64_MAX);
3185 static void mem_begin(MemoryListener *listener)
3187 AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
3189 destroy_all_mappings(d);
3190 d->phys_map.ptr = PHYS_MAP_NODE_NIL;
3193 static void core_begin(MemoryListener *listener)
3195 phys_sections_clear();
3196 phys_section_unassigned = dummy_section(&io_mem_unassigned);
3197 phys_section_notdirty = dummy_section(&io_mem_notdirty);
3198 phys_section_rom = dummy_section(&io_mem_rom);
3199 phys_section_watch = dummy_section(&io_mem_watch);
3202 static void tcg_commit(MemoryListener *listener)
3204 CPUArchState *env;
3206 /* since each CPU stores ram addresses in its TLB cache, we must
3207 reset the modified entries */
3208 /* XXX: slow ! */
3209 for(env = first_cpu; env != NULL; env = env->next_cpu) {
3210 tlb_flush(env, 1);
3214 static void core_log_global_start(MemoryListener *listener)
3216 cpu_physical_memory_set_dirty_tracking(1);
3219 static void core_log_global_stop(MemoryListener *listener)
3221 cpu_physical_memory_set_dirty_tracking(0);
3224 static void io_region_add(MemoryListener *listener,
3225 MemoryRegionSection *section)
3227 MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
3229 mrio->mr = section->mr;
3230 mrio->offset = section->offset_within_region;
3231 iorange_init(&mrio->iorange, &memory_region_iorange_ops,
3232 section->offset_within_address_space, section->size);
3233 ioport_register(&mrio->iorange);
3236 static void io_region_del(MemoryListener *listener,
3237 MemoryRegionSection *section)
3239 isa_unassign_ioport(section->offset_within_address_space, section->size);
3242 static MemoryListener core_memory_listener = {
3243 .begin = core_begin,
3244 .log_global_start = core_log_global_start,
3245 .log_global_stop = core_log_global_stop,
3246 .priority = 1,
3249 static MemoryListener io_memory_listener = {
3250 .region_add = io_region_add,
3251 .region_del = io_region_del,
3252 .priority = 0,
3255 static MemoryListener tcg_memory_listener = {
3256 .commit = tcg_commit,
3259 void address_space_init_dispatch(AddressSpace *as)
3261 AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
3263 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
3264 d->listener = (MemoryListener) {
3265 .begin = mem_begin,
3266 .region_add = mem_add,
3267 .region_nop = mem_add,
3268 .priority = 0,
3270 as->dispatch = d;
3271 memory_listener_register(&d->listener, as);
3274 void address_space_destroy_dispatch(AddressSpace *as)
3276 AddressSpaceDispatch *d = as->dispatch;
3278 memory_listener_unregister(&d->listener);
3279 destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
3280 g_free(d);
3281 as->dispatch = NULL;
3284 static void memory_map_init(void)
3286 system_memory = g_malloc(sizeof(*system_memory));
3287 memory_region_init(system_memory, "system", INT64_MAX);
3288 address_space_init(&address_space_memory, system_memory);
3289 address_space_memory.name = "memory";
3291 system_io = g_malloc(sizeof(*system_io));
3292 memory_region_init(system_io, "io", 65536);
3293 address_space_init(&address_space_io, system_io);
3294 address_space_io.name = "I/O";
3296 memory_listener_register(&core_memory_listener, &address_space_memory);
3297 memory_listener_register(&io_memory_listener, &address_space_io);
3298 memory_listener_register(&tcg_memory_listener, &address_space_memory);
3300 dma_context_init(&dma_context_memory, &address_space_memory,
3301 NULL, NULL, NULL);
3304 MemoryRegion *get_system_memory(void)
3306 return system_memory;
3309 MemoryRegion *get_system_io(void)
3311 return system_io;
3314 #endif /* !defined(CONFIG_USER_ONLY) */
3316 /* physical memory access (slow version, mainly for debug) */
3317 #if defined(CONFIG_USER_ONLY)
3318 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
3319 uint8_t *buf, int len, int is_write)
3321 int l, flags;
3322 target_ulong page;
3323 void * p;
3325 while (len > 0) {
3326 page = addr & TARGET_PAGE_MASK;
3327 l = (page + TARGET_PAGE_SIZE) - addr;
3328 if (l > len)
3329 l = len;
3330 flags = page_get_flags(page);
3331 if (!(flags & PAGE_VALID))
3332 return -1;
3333 if (is_write) {
3334 if (!(flags & PAGE_WRITE))
3335 return -1;
3336 /* XXX: this code should not depend on lock_user */
3337 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3338 return -1;
3339 memcpy(p, buf, l);
3340 unlock_user(p, addr, l);
3341 } else {
3342 if (!(flags & PAGE_READ))
3343 return -1;
3344 /* XXX: this code should not depend on lock_user */
3345 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3346 return -1;
3347 memcpy(buf, p, l);
3348 unlock_user(p, addr, 0);
3350 len -= l;
3351 buf += l;
3352 addr += l;
3354 return 0;
3357 #else
3359 static void invalidate_and_set_dirty(hwaddr addr,
3360 hwaddr length)
3362 if (!cpu_physical_memory_is_dirty(addr)) {
3363 /* invalidate code */
3364 tb_invalidate_phys_page_range(addr, addr + length, 0);
3365 /* set dirty bit */
3366 cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
3368 xen_modified_memory(addr, length);
3371 void address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
3372 int len, bool is_write)
3374 AddressSpaceDispatch *d = as->dispatch;
3375 int l;
3376 uint8_t *ptr;
3377 uint32_t val;
3378 hwaddr page;
3379 MemoryRegionSection *section;
3381 while (len > 0) {
3382 page = addr & TARGET_PAGE_MASK;
3383 l = (page + TARGET_PAGE_SIZE) - addr;
3384 if (l > len)
3385 l = len;
3386 section = phys_page_find(d, page >> TARGET_PAGE_BITS);
3388 if (is_write) {
3389 if (!memory_region_is_ram(section->mr)) {
3390 hwaddr addr1;
3391 addr1 = memory_region_section_addr(section, addr);
3392 /* XXX: could force cpu_single_env to NULL to avoid
3393 potential bugs */
3394 if (l >= 4 && ((addr1 & 3) == 0)) {
3395 /* 32 bit write access */
3396 val = ldl_p(buf);
3397 io_mem_write(section->mr, addr1, val, 4);
3398 l = 4;
3399 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3400 /* 16 bit write access */
3401 val = lduw_p(buf);
3402 io_mem_write(section->mr, addr1, val, 2);
3403 l = 2;
3404 } else {
3405 /* 8 bit write access */
3406 val = ldub_p(buf);
3407 io_mem_write(section->mr, addr1, val, 1);
3408 l = 1;
3410 } else if (!section->readonly) {
3411 ram_addr_t addr1;
3412 addr1 = memory_region_get_ram_addr(section->mr)
3413 + memory_region_section_addr(section, addr);
3414 /* RAM case */
3415 ptr = qemu_get_ram_ptr(addr1);
3416 memcpy(ptr, buf, l);
3417 invalidate_and_set_dirty(addr1, l);
3418 qemu_put_ram_ptr(ptr);
3420 } else {
3421 if (!(memory_region_is_ram(section->mr) ||
3422 memory_region_is_romd(section->mr))) {
3423 hwaddr addr1;
3424 /* I/O case */
3425 addr1 = memory_region_section_addr(section, addr);
3426 if (l >= 4 && ((addr1 & 3) == 0)) {
3427 /* 32 bit read access */
3428 val = io_mem_read(section->mr, addr1, 4);
3429 stl_p(buf, val);
3430 l = 4;
3431 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3432 /* 16 bit read access */
3433 val = io_mem_read(section->mr, addr1, 2);
3434 stw_p(buf, val);
3435 l = 2;
3436 } else {
3437 /* 8 bit read access */
3438 val = io_mem_read(section->mr, addr1, 1);
3439 stb_p(buf, val);
3440 l = 1;
3442 } else {
3443 /* RAM case */
3444 ptr = qemu_get_ram_ptr(section->mr->ram_addr
3445 + memory_region_section_addr(section,
3446 addr));
3447 memcpy(buf, ptr, l);
3448 qemu_put_ram_ptr(ptr);
3451 len -= l;
3452 buf += l;
3453 addr += l;
3457 void address_space_write(AddressSpace *as, hwaddr addr,
3458 const uint8_t *buf, int len)
3460 address_space_rw(as, addr, (uint8_t *)buf, len, true);
3464 * address_space_read: read from an address space.
3466 * @as: #AddressSpace to be accessed
3467 * @addr: address within that address space
3468 * @buf: buffer with the data transferred
3470 void address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
3472 address_space_rw(as, addr, buf, len, false);
3476 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
3477 int len, int is_write)
3479 return address_space_rw(&address_space_memory, addr, buf, len, is_write);
3482 /* used for ROM loading : can write in RAM and ROM */
3483 void cpu_physical_memory_write_rom(hwaddr addr,
3484 const uint8_t *buf, int len)
3486 AddressSpaceDispatch *d = address_space_memory.dispatch;
3487 int l;
3488 uint8_t *ptr;
3489 hwaddr page;
3490 MemoryRegionSection *section;
3492 while (len > 0) {
3493 page = addr & TARGET_PAGE_MASK;
3494 l = (page + TARGET_PAGE_SIZE) - addr;
3495 if (l > len)
3496 l = len;
3497 section = phys_page_find(d, page >> TARGET_PAGE_BITS);
3499 if (!(memory_region_is_ram(section->mr) ||
3500 memory_region_is_romd(section->mr))) {
3501 /* do nothing */
3502 } else {
3503 unsigned long addr1;
3504 addr1 = memory_region_get_ram_addr(section->mr)
3505 + memory_region_section_addr(section, addr);
3506 /* ROM/RAM case */
3507 ptr = qemu_get_ram_ptr(addr1);
3508 memcpy(ptr, buf, l);
3509 invalidate_and_set_dirty(addr1, l);
3510 qemu_put_ram_ptr(ptr);
3512 len -= l;
3513 buf += l;
3514 addr += l;
3518 typedef struct {
3519 void *buffer;
3520 hwaddr addr;
3521 hwaddr len;
3522 } BounceBuffer;
3524 static BounceBuffer bounce;
3526 typedef struct MapClient {
3527 void *opaque;
3528 void (*callback)(void *opaque);
3529 QLIST_ENTRY(MapClient) link;
3530 } MapClient;
3532 static QLIST_HEAD(map_client_list, MapClient) map_client_list
3533 = QLIST_HEAD_INITIALIZER(map_client_list);
3535 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
3537 MapClient *client = g_malloc(sizeof(*client));
3539 client->opaque = opaque;
3540 client->callback = callback;
3541 QLIST_INSERT_HEAD(&map_client_list, client, link);
3542 return client;
3545 static void cpu_unregister_map_client(void *_client)
3547 MapClient *client = (MapClient *)_client;
3549 QLIST_REMOVE(client, link);
3550 g_free(client);
3553 static void cpu_notify_map_clients(void)
3555 MapClient *client;
3557 while (!QLIST_EMPTY(&map_client_list)) {
3558 client = QLIST_FIRST(&map_client_list);
3559 client->callback(client->opaque);
3560 cpu_unregister_map_client(client);
3564 /* Map a physical memory region into a host virtual address.
3565 * May map a subset of the requested range, given by and returned in *plen.
3566 * May return NULL if resources needed to perform the mapping are exhausted.
3567 * Use only for reads OR writes - not for read-modify-write operations.
3568 * Use cpu_register_map_client() to know when retrying the map operation is
3569 * likely to succeed.
3571 void *address_space_map(AddressSpace *as,
3572 hwaddr addr,
3573 hwaddr *plen,
3574 bool is_write)
3576 AddressSpaceDispatch *d = as->dispatch;
3577 hwaddr len = *plen;
3578 hwaddr todo = 0;
3579 int l;
3580 hwaddr page;
3581 MemoryRegionSection *section;
3582 ram_addr_t raddr = RAM_ADDR_MAX;
3583 ram_addr_t rlen;
3584 void *ret;
3586 while (len > 0) {
3587 page = addr & TARGET_PAGE_MASK;
3588 l = (page + TARGET_PAGE_SIZE) - addr;
3589 if (l > len)
3590 l = len;
3591 section = phys_page_find(d, page >> TARGET_PAGE_BITS);
3593 if (!(memory_region_is_ram(section->mr) && !section->readonly)) {
3594 if (todo || bounce.buffer) {
3595 break;
3597 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
3598 bounce.addr = addr;
3599 bounce.len = l;
3600 if (!is_write) {
3601 address_space_read(as, addr, bounce.buffer, l);
3604 *plen = l;
3605 return bounce.buffer;
3607 if (!todo) {
3608 raddr = memory_region_get_ram_addr(section->mr)
3609 + memory_region_section_addr(section, addr);
3612 len -= l;
3613 addr += l;
3614 todo += l;
3616 rlen = todo;
3617 ret = qemu_ram_ptr_length(raddr, &rlen);
3618 *plen = rlen;
3619 return ret;
3622 /* Unmaps a memory region previously mapped by address_space_map().
3623 * Will also mark the memory as dirty if is_write == 1. access_len gives
3624 * the amount of memory that was actually read or written by the caller.
3626 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
3627 int is_write, hwaddr access_len)
3629 if (buffer != bounce.buffer) {
3630 if (is_write) {
3631 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
3632 while (access_len) {
3633 unsigned l;
3634 l = TARGET_PAGE_SIZE;
3635 if (l > access_len)
3636 l = access_len;
3637 invalidate_and_set_dirty(addr1, l);
3638 addr1 += l;
3639 access_len -= l;
3642 if (xen_enabled()) {
3643 xen_invalidate_map_cache_entry(buffer);
3645 return;
3647 if (is_write) {
3648 address_space_write(as, bounce.addr, bounce.buffer, access_len);
3650 qemu_vfree(bounce.buffer);
3651 bounce.buffer = NULL;
3652 cpu_notify_map_clients();
3655 void *cpu_physical_memory_map(hwaddr addr,
3656 hwaddr *plen,
3657 int is_write)
3659 return address_space_map(&address_space_memory, addr, plen, is_write);
3662 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
3663 int is_write, hwaddr access_len)
3665 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
3668 /* warning: addr must be aligned */
3669 static inline uint32_t ldl_phys_internal(hwaddr addr,
3670 enum device_endian endian)
3672 uint8_t *ptr;
3673 uint32_t val;
3674 MemoryRegionSection *section;
3676 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3678 if (!(memory_region_is_ram(section->mr) ||
3679 memory_region_is_romd(section->mr))) {
3680 /* I/O case */
3681 addr = memory_region_section_addr(section, addr);
3682 val = io_mem_read(section->mr, addr, 4);
3683 #if defined(TARGET_WORDS_BIGENDIAN)
3684 if (endian == DEVICE_LITTLE_ENDIAN) {
3685 val = bswap32(val);
3687 #else
3688 if (endian == DEVICE_BIG_ENDIAN) {
3689 val = bswap32(val);
3691 #endif
3692 } else {
3693 /* RAM case */
3694 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3695 & TARGET_PAGE_MASK)
3696 + memory_region_section_addr(section, addr));
3697 switch (endian) {
3698 case DEVICE_LITTLE_ENDIAN:
3699 val = ldl_le_p(ptr);
3700 break;
3701 case DEVICE_BIG_ENDIAN:
3702 val = ldl_be_p(ptr);
3703 break;
3704 default:
3705 val = ldl_p(ptr);
3706 break;
3709 return val;
3712 uint32_t ldl_phys(hwaddr addr)
3714 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3717 uint32_t ldl_le_phys(hwaddr addr)
3719 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3722 uint32_t ldl_be_phys(hwaddr addr)
3724 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
3727 /* warning: addr must be aligned */
3728 static inline uint64_t ldq_phys_internal(hwaddr addr,
3729 enum device_endian endian)
3731 uint8_t *ptr;
3732 uint64_t val;
3733 MemoryRegionSection *section;
3735 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3737 if (!(memory_region_is_ram(section->mr) ||
3738 memory_region_is_romd(section->mr))) {
3739 /* I/O case */
3740 addr = memory_region_section_addr(section, addr);
3742 /* XXX This is broken when device endian != cpu endian.
3743 Fix and add "endian" variable check */
3744 #ifdef TARGET_WORDS_BIGENDIAN
3745 val = io_mem_read(section->mr, addr, 4) << 32;
3746 val |= io_mem_read(section->mr, addr + 4, 4);
3747 #else
3748 val = io_mem_read(section->mr, addr, 4);
3749 val |= io_mem_read(section->mr, addr + 4, 4) << 32;
3750 #endif
3751 } else {
3752 /* RAM case */
3753 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3754 & TARGET_PAGE_MASK)
3755 + memory_region_section_addr(section, addr));
3756 switch (endian) {
3757 case DEVICE_LITTLE_ENDIAN:
3758 val = ldq_le_p(ptr);
3759 break;
3760 case DEVICE_BIG_ENDIAN:
3761 val = ldq_be_p(ptr);
3762 break;
3763 default:
3764 val = ldq_p(ptr);
3765 break;
3768 return val;
3771 uint64_t ldq_phys(hwaddr addr)
3773 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3776 uint64_t ldq_le_phys(hwaddr addr)
3778 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3781 uint64_t ldq_be_phys(hwaddr addr)
3783 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
3786 /* XXX: optimize */
3787 uint32_t ldub_phys(hwaddr addr)
3789 uint8_t val;
3790 cpu_physical_memory_read(addr, &val, 1);
3791 return val;
3794 /* warning: addr must be aligned */
3795 static inline uint32_t lduw_phys_internal(hwaddr addr,
3796 enum device_endian endian)
3798 uint8_t *ptr;
3799 uint64_t val;
3800 MemoryRegionSection *section;
3802 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3804 if (!(memory_region_is_ram(section->mr) ||
3805 memory_region_is_romd(section->mr))) {
3806 /* I/O case */
3807 addr = memory_region_section_addr(section, addr);
3808 val = io_mem_read(section->mr, addr, 2);
3809 #if defined(TARGET_WORDS_BIGENDIAN)
3810 if (endian == DEVICE_LITTLE_ENDIAN) {
3811 val = bswap16(val);
3813 #else
3814 if (endian == DEVICE_BIG_ENDIAN) {
3815 val = bswap16(val);
3817 #endif
3818 } else {
3819 /* RAM case */
3820 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3821 & TARGET_PAGE_MASK)
3822 + memory_region_section_addr(section, addr));
3823 switch (endian) {
3824 case DEVICE_LITTLE_ENDIAN:
3825 val = lduw_le_p(ptr);
3826 break;
3827 case DEVICE_BIG_ENDIAN:
3828 val = lduw_be_p(ptr);
3829 break;
3830 default:
3831 val = lduw_p(ptr);
3832 break;
3835 return val;
3838 uint32_t lduw_phys(hwaddr addr)
3840 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3843 uint32_t lduw_le_phys(hwaddr addr)
3845 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3848 uint32_t lduw_be_phys(hwaddr addr)
3850 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
3853 /* warning: addr must be aligned. The ram page is not masked as dirty
3854 and the code inside is not invalidated. It is useful if the dirty
3855 bits are used to track modified PTEs */
3856 void stl_phys_notdirty(hwaddr addr, uint32_t val)
3858 uint8_t *ptr;
3859 MemoryRegionSection *section;
3861 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3863 if (!memory_region_is_ram(section->mr) || section->readonly) {
3864 addr = memory_region_section_addr(section, addr);
3865 if (memory_region_is_ram(section->mr)) {
3866 section = &phys_sections[phys_section_rom];
3868 io_mem_write(section->mr, addr, val, 4);
3869 } else {
3870 unsigned long addr1 = (memory_region_get_ram_addr(section->mr)
3871 & TARGET_PAGE_MASK)
3872 + memory_region_section_addr(section, addr);
3873 ptr = qemu_get_ram_ptr(addr1);
3874 stl_p(ptr, val);
3876 if (unlikely(in_migration)) {
3877 if (!cpu_physical_memory_is_dirty(addr1)) {
3878 /* invalidate code */
3879 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
3880 /* set dirty bit */
3881 cpu_physical_memory_set_dirty_flags(
3882 addr1, (0xff & ~CODE_DIRTY_FLAG));
3888 void stq_phys_notdirty(hwaddr addr, uint64_t val)
3890 uint8_t *ptr;
3891 MemoryRegionSection *section;
3893 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3895 if (!memory_region_is_ram(section->mr) || section->readonly) {
3896 addr = memory_region_section_addr(section, addr);
3897 if (memory_region_is_ram(section->mr)) {
3898 section = &phys_sections[phys_section_rom];
3900 #ifdef TARGET_WORDS_BIGENDIAN
3901 io_mem_write(section->mr, addr, val >> 32, 4);
3902 io_mem_write(section->mr, addr + 4, (uint32_t)val, 4);
3903 #else
3904 io_mem_write(section->mr, addr, (uint32_t)val, 4);
3905 io_mem_write(section->mr, addr + 4, val >> 32, 4);
3906 #endif
3907 } else {
3908 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3909 & TARGET_PAGE_MASK)
3910 + memory_region_section_addr(section, addr));
3911 stq_p(ptr, val);
3915 /* warning: addr must be aligned */
3916 static inline void stl_phys_internal(hwaddr addr, uint32_t val,
3917 enum device_endian endian)
3919 uint8_t *ptr;
3920 MemoryRegionSection *section;
3922 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3924 if (!memory_region_is_ram(section->mr) || section->readonly) {
3925 addr = memory_region_section_addr(section, addr);
3926 if (memory_region_is_ram(section->mr)) {
3927 section = &phys_sections[phys_section_rom];
3929 #if defined(TARGET_WORDS_BIGENDIAN)
3930 if (endian == DEVICE_LITTLE_ENDIAN) {
3931 val = bswap32(val);
3933 #else
3934 if (endian == DEVICE_BIG_ENDIAN) {
3935 val = bswap32(val);
3937 #endif
3938 io_mem_write(section->mr, addr, val, 4);
3939 } else {
3940 unsigned long addr1;
3941 addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
3942 + memory_region_section_addr(section, addr);
3943 /* RAM case */
3944 ptr = qemu_get_ram_ptr(addr1);
3945 switch (endian) {
3946 case DEVICE_LITTLE_ENDIAN:
3947 stl_le_p(ptr, val);
3948 break;
3949 case DEVICE_BIG_ENDIAN:
3950 stl_be_p(ptr, val);
3951 break;
3952 default:
3953 stl_p(ptr, val);
3954 break;
3956 invalidate_and_set_dirty(addr1, 4);
3960 void stl_phys(hwaddr addr, uint32_t val)
3962 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
3965 void stl_le_phys(hwaddr addr, uint32_t val)
3967 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
3970 void stl_be_phys(hwaddr addr, uint32_t val)
3972 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
3975 /* XXX: optimize */
3976 void stb_phys(hwaddr addr, uint32_t val)
3978 uint8_t v = val;
3979 cpu_physical_memory_write(addr, &v, 1);
3982 /* warning: addr must be aligned */
3983 static inline void stw_phys_internal(hwaddr addr, uint32_t val,
3984 enum device_endian endian)
3986 uint8_t *ptr;
3987 MemoryRegionSection *section;
3989 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3991 if (!memory_region_is_ram(section->mr) || section->readonly) {
3992 addr = memory_region_section_addr(section, addr);
3993 if (memory_region_is_ram(section->mr)) {
3994 section = &phys_sections[phys_section_rom];
3996 #if defined(TARGET_WORDS_BIGENDIAN)
3997 if (endian == DEVICE_LITTLE_ENDIAN) {
3998 val = bswap16(val);
4000 #else
4001 if (endian == DEVICE_BIG_ENDIAN) {
4002 val = bswap16(val);
4004 #endif
4005 io_mem_write(section->mr, addr, val, 2);
4006 } else {
4007 unsigned long addr1;
4008 addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
4009 + memory_region_section_addr(section, addr);
4010 /* RAM case */
4011 ptr = qemu_get_ram_ptr(addr1);
4012 switch (endian) {
4013 case DEVICE_LITTLE_ENDIAN:
4014 stw_le_p(ptr, val);
4015 break;
4016 case DEVICE_BIG_ENDIAN:
4017 stw_be_p(ptr, val);
4018 break;
4019 default:
4020 stw_p(ptr, val);
4021 break;
4023 invalidate_and_set_dirty(addr1, 2);
4027 void stw_phys(hwaddr addr, uint32_t val)
4029 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4032 void stw_le_phys(hwaddr addr, uint32_t val)
4034 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4037 void stw_be_phys(hwaddr addr, uint32_t val)
4039 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4042 /* XXX: optimize */
4043 void stq_phys(hwaddr addr, uint64_t val)
4045 val = tswap64(val);
4046 cpu_physical_memory_write(addr, &val, 8);
4049 void stq_le_phys(hwaddr addr, uint64_t val)
4051 val = cpu_to_le64(val);
4052 cpu_physical_memory_write(addr, &val, 8);
4055 void stq_be_phys(hwaddr addr, uint64_t val)
4057 val = cpu_to_be64(val);
4058 cpu_physical_memory_write(addr, &val, 8);
4061 /* virtual memory access for debug (includes writing to ROM) */
4062 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
4063 uint8_t *buf, int len, int is_write)
4065 int l;
4066 hwaddr phys_addr;
4067 target_ulong page;
4069 while (len > 0) {
4070 page = addr & TARGET_PAGE_MASK;
4071 phys_addr = cpu_get_phys_page_debug(env, page);
4072 /* if no physical page mapped, return an error */
4073 if (phys_addr == -1)
4074 return -1;
4075 l = (page + TARGET_PAGE_SIZE) - addr;
4076 if (l > len)
4077 l = len;
4078 phys_addr += (addr & ~TARGET_PAGE_MASK);
4079 if (is_write)
4080 cpu_physical_memory_write_rom(phys_addr, buf, l);
4081 else
4082 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
4083 len -= l;
4084 buf += l;
4085 addr += l;
4087 return 0;
4089 #endif
4091 /* in deterministic execution mode, instructions doing device I/Os
4092 must be at the end of the TB */
4093 void cpu_io_recompile(CPUArchState *env, uintptr_t retaddr)
4095 TranslationBlock *tb;
4096 uint32_t n, cflags;
4097 target_ulong pc, cs_base;
4098 uint64_t flags;
4100 tb = tb_find_pc(retaddr);
4101 if (!tb) {
4102 cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
4103 (void *)retaddr);
4105 n = env->icount_decr.u16.low + tb->icount;
4106 cpu_restore_state(tb, env, retaddr);
4107 /* Calculate how many instructions had been executed before the fault
4108 occurred. */
4109 n = n - env->icount_decr.u16.low;
4110 /* Generate a new TB ending on the I/O insn. */
4111 n++;
4112 /* On MIPS and SH, delay slot instructions can only be restarted if
4113 they were already the first instruction in the TB. If this is not
4114 the first instruction in a TB then re-execute the preceding
4115 branch. */
4116 #if defined(TARGET_MIPS)
4117 if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
4118 env->active_tc.PC -= 4;
4119 env->icount_decr.u16.low++;
4120 env->hflags &= ~MIPS_HFLAG_BMASK;
4122 #elif defined(TARGET_SH4)
4123 if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
4124 && n > 1) {
4125 env->pc -= 2;
4126 env->icount_decr.u16.low++;
4127 env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
4129 #endif
4130 /* This should never happen. */
4131 if (n > CF_COUNT_MASK)
4132 cpu_abort(env, "TB too big during recompile");
4134 cflags = n | CF_LAST_IO;
4135 pc = tb->pc;
4136 cs_base = tb->cs_base;
4137 flags = tb->flags;
4138 tb_phys_invalidate(tb, -1);
4139 /* FIXME: In theory this could raise an exception. In practice
4140 we have already translated the block once so it's probably ok. */
4141 tb_gen_code(env, pc, cs_base, flags, cflags);
4142 /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4143 the first in the TB) then we end up generating a whole new TB and
4144 repeating the fault, which is horribly inefficient.
4145 Better would be to execute just this insn uncached, or generate a
4146 second new TB. */
4147 cpu_resume_from_signal(env, NULL);
4150 #if !defined(CONFIG_USER_ONLY)
4152 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
4154 int i, target_code_size, max_target_code_size;
4155 int direct_jmp_count, direct_jmp2_count, cross_page;
4156 TranslationBlock *tb;
4158 target_code_size = 0;
4159 max_target_code_size = 0;
4160 cross_page = 0;
4161 direct_jmp_count = 0;
4162 direct_jmp2_count = 0;
4163 for(i = 0; i < nb_tbs; i++) {
4164 tb = &tbs[i];
4165 target_code_size += tb->size;
4166 if (tb->size > max_target_code_size)
4167 max_target_code_size = tb->size;
4168 if (tb->page_addr[1] != -1)
4169 cross_page++;
4170 if (tb->tb_next_offset[0] != 0xffff) {
4171 direct_jmp_count++;
4172 if (tb->tb_next_offset[1] != 0xffff) {
4173 direct_jmp2_count++;
4177 /* XXX: avoid using doubles ? */
4178 cpu_fprintf(f, "Translation buffer state:\n");
4179 cpu_fprintf(f, "gen code size %td/%zd\n",
4180 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4181 cpu_fprintf(f, "TB count %d/%d\n",
4182 nb_tbs, code_gen_max_blocks);
4183 cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
4184 nb_tbs ? target_code_size / nb_tbs : 0,
4185 max_target_code_size);
4186 cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n",
4187 nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4188 target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4189 cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4190 cross_page,
4191 nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4192 cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n",
4193 direct_jmp_count,
4194 nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4195 direct_jmp2_count,
4196 nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4197 cpu_fprintf(f, "\nStatistics:\n");
4198 cpu_fprintf(f, "TB flush count %d\n", tb_flush_count);
4199 cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4200 cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
4201 tcg_dump_info(f, cpu_fprintf);
4205 * A helper function for the _utterly broken_ virtio device model to find out if
4206 * it's running on a big endian machine. Don't do this at home kids!
4208 bool virtio_is_big_endian(void);
4209 bool virtio_is_big_endian(void)
4211 #if defined(TARGET_WORDS_BIGENDIAN)
4212 return true;
4213 #else
4214 return false;
4215 #endif
4218 #endif
4220 #ifndef CONFIG_USER_ONLY
4221 bool cpu_physical_memory_is_io(hwaddr phys_addr)
4223 MemoryRegionSection *section;
4225 section = phys_page_find(address_space_memory.dispatch,
4226 phys_addr >> TARGET_PAGE_BITS);
4228 return !(memory_region_is_ram(section->mr) ||
4229 memory_region_is_romd(section->mr));
4231 #endif