nbd: fix use of two uninitialized bytes when connecting to a named export
[qemu/opensuse.git] / exec.c
blobaf94f9cd86a4dced74c84b183cc3216eab1fc230
1 /*
2 * virtual page mapping and translated block handling
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "tcg.h"
30 #include "hw/hw.h"
31 #include "hw/qdev.h"
32 #include "osdep.h"
33 #include "kvm.h"
34 #include "hw/xen.h"
35 #include "qemu-timer.h"
36 #include "memory.h"
37 #include "exec-memory.h"
38 #if defined(CONFIG_USER_ONLY)
39 #include <qemu.h>
40 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
41 #include <sys/param.h>
42 #if __FreeBSD_version >= 700104
43 #define HAVE_KINFO_GETVMMAP
44 #define sigqueue sigqueue_freebsd /* avoid redefinition */
45 #include <sys/time.h>
46 #include <sys/proc.h>
47 #include <machine/profile.h>
48 #define _KERNEL
49 #include <sys/user.h>
50 #undef _KERNEL
51 #undef sigqueue
52 #include <libutil.h>
53 #endif
54 #endif
55 #else /* !CONFIG_USER_ONLY */
56 #include "xen-mapcache.h"
57 #include "trace.h"
58 #endif
60 #include "cputlb.h"
62 #include "memory-internal.h"
64 //#define DEBUG_TB_INVALIDATE
65 //#define DEBUG_FLUSH
66 //#define DEBUG_UNASSIGNED
68 /* make various TB consistency checks */
69 //#define DEBUG_TB_CHECK
71 //#define DEBUG_IOPORT
72 //#define DEBUG_SUBPAGE
74 #if !defined(CONFIG_USER_ONLY)
75 /* TB consistency checks only implemented for usermode emulation. */
76 #undef DEBUG_TB_CHECK
77 #endif
79 #define SMC_BITMAP_USE_THRESHOLD 10
81 static TranslationBlock *tbs;
82 static int code_gen_max_blocks;
83 TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
84 static int nb_tbs;
85 /* any access to the tbs or the page table must use this lock */
86 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
88 uint8_t *code_gen_prologue;
89 static uint8_t *code_gen_buffer;
90 static size_t code_gen_buffer_size;
91 /* threshold to flush the translated code buffer */
92 static size_t code_gen_buffer_max_size;
93 static uint8_t *code_gen_ptr;
95 #if !defined(CONFIG_USER_ONLY)
96 int phys_ram_fd;
97 static int in_migration;
99 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
101 static MemoryRegion *system_memory;
102 static MemoryRegion *system_io;
104 AddressSpace address_space_io;
105 AddressSpace address_space_memory;
107 MemoryRegion io_mem_ram, io_mem_rom, io_mem_unassigned, io_mem_notdirty;
108 static MemoryRegion io_mem_subpage_ram;
110 #endif
112 CPUArchState *first_cpu;
113 /* current CPU in the current thread. It is only valid inside
114 cpu_exec() */
115 DEFINE_TLS(CPUArchState *,cpu_single_env);
116 /* 0 = Do not count executed instructions.
117 1 = Precise instruction counting.
118 2 = Adaptive rate instruction counting. */
119 int use_icount = 0;
121 typedef struct PageDesc {
122 /* list of TBs intersecting this ram page */
123 TranslationBlock *first_tb;
124 /* in order to optimize self modifying code, we count the number
125 of lookups we do to a given page to use a bitmap */
126 unsigned int code_write_count;
127 uint8_t *code_bitmap;
128 #if defined(CONFIG_USER_ONLY)
129 unsigned long flags;
130 #endif
131 } PageDesc;
133 /* In system mode we want L1_MAP to be based on ram offsets,
134 while in user mode we want it to be based on virtual addresses. */
135 #if !defined(CONFIG_USER_ONLY)
136 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
137 # define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS
138 #else
139 # define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS
140 #endif
141 #else
142 # define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS
143 #endif
145 /* Size of the L2 (and L3, etc) page tables. */
146 #define L2_BITS 10
147 #define L2_SIZE (1 << L2_BITS)
149 #define P_L2_LEVELS \
150 (((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / L2_BITS) + 1)
152 /* The bits remaining after N lower levels of page tables. */
153 #define V_L1_BITS_REM \
154 ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
156 #if V_L1_BITS_REM < 4
157 #define V_L1_BITS (V_L1_BITS_REM + L2_BITS)
158 #else
159 #define V_L1_BITS V_L1_BITS_REM
160 #endif
162 #define V_L1_SIZE ((target_ulong)1 << V_L1_BITS)
164 #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
166 uintptr_t qemu_real_host_page_size;
167 uintptr_t qemu_host_page_size;
168 uintptr_t qemu_host_page_mask;
170 /* This is a multi-level map on the virtual address space.
171 The bottom level has pointers to PageDesc. */
172 static void *l1_map[V_L1_SIZE];
174 #if !defined(CONFIG_USER_ONLY)
176 static MemoryRegionSection *phys_sections;
177 static unsigned phys_sections_nb, phys_sections_nb_alloc;
178 static uint16_t phys_section_unassigned;
179 static uint16_t phys_section_notdirty;
180 static uint16_t phys_section_rom;
181 static uint16_t phys_section_watch;
183 /* Simple allocator for PhysPageEntry nodes */
184 static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
185 static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
187 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
189 static void io_mem_init(void);
190 static void memory_map_init(void);
191 static void *qemu_safe_ram_ptr(ram_addr_t addr);
193 static MemoryRegion io_mem_watch;
194 #endif
195 static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
196 tb_page_addr_t phys_page2);
198 /* statistics */
199 static int tb_flush_count;
200 static int tb_phys_invalidate_count;
202 #ifdef _WIN32
203 static inline void map_exec(void *addr, long size)
205 DWORD old_protect;
206 VirtualProtect(addr, size,
207 PAGE_EXECUTE_READWRITE, &old_protect);
210 #else
211 static inline void map_exec(void *addr, long size)
213 unsigned long start, end, page_size;
215 page_size = getpagesize();
216 start = (unsigned long)addr;
217 start &= ~(page_size - 1);
219 end = (unsigned long)addr + size;
220 end += page_size - 1;
221 end &= ~(page_size - 1);
223 mprotect((void *)start, end - start,
224 PROT_READ | PROT_WRITE | PROT_EXEC);
226 #endif
228 static void page_init(void)
230 /* NOTE: we can always suppose that qemu_host_page_size >=
231 TARGET_PAGE_SIZE */
232 #ifdef _WIN32
234 SYSTEM_INFO system_info;
236 GetSystemInfo(&system_info);
237 qemu_real_host_page_size = system_info.dwPageSize;
239 #else
240 qemu_real_host_page_size = getpagesize();
241 #endif
242 if (qemu_host_page_size == 0)
243 qemu_host_page_size = qemu_real_host_page_size;
244 if (qemu_host_page_size < TARGET_PAGE_SIZE)
245 qemu_host_page_size = TARGET_PAGE_SIZE;
246 qemu_host_page_mask = ~(qemu_host_page_size - 1);
248 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
250 #ifdef HAVE_KINFO_GETVMMAP
251 struct kinfo_vmentry *freep;
252 int i, cnt;
254 freep = kinfo_getvmmap(getpid(), &cnt);
255 if (freep) {
256 mmap_lock();
257 for (i = 0; i < cnt; i++) {
258 unsigned long startaddr, endaddr;
260 startaddr = freep[i].kve_start;
261 endaddr = freep[i].kve_end;
262 if (h2g_valid(startaddr)) {
263 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
265 if (h2g_valid(endaddr)) {
266 endaddr = h2g(endaddr);
267 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
268 } else {
269 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
270 endaddr = ~0ul;
271 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
272 #endif
276 free(freep);
277 mmap_unlock();
279 #else
280 FILE *f;
282 last_brk = (unsigned long)sbrk(0);
284 f = fopen("/compat/linux/proc/self/maps", "r");
285 if (f) {
286 mmap_lock();
288 do {
289 unsigned long startaddr, endaddr;
290 int n;
292 n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
294 if (n == 2 && h2g_valid(startaddr)) {
295 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
297 if (h2g_valid(endaddr)) {
298 endaddr = h2g(endaddr);
299 } else {
300 endaddr = ~0ul;
302 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
304 } while (!feof(f));
306 fclose(f);
307 mmap_unlock();
309 #endif
311 #endif
314 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
316 PageDesc *pd;
317 void **lp;
318 int i;
320 #if defined(CONFIG_USER_ONLY)
321 /* We can't use g_malloc because it may recurse into a locked mutex. */
322 # define ALLOC(P, SIZE) \
323 do { \
324 P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \
325 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
326 } while (0)
327 #else
328 # define ALLOC(P, SIZE) \
329 do { P = g_malloc0(SIZE); } while (0)
330 #endif
332 /* Level 1. Always allocated. */
333 lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
335 /* Level 2..N-1. */
336 for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
337 void **p = *lp;
339 if (p == NULL) {
340 if (!alloc) {
341 return NULL;
343 ALLOC(p, sizeof(void *) * L2_SIZE);
344 *lp = p;
347 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
350 pd = *lp;
351 if (pd == NULL) {
352 if (!alloc) {
353 return NULL;
355 ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
356 *lp = pd;
359 #undef ALLOC
361 return pd + (index & (L2_SIZE - 1));
364 static inline PageDesc *page_find(tb_page_addr_t index)
366 return page_find_alloc(index, 0);
369 #if !defined(CONFIG_USER_ONLY)
371 static void phys_map_node_reserve(unsigned nodes)
373 if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
374 typedef PhysPageEntry Node[L2_SIZE];
375 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
376 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
377 phys_map_nodes_nb + nodes);
378 phys_map_nodes = g_renew(Node, phys_map_nodes,
379 phys_map_nodes_nb_alloc);
383 static uint16_t phys_map_node_alloc(void)
385 unsigned i;
386 uint16_t ret;
388 ret = phys_map_nodes_nb++;
389 assert(ret != PHYS_MAP_NODE_NIL);
390 assert(ret != phys_map_nodes_nb_alloc);
391 for (i = 0; i < L2_SIZE; ++i) {
392 phys_map_nodes[ret][i].is_leaf = 0;
393 phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
395 return ret;
398 static void phys_map_nodes_reset(void)
400 phys_map_nodes_nb = 0;
404 static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
405 hwaddr *nb, uint16_t leaf,
406 int level)
408 PhysPageEntry *p;
409 int i;
410 hwaddr step = (hwaddr)1 << (level * L2_BITS);
412 if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
413 lp->ptr = phys_map_node_alloc();
414 p = phys_map_nodes[lp->ptr];
415 if (level == 0) {
416 for (i = 0; i < L2_SIZE; i++) {
417 p[i].is_leaf = 1;
418 p[i].ptr = phys_section_unassigned;
421 } else {
422 p = phys_map_nodes[lp->ptr];
424 lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
426 while (*nb && lp < &p[L2_SIZE]) {
427 if ((*index & (step - 1)) == 0 && *nb >= step) {
428 lp->is_leaf = true;
429 lp->ptr = leaf;
430 *index += step;
431 *nb -= step;
432 } else {
433 phys_page_set_level(lp, index, nb, leaf, level - 1);
435 ++lp;
439 static void phys_page_set(AddressSpaceDispatch *d,
440 hwaddr index, hwaddr nb,
441 uint16_t leaf)
443 /* Wildly overreserve - it doesn't matter much. */
444 phys_map_node_reserve(3 * P_L2_LEVELS);
446 phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
449 MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr index)
451 PhysPageEntry lp = d->phys_map;
452 PhysPageEntry *p;
453 int i;
454 uint16_t s_index = phys_section_unassigned;
456 for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
457 if (lp.ptr == PHYS_MAP_NODE_NIL) {
458 goto not_found;
460 p = phys_map_nodes[lp.ptr];
461 lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
464 s_index = lp.ptr;
465 not_found:
466 return &phys_sections[s_index];
469 bool memory_region_is_unassigned(MemoryRegion *mr)
471 return mr != &io_mem_ram && mr != &io_mem_rom
472 && mr != &io_mem_notdirty && !mr->rom_device
473 && mr != &io_mem_watch;
476 #define mmap_lock() do { } while(0)
477 #define mmap_unlock() do { } while(0)
478 #endif
480 #if defined(CONFIG_USER_ONLY)
481 /* Currently it is not recommended to allocate big chunks of data in
482 user mode. It will change when a dedicated libc will be used. */
483 /* ??? 64-bit hosts ought to have no problem mmaping data outside the
484 region in which the guest needs to run. Revisit this. */
485 #define USE_STATIC_CODE_GEN_BUFFER
486 #endif
488 /* ??? Should configure for this, not list operating systems here. */
489 #if (defined(__linux__) \
490 || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
491 || defined(__DragonFly__) || defined(__OpenBSD__) \
492 || defined(__NetBSD__))
493 # define USE_MMAP
494 #endif
496 /* Minimum size of the code gen buffer. This number is randomly chosen,
497 but not so small that we can't have a fair number of TB's live. */
498 #define MIN_CODE_GEN_BUFFER_SIZE (1024u * 1024)
500 /* Maximum size of the code gen buffer we'd like to use. Unless otherwise
501 indicated, this is constrained by the range of direct branches on the
502 host cpu, as used by the TCG implementation of goto_tb. */
503 #if defined(__x86_64__)
504 # define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024)
505 #elif defined(__sparc__)
506 # define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024)
507 #elif defined(__arm__)
508 # define MAX_CODE_GEN_BUFFER_SIZE (16u * 1024 * 1024)
509 #elif defined(__s390x__)
510 /* We have a +- 4GB range on the branches; leave some slop. */
511 # define MAX_CODE_GEN_BUFFER_SIZE (3ul * 1024 * 1024 * 1024)
512 #else
513 # define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
514 #endif
516 #define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32u * 1024 * 1024)
518 #define DEFAULT_CODE_GEN_BUFFER_SIZE \
519 (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
520 ? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE)
522 static inline size_t size_code_gen_buffer(size_t tb_size)
524 /* Size the buffer. */
525 if (tb_size == 0) {
526 #ifdef USE_STATIC_CODE_GEN_BUFFER
527 tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
528 #else
529 /* ??? Needs adjustments. */
530 /* ??? If we relax the requirement that CONFIG_USER_ONLY use the
531 static buffer, we could size this on RESERVED_VA, on the text
532 segment size of the executable, or continue to use the default. */
533 tb_size = (unsigned long)(ram_size / 4);
534 #endif
536 if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
537 tb_size = MIN_CODE_GEN_BUFFER_SIZE;
539 if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {
540 tb_size = MAX_CODE_GEN_BUFFER_SIZE;
542 code_gen_buffer_size = tb_size;
543 return tb_size;
546 #ifdef USE_STATIC_CODE_GEN_BUFFER
547 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
548 __attribute__((aligned(CODE_GEN_ALIGN)));
550 static inline void *alloc_code_gen_buffer(void)
552 map_exec(static_code_gen_buffer, code_gen_buffer_size);
553 return static_code_gen_buffer;
555 #elif defined(USE_MMAP)
556 static inline void *alloc_code_gen_buffer(void)
558 int flags = MAP_PRIVATE | MAP_ANONYMOUS;
559 uintptr_t start = 0;
560 void *buf;
562 /* Constrain the position of the buffer based on the host cpu.
563 Note that these addresses are chosen in concert with the
564 addresses assigned in the relevant linker script file. */
565 # if defined(__PIE__) || defined(__PIC__)
566 /* Don't bother setting a preferred location if we're building
567 a position-independent executable. We're more likely to get
568 an address near the main executable if we let the kernel
569 choose the address. */
570 # elif defined(__x86_64__) && defined(MAP_32BIT)
571 /* Force the memory down into low memory with the executable.
572 Leave the choice of exact location with the kernel. */
573 flags |= MAP_32BIT;
574 /* Cannot expect to map more than 800MB in low memory. */
575 if (code_gen_buffer_size > 800u * 1024 * 1024) {
576 code_gen_buffer_size = 800u * 1024 * 1024;
578 # elif defined(__sparc__)
579 start = 0x40000000ul;
580 # elif defined(__s390x__)
581 start = 0x90000000ul;
582 # endif
584 buf = mmap((void *)start, code_gen_buffer_size,
585 PROT_WRITE | PROT_READ | PROT_EXEC, flags, -1, 0);
586 return buf == MAP_FAILED ? NULL : buf;
588 #else
589 static inline void *alloc_code_gen_buffer(void)
591 void *buf = g_malloc(code_gen_buffer_size);
592 if (buf) {
593 map_exec(buf, code_gen_buffer_size);
595 return buf;
597 #endif /* USE_STATIC_CODE_GEN_BUFFER, USE_MMAP */
599 static inline void code_gen_alloc(size_t tb_size)
601 code_gen_buffer_size = size_code_gen_buffer(tb_size);
602 code_gen_buffer = alloc_code_gen_buffer();
603 if (code_gen_buffer == NULL) {
604 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
605 exit(1);
608 /* Steal room for the prologue at the end of the buffer. This ensures
609 (via the MAX_CODE_GEN_BUFFER_SIZE limits above) that direct branches
610 from TB's to the prologue are going to be in range. It also means
611 that we don't need to mark (additional) portions of the data segment
612 as executable. */
613 code_gen_prologue = code_gen_buffer + code_gen_buffer_size - 1024;
614 code_gen_buffer_size -= 1024;
616 code_gen_buffer_max_size = code_gen_buffer_size -
617 (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
618 code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
619 tbs = g_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
622 /* Must be called before using the QEMU cpus. 'tb_size' is the size
623 (in bytes) allocated to the translation buffer. Zero means default
624 size. */
625 void tcg_exec_init(unsigned long tb_size)
627 cpu_gen_init();
628 code_gen_alloc(tb_size);
629 code_gen_ptr = code_gen_buffer;
630 tcg_register_jit(code_gen_buffer, code_gen_buffer_size);
631 page_init();
632 #if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
633 /* There's no guest base to take into account, so go ahead and
634 initialize the prologue now. */
635 tcg_prologue_init(&tcg_ctx);
636 #endif
639 bool tcg_enabled(void)
641 return code_gen_buffer != NULL;
644 void cpu_exec_init_all(void)
646 #if !defined(CONFIG_USER_ONLY)
647 memory_map_init();
648 io_mem_init();
649 #endif
652 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
654 static int cpu_common_post_load(void *opaque, int version_id)
656 CPUArchState *env = opaque;
658 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
659 version_id is increased. */
660 env->interrupt_request &= ~0x01;
661 tlb_flush(env, 1);
663 return 0;
666 static const VMStateDescription vmstate_cpu_common = {
667 .name = "cpu_common",
668 .version_id = 1,
669 .minimum_version_id = 1,
670 .minimum_version_id_old = 1,
671 .post_load = cpu_common_post_load,
672 .fields = (VMStateField []) {
673 VMSTATE_UINT32(halted, CPUArchState),
674 VMSTATE_UINT32(interrupt_request, CPUArchState),
675 VMSTATE_END_OF_LIST()
678 #endif
680 CPUArchState *qemu_get_cpu(int cpu)
682 CPUArchState *env = first_cpu;
684 while (env) {
685 if (env->cpu_index == cpu)
686 break;
687 env = env->next_cpu;
690 return env;
693 void cpu_exec_init(CPUArchState *env)
695 #ifndef CONFIG_USER_ONLY
696 CPUState *cpu = ENV_GET_CPU(env);
697 #endif
698 CPUArchState **penv;
699 int cpu_index;
701 #if defined(CONFIG_USER_ONLY)
702 cpu_list_lock();
703 #endif
704 env->next_cpu = NULL;
705 penv = &first_cpu;
706 cpu_index = 0;
707 while (*penv != NULL) {
708 penv = &(*penv)->next_cpu;
709 cpu_index++;
711 env->cpu_index = cpu_index;
712 env->numa_node = 0;
713 QTAILQ_INIT(&env->breakpoints);
714 QTAILQ_INIT(&env->watchpoints);
715 #ifndef CONFIG_USER_ONLY
716 cpu->thread_id = qemu_get_thread_id();
717 #endif
718 *penv = env;
719 #if defined(CONFIG_USER_ONLY)
720 cpu_list_unlock();
721 #endif
722 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
723 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
724 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
725 cpu_save, cpu_load, env);
726 #endif
729 /* Allocate a new translation block. Flush the translation buffer if
730 too many translation blocks or too much generated code. */
731 static TranslationBlock *tb_alloc(target_ulong pc)
733 TranslationBlock *tb;
735 if (nb_tbs >= code_gen_max_blocks ||
736 (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
737 return NULL;
738 tb = &tbs[nb_tbs++];
739 tb->pc = pc;
740 tb->cflags = 0;
741 return tb;
744 void tb_free(TranslationBlock *tb)
746 /* In practice this is mostly used for single use temporary TB
747 Ignore the hard cases and just back up if this TB happens to
748 be the last one generated. */
749 if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
750 code_gen_ptr = tb->tc_ptr;
751 nb_tbs--;
755 static inline void invalidate_page_bitmap(PageDesc *p)
757 if (p->code_bitmap) {
758 g_free(p->code_bitmap);
759 p->code_bitmap = NULL;
761 p->code_write_count = 0;
764 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
766 static void page_flush_tb_1 (int level, void **lp)
768 int i;
770 if (*lp == NULL) {
771 return;
773 if (level == 0) {
774 PageDesc *pd = *lp;
775 for (i = 0; i < L2_SIZE; ++i) {
776 pd[i].first_tb = NULL;
777 invalidate_page_bitmap(pd + i);
779 } else {
780 void **pp = *lp;
781 for (i = 0; i < L2_SIZE; ++i) {
782 page_flush_tb_1 (level - 1, pp + i);
787 static void page_flush_tb(void)
789 int i;
790 for (i = 0; i < V_L1_SIZE; i++) {
791 page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
795 /* flush all the translation blocks */
796 /* XXX: tb_flush is currently not thread safe */
797 void tb_flush(CPUArchState *env1)
799 CPUArchState *env;
800 #if defined(DEBUG_FLUSH)
801 printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
802 (unsigned long)(code_gen_ptr - code_gen_buffer),
803 nb_tbs, nb_tbs > 0 ?
804 ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
805 #endif
806 if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
807 cpu_abort(env1, "Internal error: code buffer overflow\n");
809 nb_tbs = 0;
811 for(env = first_cpu; env != NULL; env = env->next_cpu) {
812 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
815 memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
816 page_flush_tb();
818 code_gen_ptr = code_gen_buffer;
819 /* XXX: flush processor icache at this point if cache flush is
820 expensive */
821 tb_flush_count++;
824 #ifdef DEBUG_TB_CHECK
826 static void tb_invalidate_check(target_ulong address)
828 TranslationBlock *tb;
829 int i;
830 address &= TARGET_PAGE_MASK;
831 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
832 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
833 if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
834 address >= tb->pc + tb->size)) {
835 printf("ERROR invalidate: address=" TARGET_FMT_lx
836 " PC=%08lx size=%04x\n",
837 address, (long)tb->pc, tb->size);
843 /* verify that all the pages have correct rights for code */
844 static void tb_page_check(void)
846 TranslationBlock *tb;
847 int i, flags1, flags2;
849 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
850 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
851 flags1 = page_get_flags(tb->pc);
852 flags2 = page_get_flags(tb->pc + tb->size - 1);
853 if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
854 printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
855 (long)tb->pc, tb->size, flags1, flags2);
861 #endif
863 /* invalidate one TB */
864 static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
865 int next_offset)
867 TranslationBlock *tb1;
868 for(;;) {
869 tb1 = *ptb;
870 if (tb1 == tb) {
871 *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
872 break;
874 ptb = (TranslationBlock **)((char *)tb1 + next_offset);
878 static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
880 TranslationBlock *tb1;
881 unsigned int n1;
883 for(;;) {
884 tb1 = *ptb;
885 n1 = (uintptr_t)tb1 & 3;
886 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
887 if (tb1 == tb) {
888 *ptb = tb1->page_next[n1];
889 break;
891 ptb = &tb1->page_next[n1];
895 static inline void tb_jmp_remove(TranslationBlock *tb, int n)
897 TranslationBlock *tb1, **ptb;
898 unsigned int n1;
900 ptb = &tb->jmp_next[n];
901 tb1 = *ptb;
902 if (tb1) {
903 /* find tb(n) in circular list */
904 for(;;) {
905 tb1 = *ptb;
906 n1 = (uintptr_t)tb1 & 3;
907 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
908 if (n1 == n && tb1 == tb)
909 break;
910 if (n1 == 2) {
911 ptb = &tb1->jmp_first;
912 } else {
913 ptb = &tb1->jmp_next[n1];
916 /* now we can suppress tb(n) from the list */
917 *ptb = tb->jmp_next[n];
919 tb->jmp_next[n] = NULL;
923 /* reset the jump entry 'n' of a TB so that it is not chained to
924 another TB */
925 static inline void tb_reset_jump(TranslationBlock *tb, int n)
927 tb_set_jmp_target(tb, n, (uintptr_t)(tb->tc_ptr + tb->tb_next_offset[n]));
930 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
932 CPUArchState *env;
933 PageDesc *p;
934 unsigned int h, n1;
935 tb_page_addr_t phys_pc;
936 TranslationBlock *tb1, *tb2;
938 /* remove the TB from the hash list */
939 phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
940 h = tb_phys_hash_func(phys_pc);
941 tb_remove(&tb_phys_hash[h], tb,
942 offsetof(TranslationBlock, phys_hash_next));
944 /* remove the TB from the page list */
945 if (tb->page_addr[0] != page_addr) {
946 p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
947 tb_page_remove(&p->first_tb, tb);
948 invalidate_page_bitmap(p);
950 if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
951 p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
952 tb_page_remove(&p->first_tb, tb);
953 invalidate_page_bitmap(p);
956 tb_invalidated_flag = 1;
958 /* remove the TB from the hash list */
959 h = tb_jmp_cache_hash_func(tb->pc);
960 for(env = first_cpu; env != NULL; env = env->next_cpu) {
961 if (env->tb_jmp_cache[h] == tb)
962 env->tb_jmp_cache[h] = NULL;
965 /* suppress this TB from the two jump lists */
966 tb_jmp_remove(tb, 0);
967 tb_jmp_remove(tb, 1);
969 /* suppress any remaining jumps to this TB */
970 tb1 = tb->jmp_first;
971 for(;;) {
972 n1 = (uintptr_t)tb1 & 3;
973 if (n1 == 2)
974 break;
975 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
976 tb2 = tb1->jmp_next[n1];
977 tb_reset_jump(tb1, n1);
978 tb1->jmp_next[n1] = NULL;
979 tb1 = tb2;
981 tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); /* fail safe */
983 tb_phys_invalidate_count++;
986 static inline void set_bits(uint8_t *tab, int start, int len)
988 int end, mask, end1;
990 end = start + len;
991 tab += start >> 3;
992 mask = 0xff << (start & 7);
993 if ((start & ~7) == (end & ~7)) {
994 if (start < end) {
995 mask &= ~(0xff << (end & 7));
996 *tab |= mask;
998 } else {
999 *tab++ |= mask;
1000 start = (start + 8) & ~7;
1001 end1 = end & ~7;
1002 while (start < end1) {
1003 *tab++ = 0xff;
1004 start += 8;
1006 if (start < end) {
1007 mask = ~(0xff << (end & 7));
1008 *tab |= mask;
1013 static void build_page_bitmap(PageDesc *p)
1015 int n, tb_start, tb_end;
1016 TranslationBlock *tb;
1018 p->code_bitmap = g_malloc0(TARGET_PAGE_SIZE / 8);
1020 tb = p->first_tb;
1021 while (tb != NULL) {
1022 n = (uintptr_t)tb & 3;
1023 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1024 /* NOTE: this is subtle as a TB may span two physical pages */
1025 if (n == 0) {
1026 /* NOTE: tb_end may be after the end of the page, but
1027 it is not a problem */
1028 tb_start = tb->pc & ~TARGET_PAGE_MASK;
1029 tb_end = tb_start + tb->size;
1030 if (tb_end > TARGET_PAGE_SIZE)
1031 tb_end = TARGET_PAGE_SIZE;
1032 } else {
1033 tb_start = 0;
1034 tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1036 set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
1037 tb = tb->page_next[n];
1041 TranslationBlock *tb_gen_code(CPUArchState *env,
1042 target_ulong pc, target_ulong cs_base,
1043 int flags, int cflags)
1045 TranslationBlock *tb;
1046 uint8_t *tc_ptr;
1047 tb_page_addr_t phys_pc, phys_page2;
1048 target_ulong virt_page2;
1049 int code_gen_size;
1051 phys_pc = get_page_addr_code(env, pc);
1052 tb = tb_alloc(pc);
1053 if (!tb) {
1054 /* flush must be done */
1055 tb_flush(env);
1056 /* cannot fail at this point */
1057 tb = tb_alloc(pc);
1058 /* Don't forget to invalidate previous TB info. */
1059 tb_invalidated_flag = 1;
1061 tc_ptr = code_gen_ptr;
1062 tb->tc_ptr = tc_ptr;
1063 tb->cs_base = cs_base;
1064 tb->flags = flags;
1065 tb->cflags = cflags;
1066 cpu_gen_code(env, tb, &code_gen_size);
1067 code_gen_ptr = (void *)(((uintptr_t)code_gen_ptr + code_gen_size +
1068 CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
1070 /* check next page if needed */
1071 virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1072 phys_page2 = -1;
1073 if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1074 phys_page2 = get_page_addr_code(env, virt_page2);
1076 tb_link_page(tb, phys_pc, phys_page2);
1077 return tb;
1081 * Invalidate all TBs which intersect with the target physical address range
1082 * [start;end[. NOTE: start and end may refer to *different* physical pages.
1083 * 'is_cpu_write_access' should be true if called from a real cpu write
1084 * access: the virtual CPU will exit the current TB if code is modified inside
1085 * this TB.
1087 void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end,
1088 int is_cpu_write_access)
1090 while (start < end) {
1091 tb_invalidate_phys_page_range(start, end, is_cpu_write_access);
1092 start &= TARGET_PAGE_MASK;
1093 start += TARGET_PAGE_SIZE;
1098 * Invalidate all TBs which intersect with the target physical address range
1099 * [start;end[. NOTE: start and end must refer to the *same* physical page.
1100 * 'is_cpu_write_access' should be true if called from a real cpu write
1101 * access: the virtual CPU will exit the current TB if code is modified inside
1102 * this TB.
1104 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1105 int is_cpu_write_access)
1107 TranslationBlock *tb, *tb_next, *saved_tb;
1108 CPUArchState *env = cpu_single_env;
1109 tb_page_addr_t tb_start, tb_end;
1110 PageDesc *p;
1111 int n;
1112 #ifdef TARGET_HAS_PRECISE_SMC
1113 int current_tb_not_found = is_cpu_write_access;
1114 TranslationBlock *current_tb = NULL;
1115 int current_tb_modified = 0;
1116 target_ulong current_pc = 0;
1117 target_ulong current_cs_base = 0;
1118 int current_flags = 0;
1119 #endif /* TARGET_HAS_PRECISE_SMC */
1121 p = page_find(start >> TARGET_PAGE_BITS);
1122 if (!p)
1123 return;
1124 if (!p->code_bitmap &&
1125 ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1126 is_cpu_write_access) {
1127 /* build code bitmap */
1128 build_page_bitmap(p);
1131 /* we remove all the TBs in the range [start, end[ */
1132 /* XXX: see if in some cases it could be faster to invalidate all the code */
1133 tb = p->first_tb;
1134 while (tb != NULL) {
1135 n = (uintptr_t)tb & 3;
1136 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1137 tb_next = tb->page_next[n];
1138 /* NOTE: this is subtle as a TB may span two physical pages */
1139 if (n == 0) {
1140 /* NOTE: tb_end may be after the end of the page, but
1141 it is not a problem */
1142 tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1143 tb_end = tb_start + tb->size;
1144 } else {
1145 tb_start = tb->page_addr[1];
1146 tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1148 if (!(tb_end <= start || tb_start >= end)) {
1149 #ifdef TARGET_HAS_PRECISE_SMC
1150 if (current_tb_not_found) {
1151 current_tb_not_found = 0;
1152 current_tb = NULL;
1153 if (env->mem_io_pc) {
1154 /* now we have a real cpu fault */
1155 current_tb = tb_find_pc(env->mem_io_pc);
1158 if (current_tb == tb &&
1159 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1160 /* If we are modifying the current TB, we must stop
1161 its execution. We could be more precise by checking
1162 that the modification is after the current PC, but it
1163 would require a specialized function to partially
1164 restore the CPU state */
1166 current_tb_modified = 1;
1167 cpu_restore_state(current_tb, env, env->mem_io_pc);
1168 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1169 &current_flags);
1171 #endif /* TARGET_HAS_PRECISE_SMC */
1172 /* we need to do that to handle the case where a signal
1173 occurs while doing tb_phys_invalidate() */
1174 saved_tb = NULL;
1175 if (env) {
1176 saved_tb = env->current_tb;
1177 env->current_tb = NULL;
1179 tb_phys_invalidate(tb, -1);
1180 if (env) {
1181 env->current_tb = saved_tb;
1182 if (env->interrupt_request && env->current_tb)
1183 cpu_interrupt(env, env->interrupt_request);
1186 tb = tb_next;
1188 #if !defined(CONFIG_USER_ONLY)
1189 /* if no code remaining, no need to continue to use slow writes */
1190 if (!p->first_tb) {
1191 invalidate_page_bitmap(p);
1192 if (is_cpu_write_access) {
1193 tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1196 #endif
1197 #ifdef TARGET_HAS_PRECISE_SMC
1198 if (current_tb_modified) {
1199 /* we generate a block containing just the instruction
1200 modifying the memory. It will ensure that it cannot modify
1201 itself */
1202 env->current_tb = NULL;
1203 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1204 cpu_resume_from_signal(env, NULL);
1206 #endif
1209 /* len must be <= 8 and start must be a multiple of len */
1210 static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1212 PageDesc *p;
1213 int offset, b;
1214 #if 0
1215 if (1) {
1216 qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1217 cpu_single_env->mem_io_vaddr, len,
1218 cpu_single_env->eip,
1219 cpu_single_env->eip +
1220 (intptr_t)cpu_single_env->segs[R_CS].base);
1222 #endif
1223 p = page_find(start >> TARGET_PAGE_BITS);
1224 if (!p)
1225 return;
1226 if (p->code_bitmap) {
1227 offset = start & ~TARGET_PAGE_MASK;
1228 b = p->code_bitmap[offset >> 3] >> (offset & 7);
1229 if (b & ((1 << len) - 1))
1230 goto do_invalidate;
1231 } else {
1232 do_invalidate:
1233 tb_invalidate_phys_page_range(start, start + len, 1);
1237 #if !defined(CONFIG_SOFTMMU)
1238 static void tb_invalidate_phys_page(tb_page_addr_t addr,
1239 uintptr_t pc, void *puc)
1241 TranslationBlock *tb;
1242 PageDesc *p;
1243 int n;
1244 #ifdef TARGET_HAS_PRECISE_SMC
1245 TranslationBlock *current_tb = NULL;
1246 CPUArchState *env = cpu_single_env;
1247 int current_tb_modified = 0;
1248 target_ulong current_pc = 0;
1249 target_ulong current_cs_base = 0;
1250 int current_flags = 0;
1251 #endif
1253 addr &= TARGET_PAGE_MASK;
1254 p = page_find(addr >> TARGET_PAGE_BITS);
1255 if (!p)
1256 return;
1257 tb = p->first_tb;
1258 #ifdef TARGET_HAS_PRECISE_SMC
1259 if (tb && pc != 0) {
1260 current_tb = tb_find_pc(pc);
1262 #endif
1263 while (tb != NULL) {
1264 n = (uintptr_t)tb & 3;
1265 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1266 #ifdef TARGET_HAS_PRECISE_SMC
1267 if (current_tb == tb &&
1268 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1269 /* If we are modifying the current TB, we must stop
1270 its execution. We could be more precise by checking
1271 that the modification is after the current PC, but it
1272 would require a specialized function to partially
1273 restore the CPU state */
1275 current_tb_modified = 1;
1276 cpu_restore_state(current_tb, env, pc);
1277 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1278 &current_flags);
1280 #endif /* TARGET_HAS_PRECISE_SMC */
1281 tb_phys_invalidate(tb, addr);
1282 tb = tb->page_next[n];
1284 p->first_tb = NULL;
1285 #ifdef TARGET_HAS_PRECISE_SMC
1286 if (current_tb_modified) {
1287 /* we generate a block containing just the instruction
1288 modifying the memory. It will ensure that it cannot modify
1289 itself */
1290 env->current_tb = NULL;
1291 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1292 cpu_resume_from_signal(env, puc);
1294 #endif
1296 #endif
1298 /* add the tb in the target page and protect it if necessary */
1299 static inline void tb_alloc_page(TranslationBlock *tb,
1300 unsigned int n, tb_page_addr_t page_addr)
1302 PageDesc *p;
1303 #ifndef CONFIG_USER_ONLY
1304 bool page_already_protected;
1305 #endif
1307 tb->page_addr[n] = page_addr;
1308 p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1309 tb->page_next[n] = p->first_tb;
1310 #ifndef CONFIG_USER_ONLY
1311 page_already_protected = p->first_tb != NULL;
1312 #endif
1313 p->first_tb = (TranslationBlock *)((uintptr_t)tb | n);
1314 invalidate_page_bitmap(p);
1316 #if defined(TARGET_HAS_SMC) || 1
1318 #if defined(CONFIG_USER_ONLY)
1319 if (p->flags & PAGE_WRITE) {
1320 target_ulong addr;
1321 PageDesc *p2;
1322 int prot;
1324 /* force the host page as non writable (writes will have a
1325 page fault + mprotect overhead) */
1326 page_addr &= qemu_host_page_mask;
1327 prot = 0;
1328 for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1329 addr += TARGET_PAGE_SIZE) {
1331 p2 = page_find (addr >> TARGET_PAGE_BITS);
1332 if (!p2)
1333 continue;
1334 prot |= p2->flags;
1335 p2->flags &= ~PAGE_WRITE;
1337 mprotect(g2h(page_addr), qemu_host_page_size,
1338 (prot & PAGE_BITS) & ~PAGE_WRITE);
1339 #ifdef DEBUG_TB_INVALIDATE
1340 printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1341 page_addr);
1342 #endif
1344 #else
1345 /* if some code is already present, then the pages are already
1346 protected. So we handle the case where only the first TB is
1347 allocated in a physical page */
1348 if (!page_already_protected) {
1349 tlb_protect_code(page_addr);
1351 #endif
1353 #endif /* TARGET_HAS_SMC */
1356 /* add a new TB and link it to the physical page tables. phys_page2 is
1357 (-1) to indicate that only one page contains the TB. */
1358 static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
1359 tb_page_addr_t phys_page2)
1361 unsigned int h;
1362 TranslationBlock **ptb;
1364 /* Grab the mmap lock to stop another thread invalidating this TB
1365 before we are done. */
1366 mmap_lock();
1367 /* add in the physical hash table */
1368 h = tb_phys_hash_func(phys_pc);
1369 ptb = &tb_phys_hash[h];
1370 tb->phys_hash_next = *ptb;
1371 *ptb = tb;
1373 /* add in the page list */
1374 tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1375 if (phys_page2 != -1)
1376 tb_alloc_page(tb, 1, phys_page2);
1377 else
1378 tb->page_addr[1] = -1;
1380 tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2);
1381 tb->jmp_next[0] = NULL;
1382 tb->jmp_next[1] = NULL;
1384 /* init original jump addresses */
1385 if (tb->tb_next_offset[0] != 0xffff)
1386 tb_reset_jump(tb, 0);
1387 if (tb->tb_next_offset[1] != 0xffff)
1388 tb_reset_jump(tb, 1);
1390 #ifdef DEBUG_TB_CHECK
1391 tb_page_check();
1392 #endif
1393 mmap_unlock();
1396 #if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU)
1397 /* check whether the given addr is in TCG generated code buffer or not */
1398 bool is_tcg_gen_code(uintptr_t tc_ptr)
1400 /* This can be called during code generation, code_gen_buffer_max_size
1401 is used instead of code_gen_ptr for upper boundary checking */
1402 return (tc_ptr >= (uintptr_t)code_gen_buffer &&
1403 tc_ptr < (uintptr_t)(code_gen_buffer + code_gen_buffer_max_size));
1405 #endif
1407 /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1408 tb[1].tc_ptr. Return NULL if not found */
1409 TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
1411 int m_min, m_max, m;
1412 uintptr_t v;
1413 TranslationBlock *tb;
1415 if (nb_tbs <= 0)
1416 return NULL;
1417 if (tc_ptr < (uintptr_t)code_gen_buffer ||
1418 tc_ptr >= (uintptr_t)code_gen_ptr) {
1419 return NULL;
1421 /* binary search (cf Knuth) */
1422 m_min = 0;
1423 m_max = nb_tbs - 1;
1424 while (m_min <= m_max) {
1425 m = (m_min + m_max) >> 1;
1426 tb = &tbs[m];
1427 v = (uintptr_t)tb->tc_ptr;
1428 if (v == tc_ptr)
1429 return tb;
1430 else if (tc_ptr < v) {
1431 m_max = m - 1;
1432 } else {
1433 m_min = m + 1;
1436 return &tbs[m_max];
1439 static void tb_reset_jump_recursive(TranslationBlock *tb);
1441 static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1443 TranslationBlock *tb1, *tb_next, **ptb;
1444 unsigned int n1;
1446 tb1 = tb->jmp_next[n];
1447 if (tb1 != NULL) {
1448 /* find head of list */
1449 for(;;) {
1450 n1 = (uintptr_t)tb1 & 3;
1451 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1452 if (n1 == 2)
1453 break;
1454 tb1 = tb1->jmp_next[n1];
1456 /* we are now sure now that tb jumps to tb1 */
1457 tb_next = tb1;
1459 /* remove tb from the jmp_first list */
1460 ptb = &tb_next->jmp_first;
1461 for(;;) {
1462 tb1 = *ptb;
1463 n1 = (uintptr_t)tb1 & 3;
1464 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1465 if (n1 == n && tb1 == tb)
1466 break;
1467 ptb = &tb1->jmp_next[n1];
1469 *ptb = tb->jmp_next[n];
1470 tb->jmp_next[n] = NULL;
1472 /* suppress the jump to next tb in generated code */
1473 tb_reset_jump(tb, n);
1475 /* suppress jumps in the tb on which we could have jumped */
1476 tb_reset_jump_recursive(tb_next);
1480 static void tb_reset_jump_recursive(TranslationBlock *tb)
1482 tb_reset_jump_recursive2(tb, 0);
1483 tb_reset_jump_recursive2(tb, 1);
1486 #if defined(TARGET_HAS_ICE)
1487 #if defined(CONFIG_USER_ONLY)
1488 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1490 tb_invalidate_phys_page_range(pc, pc + 1, 0);
1492 #else
1493 void tb_invalidate_phys_addr(hwaddr addr)
1495 ram_addr_t ram_addr;
1496 MemoryRegionSection *section;
1498 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
1499 if (!(memory_region_is_ram(section->mr)
1500 || (section->mr->rom_device && section->mr->readable))) {
1501 return;
1503 ram_addr = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1504 + memory_region_section_addr(section, addr);
1505 tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1508 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1510 tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
1511 (pc & ~TARGET_PAGE_MASK));
1513 #endif
1514 #endif /* TARGET_HAS_ICE */
1516 #if defined(CONFIG_USER_ONLY)
1517 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1522 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1523 int flags, CPUWatchpoint **watchpoint)
1525 return -ENOSYS;
1527 #else
1528 /* Add a watchpoint. */
1529 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1530 int flags, CPUWatchpoint **watchpoint)
1532 target_ulong len_mask = ~(len - 1);
1533 CPUWatchpoint *wp;
1535 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1536 if ((len & (len - 1)) || (addr & ~len_mask) ||
1537 len == 0 || len > TARGET_PAGE_SIZE) {
1538 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1539 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1540 return -EINVAL;
1542 wp = g_malloc(sizeof(*wp));
1544 wp->vaddr = addr;
1545 wp->len_mask = len_mask;
1546 wp->flags = flags;
1548 /* keep all GDB-injected watchpoints in front */
1549 if (flags & BP_GDB)
1550 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1551 else
1552 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1554 tlb_flush_page(env, addr);
1556 if (watchpoint)
1557 *watchpoint = wp;
1558 return 0;
1561 /* Remove a specific watchpoint. */
1562 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
1563 int flags)
1565 target_ulong len_mask = ~(len - 1);
1566 CPUWatchpoint *wp;
1568 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1569 if (addr == wp->vaddr && len_mask == wp->len_mask
1570 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1571 cpu_watchpoint_remove_by_ref(env, wp);
1572 return 0;
1575 return -ENOENT;
1578 /* Remove a specific watchpoint by reference. */
1579 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
1581 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1583 tlb_flush_page(env, watchpoint->vaddr);
1585 g_free(watchpoint);
1588 /* Remove all matching watchpoints. */
1589 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1591 CPUWatchpoint *wp, *next;
1593 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1594 if (wp->flags & mask)
1595 cpu_watchpoint_remove_by_ref(env, wp);
1598 #endif
1600 /* Add a breakpoint. */
1601 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
1602 CPUBreakpoint **breakpoint)
1604 #if defined(TARGET_HAS_ICE)
1605 CPUBreakpoint *bp;
1607 bp = g_malloc(sizeof(*bp));
1609 bp->pc = pc;
1610 bp->flags = flags;
1612 /* keep all GDB-injected breakpoints in front */
1613 if (flags & BP_GDB)
1614 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1615 else
1616 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1618 breakpoint_invalidate(env, pc);
1620 if (breakpoint)
1621 *breakpoint = bp;
1622 return 0;
1623 #else
1624 return -ENOSYS;
1625 #endif
1628 /* Remove a specific breakpoint. */
1629 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
1631 #if defined(TARGET_HAS_ICE)
1632 CPUBreakpoint *bp;
1634 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1635 if (bp->pc == pc && bp->flags == flags) {
1636 cpu_breakpoint_remove_by_ref(env, bp);
1637 return 0;
1640 return -ENOENT;
1641 #else
1642 return -ENOSYS;
1643 #endif
1646 /* Remove a specific breakpoint by reference. */
1647 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
1649 #if defined(TARGET_HAS_ICE)
1650 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1652 breakpoint_invalidate(env, breakpoint->pc);
1654 g_free(breakpoint);
1655 #endif
1658 /* Remove all matching breakpoints. */
1659 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
1661 #if defined(TARGET_HAS_ICE)
1662 CPUBreakpoint *bp, *next;
1664 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1665 if (bp->flags & mask)
1666 cpu_breakpoint_remove_by_ref(env, bp);
1668 #endif
1671 /* enable or disable single step mode. EXCP_DEBUG is returned by the
1672 CPU loop after each instruction */
1673 void cpu_single_step(CPUArchState *env, int enabled)
1675 #if defined(TARGET_HAS_ICE)
1676 if (env->singlestep_enabled != enabled) {
1677 env->singlestep_enabled = enabled;
1678 if (kvm_enabled())
1679 kvm_update_guest_debug(env, 0);
1680 else {
1681 /* must flush all the translated code to avoid inconsistencies */
1682 /* XXX: only flush what is necessary */
1683 tb_flush(env);
1686 #endif
1689 static void cpu_unlink_tb(CPUArchState *env)
1691 /* FIXME: TB unchaining isn't SMP safe. For now just ignore the
1692 problem and hope the cpu will stop of its own accord. For userspace
1693 emulation this often isn't actually as bad as it sounds. Often
1694 signals are used primarily to interrupt blocking syscalls. */
1695 TranslationBlock *tb;
1696 static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1698 spin_lock(&interrupt_lock);
1699 tb = env->current_tb;
1700 /* if the cpu is currently executing code, we must unlink it and
1701 all the potentially executing TB */
1702 if (tb) {
1703 env->current_tb = NULL;
1704 tb_reset_jump_recursive(tb);
1706 spin_unlock(&interrupt_lock);
1709 #ifndef CONFIG_USER_ONLY
1710 /* mask must never be zero, except for A20 change call */
1711 static void tcg_handle_interrupt(CPUArchState *env, int mask)
1713 CPUState *cpu = ENV_GET_CPU(env);
1714 int old_mask;
1716 old_mask = env->interrupt_request;
1717 env->interrupt_request |= mask;
1720 * If called from iothread context, wake the target cpu in
1721 * case its halted.
1723 if (!qemu_cpu_is_self(cpu)) {
1724 qemu_cpu_kick(cpu);
1725 return;
1728 if (use_icount) {
1729 env->icount_decr.u16.high = 0xffff;
1730 if (!can_do_io(env)
1731 && (mask & ~old_mask) != 0) {
1732 cpu_abort(env, "Raised interrupt while not in I/O function");
1734 } else {
1735 cpu_unlink_tb(env);
1739 CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
1741 #else /* CONFIG_USER_ONLY */
1743 void cpu_interrupt(CPUArchState *env, int mask)
1745 env->interrupt_request |= mask;
1746 cpu_unlink_tb(env);
1748 #endif /* CONFIG_USER_ONLY */
1750 void cpu_reset_interrupt(CPUArchState *env, int mask)
1752 env->interrupt_request &= ~mask;
1755 void cpu_exit(CPUArchState *env)
1757 env->exit_request = 1;
1758 cpu_unlink_tb(env);
1761 void cpu_abort(CPUArchState *env, const char *fmt, ...)
1763 va_list ap;
1764 va_list ap2;
1766 va_start(ap, fmt);
1767 va_copy(ap2, ap);
1768 fprintf(stderr, "qemu: fatal: ");
1769 vfprintf(stderr, fmt, ap);
1770 fprintf(stderr, "\n");
1771 cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
1772 if (qemu_log_enabled()) {
1773 qemu_log("qemu: fatal: ");
1774 qemu_log_vprintf(fmt, ap2);
1775 qemu_log("\n");
1776 log_cpu_state(env, CPU_DUMP_FPU | CPU_DUMP_CCOP);
1777 qemu_log_flush();
1778 qemu_log_close();
1780 va_end(ap2);
1781 va_end(ap);
1782 #if defined(CONFIG_USER_ONLY)
1784 struct sigaction act;
1785 sigfillset(&act.sa_mask);
1786 act.sa_handler = SIG_DFL;
1787 sigaction(SIGABRT, &act, NULL);
1789 #endif
1790 abort();
1793 CPUArchState *cpu_copy(CPUArchState *env)
1795 CPUArchState *new_env = cpu_init(env->cpu_model_str);
1796 CPUArchState *next_cpu = new_env->next_cpu;
1797 int cpu_index = new_env->cpu_index;
1798 #if defined(TARGET_HAS_ICE)
1799 CPUBreakpoint *bp;
1800 CPUWatchpoint *wp;
1801 #endif
1803 memcpy(new_env, env, sizeof(CPUArchState));
1805 /* Preserve chaining and index. */
1806 new_env->next_cpu = next_cpu;
1807 new_env->cpu_index = cpu_index;
1809 /* Clone all break/watchpoints.
1810 Note: Once we support ptrace with hw-debug register access, make sure
1811 BP_CPU break/watchpoints are handled correctly on clone. */
1812 QTAILQ_INIT(&env->breakpoints);
1813 QTAILQ_INIT(&env->watchpoints);
1814 #if defined(TARGET_HAS_ICE)
1815 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1816 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1818 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1819 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1820 wp->flags, NULL);
1822 #endif
1824 return new_env;
1827 #if !defined(CONFIG_USER_ONLY)
1828 void tb_flush_jmp_cache(CPUArchState *env, target_ulong addr)
1830 unsigned int i;
1832 /* Discard jump cache entries for any tb which might potentially
1833 overlap the flushed page. */
1834 i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1835 memset (&env->tb_jmp_cache[i], 0,
1836 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1838 i = tb_jmp_cache_hash_page(addr);
1839 memset (&env->tb_jmp_cache[i], 0,
1840 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1843 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
1844 uintptr_t length)
1846 uintptr_t start1;
1848 /* we modify the TLB cache so that the dirty bit will be set again
1849 when accessing the range */
1850 start1 = (uintptr_t)qemu_safe_ram_ptr(start);
1851 /* Check that we don't span multiple blocks - this breaks the
1852 address comparisons below. */
1853 if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
1854 != (end - 1) - start) {
1855 abort();
1857 cpu_tlb_reset_dirty_all(start1, length);
1861 /* Note: start and end must be within the same ram block. */
1862 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
1863 int dirty_flags)
1865 uintptr_t length;
1867 start &= TARGET_PAGE_MASK;
1868 end = TARGET_PAGE_ALIGN(end);
1870 length = end - start;
1871 if (length == 0)
1872 return;
1873 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
1875 if (tcg_enabled()) {
1876 tlb_reset_dirty_range_all(start, end, length);
1880 static int cpu_physical_memory_set_dirty_tracking(int enable)
1882 int ret = 0;
1883 in_migration = enable;
1884 return ret;
1887 hwaddr memory_region_section_get_iotlb(CPUArchState *env,
1888 MemoryRegionSection *section,
1889 target_ulong vaddr,
1890 hwaddr paddr,
1891 int prot,
1892 target_ulong *address)
1894 hwaddr iotlb;
1895 CPUWatchpoint *wp;
1897 if (memory_region_is_ram(section->mr)) {
1898 /* Normal RAM. */
1899 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1900 + memory_region_section_addr(section, paddr);
1901 if (!section->readonly) {
1902 iotlb |= phys_section_notdirty;
1903 } else {
1904 iotlb |= phys_section_rom;
1906 } else {
1907 /* IO handlers are currently passed a physical address.
1908 It would be nice to pass an offset from the base address
1909 of that region. This would avoid having to special case RAM,
1910 and avoid full address decoding in every device.
1911 We can't use the high bits of pd for this because
1912 IO_MEM_ROMD uses these as a ram address. */
1913 iotlb = section - phys_sections;
1914 iotlb += memory_region_section_addr(section, paddr);
1917 /* Make accesses to pages with watchpoints go via the
1918 watchpoint trap routines. */
1919 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1920 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
1921 /* Avoid trapping reads of pages with a write breakpoint. */
1922 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1923 iotlb = phys_section_watch + paddr;
1924 *address |= TLB_MMIO;
1925 break;
1930 return iotlb;
1933 #else
1935 * Walks guest process memory "regions" one by one
1936 * and calls callback function 'fn' for each region.
1939 struct walk_memory_regions_data
1941 walk_memory_regions_fn fn;
1942 void *priv;
1943 uintptr_t start;
1944 int prot;
1947 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
1948 abi_ulong end, int new_prot)
1950 if (data->start != -1ul) {
1951 int rc = data->fn(data->priv, data->start, end, data->prot);
1952 if (rc != 0) {
1953 return rc;
1957 data->start = (new_prot ? end : -1ul);
1958 data->prot = new_prot;
1960 return 0;
1963 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
1964 abi_ulong base, int level, void **lp)
1966 abi_ulong pa;
1967 int i, rc;
1969 if (*lp == NULL) {
1970 return walk_memory_regions_end(data, base, 0);
1973 if (level == 0) {
1974 PageDesc *pd = *lp;
1975 for (i = 0; i < L2_SIZE; ++i) {
1976 int prot = pd[i].flags;
1978 pa = base | (i << TARGET_PAGE_BITS);
1979 if (prot != data->prot) {
1980 rc = walk_memory_regions_end(data, pa, prot);
1981 if (rc != 0) {
1982 return rc;
1986 } else {
1987 void **pp = *lp;
1988 for (i = 0; i < L2_SIZE; ++i) {
1989 pa = base | ((abi_ulong)i <<
1990 (TARGET_PAGE_BITS + L2_BITS * level));
1991 rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
1992 if (rc != 0) {
1993 return rc;
1998 return 0;
2001 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
2003 struct walk_memory_regions_data data;
2004 uintptr_t i;
2006 data.fn = fn;
2007 data.priv = priv;
2008 data.start = -1ul;
2009 data.prot = 0;
2011 for (i = 0; i < V_L1_SIZE; i++) {
2012 int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
2013 V_L1_SHIFT / L2_BITS - 1, l1_map + i);
2014 if (rc != 0) {
2015 return rc;
2019 return walk_memory_regions_end(&data, 0, 0);
2022 static int dump_region(void *priv, abi_ulong start,
2023 abi_ulong end, unsigned long prot)
2025 FILE *f = (FILE *)priv;
2027 (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2028 " "TARGET_ABI_FMT_lx" %c%c%c\n",
2029 start, end, end - start,
2030 ((prot & PAGE_READ) ? 'r' : '-'),
2031 ((prot & PAGE_WRITE) ? 'w' : '-'),
2032 ((prot & PAGE_EXEC) ? 'x' : '-'));
2034 return (0);
2037 /* dump memory mappings */
2038 void page_dump(FILE *f)
2040 (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2041 "start", "end", "size", "prot");
2042 walk_memory_regions(f, dump_region);
2045 int page_get_flags(target_ulong address)
2047 PageDesc *p;
2049 p = page_find(address >> TARGET_PAGE_BITS);
2050 if (!p)
2051 return 0;
2052 return p->flags;
2055 /* Modify the flags of a page and invalidate the code if necessary.
2056 The flag PAGE_WRITE_ORG is positioned automatically depending
2057 on PAGE_WRITE. The mmap_lock should already be held. */
2058 void page_set_flags(target_ulong start, target_ulong end, int flags)
2060 target_ulong addr, len;
2062 /* This function should never be called with addresses outside the
2063 guest address space. If this assert fires, it probably indicates
2064 a missing call to h2g_valid. */
2065 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2066 assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2067 #endif
2068 assert(start < end);
2070 start = start & TARGET_PAGE_MASK;
2071 end = TARGET_PAGE_ALIGN(end);
2073 if (flags & PAGE_WRITE) {
2074 flags |= PAGE_WRITE_ORG;
2077 for (addr = start, len = end - start;
2078 len != 0;
2079 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2080 PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2082 /* If the write protection bit is set, then we invalidate
2083 the code inside. */
2084 if (!(p->flags & PAGE_WRITE) &&
2085 (flags & PAGE_WRITE) &&
2086 p->first_tb) {
2087 tb_invalidate_phys_page(addr, 0, NULL);
2089 p->flags = flags;
2093 int page_check_range(target_ulong start, target_ulong len, int flags)
2095 PageDesc *p;
2096 target_ulong end;
2097 target_ulong addr;
2099 /* This function should never be called with addresses outside the
2100 guest address space. If this assert fires, it probably indicates
2101 a missing call to h2g_valid. */
2102 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2103 assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2104 #endif
2106 if (len == 0) {
2107 return 0;
2109 if (start + len - 1 < start) {
2110 /* We've wrapped around. */
2111 return -1;
2114 end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2115 start = start & TARGET_PAGE_MASK;
2117 for (addr = start, len = end - start;
2118 len != 0;
2119 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2120 p = page_find(addr >> TARGET_PAGE_BITS);
2121 if( !p )
2122 return -1;
2123 if( !(p->flags & PAGE_VALID) )
2124 return -1;
2126 if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2127 return -1;
2128 if (flags & PAGE_WRITE) {
2129 if (!(p->flags & PAGE_WRITE_ORG))
2130 return -1;
2131 /* unprotect the page if it was put read-only because it
2132 contains translated code */
2133 if (!(p->flags & PAGE_WRITE)) {
2134 if (!page_unprotect(addr, 0, NULL))
2135 return -1;
2137 return 0;
2140 return 0;
2143 /* called from signal handler: invalidate the code and unprotect the
2144 page. Return TRUE if the fault was successfully handled. */
2145 int page_unprotect(target_ulong address, uintptr_t pc, void *puc)
2147 unsigned int prot;
2148 PageDesc *p;
2149 target_ulong host_start, host_end, addr;
2151 /* Technically this isn't safe inside a signal handler. However we
2152 know this only ever happens in a synchronous SEGV handler, so in
2153 practice it seems to be ok. */
2154 mmap_lock();
2156 p = page_find(address >> TARGET_PAGE_BITS);
2157 if (!p) {
2158 mmap_unlock();
2159 return 0;
2162 /* if the page was really writable, then we change its
2163 protection back to writable */
2164 if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2165 host_start = address & qemu_host_page_mask;
2166 host_end = host_start + qemu_host_page_size;
2168 prot = 0;
2169 for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2170 p = page_find(addr >> TARGET_PAGE_BITS);
2171 p->flags |= PAGE_WRITE;
2172 prot |= p->flags;
2174 /* and since the content will be modified, we must invalidate
2175 the corresponding translated code. */
2176 tb_invalidate_phys_page(addr, pc, puc);
2177 #ifdef DEBUG_TB_CHECK
2178 tb_invalidate_check(addr);
2179 #endif
2181 mprotect((void *)g2h(host_start), qemu_host_page_size,
2182 prot & PAGE_BITS);
2184 mmap_unlock();
2185 return 1;
2187 mmap_unlock();
2188 return 0;
2190 #endif /* defined(CONFIG_USER_ONLY) */
2192 #if !defined(CONFIG_USER_ONLY)
2194 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2195 typedef struct subpage_t {
2196 MemoryRegion iomem;
2197 hwaddr base;
2198 uint16_t sub_section[TARGET_PAGE_SIZE];
2199 } subpage_t;
2201 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2202 uint16_t section);
2203 static subpage_t *subpage_init(hwaddr base);
2204 static void destroy_page_desc(uint16_t section_index)
2206 MemoryRegionSection *section = &phys_sections[section_index];
2207 MemoryRegion *mr = section->mr;
2209 if (mr->subpage) {
2210 subpage_t *subpage = container_of(mr, subpage_t, iomem);
2211 memory_region_destroy(&subpage->iomem);
2212 g_free(subpage);
2216 static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
2218 unsigned i;
2219 PhysPageEntry *p;
2221 if (lp->ptr == PHYS_MAP_NODE_NIL) {
2222 return;
2225 p = phys_map_nodes[lp->ptr];
2226 for (i = 0; i < L2_SIZE; ++i) {
2227 if (!p[i].is_leaf) {
2228 destroy_l2_mapping(&p[i], level - 1);
2229 } else {
2230 destroy_page_desc(p[i].ptr);
2233 lp->is_leaf = 0;
2234 lp->ptr = PHYS_MAP_NODE_NIL;
2237 static void destroy_all_mappings(AddressSpaceDispatch *d)
2239 destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
2240 phys_map_nodes_reset();
2243 static uint16_t phys_section_add(MemoryRegionSection *section)
2245 if (phys_sections_nb == phys_sections_nb_alloc) {
2246 phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
2247 phys_sections = g_renew(MemoryRegionSection, phys_sections,
2248 phys_sections_nb_alloc);
2250 phys_sections[phys_sections_nb] = *section;
2251 return phys_sections_nb++;
2254 static void phys_sections_clear(void)
2256 phys_sections_nb = 0;
2259 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
2261 subpage_t *subpage;
2262 hwaddr base = section->offset_within_address_space
2263 & TARGET_PAGE_MASK;
2264 MemoryRegionSection *existing = phys_page_find(d, base >> TARGET_PAGE_BITS);
2265 MemoryRegionSection subsection = {
2266 .offset_within_address_space = base,
2267 .size = TARGET_PAGE_SIZE,
2269 hwaddr start, end;
2271 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
2273 if (!(existing->mr->subpage)) {
2274 subpage = subpage_init(base);
2275 subsection.mr = &subpage->iomem;
2276 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
2277 phys_section_add(&subsection));
2278 } else {
2279 subpage = container_of(existing->mr, subpage_t, iomem);
2281 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
2282 end = start + section->size - 1;
2283 subpage_register(subpage, start, end, phys_section_add(section));
2287 static void register_multipage(AddressSpaceDispatch *d, MemoryRegionSection *section)
2289 hwaddr start_addr = section->offset_within_address_space;
2290 ram_addr_t size = section->size;
2291 hwaddr addr;
2292 uint16_t section_index = phys_section_add(section);
2294 assert(size);
2296 addr = start_addr;
2297 phys_page_set(d, addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
2298 section_index);
2301 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
2303 AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
2304 MemoryRegionSection now = *section, remain = *section;
2306 if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
2307 || (now.size < TARGET_PAGE_SIZE)) {
2308 now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
2309 - now.offset_within_address_space,
2310 now.size);
2311 register_subpage(d, &now);
2312 remain.size -= now.size;
2313 remain.offset_within_address_space += now.size;
2314 remain.offset_within_region += now.size;
2316 while (remain.size >= TARGET_PAGE_SIZE) {
2317 now = remain;
2318 if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
2319 now.size = TARGET_PAGE_SIZE;
2320 register_subpage(d, &now);
2321 } else {
2322 now.size &= TARGET_PAGE_MASK;
2323 register_multipage(d, &now);
2325 remain.size -= now.size;
2326 remain.offset_within_address_space += now.size;
2327 remain.offset_within_region += now.size;
2329 now = remain;
2330 if (now.size) {
2331 register_subpage(d, &now);
2335 void qemu_flush_coalesced_mmio_buffer(void)
2337 if (kvm_enabled())
2338 kvm_flush_coalesced_mmio_buffer();
2341 #if defined(__linux__) && !defined(TARGET_S390X)
2343 #include <sys/vfs.h>
2345 #define HUGETLBFS_MAGIC 0x958458f6
2347 static long gethugepagesize(const char *path)
2349 struct statfs fs;
2350 int ret;
2352 do {
2353 ret = statfs(path, &fs);
2354 } while (ret != 0 && errno == EINTR);
2356 if (ret != 0) {
2357 perror(path);
2358 return 0;
2361 if (fs.f_type != HUGETLBFS_MAGIC)
2362 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2364 return fs.f_bsize;
2367 static void *file_ram_alloc(RAMBlock *block,
2368 ram_addr_t memory,
2369 const char *path)
2371 char *filename;
2372 void *area;
2373 int fd;
2374 #ifdef MAP_POPULATE
2375 int flags;
2376 #endif
2377 unsigned long hpagesize;
2379 hpagesize = gethugepagesize(path);
2380 if (!hpagesize) {
2381 return NULL;
2384 if (memory < hpagesize) {
2385 return NULL;
2388 if (kvm_enabled() && !kvm_has_sync_mmu()) {
2389 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2390 return NULL;
2393 if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2394 return NULL;
2397 fd = mkstemp(filename);
2398 if (fd < 0) {
2399 perror("unable to create backing store for hugepages");
2400 free(filename);
2401 return NULL;
2403 unlink(filename);
2404 free(filename);
2406 memory = (memory+hpagesize-1) & ~(hpagesize-1);
2409 * ftruncate is not supported by hugetlbfs in older
2410 * hosts, so don't bother bailing out on errors.
2411 * If anything goes wrong with it under other filesystems,
2412 * mmap will fail.
2414 if (ftruncate(fd, memory))
2415 perror("ftruncate");
2417 #ifdef MAP_POPULATE
2418 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2419 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
2420 * to sidestep this quirk.
2422 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2423 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2424 #else
2425 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2426 #endif
2427 if (area == MAP_FAILED) {
2428 perror("file_ram_alloc: can't mmap RAM pages");
2429 close(fd);
2430 return (NULL);
2432 block->fd = fd;
2433 return area;
2435 #endif
2437 static ram_addr_t find_ram_offset(ram_addr_t size)
2439 RAMBlock *block, *next_block;
2440 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
2442 if (QLIST_EMPTY(&ram_list.blocks))
2443 return 0;
2445 QLIST_FOREACH(block, &ram_list.blocks, next) {
2446 ram_addr_t end, next = RAM_ADDR_MAX;
2448 end = block->offset + block->length;
2450 QLIST_FOREACH(next_block, &ram_list.blocks, next) {
2451 if (next_block->offset >= end) {
2452 next = MIN(next, next_block->offset);
2455 if (next - end >= size && next - end < mingap) {
2456 offset = end;
2457 mingap = next - end;
2461 if (offset == RAM_ADDR_MAX) {
2462 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
2463 (uint64_t)size);
2464 abort();
2467 return offset;
2470 ram_addr_t last_ram_offset(void)
2472 RAMBlock *block;
2473 ram_addr_t last = 0;
2475 QLIST_FOREACH(block, &ram_list.blocks, next)
2476 last = MAX(last, block->offset + block->length);
2478 return last;
2481 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
2483 int ret;
2484 QemuOpts *machine_opts;
2486 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
2487 machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
2488 if (machine_opts &&
2489 !qemu_opt_get_bool(machine_opts, "dump-guest-core", true)) {
2490 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
2491 if (ret) {
2492 perror("qemu_madvise");
2493 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
2494 "but dump_guest_core=off specified\n");
2499 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
2501 RAMBlock *new_block, *block;
2503 new_block = NULL;
2504 QLIST_FOREACH(block, &ram_list.blocks, next) {
2505 if (block->offset == addr) {
2506 new_block = block;
2507 break;
2510 assert(new_block);
2511 assert(!new_block->idstr[0]);
2513 if (dev) {
2514 char *id = qdev_get_dev_path(dev);
2515 if (id) {
2516 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
2517 g_free(id);
2520 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
2522 QLIST_FOREACH(block, &ram_list.blocks, next) {
2523 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
2524 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
2525 new_block->idstr);
2526 abort();
2531 static int memory_try_enable_merging(void *addr, size_t len)
2533 QemuOpts *opts;
2535 opts = qemu_opts_find(qemu_find_opts("machine"), 0);
2536 if (opts && !qemu_opt_get_bool(opts, "mem-merge", true)) {
2537 /* disabled by the user */
2538 return 0;
2541 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
2544 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
2545 MemoryRegion *mr)
2547 RAMBlock *new_block;
2549 size = TARGET_PAGE_ALIGN(size);
2550 new_block = g_malloc0(sizeof(*new_block));
2552 new_block->mr = mr;
2553 new_block->offset = find_ram_offset(size);
2554 if (host) {
2555 new_block->host = host;
2556 new_block->flags |= RAM_PREALLOC_MASK;
2557 } else {
2558 if (mem_path) {
2559 #if defined (__linux__) && !defined(TARGET_S390X)
2560 new_block->host = file_ram_alloc(new_block, size, mem_path);
2561 if (!new_block->host) {
2562 new_block->host = qemu_vmalloc(size);
2563 memory_try_enable_merging(new_block->host, size);
2565 #else
2566 fprintf(stderr, "-mem-path option unsupported\n");
2567 exit(1);
2568 #endif
2569 } else {
2570 if (xen_enabled()) {
2571 xen_ram_alloc(new_block->offset, size, mr);
2572 } else if (kvm_enabled()) {
2573 /* some s390/kvm configurations have special constraints */
2574 new_block->host = kvm_vmalloc(size);
2575 } else {
2576 new_block->host = qemu_vmalloc(size);
2578 memory_try_enable_merging(new_block->host, size);
2581 new_block->length = size;
2583 QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
2585 ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
2586 last_ram_offset() >> TARGET_PAGE_BITS);
2587 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
2588 0, size >> TARGET_PAGE_BITS);
2589 cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
2591 qemu_ram_setup_dump(new_block->host, size);
2592 qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
2594 if (kvm_enabled())
2595 kvm_setup_guest_memory(new_block->host, size);
2597 return new_block->offset;
2600 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
2602 return qemu_ram_alloc_from_ptr(size, NULL, mr);
2605 void qemu_ram_free_from_ptr(ram_addr_t addr)
2607 RAMBlock *block;
2609 QLIST_FOREACH(block, &ram_list.blocks, next) {
2610 if (addr == block->offset) {
2611 QLIST_REMOVE(block, next);
2612 g_free(block);
2613 return;
2618 void qemu_ram_free(ram_addr_t addr)
2620 RAMBlock *block;
2622 QLIST_FOREACH(block, &ram_list.blocks, next) {
2623 if (addr == block->offset) {
2624 QLIST_REMOVE(block, next);
2625 if (block->flags & RAM_PREALLOC_MASK) {
2627 } else if (mem_path) {
2628 #if defined (__linux__) && !defined(TARGET_S390X)
2629 if (block->fd) {
2630 munmap(block->host, block->length);
2631 close(block->fd);
2632 } else {
2633 qemu_vfree(block->host);
2635 #else
2636 abort();
2637 #endif
2638 } else {
2639 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2640 munmap(block->host, block->length);
2641 #else
2642 if (xen_enabled()) {
2643 xen_invalidate_map_cache_entry(block->host);
2644 } else {
2645 qemu_vfree(block->host);
2647 #endif
2649 g_free(block);
2650 return;
2656 #ifndef _WIN32
2657 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
2659 RAMBlock *block;
2660 ram_addr_t offset;
2661 int flags;
2662 void *area, *vaddr;
2664 QLIST_FOREACH(block, &ram_list.blocks, next) {
2665 offset = addr - block->offset;
2666 if (offset < block->length) {
2667 vaddr = block->host + offset;
2668 if (block->flags & RAM_PREALLOC_MASK) {
2670 } else {
2671 flags = MAP_FIXED;
2672 munmap(vaddr, length);
2673 if (mem_path) {
2674 #if defined(__linux__) && !defined(TARGET_S390X)
2675 if (block->fd) {
2676 #ifdef MAP_POPULATE
2677 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
2678 MAP_PRIVATE;
2679 #else
2680 flags |= MAP_PRIVATE;
2681 #endif
2682 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2683 flags, block->fd, offset);
2684 } else {
2685 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2686 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2687 flags, -1, 0);
2689 #else
2690 abort();
2691 #endif
2692 } else {
2693 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2694 flags |= MAP_SHARED | MAP_ANONYMOUS;
2695 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
2696 flags, -1, 0);
2697 #else
2698 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2699 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2700 flags, -1, 0);
2701 #endif
2703 if (area != vaddr) {
2704 fprintf(stderr, "Could not remap addr: "
2705 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
2706 length, addr);
2707 exit(1);
2709 memory_try_enable_merging(vaddr, length);
2710 qemu_ram_setup_dump(vaddr, length);
2712 return;
2716 #endif /* !_WIN32 */
2718 /* Return a host pointer to ram allocated with qemu_ram_alloc.
2719 With the exception of the softmmu code in this file, this should
2720 only be used for local memory (e.g. video ram) that the device owns,
2721 and knows it isn't going to access beyond the end of the block.
2723 It should not be used for general purpose DMA.
2724 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
2726 void *qemu_get_ram_ptr(ram_addr_t addr)
2728 RAMBlock *block;
2730 QLIST_FOREACH(block, &ram_list.blocks, next) {
2731 if (addr - block->offset < block->length) {
2732 /* Move this entry to to start of the list. */
2733 if (block != QLIST_FIRST(&ram_list.blocks)) {
2734 QLIST_REMOVE(block, next);
2735 QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
2737 if (xen_enabled()) {
2738 /* We need to check if the requested address is in the RAM
2739 * because we don't want to map the entire memory in QEMU.
2740 * In that case just map until the end of the page.
2742 if (block->offset == 0) {
2743 return xen_map_cache(addr, 0, 0);
2744 } else if (block->host == NULL) {
2745 block->host =
2746 xen_map_cache(block->offset, block->length, 1);
2749 return block->host + (addr - block->offset);
2753 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2754 abort();
2756 return NULL;
2759 /* Return a host pointer to ram allocated with qemu_ram_alloc.
2760 * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
2762 static void *qemu_safe_ram_ptr(ram_addr_t addr)
2764 RAMBlock *block;
2766 QLIST_FOREACH(block, &ram_list.blocks, next) {
2767 if (addr - block->offset < block->length) {
2768 if (xen_enabled()) {
2769 /* We need to check if the requested address is in the RAM
2770 * because we don't want to map the entire memory in QEMU.
2771 * In that case just map until the end of the page.
2773 if (block->offset == 0) {
2774 return xen_map_cache(addr, 0, 0);
2775 } else if (block->host == NULL) {
2776 block->host =
2777 xen_map_cache(block->offset, block->length, 1);
2780 return block->host + (addr - block->offset);
2784 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2785 abort();
2787 return NULL;
2790 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
2791 * but takes a size argument */
2792 static void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
2794 if (*size == 0) {
2795 return NULL;
2797 if (xen_enabled()) {
2798 return xen_map_cache(addr, *size, 1);
2799 } else {
2800 RAMBlock *block;
2802 QLIST_FOREACH(block, &ram_list.blocks, next) {
2803 if (addr - block->offset < block->length) {
2804 if (addr - block->offset + *size > block->length)
2805 *size = block->length - addr + block->offset;
2806 return block->host + (addr - block->offset);
2810 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2811 abort();
2815 void qemu_put_ram_ptr(void *addr)
2817 trace_qemu_put_ram_ptr(addr);
2820 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
2822 RAMBlock *block;
2823 uint8_t *host = ptr;
2825 if (xen_enabled()) {
2826 *ram_addr = xen_ram_addr_from_mapcache(ptr);
2827 return 0;
2830 QLIST_FOREACH(block, &ram_list.blocks, next) {
2831 /* This case append when the block is not mapped. */
2832 if (block->host == NULL) {
2833 continue;
2835 if (host - block->host < block->length) {
2836 *ram_addr = block->offset + (host - block->host);
2837 return 0;
2841 return -1;
2844 /* Some of the softmmu routines need to translate from a host pointer
2845 (typically a TLB entry) back to a ram offset. */
2846 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
2848 ram_addr_t ram_addr;
2850 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
2851 fprintf(stderr, "Bad ram pointer %p\n", ptr);
2852 abort();
2854 return ram_addr;
2857 static uint64_t unassigned_mem_read(void *opaque, hwaddr addr,
2858 unsigned size)
2860 #ifdef DEBUG_UNASSIGNED
2861 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
2862 #endif
2863 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2864 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, size);
2865 #endif
2866 return 0;
2869 static void unassigned_mem_write(void *opaque, hwaddr addr,
2870 uint64_t val, unsigned size)
2872 #ifdef DEBUG_UNASSIGNED
2873 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%"PRIx64"\n", addr, val);
2874 #endif
2875 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2876 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, size);
2877 #endif
2880 static const MemoryRegionOps unassigned_mem_ops = {
2881 .read = unassigned_mem_read,
2882 .write = unassigned_mem_write,
2883 .endianness = DEVICE_NATIVE_ENDIAN,
2886 static uint64_t error_mem_read(void *opaque, hwaddr addr,
2887 unsigned size)
2889 abort();
2892 static void error_mem_write(void *opaque, hwaddr addr,
2893 uint64_t value, unsigned size)
2895 abort();
2898 static const MemoryRegionOps error_mem_ops = {
2899 .read = error_mem_read,
2900 .write = error_mem_write,
2901 .endianness = DEVICE_NATIVE_ENDIAN,
2904 static const MemoryRegionOps rom_mem_ops = {
2905 .read = error_mem_read,
2906 .write = unassigned_mem_write,
2907 .endianness = DEVICE_NATIVE_ENDIAN,
2910 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
2911 uint64_t val, unsigned size)
2913 int dirty_flags;
2914 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2915 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
2916 #if !defined(CONFIG_USER_ONLY)
2917 tb_invalidate_phys_page_fast(ram_addr, size);
2918 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2919 #endif
2921 switch (size) {
2922 case 1:
2923 stb_p(qemu_get_ram_ptr(ram_addr), val);
2924 break;
2925 case 2:
2926 stw_p(qemu_get_ram_ptr(ram_addr), val);
2927 break;
2928 case 4:
2929 stl_p(qemu_get_ram_ptr(ram_addr), val);
2930 break;
2931 default:
2932 abort();
2934 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
2935 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
2936 /* we remove the notdirty callback only if the code has been
2937 flushed */
2938 if (dirty_flags == 0xff)
2939 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
2942 static const MemoryRegionOps notdirty_mem_ops = {
2943 .read = error_mem_read,
2944 .write = notdirty_mem_write,
2945 .endianness = DEVICE_NATIVE_ENDIAN,
2948 /* Generate a debug exception if a watchpoint has been hit. */
2949 static void check_watchpoint(int offset, int len_mask, int flags)
2951 CPUArchState *env = cpu_single_env;
2952 target_ulong pc, cs_base;
2953 TranslationBlock *tb;
2954 target_ulong vaddr;
2955 CPUWatchpoint *wp;
2956 int cpu_flags;
2958 if (env->watchpoint_hit) {
2959 /* We re-entered the check after replacing the TB. Now raise
2960 * the debug interrupt so that is will trigger after the
2961 * current instruction. */
2962 cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
2963 return;
2965 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2966 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2967 if ((vaddr == (wp->vaddr & len_mask) ||
2968 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
2969 wp->flags |= BP_WATCHPOINT_HIT;
2970 if (!env->watchpoint_hit) {
2971 env->watchpoint_hit = wp;
2972 tb = tb_find_pc(env->mem_io_pc);
2973 if (!tb) {
2974 cpu_abort(env, "check_watchpoint: could not find TB for "
2975 "pc=%p", (void *)env->mem_io_pc);
2977 cpu_restore_state(tb, env, env->mem_io_pc);
2978 tb_phys_invalidate(tb, -1);
2979 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2980 env->exception_index = EXCP_DEBUG;
2981 cpu_loop_exit(env);
2982 } else {
2983 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2984 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
2985 cpu_resume_from_signal(env, NULL);
2988 } else {
2989 wp->flags &= ~BP_WATCHPOINT_HIT;
2994 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
2995 so these check for a hit then pass through to the normal out-of-line
2996 phys routines. */
2997 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
2998 unsigned size)
3000 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
3001 switch (size) {
3002 case 1: return ldub_phys(addr);
3003 case 2: return lduw_phys(addr);
3004 case 4: return ldl_phys(addr);
3005 default: abort();
3009 static void watch_mem_write(void *opaque, hwaddr addr,
3010 uint64_t val, unsigned size)
3012 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
3013 switch (size) {
3014 case 1:
3015 stb_phys(addr, val);
3016 break;
3017 case 2:
3018 stw_phys(addr, val);
3019 break;
3020 case 4:
3021 stl_phys(addr, val);
3022 break;
3023 default: abort();
3027 static const MemoryRegionOps watch_mem_ops = {
3028 .read = watch_mem_read,
3029 .write = watch_mem_write,
3030 .endianness = DEVICE_NATIVE_ENDIAN,
3033 static uint64_t subpage_read(void *opaque, hwaddr addr,
3034 unsigned len)
3036 subpage_t *mmio = opaque;
3037 unsigned int idx = SUBPAGE_IDX(addr);
3038 MemoryRegionSection *section;
3039 #if defined(DEBUG_SUBPAGE)
3040 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3041 mmio, len, addr, idx);
3042 #endif
3044 section = &phys_sections[mmio->sub_section[idx]];
3045 addr += mmio->base;
3046 addr -= section->offset_within_address_space;
3047 addr += section->offset_within_region;
3048 return io_mem_read(section->mr, addr, len);
3051 static void subpage_write(void *opaque, hwaddr addr,
3052 uint64_t value, unsigned len)
3054 subpage_t *mmio = opaque;
3055 unsigned int idx = SUBPAGE_IDX(addr);
3056 MemoryRegionSection *section;
3057 #if defined(DEBUG_SUBPAGE)
3058 printf("%s: subpage %p len %d addr " TARGET_FMT_plx
3059 " idx %d value %"PRIx64"\n",
3060 __func__, mmio, len, addr, idx, value);
3061 #endif
3063 section = &phys_sections[mmio->sub_section[idx]];
3064 addr += mmio->base;
3065 addr -= section->offset_within_address_space;
3066 addr += section->offset_within_region;
3067 io_mem_write(section->mr, addr, value, len);
3070 static const MemoryRegionOps subpage_ops = {
3071 .read = subpage_read,
3072 .write = subpage_write,
3073 .endianness = DEVICE_NATIVE_ENDIAN,
3076 static uint64_t subpage_ram_read(void *opaque, hwaddr addr,
3077 unsigned size)
3079 ram_addr_t raddr = addr;
3080 void *ptr = qemu_get_ram_ptr(raddr);
3081 switch (size) {
3082 case 1: return ldub_p(ptr);
3083 case 2: return lduw_p(ptr);
3084 case 4: return ldl_p(ptr);
3085 default: abort();
3089 static void subpage_ram_write(void *opaque, hwaddr addr,
3090 uint64_t value, unsigned size)
3092 ram_addr_t raddr = addr;
3093 void *ptr = qemu_get_ram_ptr(raddr);
3094 switch (size) {
3095 case 1: return stb_p(ptr, value);
3096 case 2: return stw_p(ptr, value);
3097 case 4: return stl_p(ptr, value);
3098 default: abort();
3102 static const MemoryRegionOps subpage_ram_ops = {
3103 .read = subpage_ram_read,
3104 .write = subpage_ram_write,
3105 .endianness = DEVICE_NATIVE_ENDIAN,
3108 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3109 uint16_t section)
3111 int idx, eidx;
3113 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3114 return -1;
3115 idx = SUBPAGE_IDX(start);
3116 eidx = SUBPAGE_IDX(end);
3117 #if defined(DEBUG_SUBPAGE)
3118 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3119 mmio, start, end, idx, eidx, memory);
3120 #endif
3121 if (memory_region_is_ram(phys_sections[section].mr)) {
3122 MemoryRegionSection new_section = phys_sections[section];
3123 new_section.mr = &io_mem_subpage_ram;
3124 section = phys_section_add(&new_section);
3126 for (; idx <= eidx; idx++) {
3127 mmio->sub_section[idx] = section;
3130 return 0;
3133 static subpage_t *subpage_init(hwaddr base)
3135 subpage_t *mmio;
3137 mmio = g_malloc0(sizeof(subpage_t));
3139 mmio->base = base;
3140 memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
3141 "subpage", TARGET_PAGE_SIZE);
3142 mmio->iomem.subpage = true;
3143 #if defined(DEBUG_SUBPAGE)
3144 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3145 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3146 #endif
3147 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
3149 return mmio;
3152 static uint16_t dummy_section(MemoryRegion *mr)
3154 MemoryRegionSection section = {
3155 .mr = mr,
3156 .offset_within_address_space = 0,
3157 .offset_within_region = 0,
3158 .size = UINT64_MAX,
3161 return phys_section_add(&section);
3164 MemoryRegion *iotlb_to_region(hwaddr index)
3166 return phys_sections[index & ~TARGET_PAGE_MASK].mr;
3169 static void io_mem_init(void)
3171 memory_region_init_io(&io_mem_ram, &error_mem_ops, NULL, "ram", UINT64_MAX);
3172 memory_region_init_io(&io_mem_rom, &rom_mem_ops, NULL, "rom", UINT64_MAX);
3173 memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
3174 "unassigned", UINT64_MAX);
3175 memory_region_init_io(&io_mem_notdirty, &notdirty_mem_ops, NULL,
3176 "notdirty", UINT64_MAX);
3177 memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
3178 "subpage-ram", UINT64_MAX);
3179 memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
3180 "watch", UINT64_MAX);
3183 static void mem_begin(MemoryListener *listener)
3185 AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
3187 destroy_all_mappings(d);
3188 d->phys_map.ptr = PHYS_MAP_NODE_NIL;
3191 static void core_begin(MemoryListener *listener)
3193 phys_sections_clear();
3194 phys_section_unassigned = dummy_section(&io_mem_unassigned);
3195 phys_section_notdirty = dummy_section(&io_mem_notdirty);
3196 phys_section_rom = dummy_section(&io_mem_rom);
3197 phys_section_watch = dummy_section(&io_mem_watch);
3200 static void tcg_commit(MemoryListener *listener)
3202 CPUArchState *env;
3204 /* since each CPU stores ram addresses in its TLB cache, we must
3205 reset the modified entries */
3206 /* XXX: slow ! */
3207 for(env = first_cpu; env != NULL; env = env->next_cpu) {
3208 tlb_flush(env, 1);
3212 static void core_log_global_start(MemoryListener *listener)
3214 cpu_physical_memory_set_dirty_tracking(1);
3217 static void core_log_global_stop(MemoryListener *listener)
3219 cpu_physical_memory_set_dirty_tracking(0);
3222 static void io_region_add(MemoryListener *listener,
3223 MemoryRegionSection *section)
3225 MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
3227 mrio->mr = section->mr;
3228 mrio->offset = section->offset_within_region;
3229 iorange_init(&mrio->iorange, &memory_region_iorange_ops,
3230 section->offset_within_address_space, section->size);
3231 ioport_register(&mrio->iorange);
3234 static void io_region_del(MemoryListener *listener,
3235 MemoryRegionSection *section)
3237 isa_unassign_ioport(section->offset_within_address_space, section->size);
3240 static MemoryListener core_memory_listener = {
3241 .begin = core_begin,
3242 .log_global_start = core_log_global_start,
3243 .log_global_stop = core_log_global_stop,
3244 .priority = 1,
3247 static MemoryListener io_memory_listener = {
3248 .region_add = io_region_add,
3249 .region_del = io_region_del,
3250 .priority = 0,
3253 static MemoryListener tcg_memory_listener = {
3254 .commit = tcg_commit,
3257 void address_space_init_dispatch(AddressSpace *as)
3259 AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
3261 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
3262 d->listener = (MemoryListener) {
3263 .begin = mem_begin,
3264 .region_add = mem_add,
3265 .region_nop = mem_add,
3266 .priority = 0,
3268 as->dispatch = d;
3269 memory_listener_register(&d->listener, as);
3272 void address_space_destroy_dispatch(AddressSpace *as)
3274 AddressSpaceDispatch *d = as->dispatch;
3276 memory_listener_unregister(&d->listener);
3277 destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
3278 g_free(d);
3279 as->dispatch = NULL;
3282 static void memory_map_init(void)
3284 system_memory = g_malloc(sizeof(*system_memory));
3285 memory_region_init(system_memory, "system", INT64_MAX);
3286 address_space_init(&address_space_memory, system_memory);
3287 address_space_memory.name = "memory";
3289 system_io = g_malloc(sizeof(*system_io));
3290 memory_region_init(system_io, "io", 65536);
3291 address_space_init(&address_space_io, system_io);
3292 address_space_io.name = "I/O";
3294 memory_listener_register(&core_memory_listener, &address_space_memory);
3295 memory_listener_register(&io_memory_listener, &address_space_io);
3296 memory_listener_register(&tcg_memory_listener, &address_space_memory);
3299 MemoryRegion *get_system_memory(void)
3301 return system_memory;
3304 MemoryRegion *get_system_io(void)
3306 return system_io;
3309 #endif /* !defined(CONFIG_USER_ONLY) */
3311 /* physical memory access (slow version, mainly for debug) */
3312 #if defined(CONFIG_USER_ONLY)
3313 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
3314 uint8_t *buf, int len, int is_write)
3316 int l, flags;
3317 target_ulong page;
3318 void * p;
3320 while (len > 0) {
3321 page = addr & TARGET_PAGE_MASK;
3322 l = (page + TARGET_PAGE_SIZE) - addr;
3323 if (l > len)
3324 l = len;
3325 flags = page_get_flags(page);
3326 if (!(flags & PAGE_VALID))
3327 return -1;
3328 if (is_write) {
3329 if (!(flags & PAGE_WRITE))
3330 return -1;
3331 /* XXX: this code should not depend on lock_user */
3332 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3333 return -1;
3334 memcpy(p, buf, l);
3335 unlock_user(p, addr, l);
3336 } else {
3337 if (!(flags & PAGE_READ))
3338 return -1;
3339 /* XXX: this code should not depend on lock_user */
3340 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3341 return -1;
3342 memcpy(buf, p, l);
3343 unlock_user(p, addr, 0);
3345 len -= l;
3346 buf += l;
3347 addr += l;
3349 return 0;
3352 #else
3354 static void invalidate_and_set_dirty(hwaddr addr,
3355 hwaddr length)
3357 if (!cpu_physical_memory_is_dirty(addr)) {
3358 /* invalidate code */
3359 tb_invalidate_phys_page_range(addr, addr + length, 0);
3360 /* set dirty bit */
3361 cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
3363 xen_modified_memory(addr, length);
3366 void address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
3367 int len, bool is_write)
3369 AddressSpaceDispatch *d = as->dispatch;
3370 int l;
3371 uint8_t *ptr;
3372 uint32_t val;
3373 hwaddr page;
3374 MemoryRegionSection *section;
3376 while (len > 0) {
3377 page = addr & TARGET_PAGE_MASK;
3378 l = (page + TARGET_PAGE_SIZE) - addr;
3379 if (l > len)
3380 l = len;
3381 section = phys_page_find(d, page >> TARGET_PAGE_BITS);
3383 if (is_write) {
3384 if (!memory_region_is_ram(section->mr)) {
3385 hwaddr addr1;
3386 addr1 = memory_region_section_addr(section, addr);
3387 /* XXX: could force cpu_single_env to NULL to avoid
3388 potential bugs */
3389 if (l >= 4 && ((addr1 & 3) == 0)) {
3390 /* 32 bit write access */
3391 val = ldl_p(buf);
3392 io_mem_write(section->mr, addr1, val, 4);
3393 l = 4;
3394 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3395 /* 16 bit write access */
3396 val = lduw_p(buf);
3397 io_mem_write(section->mr, addr1, val, 2);
3398 l = 2;
3399 } else {
3400 /* 8 bit write access */
3401 val = ldub_p(buf);
3402 io_mem_write(section->mr, addr1, val, 1);
3403 l = 1;
3405 } else if (!section->readonly) {
3406 ram_addr_t addr1;
3407 addr1 = memory_region_get_ram_addr(section->mr)
3408 + memory_region_section_addr(section, addr);
3409 /* RAM case */
3410 ptr = qemu_get_ram_ptr(addr1);
3411 memcpy(ptr, buf, l);
3412 invalidate_and_set_dirty(addr1, l);
3413 qemu_put_ram_ptr(ptr);
3415 } else {
3416 if (!(memory_region_is_ram(section->mr) ||
3417 memory_region_is_romd(section->mr))) {
3418 hwaddr addr1;
3419 /* I/O case */
3420 addr1 = memory_region_section_addr(section, addr);
3421 if (l >= 4 && ((addr1 & 3) == 0)) {
3422 /* 32 bit read access */
3423 val = io_mem_read(section->mr, addr1, 4);
3424 stl_p(buf, val);
3425 l = 4;
3426 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3427 /* 16 bit read access */
3428 val = io_mem_read(section->mr, addr1, 2);
3429 stw_p(buf, val);
3430 l = 2;
3431 } else {
3432 /* 8 bit read access */
3433 val = io_mem_read(section->mr, addr1, 1);
3434 stb_p(buf, val);
3435 l = 1;
3437 } else {
3438 /* RAM case */
3439 ptr = qemu_get_ram_ptr(section->mr->ram_addr
3440 + memory_region_section_addr(section,
3441 addr));
3442 memcpy(buf, ptr, l);
3443 qemu_put_ram_ptr(ptr);
3446 len -= l;
3447 buf += l;
3448 addr += l;
3452 void address_space_write(AddressSpace *as, hwaddr addr,
3453 const uint8_t *buf, int len)
3455 address_space_rw(as, addr, (uint8_t *)buf, len, true);
3459 * address_space_read: read from an address space.
3461 * @as: #AddressSpace to be accessed
3462 * @addr: address within that address space
3463 * @buf: buffer with the data transferred
3465 void address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
3467 address_space_rw(as, addr, buf, len, false);
3471 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
3472 int len, int is_write)
3474 return address_space_rw(&address_space_memory, addr, buf, len, is_write);
3477 /* used for ROM loading : can write in RAM and ROM */
3478 void cpu_physical_memory_write_rom(hwaddr addr,
3479 const uint8_t *buf, int len)
3481 AddressSpaceDispatch *d = address_space_memory.dispatch;
3482 int l;
3483 uint8_t *ptr;
3484 hwaddr page;
3485 MemoryRegionSection *section;
3487 while (len > 0) {
3488 page = addr & TARGET_PAGE_MASK;
3489 l = (page + TARGET_PAGE_SIZE) - addr;
3490 if (l > len)
3491 l = len;
3492 section = phys_page_find(d, page >> TARGET_PAGE_BITS);
3494 if (!(memory_region_is_ram(section->mr) ||
3495 memory_region_is_romd(section->mr))) {
3496 /* do nothing */
3497 } else {
3498 unsigned long addr1;
3499 addr1 = memory_region_get_ram_addr(section->mr)
3500 + memory_region_section_addr(section, addr);
3501 /* ROM/RAM case */
3502 ptr = qemu_get_ram_ptr(addr1);
3503 memcpy(ptr, buf, l);
3504 invalidate_and_set_dirty(addr1, l);
3505 qemu_put_ram_ptr(ptr);
3507 len -= l;
3508 buf += l;
3509 addr += l;
3513 typedef struct {
3514 void *buffer;
3515 hwaddr addr;
3516 hwaddr len;
3517 } BounceBuffer;
3519 static BounceBuffer bounce;
3521 typedef struct MapClient {
3522 void *opaque;
3523 void (*callback)(void *opaque);
3524 QLIST_ENTRY(MapClient) link;
3525 } MapClient;
3527 static QLIST_HEAD(map_client_list, MapClient) map_client_list
3528 = QLIST_HEAD_INITIALIZER(map_client_list);
3530 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
3532 MapClient *client = g_malloc(sizeof(*client));
3534 client->opaque = opaque;
3535 client->callback = callback;
3536 QLIST_INSERT_HEAD(&map_client_list, client, link);
3537 return client;
3540 static void cpu_unregister_map_client(void *_client)
3542 MapClient *client = (MapClient *)_client;
3544 QLIST_REMOVE(client, link);
3545 g_free(client);
3548 static void cpu_notify_map_clients(void)
3550 MapClient *client;
3552 while (!QLIST_EMPTY(&map_client_list)) {
3553 client = QLIST_FIRST(&map_client_list);
3554 client->callback(client->opaque);
3555 cpu_unregister_map_client(client);
3559 /* Map a physical memory region into a host virtual address.
3560 * May map a subset of the requested range, given by and returned in *plen.
3561 * May return NULL if resources needed to perform the mapping are exhausted.
3562 * Use only for reads OR writes - not for read-modify-write operations.
3563 * Use cpu_register_map_client() to know when retrying the map operation is
3564 * likely to succeed.
3566 void *address_space_map(AddressSpace *as,
3567 hwaddr addr,
3568 hwaddr *plen,
3569 bool is_write)
3571 AddressSpaceDispatch *d = as->dispatch;
3572 hwaddr len = *plen;
3573 hwaddr todo = 0;
3574 int l;
3575 hwaddr page;
3576 MemoryRegionSection *section;
3577 ram_addr_t raddr = RAM_ADDR_MAX;
3578 ram_addr_t rlen;
3579 void *ret;
3581 while (len > 0) {
3582 page = addr & TARGET_PAGE_MASK;
3583 l = (page + TARGET_PAGE_SIZE) - addr;
3584 if (l > len)
3585 l = len;
3586 section = phys_page_find(d, page >> TARGET_PAGE_BITS);
3588 if (!(memory_region_is_ram(section->mr) && !section->readonly)) {
3589 if (todo || bounce.buffer) {
3590 break;
3592 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
3593 bounce.addr = addr;
3594 bounce.len = l;
3595 if (!is_write) {
3596 address_space_read(as, addr, bounce.buffer, l);
3599 *plen = l;
3600 return bounce.buffer;
3602 if (!todo) {
3603 raddr = memory_region_get_ram_addr(section->mr)
3604 + memory_region_section_addr(section, addr);
3607 len -= l;
3608 addr += l;
3609 todo += l;
3611 rlen = todo;
3612 ret = qemu_ram_ptr_length(raddr, &rlen);
3613 *plen = rlen;
3614 return ret;
3617 /* Unmaps a memory region previously mapped by address_space_map().
3618 * Will also mark the memory as dirty if is_write == 1. access_len gives
3619 * the amount of memory that was actually read or written by the caller.
3621 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
3622 int is_write, hwaddr access_len)
3624 if (buffer != bounce.buffer) {
3625 if (is_write) {
3626 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
3627 while (access_len) {
3628 unsigned l;
3629 l = TARGET_PAGE_SIZE;
3630 if (l > access_len)
3631 l = access_len;
3632 invalidate_and_set_dirty(addr1, l);
3633 addr1 += l;
3634 access_len -= l;
3637 if (xen_enabled()) {
3638 xen_invalidate_map_cache_entry(buffer);
3640 return;
3642 if (is_write) {
3643 address_space_write(as, bounce.addr, bounce.buffer, access_len);
3645 qemu_vfree(bounce.buffer);
3646 bounce.buffer = NULL;
3647 cpu_notify_map_clients();
3650 void *cpu_physical_memory_map(hwaddr addr,
3651 hwaddr *plen,
3652 int is_write)
3654 return address_space_map(&address_space_memory, addr, plen, is_write);
3657 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
3658 int is_write, hwaddr access_len)
3660 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
3663 /* warning: addr must be aligned */
3664 static inline uint32_t ldl_phys_internal(hwaddr addr,
3665 enum device_endian endian)
3667 uint8_t *ptr;
3668 uint32_t val;
3669 MemoryRegionSection *section;
3671 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3673 if (!(memory_region_is_ram(section->mr) ||
3674 memory_region_is_romd(section->mr))) {
3675 /* I/O case */
3676 addr = memory_region_section_addr(section, addr);
3677 val = io_mem_read(section->mr, addr, 4);
3678 #if defined(TARGET_WORDS_BIGENDIAN)
3679 if (endian == DEVICE_LITTLE_ENDIAN) {
3680 val = bswap32(val);
3682 #else
3683 if (endian == DEVICE_BIG_ENDIAN) {
3684 val = bswap32(val);
3686 #endif
3687 } else {
3688 /* RAM case */
3689 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3690 & TARGET_PAGE_MASK)
3691 + memory_region_section_addr(section, addr));
3692 switch (endian) {
3693 case DEVICE_LITTLE_ENDIAN:
3694 val = ldl_le_p(ptr);
3695 break;
3696 case DEVICE_BIG_ENDIAN:
3697 val = ldl_be_p(ptr);
3698 break;
3699 default:
3700 val = ldl_p(ptr);
3701 break;
3704 return val;
3707 uint32_t ldl_phys(hwaddr addr)
3709 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3712 uint32_t ldl_le_phys(hwaddr addr)
3714 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3717 uint32_t ldl_be_phys(hwaddr addr)
3719 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
3722 /* warning: addr must be aligned */
3723 static inline uint64_t ldq_phys_internal(hwaddr addr,
3724 enum device_endian endian)
3726 uint8_t *ptr;
3727 uint64_t val;
3728 MemoryRegionSection *section;
3730 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3732 if (!(memory_region_is_ram(section->mr) ||
3733 memory_region_is_romd(section->mr))) {
3734 /* I/O case */
3735 addr = memory_region_section_addr(section, addr);
3737 /* XXX This is broken when device endian != cpu endian.
3738 Fix and add "endian" variable check */
3739 #ifdef TARGET_WORDS_BIGENDIAN
3740 val = io_mem_read(section->mr, addr, 4) << 32;
3741 val |= io_mem_read(section->mr, addr + 4, 4);
3742 #else
3743 val = io_mem_read(section->mr, addr, 4);
3744 val |= io_mem_read(section->mr, addr + 4, 4) << 32;
3745 #endif
3746 } else {
3747 /* RAM case */
3748 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3749 & TARGET_PAGE_MASK)
3750 + memory_region_section_addr(section, addr));
3751 switch (endian) {
3752 case DEVICE_LITTLE_ENDIAN:
3753 val = ldq_le_p(ptr);
3754 break;
3755 case DEVICE_BIG_ENDIAN:
3756 val = ldq_be_p(ptr);
3757 break;
3758 default:
3759 val = ldq_p(ptr);
3760 break;
3763 return val;
3766 uint64_t ldq_phys(hwaddr addr)
3768 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3771 uint64_t ldq_le_phys(hwaddr addr)
3773 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3776 uint64_t ldq_be_phys(hwaddr addr)
3778 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
3781 /* XXX: optimize */
3782 uint32_t ldub_phys(hwaddr addr)
3784 uint8_t val;
3785 cpu_physical_memory_read(addr, &val, 1);
3786 return val;
3789 /* warning: addr must be aligned */
3790 static inline uint32_t lduw_phys_internal(hwaddr addr,
3791 enum device_endian endian)
3793 uint8_t *ptr;
3794 uint64_t val;
3795 MemoryRegionSection *section;
3797 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3799 if (!(memory_region_is_ram(section->mr) ||
3800 memory_region_is_romd(section->mr))) {
3801 /* I/O case */
3802 addr = memory_region_section_addr(section, addr);
3803 val = io_mem_read(section->mr, addr, 2);
3804 #if defined(TARGET_WORDS_BIGENDIAN)
3805 if (endian == DEVICE_LITTLE_ENDIAN) {
3806 val = bswap16(val);
3808 #else
3809 if (endian == DEVICE_BIG_ENDIAN) {
3810 val = bswap16(val);
3812 #endif
3813 } else {
3814 /* RAM case */
3815 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3816 & TARGET_PAGE_MASK)
3817 + memory_region_section_addr(section, addr));
3818 switch (endian) {
3819 case DEVICE_LITTLE_ENDIAN:
3820 val = lduw_le_p(ptr);
3821 break;
3822 case DEVICE_BIG_ENDIAN:
3823 val = lduw_be_p(ptr);
3824 break;
3825 default:
3826 val = lduw_p(ptr);
3827 break;
3830 return val;
3833 uint32_t lduw_phys(hwaddr addr)
3835 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3838 uint32_t lduw_le_phys(hwaddr addr)
3840 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3843 uint32_t lduw_be_phys(hwaddr addr)
3845 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
3848 /* warning: addr must be aligned. The ram page is not masked as dirty
3849 and the code inside is not invalidated. It is useful if the dirty
3850 bits are used to track modified PTEs */
3851 void stl_phys_notdirty(hwaddr addr, uint32_t val)
3853 uint8_t *ptr;
3854 MemoryRegionSection *section;
3856 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3858 if (!memory_region_is_ram(section->mr) || section->readonly) {
3859 addr = memory_region_section_addr(section, addr);
3860 if (memory_region_is_ram(section->mr)) {
3861 section = &phys_sections[phys_section_rom];
3863 io_mem_write(section->mr, addr, val, 4);
3864 } else {
3865 unsigned long addr1 = (memory_region_get_ram_addr(section->mr)
3866 & TARGET_PAGE_MASK)
3867 + memory_region_section_addr(section, addr);
3868 ptr = qemu_get_ram_ptr(addr1);
3869 stl_p(ptr, val);
3871 if (unlikely(in_migration)) {
3872 if (!cpu_physical_memory_is_dirty(addr1)) {
3873 /* invalidate code */
3874 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
3875 /* set dirty bit */
3876 cpu_physical_memory_set_dirty_flags(
3877 addr1, (0xff & ~CODE_DIRTY_FLAG));
3883 void stq_phys_notdirty(hwaddr addr, uint64_t val)
3885 uint8_t *ptr;
3886 MemoryRegionSection *section;
3888 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3890 if (!memory_region_is_ram(section->mr) || section->readonly) {
3891 addr = memory_region_section_addr(section, addr);
3892 if (memory_region_is_ram(section->mr)) {
3893 section = &phys_sections[phys_section_rom];
3895 #ifdef TARGET_WORDS_BIGENDIAN
3896 io_mem_write(section->mr, addr, val >> 32, 4);
3897 io_mem_write(section->mr, addr + 4, (uint32_t)val, 4);
3898 #else
3899 io_mem_write(section->mr, addr, (uint32_t)val, 4);
3900 io_mem_write(section->mr, addr + 4, val >> 32, 4);
3901 #endif
3902 } else {
3903 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3904 & TARGET_PAGE_MASK)
3905 + memory_region_section_addr(section, addr));
3906 stq_p(ptr, val);
3910 /* warning: addr must be aligned */
3911 static inline void stl_phys_internal(hwaddr addr, uint32_t val,
3912 enum device_endian endian)
3914 uint8_t *ptr;
3915 MemoryRegionSection *section;
3917 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3919 if (!memory_region_is_ram(section->mr) || section->readonly) {
3920 addr = memory_region_section_addr(section, addr);
3921 if (memory_region_is_ram(section->mr)) {
3922 section = &phys_sections[phys_section_rom];
3924 #if defined(TARGET_WORDS_BIGENDIAN)
3925 if (endian == DEVICE_LITTLE_ENDIAN) {
3926 val = bswap32(val);
3928 #else
3929 if (endian == DEVICE_BIG_ENDIAN) {
3930 val = bswap32(val);
3932 #endif
3933 io_mem_write(section->mr, addr, val, 4);
3934 } else {
3935 unsigned long addr1;
3936 addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
3937 + memory_region_section_addr(section, addr);
3938 /* RAM case */
3939 ptr = qemu_get_ram_ptr(addr1);
3940 switch (endian) {
3941 case DEVICE_LITTLE_ENDIAN:
3942 stl_le_p(ptr, val);
3943 break;
3944 case DEVICE_BIG_ENDIAN:
3945 stl_be_p(ptr, val);
3946 break;
3947 default:
3948 stl_p(ptr, val);
3949 break;
3951 invalidate_and_set_dirty(addr1, 4);
3955 void stl_phys(hwaddr addr, uint32_t val)
3957 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
3960 void stl_le_phys(hwaddr addr, uint32_t val)
3962 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
3965 void stl_be_phys(hwaddr addr, uint32_t val)
3967 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
3970 /* XXX: optimize */
3971 void stb_phys(hwaddr addr, uint32_t val)
3973 uint8_t v = val;
3974 cpu_physical_memory_write(addr, &v, 1);
3977 /* warning: addr must be aligned */
3978 static inline void stw_phys_internal(hwaddr addr, uint32_t val,
3979 enum device_endian endian)
3981 uint8_t *ptr;
3982 MemoryRegionSection *section;
3984 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3986 if (!memory_region_is_ram(section->mr) || section->readonly) {
3987 addr = memory_region_section_addr(section, addr);
3988 if (memory_region_is_ram(section->mr)) {
3989 section = &phys_sections[phys_section_rom];
3991 #if defined(TARGET_WORDS_BIGENDIAN)
3992 if (endian == DEVICE_LITTLE_ENDIAN) {
3993 val = bswap16(val);
3995 #else
3996 if (endian == DEVICE_BIG_ENDIAN) {
3997 val = bswap16(val);
3999 #endif
4000 io_mem_write(section->mr, addr, val, 2);
4001 } else {
4002 unsigned long addr1;
4003 addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
4004 + memory_region_section_addr(section, addr);
4005 /* RAM case */
4006 ptr = qemu_get_ram_ptr(addr1);
4007 switch (endian) {
4008 case DEVICE_LITTLE_ENDIAN:
4009 stw_le_p(ptr, val);
4010 break;
4011 case DEVICE_BIG_ENDIAN:
4012 stw_be_p(ptr, val);
4013 break;
4014 default:
4015 stw_p(ptr, val);
4016 break;
4018 invalidate_and_set_dirty(addr1, 2);
4022 void stw_phys(hwaddr addr, uint32_t val)
4024 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4027 void stw_le_phys(hwaddr addr, uint32_t val)
4029 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4032 void stw_be_phys(hwaddr addr, uint32_t val)
4034 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4037 /* XXX: optimize */
4038 void stq_phys(hwaddr addr, uint64_t val)
4040 val = tswap64(val);
4041 cpu_physical_memory_write(addr, &val, 8);
4044 void stq_le_phys(hwaddr addr, uint64_t val)
4046 val = cpu_to_le64(val);
4047 cpu_physical_memory_write(addr, &val, 8);
4050 void stq_be_phys(hwaddr addr, uint64_t val)
4052 val = cpu_to_be64(val);
4053 cpu_physical_memory_write(addr, &val, 8);
4056 /* virtual memory access for debug (includes writing to ROM) */
4057 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
4058 uint8_t *buf, int len, int is_write)
4060 int l;
4061 hwaddr phys_addr;
4062 target_ulong page;
4064 while (len > 0) {
4065 page = addr & TARGET_PAGE_MASK;
4066 phys_addr = cpu_get_phys_page_debug(env, page);
4067 /* if no physical page mapped, return an error */
4068 if (phys_addr == -1)
4069 return -1;
4070 l = (page + TARGET_PAGE_SIZE) - addr;
4071 if (l > len)
4072 l = len;
4073 phys_addr += (addr & ~TARGET_PAGE_MASK);
4074 if (is_write)
4075 cpu_physical_memory_write_rom(phys_addr, buf, l);
4076 else
4077 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
4078 len -= l;
4079 buf += l;
4080 addr += l;
4082 return 0;
4084 #endif
4086 /* in deterministic execution mode, instructions doing device I/Os
4087 must be at the end of the TB */
4088 void cpu_io_recompile(CPUArchState *env, uintptr_t retaddr)
4090 TranslationBlock *tb;
4091 uint32_t n, cflags;
4092 target_ulong pc, cs_base;
4093 uint64_t flags;
4095 tb = tb_find_pc(retaddr);
4096 if (!tb) {
4097 cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
4098 (void *)retaddr);
4100 n = env->icount_decr.u16.low + tb->icount;
4101 cpu_restore_state(tb, env, retaddr);
4102 /* Calculate how many instructions had been executed before the fault
4103 occurred. */
4104 n = n - env->icount_decr.u16.low;
4105 /* Generate a new TB ending on the I/O insn. */
4106 n++;
4107 /* On MIPS and SH, delay slot instructions can only be restarted if
4108 they were already the first instruction in the TB. If this is not
4109 the first instruction in a TB then re-execute the preceding
4110 branch. */
4111 #if defined(TARGET_MIPS)
4112 if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
4113 env->active_tc.PC -= 4;
4114 env->icount_decr.u16.low++;
4115 env->hflags &= ~MIPS_HFLAG_BMASK;
4117 #elif defined(TARGET_SH4)
4118 if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
4119 && n > 1) {
4120 env->pc -= 2;
4121 env->icount_decr.u16.low++;
4122 env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
4124 #endif
4125 /* This should never happen. */
4126 if (n > CF_COUNT_MASK)
4127 cpu_abort(env, "TB too big during recompile");
4129 cflags = n | CF_LAST_IO;
4130 pc = tb->pc;
4131 cs_base = tb->cs_base;
4132 flags = tb->flags;
4133 tb_phys_invalidate(tb, -1);
4134 /* FIXME: In theory this could raise an exception. In practice
4135 we have already translated the block once so it's probably ok. */
4136 tb_gen_code(env, pc, cs_base, flags, cflags);
4137 /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4138 the first in the TB) then we end up generating a whole new TB and
4139 repeating the fault, which is horribly inefficient.
4140 Better would be to execute just this insn uncached, or generate a
4141 second new TB. */
4142 cpu_resume_from_signal(env, NULL);
4145 #if !defined(CONFIG_USER_ONLY)
4147 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
4149 int i, target_code_size, max_target_code_size;
4150 int direct_jmp_count, direct_jmp2_count, cross_page;
4151 TranslationBlock *tb;
4153 target_code_size = 0;
4154 max_target_code_size = 0;
4155 cross_page = 0;
4156 direct_jmp_count = 0;
4157 direct_jmp2_count = 0;
4158 for(i = 0; i < nb_tbs; i++) {
4159 tb = &tbs[i];
4160 target_code_size += tb->size;
4161 if (tb->size > max_target_code_size)
4162 max_target_code_size = tb->size;
4163 if (tb->page_addr[1] != -1)
4164 cross_page++;
4165 if (tb->tb_next_offset[0] != 0xffff) {
4166 direct_jmp_count++;
4167 if (tb->tb_next_offset[1] != 0xffff) {
4168 direct_jmp2_count++;
4172 /* XXX: avoid using doubles ? */
4173 cpu_fprintf(f, "Translation buffer state:\n");
4174 cpu_fprintf(f, "gen code size %td/%zd\n",
4175 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4176 cpu_fprintf(f, "TB count %d/%d\n",
4177 nb_tbs, code_gen_max_blocks);
4178 cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
4179 nb_tbs ? target_code_size / nb_tbs : 0,
4180 max_target_code_size);
4181 cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n",
4182 nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4183 target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4184 cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4185 cross_page,
4186 nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4187 cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n",
4188 direct_jmp_count,
4189 nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4190 direct_jmp2_count,
4191 nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4192 cpu_fprintf(f, "\nStatistics:\n");
4193 cpu_fprintf(f, "TB flush count %d\n", tb_flush_count);
4194 cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4195 cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
4196 tcg_dump_info(f, cpu_fprintf);
4200 * A helper function for the _utterly broken_ virtio device model to find out if
4201 * it's running on a big endian machine. Don't do this at home kids!
4203 bool virtio_is_big_endian(void);
4204 bool virtio_is_big_endian(void)
4206 #if defined(TARGET_WORDS_BIGENDIAN)
4207 return true;
4208 #else
4209 return false;
4210 #endif
4213 #endif
4215 #ifndef CONFIG_USER_ONLY
4216 bool cpu_physical_memory_is_io(hwaddr phys_addr)
4218 MemoryRegionSection *section;
4220 section = phys_page_find(address_space_memory.dispatch,
4221 phys_addr >> TARGET_PAGE_BITS);
4223 return !(memory_region_is_ram(section->mr) ||
4224 memory_region_is_romd(section->mr));
4226 #endif