Merge tag 'qemu-macppc-20230206' of https://github.com/mcayland/qemu into staging
[qemu.git] / linux-user / mmap.c
blob28135c9e6aa94873efebf2c518c0d7ff83bc1470
1 /*
2 * mmap support for qemu
4 * Copyright (c) 2003 Fabrice Bellard
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, see <http://www.gnu.org/licenses/>.
19 #include "qemu/osdep.h"
20 #include "trace.h"
21 #include "exec/log.h"
22 #include "qemu.h"
23 #include "user-internals.h"
24 #include "user-mmap.h"
25 #include "target_mman.h"
27 static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
28 static __thread int mmap_lock_count;
30 void mmap_lock(void)
32 if (mmap_lock_count++ == 0) {
33 pthread_mutex_lock(&mmap_mutex);
37 void mmap_unlock(void)
39 if (--mmap_lock_count == 0) {
40 pthread_mutex_unlock(&mmap_mutex);
44 bool have_mmap_lock(void)
46 return mmap_lock_count > 0 ? true : false;
49 /* Grab lock to make sure things are in a consistent state after fork(). */
50 void mmap_fork_start(void)
52 if (mmap_lock_count)
53 abort();
54 pthread_mutex_lock(&mmap_mutex);
57 void mmap_fork_end(int child)
59 if (child)
60 pthread_mutex_init(&mmap_mutex, NULL);
61 else
62 pthread_mutex_unlock(&mmap_mutex);
66 * Validate target prot bitmask.
67 * Return the prot bitmask for the host in *HOST_PROT.
68 * Return 0 if the target prot bitmask is invalid, otherwise
69 * the internal qemu page_flags (which will include PAGE_VALID).
71 static int validate_prot_to_pageflags(int *host_prot, int prot)
73 int valid = PROT_READ | PROT_WRITE | PROT_EXEC | TARGET_PROT_SEM;
74 int page_flags = (prot & PAGE_BITS) | PAGE_VALID;
77 * For the host, we need not pass anything except read/write/exec.
78 * While PROT_SEM is allowed by all hosts, it is also ignored, so
79 * don't bother transforming guest bit to host bit. Any other
80 * target-specific prot bits will not be understood by the host
81 * and will need to be encoded into page_flags for qemu emulation.
83 * Pages that are executable by the guest will never be executed
84 * by the host, but the host will need to be able to read them.
86 *host_prot = (prot & (PROT_READ | PROT_WRITE))
87 | (prot & PROT_EXEC ? PROT_READ : 0);
89 #ifdef TARGET_AARCH64
91 ARMCPU *cpu = ARM_CPU(thread_cpu);
94 * The PROT_BTI bit is only accepted if the cpu supports the feature.
95 * Since this is the unusual case, don't bother checking unless
96 * the bit has been requested. If set and valid, record the bit
97 * within QEMU's page_flags.
99 if ((prot & TARGET_PROT_BTI) && cpu_isar_feature(aa64_bti, cpu)) {
100 valid |= TARGET_PROT_BTI;
101 page_flags |= PAGE_BTI;
103 /* Similarly for the PROT_MTE bit. */
104 if ((prot & TARGET_PROT_MTE) && cpu_isar_feature(aa64_mte, cpu)) {
105 valid |= TARGET_PROT_MTE;
106 page_flags |= PAGE_MTE;
109 #elif defined(TARGET_HPPA)
110 valid |= PROT_GROWSDOWN | PROT_GROWSUP;
111 #endif
113 return prot & ~valid ? 0 : page_flags;
116 /* NOTE: all the constants are the HOST ones, but addresses are target. */
117 int target_mprotect(abi_ulong start, abi_ulong len, int target_prot)
119 abi_ulong end, host_start, host_end, addr;
120 int prot1, ret, page_flags, host_prot;
122 trace_target_mprotect(start, len, target_prot);
124 if ((start & ~TARGET_PAGE_MASK) != 0) {
125 return -TARGET_EINVAL;
127 page_flags = validate_prot_to_pageflags(&host_prot, target_prot);
128 if (!page_flags) {
129 return -TARGET_EINVAL;
131 len = TARGET_PAGE_ALIGN(len);
132 end = start + len;
133 if (!guest_range_valid_untagged(start, len)) {
134 return -TARGET_ENOMEM;
136 if (len == 0) {
137 return 0;
140 mmap_lock();
141 host_start = start & qemu_host_page_mask;
142 host_end = HOST_PAGE_ALIGN(end);
143 if (start > host_start) {
144 /* handle host page containing start */
145 prot1 = host_prot;
146 for (addr = host_start; addr < start; addr += TARGET_PAGE_SIZE) {
147 prot1 |= page_get_flags(addr);
149 if (host_end == host_start + qemu_host_page_size) {
150 for (addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
151 prot1 |= page_get_flags(addr);
153 end = host_end;
155 ret = mprotect(g2h_untagged(host_start), qemu_host_page_size,
156 prot1 & PAGE_BITS);
157 if (ret != 0) {
158 goto error;
160 host_start += qemu_host_page_size;
162 if (end < host_end) {
163 prot1 = host_prot;
164 for (addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
165 prot1 |= page_get_flags(addr);
167 ret = mprotect(g2h_untagged(host_end - qemu_host_page_size),
168 qemu_host_page_size, prot1 & PAGE_BITS);
169 if (ret != 0) {
170 goto error;
172 host_end -= qemu_host_page_size;
175 /* handle the pages in the middle */
176 if (host_start < host_end) {
177 ret = mprotect(g2h_untagged(host_start),
178 host_end - host_start, host_prot);
179 if (ret != 0) {
180 goto error;
184 page_set_flags(start, start + len, page_flags);
185 ret = 0;
187 error:
188 mmap_unlock();
189 return ret;
192 /* map an incomplete host page */
193 static int mmap_frag(abi_ulong real_start,
194 abi_ulong start, abi_ulong end,
195 int prot, int flags, int fd, abi_ulong offset)
197 abi_ulong real_end, addr;
198 void *host_start;
199 int prot1, prot_new;
201 real_end = real_start + qemu_host_page_size;
202 host_start = g2h_untagged(real_start);
204 /* get the protection of the target pages outside the mapping */
205 prot1 = 0;
206 for(addr = real_start; addr < real_end; addr++) {
207 if (addr < start || addr >= end)
208 prot1 |= page_get_flags(addr);
211 if (prot1 == 0) {
212 /* no page was there, so we allocate one */
213 void *p = mmap(host_start, qemu_host_page_size, prot,
214 flags | MAP_ANONYMOUS, -1, 0);
215 if (p == MAP_FAILED)
216 return -1;
217 prot1 = prot;
219 prot1 &= PAGE_BITS;
221 prot_new = prot | prot1;
222 if (!(flags & MAP_ANONYMOUS)) {
223 /* msync() won't work here, so we return an error if write is
224 possible while it is a shared mapping */
225 if ((flags & MAP_TYPE) == MAP_SHARED &&
226 (prot & PROT_WRITE))
227 return -1;
229 /* adjust protection to be able to read */
230 if (!(prot1 & PROT_WRITE))
231 mprotect(host_start, qemu_host_page_size, prot1 | PROT_WRITE);
233 /* read the corresponding file data */
234 if (pread(fd, g2h_untagged(start), end - start, offset) == -1)
235 return -1;
237 /* put final protection */
238 if (prot_new != (prot1 | PROT_WRITE))
239 mprotect(host_start, qemu_host_page_size, prot_new);
240 } else {
241 if (prot_new != prot1) {
242 mprotect(host_start, qemu_host_page_size, prot_new);
244 if (prot_new & PROT_WRITE) {
245 memset(g2h_untagged(start), 0, end - start);
248 return 0;
251 #if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64
252 #ifdef TARGET_AARCH64
253 # define TASK_UNMAPPED_BASE 0x5500000000
254 #else
255 # define TASK_UNMAPPED_BASE (1ul << 38)
256 #endif
257 #else
258 #ifdef TARGET_HPPA
259 # define TASK_UNMAPPED_BASE 0xfa000000
260 #else
261 # define TASK_UNMAPPED_BASE 0x40000000
262 #endif
263 #endif
264 abi_ulong mmap_next_start = TASK_UNMAPPED_BASE;
266 unsigned long last_brk;
268 /* Subroutine of mmap_find_vma, used when we have pre-allocated a chunk
269 of guest address space. */
270 static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size,
271 abi_ulong align)
273 abi_ulong addr, end_addr, incr = qemu_host_page_size;
274 int prot;
275 bool looped = false;
277 if (size > reserved_va) {
278 return (abi_ulong)-1;
281 /* Note that start and size have already been aligned by mmap_find_vma. */
283 end_addr = start + size;
284 if (start > reserved_va - size) {
285 /* Start at the top of the address space. */
286 end_addr = ((reserved_va - size) & -align) + size;
287 looped = true;
290 /* Search downward from END_ADDR, checking to see if a page is in use. */
291 addr = end_addr;
292 while (1) {
293 addr -= incr;
294 if (addr > end_addr) {
295 if (looped) {
296 /* Failure. The entire address space has been searched. */
297 return (abi_ulong)-1;
299 /* Re-start at the top of the address space. */
300 addr = end_addr = ((reserved_va - size) & -align) + size;
301 looped = true;
302 } else {
303 prot = page_get_flags(addr);
304 if (prot) {
305 /* Page in use. Restart below this page. */
306 addr = end_addr = ((addr - size) & -align) + size;
307 } else if (addr && addr + size == end_addr) {
308 /* Success! All pages between ADDR and END_ADDR are free. */
309 if (start == mmap_next_start) {
310 mmap_next_start = addr;
312 return addr;
319 * Find and reserve a free memory area of size 'size'. The search
320 * starts at 'start'.
321 * It must be called with mmap_lock() held.
322 * Return -1 if error.
324 abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align)
326 void *ptr, *prev;
327 abi_ulong addr;
328 int wrapped, repeat;
330 align = MAX(align, qemu_host_page_size);
332 /* If 'start' == 0, then a default start address is used. */
333 if (start == 0) {
334 start = mmap_next_start;
335 } else {
336 start &= qemu_host_page_mask;
338 start = ROUND_UP(start, align);
340 size = HOST_PAGE_ALIGN(size);
342 if (reserved_va) {
343 return mmap_find_vma_reserved(start, size, align);
346 addr = start;
347 wrapped = repeat = 0;
348 prev = 0;
350 for (;; prev = ptr) {
352 * Reserve needed memory area to avoid a race.
353 * It should be discarded using:
354 * - mmap() with MAP_FIXED flag
355 * - mremap() with MREMAP_FIXED flag
356 * - shmat() with SHM_REMAP flag
358 ptr = mmap(g2h_untagged(addr), size, PROT_NONE,
359 MAP_ANONYMOUS|MAP_PRIVATE|MAP_NORESERVE, -1, 0);
361 /* ENOMEM, if host address space has no memory */
362 if (ptr == MAP_FAILED) {
363 return (abi_ulong)-1;
366 /* Count the number of sequential returns of the same address.
367 This is used to modify the search algorithm below. */
368 repeat = (ptr == prev ? repeat + 1 : 0);
370 if (h2g_valid(ptr + size - 1)) {
371 addr = h2g(ptr);
373 if ((addr & (align - 1)) == 0) {
374 /* Success. */
375 if (start == mmap_next_start && addr >= TASK_UNMAPPED_BASE) {
376 mmap_next_start = addr + size;
378 return addr;
381 /* The address is not properly aligned for the target. */
382 switch (repeat) {
383 case 0:
384 /* Assume the result that the kernel gave us is the
385 first with enough free space, so start again at the
386 next higher target page. */
387 addr = ROUND_UP(addr, align);
388 break;
389 case 1:
390 /* Sometimes the kernel decides to perform the allocation
391 at the top end of memory instead. */
392 addr &= -align;
393 break;
394 case 2:
395 /* Start over at low memory. */
396 addr = 0;
397 break;
398 default:
399 /* Fail. This unaligned block must the last. */
400 addr = -1;
401 break;
403 } else {
404 /* Since the result the kernel gave didn't fit, start
405 again at low memory. If any repetition, fail. */
406 addr = (repeat ? -1 : 0);
409 /* Unmap and try again. */
410 munmap(ptr, size);
412 /* ENOMEM if we checked the whole of the target address space. */
413 if (addr == (abi_ulong)-1) {
414 return (abi_ulong)-1;
415 } else if (addr == 0) {
416 if (wrapped) {
417 return (abi_ulong)-1;
419 wrapped = 1;
420 /* Don't actually use 0 when wrapping, instead indicate
421 that we'd truly like an allocation in low memory. */
422 addr = (mmap_min_addr > TARGET_PAGE_SIZE
423 ? TARGET_PAGE_ALIGN(mmap_min_addr)
424 : TARGET_PAGE_SIZE);
425 } else if (wrapped && addr >= start) {
426 return (abi_ulong)-1;
431 /* NOTE: all the constants are the HOST ones */
432 abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot,
433 int flags, int fd, abi_ulong offset)
435 abi_ulong ret, end, real_start, real_end, retaddr, host_offset, host_len,
436 passthrough_start = -1, passthrough_end = -1;
437 int page_flags, host_prot;
439 mmap_lock();
440 trace_target_mmap(start, len, target_prot, flags, fd, offset);
442 if (!len) {
443 errno = EINVAL;
444 goto fail;
447 page_flags = validate_prot_to_pageflags(&host_prot, target_prot);
448 if (!page_flags) {
449 errno = EINVAL;
450 goto fail;
453 /* Also check for overflows... */
454 len = TARGET_PAGE_ALIGN(len);
455 if (!len) {
456 errno = ENOMEM;
457 goto fail;
460 if (offset & ~TARGET_PAGE_MASK) {
461 errno = EINVAL;
462 goto fail;
466 * If we're mapping shared memory, ensure we generate code for parallel
467 * execution and flush old translations. This will work up to the level
468 * supported by the host -- anything that requires EXCP_ATOMIC will not
469 * be atomic with respect to an external process.
471 if (flags & MAP_SHARED) {
472 CPUState *cpu = thread_cpu;
473 if (!(cpu->tcg_cflags & CF_PARALLEL)) {
474 cpu->tcg_cflags |= CF_PARALLEL;
475 tb_flush(cpu);
479 real_start = start & qemu_host_page_mask;
480 host_offset = offset & qemu_host_page_mask;
482 /* If the user is asking for the kernel to find a location, do that
483 before we truncate the length for mapping files below. */
484 if (!(flags & MAP_FIXED)) {
485 host_len = len + offset - host_offset;
486 host_len = HOST_PAGE_ALIGN(host_len);
487 start = mmap_find_vma(real_start, host_len, TARGET_PAGE_SIZE);
488 if (start == (abi_ulong)-1) {
489 errno = ENOMEM;
490 goto fail;
494 /* When mapping files into a memory area larger than the file, accesses
495 to pages beyond the file size will cause a SIGBUS.
497 For example, if mmaping a file of 100 bytes on a host with 4K pages
498 emulating a target with 8K pages, the target expects to be able to
499 access the first 8K. But the host will trap us on any access beyond
500 4K.
502 When emulating a target with a larger page-size than the hosts, we
503 may need to truncate file maps at EOF and add extra anonymous pages
504 up to the targets page boundary. */
506 if ((qemu_real_host_page_size() < qemu_host_page_size) &&
507 !(flags & MAP_ANONYMOUS)) {
508 struct stat sb;
510 if (fstat (fd, &sb) == -1)
511 goto fail;
513 /* Are we trying to create a map beyond EOF?. */
514 if (offset + len > sb.st_size) {
515 /* If so, truncate the file map at eof aligned with
516 the hosts real pagesize. Additional anonymous maps
517 will be created beyond EOF. */
518 len = REAL_HOST_PAGE_ALIGN(sb.st_size - offset);
522 if (!(flags & MAP_FIXED)) {
523 unsigned long host_start;
524 void *p;
526 host_len = len + offset - host_offset;
527 host_len = HOST_PAGE_ALIGN(host_len);
529 /* Note: we prefer to control the mapping address. It is
530 especially important if qemu_host_page_size >
531 qemu_real_host_page_size */
532 p = mmap(g2h_untagged(start), host_len, host_prot,
533 flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0);
534 if (p == MAP_FAILED) {
535 goto fail;
537 /* update start so that it points to the file position at 'offset' */
538 host_start = (unsigned long)p;
539 if (!(flags & MAP_ANONYMOUS)) {
540 p = mmap(g2h_untagged(start), len, host_prot,
541 flags | MAP_FIXED, fd, host_offset);
542 if (p == MAP_FAILED) {
543 munmap(g2h_untagged(start), host_len);
544 goto fail;
546 host_start += offset - host_offset;
548 start = h2g(host_start);
549 passthrough_start = start;
550 passthrough_end = start + len;
551 } else {
552 if (start & ~TARGET_PAGE_MASK) {
553 errno = EINVAL;
554 goto fail;
556 end = start + len;
557 real_end = HOST_PAGE_ALIGN(end);
560 * Test if requested memory area fits target address space
561 * It can fail only on 64-bit host with 32-bit target.
562 * On any other target/host host mmap() handles this error correctly.
564 if (end < start || !guest_range_valid_untagged(start, len)) {
565 errno = ENOMEM;
566 goto fail;
569 /* worst case: we cannot map the file because the offset is not
570 aligned, so we read it */
571 if (!(flags & MAP_ANONYMOUS) &&
572 (offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) {
573 /* msync() won't work here, so we return an error if write is
574 possible while it is a shared mapping */
575 if ((flags & MAP_TYPE) == MAP_SHARED &&
576 (host_prot & PROT_WRITE)) {
577 errno = EINVAL;
578 goto fail;
580 retaddr = target_mmap(start, len, target_prot | PROT_WRITE,
581 MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
582 -1, 0);
583 if (retaddr == -1)
584 goto fail;
585 if (pread(fd, g2h_untagged(start), len, offset) == -1)
586 goto fail;
587 if (!(host_prot & PROT_WRITE)) {
588 ret = target_mprotect(start, len, target_prot);
589 assert(ret == 0);
591 goto the_end;
594 /* handle the start of the mapping */
595 if (start > real_start) {
596 if (real_end == real_start + qemu_host_page_size) {
597 /* one single host page */
598 ret = mmap_frag(real_start, start, end,
599 host_prot, flags, fd, offset);
600 if (ret == -1)
601 goto fail;
602 goto the_end1;
604 ret = mmap_frag(real_start, start, real_start + qemu_host_page_size,
605 host_prot, flags, fd, offset);
606 if (ret == -1)
607 goto fail;
608 real_start += qemu_host_page_size;
610 /* handle the end of the mapping */
611 if (end < real_end) {
612 ret = mmap_frag(real_end - qemu_host_page_size,
613 real_end - qemu_host_page_size, end,
614 host_prot, flags, fd,
615 offset + real_end - qemu_host_page_size - start);
616 if (ret == -1)
617 goto fail;
618 real_end -= qemu_host_page_size;
621 /* map the middle (easier) */
622 if (real_start < real_end) {
623 void *p;
624 unsigned long offset1;
625 if (flags & MAP_ANONYMOUS)
626 offset1 = 0;
627 else
628 offset1 = offset + real_start - start;
629 p = mmap(g2h_untagged(real_start), real_end - real_start,
630 host_prot, flags, fd, offset1);
631 if (p == MAP_FAILED)
632 goto fail;
633 passthrough_start = real_start;
634 passthrough_end = real_end;
637 the_end1:
638 if (flags & MAP_ANONYMOUS) {
639 page_flags |= PAGE_ANON;
641 page_flags |= PAGE_RESET;
642 if (passthrough_start == passthrough_end) {
643 page_set_flags(start, start + len, page_flags);
644 } else {
645 if (start < passthrough_start) {
646 page_set_flags(start, passthrough_start, page_flags);
648 page_set_flags(passthrough_start, passthrough_end,
649 page_flags | PAGE_PASSTHROUGH);
650 if (passthrough_end < start + len) {
651 page_set_flags(passthrough_end, start + len, page_flags);
654 the_end:
655 trace_target_mmap_complete(start);
656 if (qemu_loglevel_mask(CPU_LOG_PAGE)) {
657 FILE *f = qemu_log_trylock();
658 if (f) {
659 fprintf(f, "page layout changed following mmap\n");
660 page_dump(f);
661 qemu_log_unlock(f);
664 mmap_unlock();
665 return start;
666 fail:
667 mmap_unlock();
668 return -1;
671 static void mmap_reserve(abi_ulong start, abi_ulong size)
673 abi_ulong real_start;
674 abi_ulong real_end;
675 abi_ulong addr;
676 abi_ulong end;
677 int prot;
679 real_start = start & qemu_host_page_mask;
680 real_end = HOST_PAGE_ALIGN(start + size);
681 end = start + size;
682 if (start > real_start) {
683 /* handle host page containing start */
684 prot = 0;
685 for (addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
686 prot |= page_get_flags(addr);
688 if (real_end == real_start + qemu_host_page_size) {
689 for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
690 prot |= page_get_flags(addr);
692 end = real_end;
694 if (prot != 0)
695 real_start += qemu_host_page_size;
697 if (end < real_end) {
698 prot = 0;
699 for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
700 prot |= page_get_flags(addr);
702 if (prot != 0)
703 real_end -= qemu_host_page_size;
705 if (real_start != real_end) {
706 mmap(g2h_untagged(real_start), real_end - real_start, PROT_NONE,
707 MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE,
708 -1, 0);
712 int target_munmap(abi_ulong start, abi_ulong len)
714 abi_ulong end, real_start, real_end, addr;
715 int prot, ret;
717 trace_target_munmap(start, len);
719 if (start & ~TARGET_PAGE_MASK)
720 return -TARGET_EINVAL;
721 len = TARGET_PAGE_ALIGN(len);
722 if (len == 0 || !guest_range_valid_untagged(start, len)) {
723 return -TARGET_EINVAL;
726 mmap_lock();
727 end = start + len;
728 real_start = start & qemu_host_page_mask;
729 real_end = HOST_PAGE_ALIGN(end);
731 if (start > real_start) {
732 /* handle host page containing start */
733 prot = 0;
734 for(addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
735 prot |= page_get_flags(addr);
737 if (real_end == real_start + qemu_host_page_size) {
738 for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
739 prot |= page_get_flags(addr);
741 end = real_end;
743 if (prot != 0)
744 real_start += qemu_host_page_size;
746 if (end < real_end) {
747 prot = 0;
748 for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
749 prot |= page_get_flags(addr);
751 if (prot != 0)
752 real_end -= qemu_host_page_size;
755 ret = 0;
756 /* unmap what we can */
757 if (real_start < real_end) {
758 if (reserved_va) {
759 mmap_reserve(real_start, real_end - real_start);
760 } else {
761 ret = munmap(g2h_untagged(real_start), real_end - real_start);
765 if (ret == 0) {
766 page_set_flags(start, start + len, 0);
768 mmap_unlock();
769 return ret;
772 abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
773 abi_ulong new_size, unsigned long flags,
774 abi_ulong new_addr)
776 int prot;
777 void *host_addr;
779 if (!guest_range_valid_untagged(old_addr, old_size) ||
780 ((flags & MREMAP_FIXED) &&
781 !guest_range_valid_untagged(new_addr, new_size)) ||
782 ((flags & MREMAP_MAYMOVE) == 0 &&
783 !guest_range_valid_untagged(old_addr, new_size))) {
784 errno = ENOMEM;
785 return -1;
788 mmap_lock();
790 if (flags & MREMAP_FIXED) {
791 host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
792 flags, g2h_untagged(new_addr));
794 if (reserved_va && host_addr != MAP_FAILED) {
795 /* If new and old addresses overlap then the above mremap will
796 already have failed with EINVAL. */
797 mmap_reserve(old_addr, old_size);
799 } else if (flags & MREMAP_MAYMOVE) {
800 abi_ulong mmap_start;
802 mmap_start = mmap_find_vma(0, new_size, TARGET_PAGE_SIZE);
804 if (mmap_start == -1) {
805 errno = ENOMEM;
806 host_addr = MAP_FAILED;
807 } else {
808 host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
809 flags | MREMAP_FIXED,
810 g2h_untagged(mmap_start));
811 if (reserved_va) {
812 mmap_reserve(old_addr, old_size);
815 } else {
816 int prot = 0;
817 if (reserved_va && old_size < new_size) {
818 abi_ulong addr;
819 for (addr = old_addr + old_size;
820 addr < old_addr + new_size;
821 addr++) {
822 prot |= page_get_flags(addr);
825 if (prot == 0) {
826 host_addr = mremap(g2h_untagged(old_addr),
827 old_size, new_size, flags);
829 if (host_addr != MAP_FAILED) {
830 /* Check if address fits target address space */
831 if (!guest_range_valid_untagged(h2g(host_addr), new_size)) {
832 /* Revert mremap() changes */
833 host_addr = mremap(g2h_untagged(old_addr),
834 new_size, old_size, flags);
835 errno = ENOMEM;
836 host_addr = MAP_FAILED;
837 } else if (reserved_va && old_size > new_size) {
838 mmap_reserve(old_addr + old_size, old_size - new_size);
841 } else {
842 errno = ENOMEM;
843 host_addr = MAP_FAILED;
847 if (host_addr == MAP_FAILED) {
848 new_addr = -1;
849 } else {
850 new_addr = h2g(host_addr);
851 prot = page_get_flags(old_addr);
852 page_set_flags(old_addr, old_addr + old_size, 0);
853 page_set_flags(new_addr, new_addr + new_size,
854 prot | PAGE_VALID | PAGE_RESET);
856 mmap_unlock();
857 return new_addr;
860 static bool can_passthrough_madvise(abi_ulong start, abi_ulong end)
862 ulong addr;
864 if ((start | end) & ~qemu_host_page_mask) {
865 return false;
868 for (addr = start; addr < end; addr += TARGET_PAGE_SIZE) {
869 if (!(page_get_flags(addr) & PAGE_PASSTHROUGH)) {
870 return false;
874 return true;
877 abi_long target_madvise(abi_ulong start, abi_ulong len_in, int advice)
879 abi_ulong len, end;
880 int ret = 0;
882 if (start & ~TARGET_PAGE_MASK) {
883 return -TARGET_EINVAL;
885 len = TARGET_PAGE_ALIGN(len_in);
887 if (len_in && !len) {
888 return -TARGET_EINVAL;
891 end = start + len;
892 if (end < start) {
893 return -TARGET_EINVAL;
896 if (end == start) {
897 return 0;
900 if (!guest_range_valid_untagged(start, len)) {
901 return -TARGET_EINVAL;
904 /* Translate for some architectures which have different MADV_xxx values */
905 switch (advice) {
906 case TARGET_MADV_DONTNEED: /* alpha */
907 advice = MADV_DONTNEED;
908 break;
909 case TARGET_MADV_WIPEONFORK: /* parisc */
910 advice = MADV_WIPEONFORK;
911 break;
912 case TARGET_MADV_KEEPONFORK: /* parisc */
913 advice = MADV_KEEPONFORK;
914 break;
915 /* we do not care about the other MADV_xxx values yet */
919 * Most advice values are hints, so ignoring and returning success is ok.
921 * However, some advice values such as MADV_DONTNEED, MADV_WIPEONFORK and
922 * MADV_KEEPONFORK are not hints and need to be emulated.
924 * A straight passthrough for those may not be safe because qemu sometimes
925 * turns private file-backed mappings into anonymous mappings.
926 * can_passthrough_madvise() helps to check if a passthrough is possible by
927 * comparing mappings that are known to have the same semantics in the host
928 * and the guest. In this case passthrough is safe.
930 * We pass through MADV_WIPEONFORK and MADV_KEEPONFORK if possible and
931 * return failure if not.
933 * MADV_DONTNEED is passed through as well, if possible.
934 * If passthrough isn't possible, we nevertheless (wrongly!) return
935 * success, which is broken but some userspace programs fail to work
936 * otherwise. Completely implementing such emulation is quite complicated
937 * though.
939 mmap_lock();
940 switch (advice) {
941 case MADV_WIPEONFORK:
942 case MADV_KEEPONFORK:
943 ret = -EINVAL;
944 /* fall through */
945 case MADV_DONTNEED:
946 if (can_passthrough_madvise(start, end)) {
947 ret = get_errno(madvise(g2h_untagged(start), len, advice));
948 if ((advice == MADV_DONTNEED) && (ret == 0)) {
949 page_reset_target_data(start, start + len);
953 mmap_unlock();
955 return ret;