@@ -52,12 +52,49 @@
#include "target_arch_cpu.h"
int singlestep;
-unsigned long mmap_min_addr;
uintptr_t guest_base;
static const char *cpu_model;
static const char *cpu_type;
bool have_guest_base;
+/*
+ * When running 32-on-64 we should make sure we can fit all of the possible
+ * guest address space into a contiguous chunk of virtual host memory.
+ *
+ * This way we will never overlap with our own libraries or binaries or stack
+ * or anything else that QEMU maps.
+ *
+ * Many cpus reserve the high bit (or more than one for some 64-bit cpus)
+ * of the address for the kernel. Some cpus rely on this and user space
+ * uses the high bit(s) for pointer tagging and the like. For them, we
+ * must preserve the expected address space.
+ */
+#ifndef MAX_RESERVED_VA
+# if HOST_LONG_BITS > TARGET_VIRT_ADDR_SPACE_BITS
+# if TARGET_VIRT_ADDR_SPACE_BITS == 32 && \
+ (TARGET_LONG_BITS == 32 || defined(TARGET_ABI32))
+/*
+ * There are a number of places where we assign reserved_va to a variable
+ * of type abi_ulong and expect it to fit. Avoid the last page.
+ */
+# define MAX_RESERVED_VA (0xfffffffful & TARGET_PAGE_MASK)
+# else
+# define MAX_RESERVED_VA (1ul << TARGET_VIRT_ADDR_SPACE_BITS)
+# endif
+# else
+# define MAX_RESERVED_VA 0
+# endif
+#endif
+
+/*
+ * That said, reserving *too* much vm space via mmap can run into problems
+ * with rlimits, oom due to page table creation, etc. We will still try it,
+ * if directed by the command-line option, but not by default.
+ */
+#if HOST_LONG_BITS == 64 && TARGET_VIRT_ADDR_SPACE_BITS <= 32
+unsigned long reserved_va = MAX_RESERVED_VA;
+#else
unsigned long reserved_va;
+#endif
static const char *interp_prefix = CONFIG_QEMU_INTERP_PREFIX;
const char *qemu_uname_release;
@@ -439,6 +476,10 @@ int main(int argc, char **argv)
target_environ = envlist_to_environ(envlist, NULL);
envlist_free(envlist);
+ if (reserved_va) {
+ mmap_next_start = reserved_va;
+ }
+
{
Error *err = NULL;
if (seed_optarg != NULL) {
@@ -188,64 +188,207 @@ static int mmap_frag(abi_ulong real_start,
return 0;
}
-static abi_ulong mmap_next_start = 0x40000000;
+#if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64
+# define TASK_UNMAPPED_BASE (1ul << 38)
+#else
+# define TASK_UNMAPPED_BASE 0x40000000
+#endif
+abi_ulong mmap_next_start = TASK_UNMAPPED_BASE;
unsigned long last_brk;
-/* find a free memory area of size 'size'. The search starts at
- 'start'. If 'start' == 0, then a default start address is used.
- Return -1 if error.
-*/
-/* page_init() marks pages used by the host as reserved to be sure not
- to use them. */
-static abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size)
+/*
+ * Subroutine of mmap_find_vma, used when we have pre-allocated a chunk of guest
+ * address space.
+ */
+static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size,
+ abi_ulong alignment)
{
- abi_ulong addr, addr1, addr_start;
+ abi_ulong addr;
+ abi_ulong end_addr;
int prot;
- unsigned long new_brk;
+ int looped = 0;
+
+ if (size > reserved_va) {
+ return (abi_ulong)-1;
+ }
+
+ size = HOST_PAGE_ALIGN(size) + alignment;
+ end_addr = start + size;
+ if (end_addr > reserved_va) {
+ end_addr = reserved_va;
+ }
+ addr = end_addr - qemu_host_page_size;
- new_brk = (unsigned long)sbrk(0);
- if (last_brk && last_brk < new_brk && last_brk == (target_ulong)last_brk) {
- /* This is a hack to catch the host allocating memory with brk().
- If it uses mmap then we loose.
- FIXME: We really want to avoid the host allocating memory in
- the first place, and maybe leave some slack to avoid switching
- to mmap. */
- page_set_flags(last_brk & TARGET_PAGE_MASK,
- TARGET_PAGE_ALIGN(new_brk),
- PAGE_RESERVED);
+ while (1) {
+ if (addr > end_addr) {
+ if (looped) {
+ return (abi_ulong)-1;
+ }
+ end_addr = reserved_va;
+ addr = end_addr - qemu_host_page_size;
+ looped = 1;
+ continue;
+ }
+ prot = page_get_flags(addr);
+ if (prot) {
+ end_addr = addr;
+ }
+ if (end_addr - addr >= size) {
+ break;
+ }
+ addr -= qemu_host_page_size;
+ }
+
+ if (start == mmap_next_start) {
+ mmap_next_start = addr;
+ }
+ /* addr is sufficiently low to align it up */
+ if (alignment != 0) {
+ addr = (addr + alignment) & ~(alignment - 1);
+ }
+ return addr;
+}
+
+/*
+ * Find and reserve a free memory area of size 'size'. The search
+ * starts at 'start'.
+ * It must be called with mmap_lock() held.
+ * Return -1 if error.
+ */
+static abi_ulong mmap_find_vma_aligned(abi_ulong start, abi_ulong size,
+ abi_ulong alignment)
+{
+ void *ptr, *prev;
+ abi_ulong addr;
+ int flags;
+ int wrapped, repeat;
+
+ /* If 'start' == 0, then a default start address is used. */
+ if (start == 0) {
+ start = mmap_next_start;
+ } else {
+ start &= qemu_host_page_mask;
}
- last_brk = new_brk;
size = HOST_PAGE_ALIGN(size);
- start = start & qemu_host_page_mask;
+
+ if (reserved_va) {
+ return mmap_find_vma_reserved(start, size,
+ (alignment != 0 ? 1 << alignment : 0));
+ }
+
addr = start;
- if (addr == 0)
- addr = mmap_next_start;
- addr_start = addr;
- for (;;) {
- prot = 0;
- for (addr1 = addr; addr1 < (addr + size); addr1 += TARGET_PAGE_SIZE) {
- prot |= page_get_flags(addr1);
+ wrapped = repeat = 0;
+ prev = 0;
+ flags = MAP_ANONYMOUS | MAP_PRIVATE;
+#ifdef MAP_ALIGNED
+ if (alignment != 0) {
+ flags |= MAP_ALIGNED(alignment);
+ }
+#else
+ /* XXX TODO */
+#endif
+
+ for (;; prev = ptr) {
+ /*
+ * Reserve needed memory area to avoid a race.
+ * It should be discarded using:
+ * - mmap() with MAP_FIXED flag
+ * - mremap() with MREMAP_FIXED flag
+ * - shmat() with SHM_REMAP flag
+ */
+ ptr = mmap(g2h_untagged(addr), size, PROT_NONE,
+ flags, -1, 0);
+
+ /* ENOMEM, if host address space has no memory */
+ if (ptr == MAP_FAILED) {
+ return (abi_ulong)-1;
}
- if (prot == 0)
- break;
- addr += qemu_host_page_size;
- /* we found nothing */
- if (addr == addr_start)
+
+ /*
+ * Count the number of sequential returns of the same address.
+ * This is used to modify the search algorithm below.
+ */
+ repeat = (ptr == prev ? repeat + 1 : 0);
+
+ if (h2g_valid(ptr + size - 1)) {
+ addr = h2g(ptr);
+
+ if ((addr & ~TARGET_PAGE_MASK) == 0) {
+ /* Success. */
+ if (start == mmap_next_start && addr >= TASK_UNMAPPED_BASE) {
+ mmap_next_start = addr + size;
+ }
+ return addr;
+ }
+
+ /* The address is not properly aligned for the target. */
+ switch (repeat) {
+ case 0:
+ /*
+ * Assume the result that the kernel gave us is the
+ * first with enough free space, so start again at the
+ * next higher target page.
+ */
+ addr = TARGET_PAGE_ALIGN(addr);
+ break;
+ case 1:
+ /*
+ * Sometimes the kernel decides to perform the allocation
+ * at the top end of memory instead.
+ */
+ addr &= TARGET_PAGE_MASK;
+ break;
+ case 2:
+ /* Start over at low memory. */
+ addr = 0;
+ break;
+ default:
+ /* Fail. This unaligned block must the last. */
+ addr = -1;
+ break;
+ }
+ } else {
+ /*
+ * Since the result the kernel gave didn't fit, start
+ * again at low memory. If any repetition, fail.
+ */
+ addr = (repeat ? -1 : 0);
+ }
+
+ /* Unmap and try again. */
+ munmap(ptr, size);
+
+ /* ENOMEM if we checked the whole of the target address space. */
+ if (addr == (abi_ulong)-1) {
+ return (abi_ulong)-1;
+ } else if (addr == 0) {
+ if (wrapped) {
+ return (abi_ulong)-1;
+ }
+ wrapped = 1;
+ /*
+ * Don't actually use 0 when wrapping, instead indicate
+ * that we'd truly like an allocation in low memory.
+ */
+ addr = TARGET_PAGE_SIZE;
+ } else if (wrapped && addr >= start) {
return (abi_ulong)-1;
+ }
}
- if (start == 0)
- mmap_next_start = addr + size;
- return addr;
+}
+
+abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size)
+{
+ return mmap_find_vma_aligned(start, size, 0);
}
/* NOTE: all the constants are the HOST ones */
abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
- int flags, int fd, abi_ulong offset)
+ int flags, int fd, off_t offset)
{
abi_ulong ret, end, real_start, real_end, retaddr, host_offset, host_len;
- unsigned long host_start;
mmap_lock();
#ifdef DEBUG_MMAP
@@ -294,43 +437,121 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
}
#endif
+ if ((flags & MAP_ANONYMOUS) && fd != -1) {
+ errno = EINVAL;
+ goto fail;
+ }
+#ifdef MAP_STACK
+ if (flags & MAP_STACK) {
+ if ((fd != -1) || ((prot & (PROT_READ | PROT_WRITE)) !=
+ (PROT_READ | PROT_WRITE))) {
+ errno = EINVAL;
+ goto fail;
+ }
+ }
+#endif /* MAP_STACK */
+#ifdef MAP_GUARD
+ if ((flags & MAP_GUARD) && (prot != PROT_NONE || fd != -1 ||
+ offset != 0 || (flags & (MAP_SHARED | MAP_PRIVATE |
+ /* MAP_PREFAULT | */ /* MAP_PREFAULT not in mman.h */
+ MAP_PREFAULT_READ | MAP_ANON | MAP_STACK)) != 0)) {
+ errno = EINVAL;
+ goto fail;
+ }
+#endif
+
if (offset & ~TARGET_PAGE_MASK) {
errno = EINVAL;
goto fail;
}
len = TARGET_PAGE_ALIGN(len);
- if (len == 0)
- goto the_end;
+ if (len == 0) {
+ errno = EINVAL;
+ goto fail;
+ }
real_start = start & qemu_host_page_mask;
+ host_offset = offset & qemu_host_page_mask;
+ /*
+ * If the user is asking for the kernel to find a location, do that
+ * before we truncate the length for mapping files below.
+ */
if (!(flags & MAP_FIXED)) {
- abi_ulong mmap_start;
- void *p;
- host_offset = offset & qemu_host_page_mask;
host_len = len + offset - host_offset;
host_len = HOST_PAGE_ALIGN(host_len);
- mmap_start = mmap_find_vma(real_start, host_len);
- if (mmap_start == (abi_ulong)-1) {
+ if ((flags & MAP_ALIGNMENT_MASK) != 0)
+ start = mmap_find_vma_aligned(real_start, host_len,
+ (flags & MAP_ALIGNMENT_MASK) >> MAP_ALIGNMENT_SHIFT);
+ else
+ start = mmap_find_vma(real_start, host_len);
+ if (start == (abi_ulong)-1) {
errno = ENOMEM;
goto fail;
}
- /* Note: we prefer to control the mapping address. It is
- especially important if qemu_host_page_size >
- qemu_real_host_page_size */
- p = mmap(g2h_untagged(mmap_start),
- host_len, prot, flags | MAP_FIXED, fd, host_offset);
+ }
+
+ /*
+ * When mapping files into a memory area larger than the file, accesses
+ * to pages beyond the file size will cause a SIGBUS.
+ *
+ * For example, if mmaping a file of 100 bytes on a host with 4K pages
+ * emulating a target with 8K pages, the target expects to be able to
+ * access the first 8K. But the host will trap us on any access beyond
+ * 4K.
+ *
+ * When emulating a target with a larger page-size than the hosts, we
+ * may need to truncate file maps at EOF and add extra anonymous pages
+ * up to the targets page boundary.
+ */
+
+ if ((qemu_real_host_page_size < qemu_host_page_size) && fd != -1) {
+ struct stat sb;
+
+ if (fstat(fd, &sb) == -1) {
+ goto fail;
+ }
+
+ /* Are we trying to create a map beyond EOF?. */
+ if (offset + len > sb.st_size) {
+ /*
+ * If so, truncate the file map at eof aligned with
+ * the hosts real pagesize. Additional anonymous maps
+ * will be created beyond EOF.
+ */
+ len = REAL_HOST_PAGE_ALIGN(sb.st_size - offset);
+ }
+ }
+
+ if (!(flags & MAP_FIXED)) {
+ unsigned long host_start;
+ void *p;
+
+ host_len = len + offset - host_offset;
+ host_len = HOST_PAGE_ALIGN(host_len);
+
+ /*
+ * Note: we prefer to control the mapping address. It is
+ * especially important if qemu_host_page_size >
+ * qemu_real_host_page_size
+ */
+ p = mmap(g2h_untagged(start), host_len, prot,
+ flags | MAP_FIXED | ((fd != -1) ? MAP_ANONYMOUS : 0), -1, 0);
if (p == MAP_FAILED)
goto fail;
/* update start so that it points to the file position at 'offset' */
host_start = (unsigned long)p;
- if (!(flags & MAP_ANON))
+ if (fd != -1) {
+ p = mmap(g2h_untagged(start), len, prot,
+ flags | MAP_FIXED, fd, host_offset);
+ if (p == MAP_FAILED) {
+ munmap(g2h_untagged(start), host_len);
+ goto fail;
+ }
host_start += offset - host_offset;
+ }
start = h2g(host_start);
} else {
- int flg;
- target_ulong addr;
-
if (start & ~TARGET_PAGE_MASK) {
errno = EINVAL;
goto fail;
@@ -338,20 +559,28 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
end = start + len;
real_end = HOST_PAGE_ALIGN(end);
- for (addr = real_start; addr < real_end; addr += TARGET_PAGE_SIZE) {
- flg = page_get_flags(addr);
- if (flg & PAGE_RESERVED) {
- errno = ENXIO;
- goto fail;
- }
+ /*
+ * Test if requested memory area fits target address space
+ * It can fail only on 64-bit host with 32-bit target.
+ * On any other target/host host mmap() handles this error correctly.
+ */
+#if TARGET_ABI_BITS == 32 && HOST_LONG_BITS == 64
+ if ((unsigned long)start + len - 1 > (abi_ulong) -1) {
+ errno = EINVAL;
+ goto fail;
}
+#endif
- /* worst case: we cannot map the file because the offset is not
- aligned, so we read it */
+ /*
+ * worst case: we cannot map the file because the offset is not
+ * aligned, so we read it
+ */
if (!(flags & MAP_ANON) &&
(offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) {
- /* msync() won't work here, so we return an error if write is
- possible while it is a shared mapping */
+ /*
+ * msync() won't work here, so we return an error if write is
+ * possible while it is a shared mapping
+ */
if ((flags & TARGET_BSD_MAP_FLAGMASK) == MAP_SHARED &&
(prot & PROT_WRITE)) {
errno = EINVAL;
@@ -392,7 +621,7 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
/* handle the end of the mapping */
if (end < real_end) {
ret = mmap_frag(real_end - qemu_host_page_size,
- real_end - qemu_host_page_size, real_end,
+ real_end - qemu_host_page_size, end,
prot, flags, fd,
offset + real_end - qemu_host_page_size - start);
if (ret == -1)
@@ -422,6 +651,7 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
page_dump(stdout);
printf("\n");
#endif
+ tb_invalidate_phys_range(start, start + len);
mmap_unlock();
return start;
fail:
@@ -429,6 +659,49 @@ fail:
return -1;
}
+static void mmap_reserve(abi_ulong start, abi_ulong size)
+{
+ abi_ulong real_start;
+ abi_ulong real_end;
+ abi_ulong addr;
+ abi_ulong end;
+ int prot;
+
+ real_start = start & qemu_host_page_mask;
+ real_end = HOST_PAGE_ALIGN(start + size);
+ end = start + size;
+ if (start > real_start) {
+ /* handle host page containing start */
+ prot = 0;
+ for (addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
+ prot |= page_get_flags(addr);
+ }
+ if (real_end == real_start + qemu_host_page_size) {
+ for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
+ prot |= page_get_flags(addr);
+ }
+ end = real_end;
+ }
+ if (prot != 0) {
+ real_start += qemu_host_page_size;
+ }
+ }
+ if (end < real_end) {
+ prot = 0;
+ for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
+ prot |= page_get_flags(addr);
+ }
+ if (prot != 0) {
+ real_end -= qemu_host_page_size;
+ }
+ }
+ if (real_start != real_end) {
+ mmap(g2h_untagged(real_start), real_end - real_start, PROT_NONE,
+ MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE,
+ -1, 0);
+ }
+}
+
int target_munmap(abi_ulong start, abi_ulong len)
{
abi_ulong end, real_start, real_end, addr;
@@ -476,11 +749,17 @@ int target_munmap(abi_ulong start, abi_ulong len)
ret = 0;
/* unmap what we can */
if (real_start < real_end) {
- ret = munmap(g2h_untagged(real_start), real_end - real_start);
+ if (reserved_va) {
+ mmap_reserve(real_start, real_end - real_start);
+ } else {
+ ret = munmap(g2h_untagged(real_start), real_end - real_start);
+ }
}
- if (ret == 0)
+ if (ret == 0) {
page_set_flags(start, start + len, 0);
+ tb_invalidate_phys_range(start, start + len);
+ }
mmap_unlock();
return ret;
}
@@ -105,7 +105,6 @@ typedef struct TaskState {
void init_task_state(TaskState *ts);
extern const char *qemu_uname_release;
-extern unsigned long mmap_min_addr;
/*
* TARGET_ARG_MAX defines the number of bytes allocated for arguments
@@ -215,13 +214,15 @@ abi_long do_sigaltstack(abi_ulong uss_addr, abi_ulong uoss_addr, abi_ulong sp);
/* mmap.c */
int target_mprotect(abi_ulong start, abi_ulong len, int prot);
abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
- int flags, int fd, abi_ulong offset);
+ int flags, int fd, off_t offset);
int target_munmap(abi_ulong start, abi_ulong len);
abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
abi_ulong new_size, unsigned long flags,
abi_ulong new_addr);
int target_msync(abi_ulong start, abi_ulong len, int flags);
extern unsigned long last_brk;
+extern abi_ulong mmap_next_start;
+abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size);
void mmap_fork_start(void);
void mmap_fork_end(int child);