Message ID | 20230819094806.14965-22-kariem.taha2.7@gmail.com |
---|---|
State | New |
Headers | show |
Series | Implement the mmap system call for FreeBSD. | expand |
On Sat, Aug 19, 2023 at 3:49 AM Karim Taha <kariem.taha2.7@gmail.com> wrote: > From: Stacey Son <sson@FreeBSD.org> > > Signed-off-by: Stacey Son <sson@FreeBSD.org> > Signed-off-by: Karim Taha <kariem.taha2.7@gmail.com> > --- > bsd-user/bsd-mem.h | 72 +++++++++++++++++++++++++++++++++++ > bsd-user/freebsd/os-syscall.c | 8 ++++ > 2 files changed, 80 insertions(+) > > diff --git a/bsd-user/bsd-mem.h b/bsd-user/bsd-mem.h > index 221ad76d8c..f737b94885 100644 > --- a/bsd-user/bsd-mem.h > +++ b/bsd-user/bsd-mem.h > @@ -335,4 +335,76 @@ static inline abi_long do_bsd_shmctl(abi_long shmid, > abi_long cmd, > return ret; > } > > +/* shmat(2) */ > +static inline abi_long do_bsd_shmat(int shmid, abi_ulong shmaddr, int > shmflg) > +{ > + abi_ulong raddr; > + abi_long ret; > + void *host_raddr; > + struct shmid_ds shm_info; > + int i; > + > + /* Find out the length of the shared memory segment. */ > + ret = get_errno(shmctl(shmid, IPC_STAT, &shm_info)); > + if (is_error(ret)) { > + /* Can't get the length */ > + return ret; > + } > + > + mmap_lock(); > + > + if (shmaddr) { > + host_raddr = shmat(shmid, (void *)g2h_untagged(shmaddr), shmflg); > + } else { > + abi_ulong mmap_start; > + > + mmap_start = mmap_find_vma(0, shm_info.shm_segsz); > + > + if (mmap_start == -1) { > + errno = ENOMEM; > + host_raddr = (void *)-1; > + } else { > + host_raddr = shmat(shmid, g2h_untagged(mmap_start), > + shmflg); /* | SHM_REMAP XXX WHY? */ > I was all set to hit reviewed by on this, but this has me curious. Kyle (or anybody else) do you know the back story here. git blame is less than helpful. Warner > + } > + } > + > + if (host_raddr == (void *)-1) { > + mmap_unlock(); > + return get_errno((long)host_raddr); > + } > + raddr = h2g((unsigned long)host_raddr); > + > + page_set_flags(raddr, raddr + shm_info.shm_segsz, > + PAGE_VALID | PAGE_READ | ((shmflg & SHM_RDONLY) ? 0 : > PAGE_WRITE)); > + > + for (i = 0; i < N_BSD_SHM_REGIONS; i++) { > + if (bsd_shm_regions[i].start == 0) { > + bsd_shm_regions[i].start = raddr; > + bsd_shm_regions[i].size = shm_info.shm_segsz; > + break; > + } > + } > + > + mmap_unlock(); > + return raddr; > +} > + > +/* shmdt(2) */ > +static inline abi_long do_bsd_shmdt(abi_ulong shmaddr) > +{ > + int i; > + > + for (i = 0; i < N_BSD_SHM_REGIONS; ++i) { > + if (bsd_shm_regions[i].start == shmaddr) { > + bsd_shm_regions[i].start = 0; > + page_set_flags(shmaddr, > + shmaddr + bsd_shm_regions[i].size, 0); > + break; > + } > + } > + > + return get_errno(shmdt(g2h_untagged(shmaddr))); > +} > + > #endif /* BSD_USER_BSD_MEM_H */ > diff --git a/bsd-user/freebsd/os-syscall.c b/bsd-user/freebsd/os-syscall.c > index 9681c65ce9..f76bc1eb38 100644 > --- a/bsd-user/freebsd/os-syscall.c > +++ b/bsd-user/freebsd/os-syscall.c > @@ -559,6 +559,14 @@ static abi_long freebsd_syscall(void *cpu_env, int > num, abi_long arg1, > ret = do_bsd_shmctl(arg1, arg2, arg3); > break; > > + case TARGET_FREEBSD_NR_shmat: /* shmat(2) */ > + ret = do_bsd_shmat(arg1, arg2, arg3); > + break; > + > + case TARGET_FREEBSD_NR_shmdt: /* shmdt(2) */ > + ret = do_bsd_shmdt(arg1); > + break; > + > /* > * Misc > */ > -- > 2.40.0 > >
On 8/19/23 02:48, Karim Taha wrote: > From: Stacey Son <sson@FreeBSD.org> > > Signed-off-by: Stacey Son <sson@FreeBSD.org> > Signed-off-by: Karim Taha <kariem.taha2.7@gmail.com> > --- > bsd-user/bsd-mem.h | 72 +++++++++++++++++++++++++++++++++++ > bsd-user/freebsd/os-syscall.c | 8 ++++ > 2 files changed, 80 insertions(+) > > diff --git a/bsd-user/bsd-mem.h b/bsd-user/bsd-mem.h > index 221ad76d8c..f737b94885 100644 > --- a/bsd-user/bsd-mem.h > +++ b/bsd-user/bsd-mem.h > @@ -335,4 +335,76 @@ static inline abi_long do_bsd_shmctl(abi_long shmid, abi_long cmd, > return ret; > } > > +/* shmat(2) */ > +static inline abi_long do_bsd_shmat(int shmid, abi_ulong shmaddr, int shmflg) > +{ > + abi_ulong raddr; > + abi_long ret; > + void *host_raddr; > + struct shmid_ds shm_info; > + int i; > + > + /* Find out the length of the shared memory segment. */ > + ret = get_errno(shmctl(shmid, IPC_STAT, &shm_info)); > + if (is_error(ret)) { > + /* Can't get the length */ > + return ret; > + } > + > + mmap_lock(); > + > + if (shmaddr) { > + host_raddr = shmat(shmid, (void *)g2h_untagged(shmaddr), shmflg); Missing if (!guest_range_valid_untagged(shmaddr, shm_info.shm_segsz)) { return -TARGET_EINVAL; } > + } else { > + abi_ulong mmap_start; > + > + mmap_start = mmap_find_vma(0, shm_info.shm_segsz); > + > + if (mmap_start == -1) { > + errno = ENOMEM; > + host_raddr = (void *)-1; > + } else { > + host_raddr = shmat(shmid, g2h_untagged(mmap_start), > + shmflg); /* | SHM_REMAP XXX WHY? */ With reserved_va, the entire guest address space is mapped PROT_NONE so that it is reserved, so that the kernel does not use it for something else. You need the SHM_REMAP to replace the reservation mapping. > +/* shmdt(2) */ > +static inline abi_long do_bsd_shmdt(abi_ulong shmaddr) > +{ > + int i; > + > + for (i = 0; i < N_BSD_SHM_REGIONS; ++i) { > + if (bsd_shm_regions[i].start == shmaddr) { > + bsd_shm_regions[i].start = 0; > + page_set_flags(shmaddr, > + shmaddr + bsd_shm_regions[i].size, 0); > + break; > + } > + } > + > + return get_errno(shmdt(g2h_untagged(shmaddr))); > +} Hmm, bug with linux-user as well, because here we should re-establish the reserved_va reservation. Also, we should not be using a fixed sized array. Nothing good happens when the array fills up. r~
On Sun, Aug 20, 2023 at 9:30 AM Richard Henderson < richard.henderson@linaro.org> wrote: > On 8/19/23 02:48, Karim Taha wrote: > > From: Stacey Son <sson@FreeBSD.org> > > > > Signed-off-by: Stacey Son <sson@FreeBSD.org> > > Signed-off-by: Karim Taha <kariem.taha2.7@gmail.com> > > --- > > bsd-user/bsd-mem.h | 72 +++++++++++++++++++++++++++++++++++ > > bsd-user/freebsd/os-syscall.c | 8 ++++ > > 2 files changed, 80 insertions(+) > > > > diff --git a/bsd-user/bsd-mem.h b/bsd-user/bsd-mem.h > > index 221ad76d8c..f737b94885 100644 > > --- a/bsd-user/bsd-mem.h > > +++ b/bsd-user/bsd-mem.h > > @@ -335,4 +335,76 @@ static inline abi_long do_bsd_shmctl(abi_long > shmid, abi_long cmd, > > return ret; > > } > > > > +/* shmat(2) */ > > +static inline abi_long do_bsd_shmat(int shmid, abi_ulong shmaddr, int > shmflg) > > +{ > > + abi_ulong raddr; > > + abi_long ret; > > + void *host_raddr; > > + struct shmid_ds shm_info; > > + int i; > > + > > + /* Find out the length of the shared memory segment. */ > > + ret = get_errno(shmctl(shmid, IPC_STAT, &shm_info)); > > + if (is_error(ret)) { > > + /* Can't get the length */ > > + return ret; > > + } > > + > > + mmap_lock(); > > + > > + if (shmaddr) { > > + host_raddr = shmat(shmid, (void *)g2h_untagged(shmaddr), > shmflg); > > Missing > > if (!guest_range_valid_untagged(shmaddr, shm_info.shm_segsz)) { > return -TARGET_EINVAL; > } > > > + } else { > > + abi_ulong mmap_start; > > + > > + mmap_start = mmap_find_vma(0, shm_info.shm_segsz); > > + > > + if (mmap_start == -1) { > > + errno = ENOMEM; > > + host_raddr = (void *)-1; > > + } else { > > + host_raddr = shmat(shmid, g2h_untagged(mmap_start), > > + shmflg); /* | SHM_REMAP XXX WHY? */ > > With reserved_va, the entire guest address space is mapped PROT_NONE so > that it is > reserved, so that the kernel does not use it for something else. You need > the SHM_REMAP > to replace the reservation mapping. > > > +/* shmdt(2) */ > > +static inline abi_long do_bsd_shmdt(abi_ulong shmaddr) > > +{ > > + int i; > > + > > + for (i = 0; i < N_BSD_SHM_REGIONS; ++i) { > > + if (bsd_shm_regions[i].start == shmaddr) { > > + bsd_shm_regions[i].start = 0; > > + page_set_flags(shmaddr, > > + shmaddr + bsd_shm_regions[i].size, 0); > > + break; > > + } > > + } > > + > > + return get_errno(shmdt(g2h_untagged(shmaddr))); > > +} > > Hmm, bug with linux-user as well, because here we should re-establish the > reserved_va > reservation. > ... of the shared memory region we just detached? Right? > Also, we should not be using a fixed sized array. Nothing good happens > when the array > fills up. > File this as https://github.com/qemu-bsd-user/qemu-bsd-user/issues/47 so we don't forget. It's good enough for the moment since the programs we've seen have a very limited number of segments... but longer term, it should be dynamic. Warner
On 8/22/23 11:03, Warner Losh wrote: > Hmm, bug with linux-user as well, because here we should re-establish the reserved_va > reservation. > > > ... of the shared memory region we just detached? Right? Correct. On a related note, on FreeBSD is there any practical difference between PROT_NONE, MAP_ANON and PROT_NONE, MAP_GUARD for large memory regions? I ask since FreeBSD doesn't have MAP_NORESERVE, which Linux uses to avoid allocation of gigabytes. r~
On Tue, Aug 22, 2023 at 12:11 PM Richard Henderson < richard.henderson@linaro.org> wrote: > On 8/22/23 11:03, Warner Losh wrote: > > Hmm, bug with linux-user as well, because here we should > re-establish the reserved_va > > reservation. > > > > > > ... of the shared memory region we just detached? Right? > > Correct. > > On a related note, on FreeBSD is there any practical difference between > > PROT_NONE, MAP_ANON > and > PROT_NONE, MAP_GUARD > > for large memory regions? > They do different things. MAP_ANON maps the memory without a backing device. This means it allocates the VA space right away, but lazily allocates the backing pages as the pages are dirtied. MAP_GUARD creates the VA mapping, but never maps any pages to those pages (well, until it's remapped). Any read/write/exec access to MAP_GUARD pages results in a SIGSEGV. > I ask since FreeBSD doesn't have MAP_NORESERVE, which Linux uses to avoid > allocation of > gigabytes > Yea. It sounds like MAP_NORESERVE is what FreeBSD's default behavior is: We don't allocate backing store in the swap areas until there's memory pressure. You can safely allocate GB of space with MAP_ANON and get similar behavior to the MAP_NORESERVE. MAP_GUARD could be used if you wanted to reserve the VA space, but didn't want to assign anything to the VA space until later. As a practical matter, they both consume about the same resources until the MAP_ANON region starts to get populated with data... With PROT_NONE, I think they would have the same effect. If it is to be a backing store for something like malloc, then MAP_ANON would be best. If you are replacing it with a lot of things, like a mix of files, devices and/or anon memory, then MAP_GUARD and replace it with MAP_FIXED later. Most likely you'll want MAP_GUARD to reserve the area, and then MAP_FIXED to use it for mmap'd memory, shared memory, executable pages, etc. Does that tell you what you need to know? Warner > r~ >
On 8/22/23 12:54, Warner Losh wrote: > As a practical matter, they both consume about the same resources until the MAP_ANON > region starts to get populated with data... > > With PROT_NONE, I think they would have the same effect. If it is to be a backing store for > something like malloc, then MAP_ANON would be best. If you are replacing it with a lot of > things, like a mix of files, devices and/or anon memory, then MAP_GUARD and replace it > with MAP_FIXED later. Most likely you'll want MAP_GUARD to reserve the area, and then > MAP_FIXED to use it for mmap'd memory, shared memory, executable pages, etc. > > Does that tell you what you need to know? Yes. The reserved_va area is replaced with a mix of files, anon, etc, based on whatever the guest requires. So it might be reasonable to adjust bsd-user/mmap.c to use MAP_GUARD for managing the reserved_va area instead of MAP_ANON. No rush, of course. r~
diff --git a/bsd-user/bsd-mem.h b/bsd-user/bsd-mem.h index 221ad76d8c..f737b94885 100644 --- a/bsd-user/bsd-mem.h +++ b/bsd-user/bsd-mem.h @@ -335,4 +335,76 @@ static inline abi_long do_bsd_shmctl(abi_long shmid, abi_long cmd, return ret; } +/* shmat(2) */ +static inline abi_long do_bsd_shmat(int shmid, abi_ulong shmaddr, int shmflg) +{ + abi_ulong raddr; + abi_long ret; + void *host_raddr; + struct shmid_ds shm_info; + int i; + + /* Find out the length of the shared memory segment. */ + ret = get_errno(shmctl(shmid, IPC_STAT, &shm_info)); + if (is_error(ret)) { + /* Can't get the length */ + return ret; + } + + mmap_lock(); + + if (shmaddr) { + host_raddr = shmat(shmid, (void *)g2h_untagged(shmaddr), shmflg); + } else { + abi_ulong mmap_start; + + mmap_start = mmap_find_vma(0, shm_info.shm_segsz); + + if (mmap_start == -1) { + errno = ENOMEM; + host_raddr = (void *)-1; + } else { + host_raddr = shmat(shmid, g2h_untagged(mmap_start), + shmflg); /* | SHM_REMAP XXX WHY? */ + } + } + + if (host_raddr == (void *)-1) { + mmap_unlock(); + return get_errno((long)host_raddr); + } + raddr = h2g((unsigned long)host_raddr); + + page_set_flags(raddr, raddr + shm_info.shm_segsz, + PAGE_VALID | PAGE_READ | ((shmflg & SHM_RDONLY) ? 0 : PAGE_WRITE)); + + for (i = 0; i < N_BSD_SHM_REGIONS; i++) { + if (bsd_shm_regions[i].start == 0) { + bsd_shm_regions[i].start = raddr; + bsd_shm_regions[i].size = shm_info.shm_segsz; + break; + } + } + + mmap_unlock(); + return raddr; +} + +/* shmdt(2) */ +static inline abi_long do_bsd_shmdt(abi_ulong shmaddr) +{ + int i; + + for (i = 0; i < N_BSD_SHM_REGIONS; ++i) { + if (bsd_shm_regions[i].start == shmaddr) { + bsd_shm_regions[i].start = 0; + page_set_flags(shmaddr, + shmaddr + bsd_shm_regions[i].size, 0); + break; + } + } + + return get_errno(shmdt(g2h_untagged(shmaddr))); +} + #endif /* BSD_USER_BSD_MEM_H */ diff --git a/bsd-user/freebsd/os-syscall.c b/bsd-user/freebsd/os-syscall.c index 9681c65ce9..f76bc1eb38 100644 --- a/bsd-user/freebsd/os-syscall.c +++ b/bsd-user/freebsd/os-syscall.c @@ -559,6 +559,14 @@ static abi_long freebsd_syscall(void *cpu_env, int num, abi_long arg1, ret = do_bsd_shmctl(arg1, arg2, arg3); break; + case TARGET_FREEBSD_NR_shmat: /* shmat(2) */ + ret = do_bsd_shmat(arg1, arg2, arg3); + break; + + case TARGET_FREEBSD_NR_shmdt: /* shmdt(2) */ + ret = do_bsd_shmdt(arg1); + break; + /* * Misc */