diff mbox series

[21/22] Implement shmat(2) and shmdt(2)

Message ID 20230819094806.14965-22-kariem.taha2.7@gmail.com
State New
Headers show
Series Implement the mmap system call for FreeBSD. | expand

Commit Message

Karim Taha Aug. 19, 2023, 9:48 a.m. UTC
From: Stacey Son <sson@FreeBSD.org>

Signed-off-by: Stacey Son <sson@FreeBSD.org>
Signed-off-by: Karim Taha <kariem.taha2.7@gmail.com>
---
 bsd-user/bsd-mem.h            | 72 +++++++++++++++++++++++++++++++++++
 bsd-user/freebsd/os-syscall.c |  8 ++++
 2 files changed, 80 insertions(+)

Comments

Warner Losh Aug. 20, 2023, 4:44 a.m. UTC | #1
On Sat, Aug 19, 2023 at 3:49 AM Karim Taha <kariem.taha2.7@gmail.com> wrote:

> From: Stacey Son <sson@FreeBSD.org>
>
> Signed-off-by: Stacey Son <sson@FreeBSD.org>
> Signed-off-by: Karim Taha <kariem.taha2.7@gmail.com>
> ---
>  bsd-user/bsd-mem.h            | 72 +++++++++++++++++++++++++++++++++++
>  bsd-user/freebsd/os-syscall.c |  8 ++++
>  2 files changed, 80 insertions(+)
>
> diff --git a/bsd-user/bsd-mem.h b/bsd-user/bsd-mem.h
> index 221ad76d8c..f737b94885 100644
> --- a/bsd-user/bsd-mem.h
> +++ b/bsd-user/bsd-mem.h
> @@ -335,4 +335,76 @@ static inline abi_long do_bsd_shmctl(abi_long shmid,
> abi_long cmd,
>      return ret;
>  }
>
> +/* shmat(2) */
> +static inline abi_long do_bsd_shmat(int shmid, abi_ulong shmaddr, int
> shmflg)
> +{
> +    abi_ulong raddr;
> +    abi_long ret;
> +    void *host_raddr;
> +    struct shmid_ds shm_info;
> +    int i;
> +
> +    /* Find out the length of the shared memory segment. */
> +    ret = get_errno(shmctl(shmid, IPC_STAT, &shm_info));
> +    if (is_error(ret)) {
> +        /* Can't get the length */
> +        return ret;
> +    }
> +
> +    mmap_lock();
> +
> +    if (shmaddr) {
> +        host_raddr = shmat(shmid, (void *)g2h_untagged(shmaddr), shmflg);
> +    } else {
> +        abi_ulong mmap_start;
> +
> +        mmap_start = mmap_find_vma(0, shm_info.shm_segsz);
> +
> +        if (mmap_start == -1) {
> +            errno = ENOMEM;
> +            host_raddr = (void *)-1;
> +        } else {
> +            host_raddr = shmat(shmid, g2h_untagged(mmap_start),
> +                shmflg); /* | SHM_REMAP XXX WHY? */
>

I was all set to hit reviewed by on this, but this has me curious. Kyle (or
anybody else) do
you know the back story here. git blame is less than helpful.

Warner


> +        }
> +    }
> +
> +    if (host_raddr == (void *)-1) {
> +        mmap_unlock();
> +        return get_errno((long)host_raddr);
> +    }
> +    raddr = h2g((unsigned long)host_raddr);
> +
> +    page_set_flags(raddr, raddr + shm_info.shm_segsz,
> +        PAGE_VALID | PAGE_READ | ((shmflg & SHM_RDONLY) ? 0 :
> PAGE_WRITE));
> +
> +    for (i = 0; i < N_BSD_SHM_REGIONS; i++) {
> +        if (bsd_shm_regions[i].start == 0) {
> +            bsd_shm_regions[i].start = raddr;
> +            bsd_shm_regions[i].size = shm_info.shm_segsz;
> +            break;
> +        }
> +    }
> +
> +    mmap_unlock();
> +    return raddr;
> +}
> +
> +/* shmdt(2) */
> +static inline abi_long do_bsd_shmdt(abi_ulong shmaddr)
> +{
> +    int i;
> +
> +    for (i = 0; i < N_BSD_SHM_REGIONS; ++i) {
> +        if (bsd_shm_regions[i].start == shmaddr) {
> +            bsd_shm_regions[i].start = 0;
> +            page_set_flags(shmaddr,
> +                shmaddr + bsd_shm_regions[i].size, 0);
> +            break;
> +        }
> +    }
> +
> +    return get_errno(shmdt(g2h_untagged(shmaddr)));
> +}
> +
>  #endif /* BSD_USER_BSD_MEM_H */
> diff --git a/bsd-user/freebsd/os-syscall.c b/bsd-user/freebsd/os-syscall.c
> index 9681c65ce9..f76bc1eb38 100644
> --- a/bsd-user/freebsd/os-syscall.c
> +++ b/bsd-user/freebsd/os-syscall.c
> @@ -559,6 +559,14 @@ static abi_long freebsd_syscall(void *cpu_env, int
> num, abi_long arg1,
>          ret = do_bsd_shmctl(arg1, arg2, arg3);
>          break;
>
> +    case TARGET_FREEBSD_NR_shmat: /* shmat(2) */
> +        ret = do_bsd_shmat(arg1, arg2, arg3);
> +        break;
> +
> +    case TARGET_FREEBSD_NR_shmdt: /* shmdt(2) */
> +        ret = do_bsd_shmdt(arg1);
> +        break;
> +
>          /*
>           * Misc
>           */
> --
> 2.40.0
>
>
Richard Henderson Aug. 20, 2023, 3:30 p.m. UTC | #2
On 8/19/23 02:48, Karim Taha wrote:
> From: Stacey Son <sson@FreeBSD.org>
> 
> Signed-off-by: Stacey Son <sson@FreeBSD.org>
> Signed-off-by: Karim Taha <kariem.taha2.7@gmail.com>
> ---
>   bsd-user/bsd-mem.h            | 72 +++++++++++++++++++++++++++++++++++
>   bsd-user/freebsd/os-syscall.c |  8 ++++
>   2 files changed, 80 insertions(+)
> 
> diff --git a/bsd-user/bsd-mem.h b/bsd-user/bsd-mem.h
> index 221ad76d8c..f737b94885 100644
> --- a/bsd-user/bsd-mem.h
> +++ b/bsd-user/bsd-mem.h
> @@ -335,4 +335,76 @@ static inline abi_long do_bsd_shmctl(abi_long shmid, abi_long cmd,
>       return ret;
>   }
>   
> +/* shmat(2) */
> +static inline abi_long do_bsd_shmat(int shmid, abi_ulong shmaddr, int shmflg)
> +{
> +    abi_ulong raddr;
> +    abi_long ret;
> +    void *host_raddr;
> +    struct shmid_ds shm_info;
> +    int i;
> +
> +    /* Find out the length of the shared memory segment. */
> +    ret = get_errno(shmctl(shmid, IPC_STAT, &shm_info));
> +    if (is_error(ret)) {
> +        /* Can't get the length */
> +        return ret;
> +    }
> +
> +    mmap_lock();
> +
> +    if (shmaddr) {
> +        host_raddr = shmat(shmid, (void *)g2h_untagged(shmaddr), shmflg);

Missing

     if (!guest_range_valid_untagged(shmaddr, shm_info.shm_segsz)) {
         return -TARGET_EINVAL;
     }

> +    } else {
> +        abi_ulong mmap_start;
> +
> +        mmap_start = mmap_find_vma(0, shm_info.shm_segsz);
> +
> +        if (mmap_start == -1) {
> +            errno = ENOMEM;
> +            host_raddr = (void *)-1;
> +        } else {
> +            host_raddr = shmat(shmid, g2h_untagged(mmap_start),
> +                shmflg); /* | SHM_REMAP XXX WHY? */

With reserved_va, the entire guest address space is mapped PROT_NONE so that it is 
reserved, so that the kernel does not use it for something else.  You need the SHM_REMAP 
to replace the reservation mapping.

> +/* shmdt(2) */
> +static inline abi_long do_bsd_shmdt(abi_ulong shmaddr)
> +{
> +    int i;
> +
> +    for (i = 0; i < N_BSD_SHM_REGIONS; ++i) {
> +        if (bsd_shm_regions[i].start == shmaddr) {
> +            bsd_shm_regions[i].start = 0;
> +            page_set_flags(shmaddr,
> +                shmaddr + bsd_shm_regions[i].size, 0);
> +            break;
> +        }
> +    }
> +
> +    return get_errno(shmdt(g2h_untagged(shmaddr)));
> +}

Hmm, bug with linux-user as well, because here we should re-establish the reserved_va 
reservation.

Also, we should not be using a fixed sized array.  Nothing good happens when the array 
fills up.


r~
Warner Losh Aug. 22, 2023, 6:03 p.m. UTC | #3
On Sun, Aug 20, 2023 at 9:30 AM Richard Henderson <
richard.henderson@linaro.org> wrote:

> On 8/19/23 02:48, Karim Taha wrote:
> > From: Stacey Son <sson@FreeBSD.org>
> >
> > Signed-off-by: Stacey Son <sson@FreeBSD.org>
> > Signed-off-by: Karim Taha <kariem.taha2.7@gmail.com>
> > ---
> >   bsd-user/bsd-mem.h            | 72 +++++++++++++++++++++++++++++++++++
> >   bsd-user/freebsd/os-syscall.c |  8 ++++
> >   2 files changed, 80 insertions(+)
> >
> > diff --git a/bsd-user/bsd-mem.h b/bsd-user/bsd-mem.h
> > index 221ad76d8c..f737b94885 100644
> > --- a/bsd-user/bsd-mem.h
> > +++ b/bsd-user/bsd-mem.h
> > @@ -335,4 +335,76 @@ static inline abi_long do_bsd_shmctl(abi_long
> shmid, abi_long cmd,
> >       return ret;
> >   }
> >
> > +/* shmat(2) */
> > +static inline abi_long do_bsd_shmat(int shmid, abi_ulong shmaddr, int
> shmflg)
> > +{
> > +    abi_ulong raddr;
> > +    abi_long ret;
> > +    void *host_raddr;
> > +    struct shmid_ds shm_info;
> > +    int i;
> > +
> > +    /* Find out the length of the shared memory segment. */
> > +    ret = get_errno(shmctl(shmid, IPC_STAT, &shm_info));
> > +    if (is_error(ret)) {
> > +        /* Can't get the length */
> > +        return ret;
> > +    }
> > +
> > +    mmap_lock();
> > +
> > +    if (shmaddr) {
> > +        host_raddr = shmat(shmid, (void *)g2h_untagged(shmaddr),
> shmflg);
>
> Missing
>
>      if (!guest_range_valid_untagged(shmaddr, shm_info.shm_segsz)) {
>          return -TARGET_EINVAL;
>      }
>
> > +    } else {
> > +        abi_ulong mmap_start;
> > +
> > +        mmap_start = mmap_find_vma(0, shm_info.shm_segsz);
> > +
> > +        if (mmap_start == -1) {
> > +            errno = ENOMEM;
> > +            host_raddr = (void *)-1;
> > +        } else {
> > +            host_raddr = shmat(shmid, g2h_untagged(mmap_start),
> > +                shmflg); /* | SHM_REMAP XXX WHY? */
>
> With reserved_va, the entire guest address space is mapped PROT_NONE so
> that it is
> reserved, so that the kernel does not use it for something else.  You need
> the SHM_REMAP
> to replace the reservation mapping.
>
> > +/* shmdt(2) */
> > +static inline abi_long do_bsd_shmdt(abi_ulong shmaddr)
> > +{
> > +    int i;
> > +
> > +    for (i = 0; i < N_BSD_SHM_REGIONS; ++i) {
> > +        if (bsd_shm_regions[i].start == shmaddr) {
> > +            bsd_shm_regions[i].start = 0;
> > +            page_set_flags(shmaddr,
> > +                shmaddr + bsd_shm_regions[i].size, 0);
> > +            break;
> > +        }
> > +    }
> > +
> > +    return get_errno(shmdt(g2h_untagged(shmaddr)));
> > +}
>
> Hmm, bug with linux-user as well, because here we should re-establish the
> reserved_va
> reservation.
>

... of the shared memory region we just detached? Right?


> Also, we should not be using a fixed sized array.  Nothing good happens
> when the array
> fills up.
>

File this as https://github.com/qemu-bsd-user/qemu-bsd-user/issues/47 so we
don't forget.
It's good enough for the moment since the programs we've seen have a very
limited number
of segments... but longer term, it should be dynamic.

Warner
Richard Henderson Aug. 22, 2023, 6:11 p.m. UTC | #4
On 8/22/23 11:03, Warner Losh wrote:
>     Hmm, bug with linux-user as well, because here we should re-establish the reserved_va
>     reservation.
> 
> 
> ... of the shared memory region we just detached? Right?

Correct.

On a related note, on FreeBSD is there any practical difference between

     PROT_NONE, MAP_ANON
and
     PROT_NONE, MAP_GUARD

for large memory regions?

I ask since FreeBSD doesn't have MAP_NORESERVE, which Linux uses to avoid allocation of 
gigabytes.


r~
Warner Losh Aug. 22, 2023, 7:54 p.m. UTC | #5
On Tue, Aug 22, 2023 at 12:11 PM Richard Henderson <
richard.henderson@linaro.org> wrote:

> On 8/22/23 11:03, Warner Losh wrote:
> >     Hmm, bug with linux-user as well, because here we should
> re-establish the reserved_va
> >     reservation.
> >
> >
> > ... of the shared memory region we just detached? Right?
>
> Correct.
>
> On a related note, on FreeBSD is there any practical difference between
>
>      PROT_NONE, MAP_ANON
> and
>      PROT_NONE, MAP_GUARD
>
> for large memory regions?
>

They do different things. MAP_ANON maps the memory without a backing
device. This means it allocates the VA space right away, but lazily
allocates
the backing pages as the pages are dirtied.

MAP_GUARD creates the VA mapping, but never maps any pages to those
pages (well, until it's remapped). Any read/write/exec access to MAP_GUARD
pages results in a SIGSEGV.


> I ask since FreeBSD doesn't have MAP_NORESERVE, which Linux uses to avoid
> allocation of
> gigabytes
>

Yea. It sounds like MAP_NORESERVE is what FreeBSD's default behavior is: We
don't
allocate backing store in the swap areas until there's memory pressure. You
can safely
allocate GB of space with MAP_ANON and get similar behavior to the
MAP_NORESERVE.

MAP_GUARD could be used if you wanted to reserve the VA space, but didn't
want to assign
anything to the VA space until later.

As a practical matter, they both consume about the same resources until the
MAP_ANON
region starts to get populated with data...

With PROT_NONE, I think they would have the same effect. If it is to be a
backing store for
something like malloc, then MAP_ANON would be best. If you are replacing it
with a lot of
things, like a mix of files, devices and/or anon memory, then MAP_GUARD and
replace it
with MAP_FIXED later. Most likely you'll want MAP_GUARD to reserve the
area, and then
MAP_FIXED to use it for mmap'd memory, shared memory, executable pages, etc.

Does that tell you what you need to know?

Warner


> r~
>
Richard Henderson Aug. 22, 2023, 9 p.m. UTC | #6
On 8/22/23 12:54, Warner Losh wrote:
> As a practical matter, they both consume about the same resources until the MAP_ANON
> region starts to get populated with data...
> 
> With PROT_NONE, I think they would have the same effect. If it is to be a backing store for
> something like malloc, then MAP_ANON would be best. If you are replacing it with a lot of
> things, like a mix of files, devices and/or anon memory, then MAP_GUARD and replace it
> with MAP_FIXED later. Most likely you'll want MAP_GUARD to reserve the area, and then
> MAP_FIXED to use it for mmap'd memory, shared memory, executable pages, etc.
> 
> Does that tell you what you need to know?

Yes.

The reserved_va area is replaced with a mix of files, anon, etc, based on whatever the 
guest requires.  So it might be reasonable to adjust bsd-user/mmap.c to use MAP_GUARD for 
managing the reserved_va area instead of MAP_ANON.  No rush, of course.


r~
diff mbox series

Patch

diff --git a/bsd-user/bsd-mem.h b/bsd-user/bsd-mem.h
index 221ad76d8c..f737b94885 100644
--- a/bsd-user/bsd-mem.h
+++ b/bsd-user/bsd-mem.h
@@ -335,4 +335,76 @@  static inline abi_long do_bsd_shmctl(abi_long shmid, abi_long cmd,
     return ret;
 }
 
+/* shmat(2) */
+static inline abi_long do_bsd_shmat(int shmid, abi_ulong shmaddr, int shmflg)
+{
+    abi_ulong raddr;
+    abi_long ret;
+    void *host_raddr;
+    struct shmid_ds shm_info;
+    int i;
+
+    /* Find out the length of the shared memory segment. */
+    ret = get_errno(shmctl(shmid, IPC_STAT, &shm_info));
+    if (is_error(ret)) {
+        /* Can't get the length */
+        return ret;
+    }
+
+    mmap_lock();
+
+    if (shmaddr) {
+        host_raddr = shmat(shmid, (void *)g2h_untagged(shmaddr), shmflg);
+    } else {
+        abi_ulong mmap_start;
+
+        mmap_start = mmap_find_vma(0, shm_info.shm_segsz);
+
+        if (mmap_start == -1) {
+            errno = ENOMEM;
+            host_raddr = (void *)-1;
+        } else {
+            host_raddr = shmat(shmid, g2h_untagged(mmap_start),
+                shmflg); /* | SHM_REMAP XXX WHY? */
+        }
+    }
+
+    if (host_raddr == (void *)-1) {
+        mmap_unlock();
+        return get_errno((long)host_raddr);
+    }
+    raddr = h2g((unsigned long)host_raddr);
+
+    page_set_flags(raddr, raddr + shm_info.shm_segsz,
+        PAGE_VALID | PAGE_READ | ((shmflg & SHM_RDONLY) ? 0 : PAGE_WRITE));
+
+    for (i = 0; i < N_BSD_SHM_REGIONS; i++) {
+        if (bsd_shm_regions[i].start == 0) {
+            bsd_shm_regions[i].start = raddr;
+            bsd_shm_regions[i].size = shm_info.shm_segsz;
+            break;
+        }
+    }
+
+    mmap_unlock();
+    return raddr;
+}
+
+/* shmdt(2) */
+static inline abi_long do_bsd_shmdt(abi_ulong shmaddr)
+{
+    int i;
+
+    for (i = 0; i < N_BSD_SHM_REGIONS; ++i) {
+        if (bsd_shm_regions[i].start == shmaddr) {
+            bsd_shm_regions[i].start = 0;
+            page_set_flags(shmaddr,
+                shmaddr + bsd_shm_regions[i].size, 0);
+            break;
+        }
+    }
+
+    return get_errno(shmdt(g2h_untagged(shmaddr)));
+}
+
 #endif /* BSD_USER_BSD_MEM_H */
diff --git a/bsd-user/freebsd/os-syscall.c b/bsd-user/freebsd/os-syscall.c
index 9681c65ce9..f76bc1eb38 100644
--- a/bsd-user/freebsd/os-syscall.c
+++ b/bsd-user/freebsd/os-syscall.c
@@ -559,6 +559,14 @@  static abi_long freebsd_syscall(void *cpu_env, int num, abi_long arg1,
         ret = do_bsd_shmctl(arg1, arg2, arg3);
         break;
 
+    case TARGET_FREEBSD_NR_shmat: /* shmat(2) */
+        ret = do_bsd_shmat(arg1, arg2, arg3);
+        break;
+
+    case TARGET_FREEBSD_NR_shmdt: /* shmdt(2) */
+        ret = do_bsd_shmdt(arg1);
+        break;
+
         /*
          * Misc
          */