diff mbox series

[v2,1/6] bsd-user/freebsd/os-syscall.c: lock_iovec

Message ID 20220608211711.56158-2-imp@bsdimp.com
State New
Headers show
Series bsd-user upstreaming: read, write and exit | expand

Commit Message

Warner Losh June 8, 2022, 9:17 p.m. UTC
lock_iovec will lock an I/O vec and the memory to which it refers and
create a iovec in the host space that refers to it, with full error
unwinding. Add helper_iovec_unlock to unlock the partially locked iovec
in case there's an error. The code will be used in iovec_unlock when
that is committed.

Note: memory handling likely could be rewritten to use q_autofree. That
will be explored in the future since what we have now works well enough.

Signed-off-by: Warner Losh <imp@bsdimp.com>
---
 bsd-user/freebsd/os-syscall.c | 102 ++++++++++++++++++++++++++++++++++
 1 file changed, 102 insertions(+)
diff mbox series

Patch

diff --git a/bsd-user/freebsd/os-syscall.c b/bsd-user/freebsd/os-syscall.c
index d272478e7b0..67851937a8f 100644
--- a/bsd-user/freebsd/os-syscall.c
+++ b/bsd-user/freebsd/os-syscall.c
@@ -73,6 +73,108 @@  bool is_error(abi_long ret)
     return (abi_ulong)ret >= (abi_ulong)(-4096);
 }
 
+/*
+ * Unlocks a iovec. Unlike unlock_iovec, it assumes the tvec array itself is
+ * already locked from target_addr. It will be unlocked as well as all the iovec
+ * elements.
+ */
+static void helper_unlock_iovec(struct target_iovec *target_vec,
+                                abi_ulong target_addr, struct iovec *vec,
+                                int count, int copy)
+{
+    for (int i = 0; i < count; i++) {
+        abi_ulong base = tswapal(target_vec[i].iov_base);
+
+        if (vec[i].iov_base) {
+            unlock_user(vec[i].iov_base, base, copy ? vec[i].iov_len : 0);
+        }
+    }
+    unlock_user(target_vec, target_addr, 0);
+}
+
+struct iovec *lock_iovec(int type, abi_ulong target_addr,
+        int count, int copy)
+{
+    struct target_iovec *target_vec;
+    struct iovec *vec;
+    abi_ulong total_len, max_len;
+    int i;
+    int err = 0;
+
+    if (count == 0) {
+        errno = 0;
+        return NULL;
+    }
+    if (count < 0 || count > IOV_MAX) {
+        errno = EINVAL;
+        return NULL;
+    }
+
+    vec = g_try_new0(struct iovec, count);
+    if (vec == NULL) {
+        errno = ENOMEM;
+        return NULL;
+    }
+
+    target_vec = lock_user(VERIFY_READ, target_addr,
+                           count * sizeof(struct target_iovec), 1);
+    if (target_vec == NULL) {
+        err = EFAULT;
+        goto fail2;
+    }
+
+    max_len = 0x7fffffff & MIN(TARGET_PAGE_MASK, PAGE_MASK);
+    total_len = 0;
+
+    for (i = 0; i < count; i++) {
+        abi_ulong base = tswapal(target_vec[i].iov_base);
+        abi_long len = tswapal(target_vec[i].iov_len);
+
+        if (len < 0) {
+            err = EINVAL;
+            goto fail;
+        } else if (len == 0) {
+            /* Zero length pointer is ignored. */
+            vec[i].iov_base = 0;
+        } else {
+            vec[i].iov_base = lock_user(type, base, len, copy);
+            /*
+             * If the first buffer pointer is bad, this is a fault.  But
+             * subsequent bad buffers will result in a partial write; this is
+             * realized by filling the vector with null pointers and zero
+             * lengths.
+             */
+            if (!vec[i].iov_base) {
+                if (i == 0) {
+                    err = EFAULT;
+                    goto fail;
+                } else {
+                    /*
+                     * Fail all the subsequent addresses, they are already
+                     * zero'd.
+                     */
+                    goto out;
+                }
+            }
+            if (len > max_len - total_len) {
+                len = max_len - total_len;
+            }
+        }
+        vec[i].iov_len = len;
+        total_len += len;
+    }
+out:
+    unlock_user(target_vec, target_addr, 0);
+    return vec;
+
+fail:
+    helper_unlock_iovec(target_vec, target_addr, vec, i, copy);
+fail2:
+    g_free(vec);
+    errno = err;
+    return NULL;
+}
+
 /*
  * do_syscall() should always have a single exit point at the end so that
  * actions, such as logging of syscall results, can be performed.  All errnos