@@ -1,8 +1,12 @@
#ifndef _SYS_RANDOM_H
#include <stdlib/sys/random.h>
+#include_next <sys/random.h>
+
# ifndef _ISOMAC
+# include <stdbool.h>
+
extern ssize_t __getrandom (void *__buffer, size_t __length,
unsigned int __flags) __wur;
libc_hidden_proto (__getrandom)
@@ -3140,8 +3140,8 @@ static void
tcache_key_initialize (void)
{
/* We need to use the _nostatus version here, see BZ 29624. */
- if (__getrandom_nocancel_nostatus (&tcache_key, sizeof(tcache_key),
- GRND_NONBLOCK)
+ if (__getrandom_nocancel_nostatus_direct (&tcache_key, sizeof(tcache_key),
+ GRND_NONBLOCK)
!= sizeof (tcache_key))
{
tcache_key = random_bits ();
@@ -132,6 +132,8 @@ get_cached_stack (size_t *sizep, void **memp)
__libc_lock_init (result->exit_lock);
memset (&result->tls_state, 0, sizeof result->tls_state);
+ result->getrandom_buf = NULL;
+
/* Clear the DTV. */
dtv_t *dtv = GET_DTV (TLS_TPADJ (result));
for (size_t cnt = 0; cnt < dtv[-1].counter; ++cnt)
@@ -404,6 +404,9 @@ struct pthread
/* Used on strsignal. */
struct tls_internal_t tls_state;
+ /* getrandom vDSO per-thread opaque state. */
+ void *getrandom_buf;
+
/* rseq area registered with the kernel. Use a custom definition
here to isolate from kernel struct rseq changes. The
implementation of sched_getcpu needs acccess to the cpu_id field;
@@ -38,6 +38,7 @@
#include <version.h>
#include <clone_internal.h>
#include <futex-internal.h>
+#include <sys/random.h>
#include <shlib-compat.h>
@@ -549,6 +550,10 @@ start_thread (void *arg)
}
#endif
+ /* Release the vDSO getrandom per-thread buffer with all signal blocked,
+ to avoid creating a new free-state block during thread release. */
+ __getrandom_vdso_release (pd);
+
if (!pd->user_stack)
advise_stack_range (pd->stackblock, pd->stackblock_size, (uintptr_t) pd,
pd->guardsize);
@@ -51,7 +51,9 @@
__fcntl64 (fd, cmd, __VA_ARGS__)
#define __getrandom_nocancel(buf, size, flags) \
__getrandom (buf, size, flags)
-#define __getrandom_nocancel_nostatus(buf, size, flags) \
+#define __getrandom_nocancel_direct(buf, size, flags) \
+ __getrandom (buf, size, flags)
+#define __getrandom_nocancel_nostatus_direct(buf, size, flags) \
__getrandom (buf, size, flags)
#define __poll_infinity_nocancel(fds, nfds) \
__poll (fds, nfds, -1)
@@ -79,7 +79,7 @@ __typeof (__fcntl) __fcntl_nocancel;
/* Non cancellable getrandom syscall that does not also set errno in case of
failure. */
static inline ssize_t
-__getrandom_nocancel_nostatus (void *buf, size_t buflen, unsigned int flags)
+__getrandom_nocancel_nostatus_direct (void *buf, size_t buflen, unsigned int flags)
{
int save_errno = errno;
ssize_t r = __getrandom (buf, buflen, flags);
@@ -90,6 +90,8 @@ __getrandom_nocancel_nostatus (void *buf, size_t buflen, unsigned int flags)
#define __getrandom_nocancel(buf, size, flags) \
__getrandom (buf, size, flags)
+#define __getrandom_nocancel_direct(buf, size, flags) \
+ __getrandom (buf, size, flags)
#define __poll_infinity_nocancel(fds, nfds) \
__poll (fds, nfds, -1)
@@ -18,6 +18,7 @@
#include <arch-fork.h>
#include <pthreadP.h>
+#include <sys/random.h>
pid_t
_Fork (void)
@@ -43,6 +44,7 @@ _Fork (void)
self->robust_head.list = &self->robust_head;
INTERNAL_SYSCALL_CALL (set_robust_list, &self->robust_head,
sizeof (struct robust_list_head));
+ call_function_static_weak (__getrandom_fork_subprocess);
}
return pid;
}
@@ -26,6 +26,7 @@
#include <mqueue.h>
#include <pthreadP.h>
#include <sysdep.h>
+#include <sys/random.h>
static inline void
fork_system_setup (void)
@@ -46,6 +47,7 @@ fork_system_setup_after_fork (void)
call_function_static_weak (__mq_notify_fork_subprocess);
call_function_static_weak (__timer_fork_subprocess);
+ call_function_static_weak (__getrandom_fork_subprocess);
}
/* In case of a fork() call the memory allocation in the child will be
@@ -128,9 +130,19 @@ reclaim_stacks (void)
curp->specific_used = true;
}
}
+
+ call_function_static_weak (__getrandom_reset_state, curp);
}
}
+ /* Also reset stale getrandom states for user stack threads. */
+ list_for_each (runp, &GL (dl_stack_user))
+ {
+ struct pthread *curp = list_entry (runp, struct pthread, list);
+ if (curp != self)
+ call_function_static_weak (__getrandom_reset_state, curp);
+ }
+
/* Add the stack of all running threads to the cache. */
list_splice (&GL (dl_stack_used), &GL (dl_stack_cache));
@@ -66,6 +66,18 @@ PROCINFO_CLASS int (*_dl_vdso_clock_getres) (clockid_t,
PROCINFO_CLASS int (*_dl_vdso_clock_getres_time64) (clockid_t,
struct __timespec64 *) RELRO;
# endif
+# ifdef HAVE_GETRANDOM_VSYSCALL
+PROCINFO_CLASS ssize_t (*_dl_vdso_getrandom) (void *buffer, size_t len,
+ unsigned int flags, void *state,
+ size_t state_len) RELRO;
+/* These values will be initialized at loading time by calling the
+ _dl_vdso_getrandom with a special value. The 'state_size' is the opaque
+ state size per-thread allocated with a mmap using 'mmap_prot' and
+ 'mmap_flags' argument. */
+PROCINFO_CLASS uint32_t _dl_vdso_getrandom_state_size RELRO;
+PROCINFO_CLASS uint32_t _dl_vdso_getrandom_mmap_prot RELRO;
+PROCINFO_CLASS uint32_t _dl_vdso_getrandom_mmap_flags RELRO;
+# endif
/* PowerPC specific ones. */
# ifdef HAVE_GET_TBFREQ
@@ -19,6 +19,10 @@
#ifndef _DL_VDSO_INIT_H
#define _DL_VDSO_INIT_H
+#ifdef HAVE_GETRANDOM_VSYSCALL
+# include <getrandom_vdso.h>
+#endif
+
/* Initialize the VDSO functions pointers. */
static inline void __attribute__ ((always_inline))
setup_vdso_pointers (void)
@@ -50,6 +54,19 @@ setup_vdso_pointers (void)
#ifdef HAVE_RISCV_HWPROBE
GLRO(dl_vdso_riscv_hwprobe) = dl_vdso_vsym (HAVE_RISCV_HWPROBE);
#endif
+#ifdef HAVE_GETRANDOM_VSYSCALL
+ GLRO(dl_vdso_getrandom) = dl_vdso_vsym (HAVE_GETRANDOM_VSYSCALL);
+ if (GLRO(dl_vdso_getrandom) != NULL)
+ {
+ struct vgetrandom_opaque_params params;
+ if (GLRO(dl_vdso_getrandom) (NULL, 0, 0, ¶ms, ~0UL) == 0)
+ {
+ GLRO(dl_vdso_getrandom_state_size) = params.size_of_opaque_state;
+ GLRO(dl_vdso_getrandom_mmap_prot) = params.mmap_prot;
+ GLRO(dl_vdso_getrandom_mmap_flags) = params.mmap_flags;
+ }
+ }
+#endif
}
#endif
@@ -21,12 +21,247 @@
#include <unistd.h>
#include <sysdep-cancel.h>
+static inline ssize_t
+getrandom_syscall (void *buffer, size_t length, unsigned int flags,
+ bool cancel)
+{
+ return cancel
+ ? SYSCALL_CANCEL (getrandom, buffer, length, flags)
+ : INLINE_SYSCALL_CALL (getrandom, buffer, length, flags);
+}
+
+#ifdef HAVE_GETRANDOM_VSYSCALL
+# include <getrandom_vdso.h>
+# include <ldsodefs.h>
+# include <libc-lock.h>
+# include <list.h>
+# include <setvmaname.h>
+# include <sys/mman.h>
+# include <sys/sysinfo.h>
+# include <tls-internal.h>
+
+# define ALIGN_PAGE(p) PTR_ALIGN_UP (p, GLRO (dl_pagesize))
+# define READ_ONCE(p) (*((volatile typeof (p) *) (&(p))))
+# define WRITE_ONCE(p, v) (*((volatile typeof (p) *) (&(p))) = (v))
+# define RESERVE_PTR(p) ((void *) ((uintptr_t) (p) | 1UL))
+# define RELEASE_PTR(p) ((void *) ((uintptr_t) (p) & ~1UL))
+# define IS_RESERVED_PTR(p) (!!((uintptr_t) (p) & 1UL))
+
+static struct
+{
+ __libc_lock_define (, lock);
+
+ void **states; /* Queue of opaque states allocated with the kernel
+ provided flags and used on getrandom vDSO call. */
+ size_t len; /* Number of available free states in the queue. */
+ size_t total; /* Number of states allocated from the kernel. */
+ size_t cap; /* Total numver of states that 'states' can hold before
+ needed to be resized. */
+} grnd_alloc = {
+ .lock = LLL_LOCK_INITIALIZER
+};
+
+static bool
+vgetrandom_get_state_alloc (void)
+{
+ size_t num = __get_nprocs (); /* Just a decent heuristic. */
+
+ size_t block_size = ALIGN_PAGE (num * GLRO(dl_vdso_getrandom_state_size));
+ num = (GLRO (dl_pagesize) / GLRO(dl_vdso_getrandom_state_size)) *
+ (block_size / GLRO (dl_pagesize));
+ void *block = __mmap (NULL, block_size, GLRO(dl_vdso_getrandom_mmap_prot),
+ GLRO(dl_vdso_getrandom_mmap_flags), -1, 0);
+ if (block == MAP_FAILED)
+ return false;
+ __set_vma_name (block, block_size, " glibc: getrandom");
+
+ if (grnd_alloc.total + num > grnd_alloc.cap)
+ {
+ /* Use a new mmap instead of trying to mremap. It avoids a
+ potential multithread fork issue where fork is called just after
+ mremap returns but before assigning to the grnd_alloc.states,
+ thus making the its value invalid in the child. */
+ void *old_states = grnd_alloc.states;
+ size_t old_states_size = ALIGN_PAGE (sizeof (*grnd_alloc.states) *
+ grnd_alloc.total + num);
+ size_t states_size;
+ if (grnd_alloc.states == NULL)
+ states_size = old_states_size;
+ else
+ states_size = ALIGN_PAGE (sizeof (*grnd_alloc.states)
+ * grnd_alloc.cap);
+
+ void **states = __mmap (NULL, states_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (states == MAP_FAILED)
+ {
+ __munmap (block, block_size);
+ return false;
+ }
+
+ /* Atomically replace the old state, so if a fork happens the child
+ process will see a consistent free state buffer. The size might
+ not be updated, but it does not really matter since the buffer is
+ always increased. */
+ atomic_store_relaxed (&grnd_alloc.states, states);
+ if (old_states != NULL)
+ __munmap (old_states, old_states_size);
+
+ __set_vma_name (states, states_size, " glibc: getrandom states");
+ grnd_alloc.cap = states_size / sizeof (*grnd_alloc.states);
+ }
+
+ for (size_t i = 0; i < num; ++i)
+ {
+ /* States should not straddle a page. */
+ if (((uintptr_t) block & (GLRO (dl_pagesize) - 1)) +
+ GLRO(dl_vdso_getrandom_state_size) > GLRO (dl_pagesize))
+ block = ALIGN_PAGE (block);
+ grnd_alloc.states[i] = block;
+ block += GLRO(dl_vdso_getrandom_state_size);
+ }
+ grnd_alloc.len = num;
+ grnd_alloc.total += num;
+
+ return true;
+}
+
+/* Allocate an opaque state for vgetrandom. If the grnd_alloc does not have
+ any, mmap() another page of them using the vgetrandom parameters. */
+static void *
+vgetrandom_get_state (void)
+{
+ void *state = NULL;
+
+ /* The signal blocking avoid the potential issue where _Fork() (which is
+ async-signal-safe) is called with the lock taken. The function is
+ called only once during thread lifetime, so the overhead should be
+ minimal. */
+ internal_sigset_t set;
+ internal_signal_block_all (&set);
+ __libc_lock_lock (grnd_alloc.lock);
+
+ if (grnd_alloc.len > 0 || vgetrandom_get_state_alloc ())
+ state = grnd_alloc.states[--grnd_alloc.len];
+
+ __libc_lock_unlock (grnd_alloc.lock);
+ internal_signal_restore_set (&set);
+
+ return state;
+}
+
+/* Returns true when vgetrandom is used successfully. Returns false if the
+ syscall fallback should be issued in the case the vDSO is not present, in
+ the case of reentrancy, or if any memory allocation fails. */
+static ssize_t
+getrandom_vdso (void *buffer, size_t length, unsigned int flags, bool cancel)
+{
+ if (GLRO (dl_vdso_getrandom_state_size) == 0)
+ return getrandom_syscall (buffer, length, flags, cancel);
+
+ struct pthread *self = THREAD_SELF;
+
+ /* If the LSB of getrandom_buf is set, then this function is already being
+ called, and we have a reentrant call from a signal handler. In this case
+ fallback to the syscall. */
+ void *state = READ_ONCE (self->getrandom_buf);
+ if (IS_RESERVED_PTR (state))
+ return getrandom_syscall (buffer, length, flags, cancel);
+ WRITE_ONCE (self->getrandom_buf, RESERVE_PTR (state));
+
+ bool r = false;
+ if (state == NULL)
+ {
+ state = vgetrandom_get_state ();
+ if (state == NULL)
+ goto out;
+ }
+
+ /* Since the vDSO fallback does not issue the syscall with the cancellation
+ bridge (__syscall_cancel_arch), use GRND_NONBLOCK so there is no
+ potential unbounded blocking in the kernel. It should be a rare
+ situation, only at system startup when RNG is not initialized. */
+ ssize_t ret = GLRO (dl_vdso_getrandom) (buffer,
+ length,
+ flags | GRND_NONBLOCK,
+ state,
+ GLRO(dl_vdso_getrandom_state_size));
+ if (INTERNAL_SYSCALL_ERROR_P (ret))
+ {
+ /* Fallback to the syscall if the kernel would block. */
+ int err = INTERNAL_SYSCALL_ERRNO (ret);
+ if (err == EAGAIN && !(flags & GRND_NONBLOCK))
+ goto out;
+
+ __set_errno (err);
+ ret = -1;
+ }
+ r = true;
+
+out:
+ WRITE_ONCE (self->getrandom_buf, state);
+ return r ? ret : getrandom_syscall (buffer, length, flags, cancel);
+}
+#endif
+
+/* Re-add the state state from CURP on the free list. */
+void
+__getrandom_reset_state (struct pthread *curp)
+{
+#ifdef HAVE_GETRANDOM_VSYSCALL
+ if (grnd_alloc.states == NULL || curp->getrandom_buf == NULL)
+ return;
+ grnd_alloc.states[grnd_alloc.len++] = RELEASE_PTR (curp->getrandom_buf);
+ curp->getrandom_buf = NULL;
+#endif
+}
+
+/* Called when a thread terminates, and adds its random buffer back into the
+ allocator pool for use in a future thread. */
+void
+__getrandom_vdso_release (struct pthread *curp)
+{
+#ifdef HAVE_GETRANDOM_VSYSCALL
+ if (curp->getrandom_buf == NULL)
+ return;
+
+ __libc_lock_lock (grnd_alloc.lock);
+ grnd_alloc.states[grnd_alloc.len++] = curp->getrandom_buf;
+ __libc_lock_unlock (grnd_alloc.lock);
+#endif
+}
+
+/* Reset the internal lock state in case another thread has locked while
+ this thread calls fork. The stale thread states will be handled by
+ reclaim_stacks which calls __getrandom_reset_state on each thread. */
+void
+__getrandom_fork_subprocess (void)
+{
+#ifdef HAVE_GETRANDOM_VSYSCALL
+ grnd_alloc.lock = LLL_LOCK_INITIALIZER;
+#endif
+}
+
+ssize_t
+__getrandom_nocancel (void *buffer, size_t length, unsigned int flags)
+{
+#ifdef HAVE_GETRANDOM_VSYSCALL
+ return getrandom_vdso (buffer, length, flags, false);
+#else
+ return getrandom_syscall (buffer, length, flags, false);
+#endif
+}
+
/* Write up to LENGTH bytes of randomness starting at BUFFER.
Return the number of bytes written, or -1 on error. */
ssize_t
__getrandom (void *buffer, size_t length, unsigned int flags)
{
- return SYSCALL_CANCEL (getrandom, buffer, length, flags);
+#ifdef HAVE_GETRANDOM_VSYSCALL
+ return getrandom_vdso (buffer, length, flags, true);
+#else
+ return getrandom_syscall (buffer, length, flags, true);
+#endif
}
libc_hidden_def (__getrandom)
weak_alias (__getrandom, getrandom)
new file mode 100644
@@ -0,0 +1,36 @@
+/* Linux getrandom vDSO support.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifndef _GETRANDOM_VDSO_H
+#define _GETRANDOM_VDSO_H
+
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/types.h>
+
+/* Used to query the vDSO for the required mmap flags and the opaque
+ per-thread state size Defined by linux/random.h. */
+struct vgetrandom_opaque_params
+{
+ uint32_t size_of_opaque_state;
+ uint32_t mmap_prot;
+ uint32_t mmap_flags;
+ uint32_t reserved[13];
+};
+
+#endif
new file mode 100644
@@ -0,0 +1,29 @@
+/* Internal definitions for Linux getrandom implementation.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifndef _LINUX_SYS_RANDOM_H
+#define _LINUX_SYS_RANDOM_H
+
+# ifndef _ISOMAC
+# include <pthreadP.h>
+
+extern void __getrandom_fork_subprocess (void) attribute_hidden;
+extern void __getrandom_vdso_release (struct pthread *curp) attribute_hidden;
+extern void __getrandom_reset_state (struct pthread *curp) attribute_hidden;
+# endif
+#endif
@@ -27,6 +27,7 @@
#include <sys/syscall.h>
#include <sys/wait.h>
#include <time.h>
+#include <sys/random.h>
/* Non cancellable open syscall. */
__typeof (open) __open_nocancel;
@@ -84,15 +85,17 @@ __writev_nocancel_nostatus (int fd, const struct iovec *iov, int iovcnt)
}
static inline ssize_t
-__getrandom_nocancel (void *buf, size_t buflen, unsigned int flags)
+__getrandom_nocancel_direct (void *buf, size_t buflen, unsigned int flags)
{
return INLINE_SYSCALL_CALL (getrandom, buf, buflen, flags);
}
+__typeof (getrandom) __getrandom_nocancel attribute_hidden;
+
/* Non cancellable getrandom syscall that does not also set errno in case of
failure. */
static inline ssize_t
-__getrandom_nocancel_nostatus (void *buf, size_t buflen, unsigned int flags)
+__getrandom_nocancel_nostatus_direct (void *buf, size_t buflen, unsigned int flags)
{
return INTERNAL_SYSCALL_CALL (getrandom, buf, buflen, flags);
}
@@ -376,6 +376,7 @@
# define HAVE_TIME_VSYSCALL "__vdso_time"
# define HAVE_GETCPU_VSYSCALL "__vdso_getcpu"
# define HAVE_CLOCK_GETRES64_VSYSCALL "__vdso_clock_getres"
+# define HAVE_GETRANDOM_VSYSCALL "__vdso_getrandom"
# define HAVE_CLONE3_WRAPPER 1