@@ -26,6 +26,8 @@
#include <array_length.h>
#include <pthreadP.h>
#include <dl-call_tls_init_tp.h>
+#include <dl-rseq.h>
+#include <elf/dl-tunables.h>
#ifdef SHARED
#error makefile bug, this file is for static only
@@ -62,6 +64,18 @@ size_t _dl_tls_static_surplus;
dynamic TLS access (e.g. with TLSDESC). */
size_t _dl_tls_static_optional;
+/* Size of the features present in the rseq area. */
+size_t _dl_tls_rseq_feature_size;
+
+/* Alignment requirement of the rseq area. */
+size_t _dl_tls_rseq_align;
+
+/* Size of the rseq area allocated in the static TLS block. */
+size_t _dl_tls_rseq_alloc_size;
+
+/* Offset of the rseq area from the thread pointer. */
+ptrdiff_t _dl_tls_rseq_offset;
+
/* Generation counter for the dtv. */
size_t _dl_tls_generation;
@@ -110,6 +124,7 @@ __libc_setup_tls (void)
size_t filesz = 0;
void *initimage = NULL;
size_t align = 0;
+ size_t tls_blocks_size = 0;
size_t max_align = TCB_ALIGNMENT;
size_t tcb_offset;
const ElfW(Phdr) *phdr;
@@ -135,22 +150,79 @@ __libc_setup_tls (void)
/* Calculate the size of the static TLS surplus, with 0 auditors. */
_dl_tls_static_surplus_init (0);
+ /* Even when disabled by tunable, an rseq area will be allocated to allow
+ application code to test the registration status with 'rseq->cpud_id >= 0'.
+ Default to the rseq ABI minimum size and alignment, this will ensure we
+ don't use more TLS than necessary. */
+ size_t rseq_alloc_size = TLS_DL_RSEQ_MIN_SIZE;
+ size_t rseq_align = TLS_DL_RSEQ_MIN_ALIGN;
+ bool do_rseq = true;
+ do_rseq = TUNABLE_GET_FULL (glibc, pthread, rseq, int, NULL);
+ if (do_rseq)
+ {
+ rseq_align = GLRO(dl_tls_rseq_align);
+ /* Make sure the rseq area size is at least the minimum ABI size and a
+ multiple of the requested aligment. */
+ rseq_alloc_size = roundup (MAX (GLRO(dl_tls_rseq_feature_size),
+ TLS_DL_RSEQ_MIN_SIZE), rseq_align);
+ }
+
+ /* Increase the maximum alignment with the rseq alignment requirements if
+ necessary. */
+ max_align = MAX (max_align, rseq_align);
+
+ /* Record the rseq_area block size. */
+ GLRO (dl_tls_rseq_alloc_size) = rseq_alloc_size;
+
/* We have to set up the TCB block which also (possibly) contains
'errno'. Therefore we avoid 'malloc' which might touch 'errno'.
Instead we use 'sbrk' which would only uses 'errno' if it fails.
In this case we are right away out of memory and the user gets
what she/he deserves. */
#if TLS_TCB_AT_TP
+ /* Before the the thread pointer, add the aligned tls block size and then
+ align the rseq area block on top. */
+ tls_blocks_size = roundup (roundup (memsz, align ?: 1) + rseq_alloc_size, rseq_align);
+
+ /* Record the rseq_area offset.
+
+ With TLS_TCB_AT_TP the TLS blocks are allocated before the thread pointer
+ in reverse order. Our block is added last which results in it being the
+ first in the static TLS block, thus record the most negative offset.
+
+ The alignment requirements of the pointer resulting from this offset and
+ the thread pointer are enforced by 'max_align' which is used to align the
+ tcb_offset. */
+ GLRO (dl_tls_rseq_offset) = -tls_blocks_size;
+
/* Align the TCB offset to the maximum alignment, as
_dl_allocate_tls_storage (in elf/dl-tls.c) does using __libc_memalign
and dl_tls_static_align. */
- tcb_offset = roundup (memsz + GLRO(dl_tls_static_surplus), max_align);
+ tcb_offset = roundup (tls_blocks_size + GLRO(dl_tls_static_surplus), max_align);
tlsblock = _dl_early_allocate (tcb_offset + TLS_INIT_TCB_SIZE + max_align);
if (tlsblock == NULL)
_startup_fatal_tls_error ();
#elif TLS_DTV_AT_TP
+ /* Align memsz on top of the initial tcb. */
tcb_offset = roundup (TLS_INIT_TCB_SIZE, align ?: 1);
- tlsblock = _dl_early_allocate (tcb_offset + memsz + max_align
+
+ /* After the thread pointer, add the initial tcb plus the tls block size and
+ then align the rseq area block on top. */
+ tls_blocks_size = roundup (tcb_offset + memsz + rseq_alloc_size, rseq_align);
+
+ /* Record the rseq_area offset.
+
+ With TLS_DTV_AT_TP the TLS blocks are allocated after the thread pointer in
+ order. Our block is added last which results in it being the last in the
+ static TLS block, thus record the offset as the size of the static TLS
+ block minus the size of our block. The resulting offset will be positive.
+
+ The alignment requirements of the pointer resulting from this offset and
+ the thread pointer are enforced by 'max_align' which is used to align the
+ tcb_offset. */
+ GLRO (dl_tls_rseq_offset) = tls_blocks_size - rseq_alloc_size;
+
+ tlsblock = _dl_early_allocate (tls_blocks_size + max_align
+ TLS_PRE_TCB_SIZE
+ GLRO(dl_tls_static_surplus));
if (tlsblock == NULL)
@@ -209,11 +281,5 @@ __libc_setup_tls (void)
/* static_slotinfo.slotinfo[1].gen = 0; -- Already zero. */
static_slotinfo.slotinfo[1].map = main_map;
- memsz = roundup (memsz, align ?: 1);
-
-#if TLS_DTV_AT_TP
- memsz += tcb_offset;
-#endif
-
- init_static_tls (memsz, MAX (TCB_ALIGNMENT, max_align));
+ init_static_tls (tls_blocks_size, MAX (TCB_ALIGNMENT, max_align));
}
@@ -27,6 +27,7 @@
#include <tls.h>
#include <dl-tls.h>
+#include <dl-rseq.h>
#include <ldsodefs.h>
#if PTHREAD_IN_LIBC
@@ -298,6 +299,44 @@ _dl_determine_tlsoffset (void)
slotinfo[cnt].map->l_tls_offset = off;
}
+ /* Insert the rseq area block after the last TLS block. */
+
+ /* Even when disabled by tunable, an rseq area will be allocated to allow
+ application code to test the registration status with 'rseq->cpud_id >= 0'.
+ Default to the rseq ABI minimum size and aligment, this will ensure we
+ don't use more TLS than necessary. */
+ size_t rseq_alloc_size = TLS_DL_RSEQ_MIN_SIZE;
+ size_t rseq_align = TLS_DL_RSEQ_MIN_ALIGN;
+ bool do_rseq = true;
+ do_rseq = TUNABLE_GET_FULL (glibc, pthread, rseq, int, NULL);
+ if (do_rseq)
+ {
+ rseq_align = GLRO(dl_tls_rseq_align);
+ /* Make sure the rseq area size is at least the minimum ABI size and a
+ multiple of the requested aligment. */
+ rseq_alloc_size = roundup (MAX (GLRO(dl_tls_rseq_feature_size),
+ TLS_DL_RSEQ_MIN_SIZE), rseq_align);
+ }
+
+ /* Add the rseq area block to the global offset. */
+ offset = roundup (offset, rseq_align) + rseq_alloc_size;
+
+ /* Increase the maximum alignment with the rseq alignment requirements if
+ necessary. */
+ max_align = MAX (max_align, rseq_align);
+
+ /* Record the rseq_area offset.
+
+ With TLS_TCB_AT_TP the TLS blocks are allocated before the thread pointer
+ in reverse order. Our block is added last which results in it being the
+ first in the static TLS block, thus record the most negative offset.
+
+ The alignment requirements of the pointer resulting from this offset and
+ the thread pointer are enforced by 'max_align' which is used to align the
+ tcb_offset. */
+ GLRO (dl_tls_rseq_offset) = -offset;
+ GLRO (dl_tls_rseq_alloc_size) = rseq_alloc_size;
+
GL(dl_tls_static_used) = offset;
GLRO (dl_tls_static_size) = (roundup (offset + GLRO(dl_tls_static_surplus),
max_align)
@@ -343,6 +382,45 @@ _dl_determine_tlsoffset (void)
offset = off + slotinfo[cnt].map->l_tls_blocksize - firstbyte;
}
+ /* Insert the rseq area block after the last TLS block. */
+
+ /* Default to the rseq ABI minimum sizes, this will reduce TLS usage to 32
+ bytes when rseq is disabled by tunables. */
+ size_t rseq_alloc_size = TLS_DL_RSEQ_MIN_SIZE;
+ size_t rseq_align = TLS_DL_RSEQ_MIN_ALIGN;
+ bool do_rseq = true;
+ do_rseq = TUNABLE_GET_FULL (glibc, pthread, rseq, int, NULL);
+ if (do_rseq)
+ {
+ rseq_align = GLRO(dl_tls_rseq_align);
+ /* Make sure the rseq area size is at least the minimum ABI size and a
+ multiple of the requested aligment. */
+ rseq_alloc_size = roundup (MAX (GLRO(dl_tls_rseq_feature_size),
+ TLS_DL_RSEQ_MIN_SIZE), rseq_align);
+ }
+
+ /* Align the global offset to the beginning of the rseq area. */
+ offset = roundup (offset, rseq_align);
+
+ /* Record the rseq_area offset.
+
+ With TLS_DTV_AT_TP the TLS blocks are allocated after the thread pointer in
+ order. Our block is added last which results in it being the last in the
+ static TLS block, thus record the offset as the size of the static TLS
+ block minus the size of our block. The resulting offset will be positive.
+
+ The alignment requirements of the pointer resulting from this offset and
+ the thread pointer are enforced by 'max_align' which is used to align the
+ tcb_offset. */
+ GLRO (dl_tls_rseq_alloc_size) = rseq_alloc_size;
+ GLRO (dl_tls_rseq_offset) = offset;
+
+ /* Add the rseq area block to the global offset. */
+ offset += rseq_alloc_size;
+
+ /* Increase the max_align if necessary. */
+ max_align = MAX (max_align, rseq_align);
+
GL(dl_tls_static_used) = offset;
GLRO (dl_tls_static_size) = roundup (offset + GLRO(dl_tls_static_surplus),
TCB_ALIGNMENT);
@@ -78,6 +78,18 @@ __rtld_static_init (struct link_map *map)
extern __typeof (dl->_dl_tls_static_size) _dl_tls_static_size
attribute_hidden;
dl->_dl_tls_static_size = _dl_tls_static_size;
+ extern __typeof (dl->_dl_tls_rseq_feature_size) _dl_tls_rseq_feature_size
+ attribute_hidden;
+ dl->_dl_tls_rseq_feature_size = _dl_tls_rseq_feature_size;
+ extern __typeof (dl->_dl_tls_rseq_align) _dl_tls_rseq_align
+ attribute_hidden;
+ dl->_dl_tls_rseq_align = _dl_tls_rseq_align;
+ extern __typeof (dl->_dl_tls_rseq_alloc_size) _dl_tls_rseq_alloc_size
+ attribute_hidden;
+ dl->_dl_tls_rseq_alloc_size = _dl_tls_rseq_alloc_size;
+ extern __typeof (dl->_dl_tls_rseq_offset) _dl_tls_rseq_offset
+ attribute_hidden;
+ dl->_dl_tls_rseq_offset = _dl_tls_rseq_offset;
dl->_dl_find_object = _dl_find_object;
__rtld_static_init_arch (map, dl);
@@ -404,25 +404,11 @@ struct pthread
/* Used on strsignal. */
struct tls_internal_t tls_state;
- /* rseq area registered with the kernel. Use a custom definition
- here to isolate from kernel struct rseq changes. The
- implementation of sched_getcpu needs acccess to the cpu_id field;
- the other fields are unused and not included here. */
- union
- {
- struct
- {
- uint32_t cpu_id_start;
- uint32_t cpu_id;
- };
- char pad[32]; /* Original rseq area size. */
- } rseq_area __attribute__ ((aligned (32)));
-
/* Amount of end padding, if any, in this structure.
- This definition relies on rseq_area being last. */
+ This definition relies on tls_state being last. */
#define PTHREAD_STRUCT_END_PADDING \
- (sizeof (struct pthread) - offsetof (struct pthread, rseq_area) \
- + sizeof ((struct pthread) {}.rseq_area))
+ (sizeof (struct pthread) - offsetof (struct pthread, tls_state) \
+ + sizeof ((struct pthread) {}.tls_state))
} __attribute ((aligned (TCB_ALIGNMENT)));
static inline bool
@@ -691,7 +691,7 @@ __pthread_create_2_1 (pthread_t *newthread, const pthread_attr_t *attr,
/* Inherit rseq registration state. Without seccomp filters, rseq
registration will either always fail or always succeed. */
- if ((int) THREAD_GETMEM_VOLATILE (self, rseq_area.cpu_id) >= 0)
+ if ((int) RSEQ_GETMEM_VOLATILE (rseq_get_area(), cpu_id) >= 0)
pd->flags |= ATTR_FLAG_DO_RSEQ;
/* Initialize the field for the ID of the thread which is waiting
new file mode 100644
@@ -0,0 +1,26 @@
+/* RSEQ defines for the dynamic linker. Generic version.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+/* Minimum size of the rseq area. */
+#define TLS_DL_RSEQ_MIN_SIZE 32
+
+/* Minimum feature size of the rseq area. */
+#define TLS_DL_RSEQ_MIN_FEATURE_SIZE 20
+
+/* Minimum size of the rseq area alignment. */
+#define TLS_DL_RSEQ_MIN_ALIGN 32
@@ -610,6 +610,18 @@ struct rtld_global_ro
See comments in elf/dl-tls.c where it is initialized. */
EXTERN size_t _dl_tls_static_surplus;
+ /* Size of the features present in the rseq area. */
+ EXTERN size_t _dl_tls_rseq_feature_size;
+
+ /* Alignment requirement of the rseq area. */
+ EXTERN size_t _dl_tls_rseq_align;
+
+ /* Size of the rseq area allocated in the static TLS block. */
+ EXTERN size_t _dl_tls_rseq_alloc_size;
+
+ /* Offset of the rseq area from the thread pointer. */
+ EXTERN ptrdiff_t _dl_tls_rseq_offset;
+
/* Name of the shared object to be profiled (if any). */
EXTERN const char *_dl_profile;
/* Filename of the output file. */
@@ -123,3 +123,59 @@
"i" (offsetof (struct pthread, member)), \
"r" (idx)); \
}})
+
+
+/* Read member of the RSEQ area directly. */
+#define RSEQ_GETMEM_VOLATILE(descr, member) \
+ ({ __typeof (descr->member) __value; \
+ ptrdiff_t _rseq_offset = GLRO (dl_tls_rseq_offset); \
+ _Static_assert (sizeof (__value) == 1 \
+ || sizeof (__value) == 4 \
+ || sizeof (__value) == 8, \
+ "size of per-thread data"); \
+ if (sizeof (__value) == 1) \
+ asm volatile ("movb %%gs:%P2(%3),%b0" \
+ : "=q" (__value) \
+ : "0" (0), "i" (offsetof (struct rseq_area, member)), \
+ "r" (_rseq_offset)); \
+ else if (sizeof (__value) == 4) \
+ asm volatile ("movl %%gs:%P1(%2),%0" \
+ : "=r" (__value) \
+ : "i" (offsetof (struct rseq_area, member)), \
+ "r" (_rseq_offset)); \
+ else /* 8 */ \
+ { \
+ asm volatile ("movl %%gs:%P1(%2),%%eax\n\t" \
+ "movl %%gs:4+%P1(%2),%%edx" \
+ : "=&A" (__value) \
+ : "i" (offsetof (struct rseq_area, member)), \
+ "r" (_rseq_offset)); \
+ } \
+ __value; })
+
+/* Set member of the RSEQ area directly. */
+#define RSEQ_SETMEM(descr, member, value) \
+ ({ \
+ ptrdiff_t _rseq_offset = GLRO (dl_tls_rseq_offset); \
+ _Static_assert (sizeof (descr->member) == 1 \
+ || sizeof (descr->member) == 4 \
+ || sizeof (descr->member) == 8, \
+ "size of per-thread data"); \
+ if (sizeof (descr->member) == 1) \
+ asm volatile ("movb %b0,%%gs:%P1(%2)" : \
+ : "iq" (value), \
+ "i" (offsetof (struct rseq_area, member)), \
+ "r" (_rseq_offset)); \
+ else if (sizeof (descr->member) == 4) \
+ asm volatile ("movl %0,%%gs:%P1(%2)" : \
+ : "ir" (value), \
+ "i" (offsetof (struct rseq_area, member)), \
+ "r" (_rseq_offset)); \
+ else /* 8 */ \
+ { \
+ asm volatile ("movl %%eax,%%gs:%P1(%2)\n\t" \
+ "movl %%edx,%%gs:4+%P1(%2)" : \
+ : "A" ((uint64_t) cast_to_integer (value)), \
+ "i" (offsetof (struct rseq_area, member)), \
+ "r" (_rseq_offset)); \
+ }})
@@ -107,16 +107,22 @@ __tls_init_tp (void)
do_rseq = TUNABLE_GET (rseq, int, NULL);
if (rseq_register_current_thread (pd, do_rseq))
{
- _rseq_size = sizeof (pd->rseq_area);
+ _rseq_size = GLRO (dl_tls_rseq_alloc_size);
}
+ /* If the registration fails or is disabled by tunable, the public rseq
+ size will be '0' regardless of the size of the allocated rseq area. An
+ rseq area of at least 32 bytes is always allocated since application
+ code is allowed to test the status of the rseq registration with
+ 'rseq->cpu_id >= 0'. */
+
#ifdef RSEQ_SIG
/* This should be a compile-time constant, but the current
infrastructure makes it difficult to determine its value. Not
all targets support __thread_pointer, so set __rseq_offset only
if the rseq registration may have happened because RSEQ_SIG is
defined. */
- _rseq_offset = (char *) &pd->rseq_area - (char *) __thread_pointer ();
+ _rseq_offset = GLRO (dl_tls_rseq_offset);
#endif
}
@@ -30,3 +30,8 @@
descr->member = (value)
#define THREAD_SETMEM_NC(descr, member, idx, value) \
descr->member[idx] = (value)
+
+#define RSEQ_GETMEM_VOLATILE(descr, member) \
+ THREAD_GETMEM_VOLATILE(descr, member)
+#define RSEQ_SETMEM(descr, member, value) \
+ THREAD_SETMEM(descr, member, value)
@@ -261,6 +261,11 @@ tests-internal += \
tst-rseq-disable \
# tests-internal
+tests-static += \
+ tst-rseq-disable-static \
+ tst-rseq-static \
+ # tests-static
+
tests-time64 += \
tst-adjtimex-time64 \
tst-clock_adjtime-time64 \
@@ -394,6 +399,7 @@ $(objpfx)tst-mount-compile.out: ../sysdeps/unix/sysv/linux/tst-mount-compile.py
$(objpfx)tst-mount-compile.out: $(sysdeps-linux-python-deps)
tst-rseq-disable-ENV = GLIBC_TUNABLES=glibc.pthread.rseq=0
+tst-rseq-disable-static-ENV = GLIBC_TUNABLES=glibc.pthread.rseq=0
endif # $(subdir) == misc
@@ -655,4 +661,8 @@ tests += \
tests-internal += \
tst-rseq-nptl \
# tests-internal
+
+tests-static += \
+ tst-rseq-nptl-static \
+ # tests-static
endif
@@ -21,6 +21,7 @@
#include <fpu_control.h>
#include <ldsodefs.h>
#include <link.h>
+#include <dl-rseq.h>
typedef ElfW(Addr) dl_parse_auxv_t[AT_MINSIGSTKSZ + 1];
@@ -59,5 +60,10 @@ void _dl_parse_auxv (ElfW(auxv_t) *av, dl_parse_auxv_t auxv_values)
GLRO(dl_sysinfo) = auxv_values[AT_SYSINFO];
#endif
+ GLRO(dl_tls_rseq_feature_size) = MAX (auxv_values[AT_RSEQ_FEATURE_SIZE],
+ TLS_DL_RSEQ_MIN_FEATURE_SIZE);
+ GLRO(dl_tls_rseq_align) = MAX (auxv_values[AT_RSEQ_ALIGN],
+ TLS_DL_RSEQ_MIN_ALIGN);
+
DL_PLATFORM_AUXV
}
@@ -24,6 +24,24 @@
#include <stdbool.h>
#include <stdio.h>
#include <sys/rseq.h>
+#include <thread_pointer.h>
+#include <ldsodefs.h>
+
+/* rseq area registered with the kernel. Use a custom definition
+ here to isolate from kernel struct rseq changes. The
+ implementation of sched_getcpu needs acccess to the cpu_id field;
+ the other fields are unused and not included here. */
+struct rseq_area
+{
+ uint32_t cpu_id_start;
+ uint32_t cpu_id;
+};
+
+static inline struct rseq_area *
+rseq_get_area(void)
+{
+ return (struct rseq_area *) ((char *) __thread_pointer() + GLRO (dl_tls_rseq_offset));
+}
#ifdef RSEQ_SIG
static inline bool
@@ -31,20 +49,23 @@ rseq_register_current_thread (struct pthread *self, bool do_rseq)
{
if (do_rseq)
{
- int ret = INTERNAL_SYSCALL_CALL (rseq, &self->rseq_area,
- sizeof (self->rseq_area),
+ /* The kernel expects 'rseq_area->rseq_cs == NULL' on registration, zero
+ the whole rseq area. */
+ memset(rseq_get_area(), 0, GLRO (dl_tls_rseq_alloc_size));
+ int ret = INTERNAL_SYSCALL_CALL (rseq, rseq_get_area(),
+ GLRO (dl_tls_rseq_alloc_size),
0, RSEQ_SIG);
if (!INTERNAL_SYSCALL_ERROR_P (ret))
return true;
}
- THREAD_SETMEM (self, rseq_area.cpu_id, RSEQ_CPU_ID_REGISTRATION_FAILED);
+ RSEQ_SETMEM (rseq_get_area(), cpu_id, RSEQ_CPU_ID_REGISTRATION_FAILED);
return false;
}
#else /* RSEQ_SIG */
static inline bool
rseq_register_current_thread (struct pthread *self, bool do_rseq)
{
- THREAD_SETMEM (self, rseq_area.cpu_id, RSEQ_CPU_ID_REGISTRATION_FAILED);
+ RSEQ_SETMEM (rseq_get_area(), cpu_id, RSEQ_CPU_ID_REGISTRATION_FAILED);
return false;
}
#endif /* RSEQ_SIG */
@@ -19,6 +19,7 @@
#include <sched.h>
#include <sysdep.h>
#include <sysdep-vdso.h>
+#include <rseq-internal.h>
static int
vsyscall_sched_getcpu (void)
@@ -36,6 +37,6 @@ vsyscall_sched_getcpu (void)
int
sched_getcpu (void)
{
- int cpu_id = THREAD_GETMEM_VOLATILE (THREAD_SELF, rseq_area.cpu_id);
+ int cpu_id = RSEQ_GETMEM_VOLATILE (rseq_get_area(), cpu_id);
return __glibc_likely (cpu_id >= 0) ? cpu_id : vsyscall_sched_getcpu ();
}
new file mode 100644
@@ -0,0 +1 @@
+#include "tst-rseq-disable.c"
@@ -26,27 +26,28 @@
#include <unistd.h>
#ifdef RSEQ_SIG
+# include <sys/auxv.h>
+# include "tst-rseq.h"
+
+static __thread struct rseq local_rseq;
/* Check that rseq can be registered and has not been taken by glibc. */
static void
check_rseq_disabled (void)
{
- struct pthread *pd = THREAD_SELF;
+ struct rseq *rseq_area = (struct rseq *) ((char *) __thread_pointer () + __rseq_offset);
TEST_COMPARE (__rseq_flags, 0);
- TEST_VERIFY ((char *) __thread_pointer () + __rseq_offset
- == (char *) &pd->rseq_area);
TEST_COMPARE (__rseq_size, 0);
- TEST_COMPARE ((int) pd->rseq_area.cpu_id, RSEQ_CPU_ID_REGISTRATION_FAILED);
+ TEST_COMPARE ((int) rseq_area->cpu_id, RSEQ_CPU_ID_REGISTRATION_FAILED);
- int ret = syscall (__NR_rseq, &pd->rseq_area, sizeof (pd->rseq_area),
- 0, RSEQ_SIG);
+ int ret = syscall (__NR_rseq, &local_rseq, RSEQ_TEST_MIN_SIZE, 0, RSEQ_SIG);
if (ret == 0)
{
- ret = syscall (__NR_rseq, &pd->rseq_area, sizeof (pd->rseq_area),
+ ret = syscall (__NR_rseq, &local_rseq, RSEQ_TEST_MIN_SIZE,
RSEQ_FLAG_UNREGISTER, RSEQ_SIG);
TEST_COMPARE (ret, 0);
- pd->rseq_area.cpu_id = RSEQ_CPU_ID_REGISTRATION_FAILED;
+ rseq_area->cpu_id = RSEQ_CPU_ID_REGISTRATION_FAILED;
}
else
{
new file mode 100644
@@ -0,0 +1 @@
+#include "tst-rseq-nptl.c"
new file mode 100644
@@ -0,0 +1 @@
+#include "tst-rseq.c"
@@ -31,18 +31,32 @@
# include <syscall.h>
# include <thread_pointer.h>
# include <tls.h>
+# include <sys/auxv.h>
# include "tst-rseq.h"
static void
do_rseq_main_test (void)
{
- struct pthread *pd = THREAD_SELF;
+ size_t rseq_align = MAX (getauxval (AT_RSEQ_ALIGN), RSEQ_TEST_MIN_ALIGN);
+ size_t rseq_size = roundup (MAX (getauxval (AT_RSEQ_FEATURE_SIZE), RSEQ_TEST_MIN_SIZE), rseq_align);
+ struct rseq *rseq = __thread_pointer () + __rseq_offset;
TEST_VERIFY_EXIT (rseq_thread_registered ());
TEST_COMPARE (__rseq_flags, 0);
- TEST_VERIFY ((char *) __thread_pointer () + __rseq_offset
- == (char *) &pd->rseq_area);
- TEST_COMPARE (__rseq_size, sizeof (pd->rseq_area));
+ TEST_COMPARE (__rseq_size, rseq_size);
+ /* The size of the rseq area must be a multiple of the alignment. */
+ TEST_VERIFY ((__rseq_size % rseq_align) == 0);
+ /* The rseq area address must be aligned. */
+ TEST_VERIFY (((unsigned long) rseq % rseq_align) == 0);
+#if TLS_TCB_AT_TP
+ /* The rseq area block should come before the thread pointer and be at least 32 bytes. */
+ TEST_VERIFY (__rseq_offset <= RSEQ_TEST_MIN_SIZE);
+#elif TLS_DTV_AT_TP
+ /* The rseq area block should come after the thread pointer. */
+ TEST_VERIFY (__rseq_offset >= 0);
+#else
+# error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
+#endif
}
static void
@@ -23,11 +23,18 @@
#include <syscall.h>
#include <sys/rseq.h>
#include <tls.h>
+#include <rseq-internal.h>
+
+#define RSEQ_TEST_MIN_SIZE 32
+#define RSEQ_TEST_MIN_FEATURE_SIZE 20
+#define RSEQ_TEST_MIN_ALIGN 32
static inline bool
rseq_thread_registered (void)
{
- return THREAD_GETMEM_VOLATILE (THREAD_SELF, rseq_area.cpu_id) >= 0;
+ struct rseq_area *rseq = (struct rseq_area *) ((char *) __thread_pointer () + __rseq_offset);
+
+ return __atomic_load_n (&rseq->cpu_id, __ATOMIC_RELAXED) >= 0;
}
static inline int
@@ -130,3 +130,59 @@
"i" (offsetof (struct pthread, member[0])), \
"r" (idx)); \
}})
+
+/* Read member of the RSEQ area directly. */
+# define RSEQ_GETMEM_VOLATILE(descr, member) \
+ ({ __typeof (descr->member) __value; \
+ ptrdiff_t _rseq_offset = GLRO (dl_tls_rseq_offset); \
+ _Static_assert (sizeof (__value) == 1 \
+ || sizeof (__value) == 4 \
+ || sizeof (__value) == 8, \
+ "size of per-thread data"); \
+ if (sizeof (__value) == 1) \
+ asm volatile ("movb %%fs:%P2(%q3),%b0" \
+ : "=q" (__value) \
+ : "0" (0), "i" (offsetof (struct rseq_area, member)), \
+ "r" (_rseq_offset)); \
+ else if (sizeof (__value) == 4) \
+ asm volatile ("movl %%fs:%P1(%q2),%0" \
+ : "=r" (__value) \
+ : "i" (offsetof (struct rseq_area, member)), \
+ "r" (_rseq_offset)); \
+ else /* 8 */ \
+ { \
+ asm volatile ("movq %%fs:%P1(%q2),%q0" \
+ : "=r" (__value) \
+ : "i" (offsetof (struct rseq_area, member)), \
+ "r" (_rseq_offset)); \
+ } \
+ __value; })
+
+/* Set member of the RSEQ area directly. */
+# define RSEQ_SETMEM(descr, member, value) \
+ ({ \
+ ptrdiff_t _rseq_offset = GLRO (dl_tls_rseq_offset); \
+ _Static_assert (sizeof (descr->member) == 1 \
+ || sizeof (descr->member) == 4 \
+ || sizeof (descr->member) == 8, \
+ "size of per-thread data"); \
+ if (sizeof (descr->member) == 1) \
+ asm volatile ("movb %b0,%%fs:%P1(%q2)" : \
+ : "iq" (value), \
+ "i" (offsetof (struct rseq_area, member)), \
+ "r" (_rseq_offset)); \
+ else if (sizeof (descr->member) == 4) \
+ asm volatile ("movl %0,%%fs:%P1(%q2)" : \
+ : IMM_MODE (value), \
+ "i" (offsetof (struct rseq_area, member)), \
+ "r" (_rseq_offset)); \
+ else /* 8 */ \
+ { \
+ /* Since movq takes a signed 32-bit immediate or a register source \
+ operand, use "er" constraint for 32-bit signed integer constant \
+ or register. */ \
+ asm volatile ("movq %q0,%%fs:%P1(%q2)" : \
+ : "er" ((uint64_t) cast_to_integer (value)), \
+ "i" (offsetof (struct rseq_area, member)), \
+ "r" (_rseq_offset)); \
+ }})