Message ID | 20240206162801.882585-3-mjeanson@efficios.com |
---|---|
State | New |
Headers | show |
Series | Extend rseq support | expand |
One incorrect year Could use more comments around the tcb math Questions in sysdeps/unix/sysv/linux/tst-rseq-disable.c Michael Jeanson <mjeanson@efficios.com> writes: > This makes the size of the rseq area variable and thus requires to > relocate it out of 'struct pthread'. We chose to move it after (in block > allocation order) the last TLS block since it required a fairly small > modification to the TLS block allocator and did not interfere with the > main executable TLS block which must always be first. The TLS doesn't get resized, does it? Or am I thinking of the DTV? > diff --git a/csu/libc-tls.c b/csu/libc-tls.c > index b7682bdf43..9c70f53284 100644 > --- a/csu/libc-tls.c > +++ b/csu/libc-tls.c > @@ -26,6 +26,8 @@ > #include <array_length.h> > #include <pthreadP.h> > #include <dl-call_tls_init_tp.h> > +#include <dl-rseq.h> > +#include <elf/dl-tunables.h> Ok. > #ifdef SHARED > #error makefile bug, this file is for static only > @@ -62,6 +64,18 @@ size_t _dl_tls_static_surplus; > dynamic TLS access (e.g. with TLSDESC). */ > size_t _dl_tls_static_optional; > > +/* Size of the features present in the rseq area. */ > +size_t _dl_tls_rseq_feature_size; > + > +/* Alignment requirement of the rseq area. */ > +size_t _dl_tls_rseq_align; > + > +/* Size of the rseq area in the static TLS block. */ > +size_t _dl_tls_rseq_size; > + > +/* Offset of the rseq area from the thread pointer. */ > +ptrdiff_t _dl_tls_rseq_offset; > + Ok. > /* Generation counter for the dtv. */ > size_t _dl_tls_generation; > > @@ -110,6 +124,7 @@ __libc_setup_tls (void) > size_t filesz = 0; > void *initimage = NULL; > size_t align = 0; > + size_t tls_blocks_size = 0; This may be overriden later, ok. > @@ -135,22 +150,61 @@ __libc_setup_tls (void) > /* Calculate the size of the static TLS surplus, with 0 auditors. */ > _dl_tls_static_surplus_init (0); > > + /* Default to the rseq ABI minimum sizes, this will reduce TLS usage to 32 > + bytes when rseq is disabled by tunables. */ > + size_t rseq_size = TLS_DL_RSEQ_MIN_SIZE; > + size_t rseq_align = TLS_DL_RSEQ_MIN_ALIGN; > + bool do_rseq = true; > + do_rseq = TUNABLE_GET_FULL (glibc, pthread, rseq, int, NULL); > + if (do_rseq) > + { > + rseq_align = GLRO(dl_tls_rseq_align); > + /* Make sure the rseq area size is at least the minimum ABI size and a > + multiple of the requested aligment. */ > + rseq_size = roundup (MAX (GLRO(dl_tls_rseq_feature_size), > + TLS_DL_RSEQ_MIN_SIZE), rseq_align); > + } > + > + /* Increase the max_align if necessary. */ > + max_align = MAX (max_align, rseq_align); > + > + /* Record the rseq_area block size. */ > + GLRO (dl_tls_rseq_size) = rseq_size; Ok. > #if TLS_TCB_AT_TP > + /* Before the the thread pointer, add the aligned tls block size and then > + align the rseq area block on top. */ > + tls_blocks_size = roundup (roundup (memsz, align ?: 1) + rseq_size, rseq_align); > + > + /* Record the rseq_area offset. The offset is negative with TLS_TCB_AT_TP > + because the TLS blocks are located before the thread pointer. */ > + GLRO (dl_tls_rseq_offset) = -tls_blocks_size; > + So the allocated area will be at negative offsets to TP anyway; we add at the beginning, so *our* location is at the beginning, and thus the most negative offset. What if the rseq area needs alignment greater than what _dl_early_allocate() guarantees? This code takes the time to round the tls size up to rseq_align, but never aligns the actual address of the rseq area after allocation. Ah, the adjustments to max_align cause this to happen later, as long as the offsets are suitably aligned also. Perhaps a quick note in the comment that "max_align enforces the alignment of the resulting pointer" or something like that? Ok. > /* Align the TCB offset to the maximum alignment, as > _dl_allocate_tls_storage (in elf/dl-tls.c) does using __libc_memalign > and dl_tls_static_align. */ > - tcb_offset = roundup (memsz + GLRO(dl_tls_static_surplus), max_align); > + tcb_offset = roundup (tls_blocks_size + GLRO(dl_tls_static_surplus), max_align); Ok. > #elif TLS_DTV_AT_TP > + /* Align memsz on top of the initial tcb. */ > tcb_offset = roundup (TLS_INIT_TCB_SIZE, align ?: 1); > - tlsblock = _dl_early_allocate (tcb_offset + memsz + max_align > + > + /* After the thread pointer, add the initial tcb plus the tls block size and > + then align the rseq area block on top. */ > + tls_blocks_size = roundup (tcb_offset + memsz + rseq_size, rseq_align); > + > + /* Record the rseq_area offset. The offset is positive with TLS_DTV_AT_TP > + because the TLS blocks are located after the thread pointer. */ > + GLRO (dl_tls_rseq_offset) = tls_blocks_size - rseq_size; > + > + tlsblock = _dl_early_allocate (tls_blocks_size + max_align > + TLS_PRE_TCB_SIZE > + GLRO(dl_tls_static_surplus)); So here all the offsets are positive wrt TP, so we extend the block and calculate the old "end" as our new offset. Note that rseq_size was previously rounded up to rseq_align so the resulting pointer should be properly aligned. Ok. > @@ -209,11 +263,5 @@ __libc_setup_tls (void) > /* static_slotinfo.slotinfo[1].gen = 0; -- Already zero. */ > static_slotinfo.slotinfo[1].map = main_map; > > - memsz = roundup (memsz, align ?: 1); > - > -#if TLS_DTV_AT_TP > - memsz += tcb_offset; > -#endif > - > - init_static_tls (memsz, MAX (TCB_ALIGNMENT, max_align)); > + init_static_tls (tls_blocks_size, MAX (TCB_ALIGNMENT, max_align)); > } Ok. > diff --git a/elf/dl-tls.c b/elf/dl-tls.c > index 7b3dd9ab60..70fbe7095f 100644 > --- a/elf/dl-tls.c > +++ b/elf/dl-tls.c > @@ -27,6 +27,7 @@ > > #include <tls.h> > #include <dl-tls.h> > +#include <dl-rseq.h> > #include <ldsodefs.h> Ok. > @@ -298,6 +299,35 @@ _dl_determine_tlsoffset (void) > slotinfo[cnt].map->l_tls_offset = off; > } > > + /* Insert the rseq area block after the last TLS block. */ > + > + /* Default to the rseq ABI minimum sizes, this will reduce TLS usage to 32 > + bytes when rseq is disabled by tunables. */ > + size_t rseq_size = TLS_DL_RSEQ_MIN_SIZE; > + size_t rseq_align = TLS_DL_RSEQ_MIN_ALIGN; > + bool do_rseq = true; > + do_rseq = TUNABLE_GET_FULL (glibc, pthread, rseq, int, NULL); > + if (do_rseq) > + { > + rseq_align = GLRO(dl_tls_rseq_align); > + /* Make sure the rseq area size is at least the minimum ABI size and a > + multiple of the requested aligment. */ > + rseq_size = roundup (MAX (GLRO(dl_tls_rseq_feature_size), > + TLS_DL_RSEQ_MIN_SIZE), rseq_align); > + } > + > + /* Add the rseq area block to the global offset. */ > + offset = roundup (offset, rseq_align) + rseq_size; > + > + /* Increase the max_align if necessary. */ > + max_align = MAX (max_align, rseq_align); > + > + /* Record the rseq_area block size and offset. The offset is negative > + with TLS_TCB_AT_TP because the TLS blocks are located before the > + thread pointer. */ > + GLRO (dl_tls_rseq_offset) = -offset; > + GLRO (dl_tls_rseq_size) = rseq_size; > + Ok. Same as above, this time for shared objects. > @@ -343,6 +373,38 @@ _dl_determine_tlsoffset (void) > offset = off + slotinfo[cnt].map->l_tls_blocksize - firstbyte; > } > > + /* Insert the rseq area block after the last TLS block. */ > + > + /* Default to the rseq ABI minimum sizes, this will reduce TLS usage to 32 > + bytes when rseq is disabled by tunables. */ > + size_t rseq_size = TLS_DL_RSEQ_MIN_SIZE; > + size_t rseq_align = TLS_DL_RSEQ_MIN_ALIGN; > + bool do_rseq = true; > + do_rseq = TUNABLE_GET_FULL (glibc, pthread, rseq, int, NULL); > + if (do_rseq) > + { > + rseq_align = GLRO(dl_tls_rseq_align); > + /* Make sure the rseq area size is at least the minimum ABI size and a > + multiple of the requested aligment. */ > + rseq_size = roundup (MAX (GLRO(dl_tls_rseq_feature_size), > + TLS_DL_RSEQ_MIN_SIZE), rseq_align); > + } > + > + /* Align the global offset to the beginning of the rseq area. */ > + offset = roundup (offset, rseq_align); > + > + /* Record the rseq_area block size and offset. The offset is positive > + with TLS_DTV_AT_TP because the TLS blocks are located after the > + thread pointer. */ > + GLRO (dl_tls_rseq_size) = rseq_size; > + GLRO (dl_tls_rseq_offset) = offset; > + > + /* Add the rseq area block to the global offset. */ > + offset += rseq_size; > + > + /* Increase the max_align if necessary. */ > + max_align = MAX (max_align, rseq_align); > + Ok. > diff --git a/elf/rtld_static_init.c b/elf/rtld_static_init.c > index e918e4ebdf..0606317b8c 100644 > --- a/elf/rtld_static_init.c > +++ b/elf/rtld_static_init.c > @@ -78,6 +78,18 @@ __rtld_static_init (struct link_map *map) > extern __typeof (dl->_dl_tls_static_size) _dl_tls_static_size > attribute_hidden; > dl->_dl_tls_static_size = _dl_tls_static_size; > + extern __typeof (dl->_dl_tls_rseq_feature_size) _dl_tls_rseq_feature_size > + attribute_hidden; > + dl->_dl_tls_rseq_feature_size = _dl_tls_rseq_feature_size; > + extern __typeof (dl->_dl_tls_rseq_align) _dl_tls_rseq_align > + attribute_hidden; > + dl->_dl_tls_rseq_align = _dl_tls_rseq_align; > + extern __typeof (dl->_dl_tls_rseq_size) _dl_tls_rseq_size > + attribute_hidden; > + dl->_dl_tls_rseq_size = _dl_tls_rseq_size; > + extern __typeof (dl->_dl_tls_rseq_offset) _dl_tls_rseq_offset > + attribute_hidden; > + dl->_dl_tls_rseq_offset = _dl_tls_rseq_offset; Ok. > diff --git a/nptl/descr.h b/nptl/descr.h > index 8cef95810c..cdc3c82d9a 100644 > --- a/nptl/descr.h > +++ b/nptl/descr.h > @@ -404,25 +404,11 @@ struct pthread > /* Used on strsignal. */ > struct tls_internal_t tls_state; > > - /* rseq area registered with the kernel. Use a custom definition > - here to isolate from kernel struct rseq changes. The > - implementation of sched_getcpu needs acccess to the cpu_id field; > - the other fields are unused and not included here. */ > - union > - { > - struct > - { > - uint32_t cpu_id_start; > - uint32_t cpu_id; > - }; > - char pad[32]; /* Original rseq area size. */ > - } rseq_area __attribute__ ((aligned (32))); > - > /* Amount of end padding, if any, in this structure. > - This definition relies on rseq_area being last. */ > + This definition relies on tls_state being last. */ > #define PTHREAD_STRUCT_END_PADDING \ > - (sizeof (struct pthread) - offsetof (struct pthread, rseq_area) \ > - + sizeof ((struct pthread) {}.rseq_area)) > + (sizeof (struct pthread) - offsetof (struct pthread, tls_state) \ > + + sizeof ((struct pthread) {}.tls_state)) This memory is no longer statically sized; ok. > diff --git a/nptl/pthread_create.c b/nptl/pthread_create.c > index 1d3665d5ed..9b49ee7121 100644 > --- a/nptl/pthread_create.c > +++ b/nptl/pthread_create.c > @@ -691,7 +691,7 @@ __pthread_create_2_1 (pthread_t *newthread, const pthread_attr_t *attr, > > /* Inherit rseq registration state. Without seccomp filters, rseq > registration will either always fail or always succeed. */ > - if ((int) THREAD_GETMEM_VOLATILE (self, rseq_area.cpu_id) >= 0) > + if ((int) RSEQ_GETMEM_VOLATILE (rseq_get_area(), cpu_id) >= 0) Ok. > diff --git a/sysdeps/generic/dl-rseq.h b/sysdeps/generic/dl-rseq.h > new file mode 100644 > index 0000000000..0855981c89 > --- /dev/null > +++ b/sysdeps/generic/dl-rseq.h > @@ -0,0 +1,26 @@ > +/* RSEQ defines for the dynamic linker. Generic version. > + Copyright (C) 2023 Free Software Foundation, Inc. 2023 ? > +/* Minimum size of the rseq area. */ > +#define TLS_DL_RSEQ_MIN_SIZE 32 > + > +/* Minimum feature size of the rseq area. */ > +#define TLS_DL_RSEQ_MIN_FEATURE_SIZE 20 > + > +/* Minimum size of the rseq area alignment. */ > +#define TLS_DL_RSEQ_MIN_ALIGN 32 Ok. > diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h > index 117c901ccc..53a0a208d8 100644 > --- a/sysdeps/generic/ldsodefs.h > +++ b/sysdeps/generic/ldsodefs.h > @@ -610,6 +610,18 @@ struct rtld_global_ro > See comments in elf/dl-tls.c where it is initialized. */ > EXTERN size_t _dl_tls_static_surplus; > > + /* Size of the features present in the rseq area. */ > + EXTERN size_t _dl_tls_rseq_feature_size; > + > + /* Alignment requirement of the rseq area. */ > + EXTERN size_t _dl_tls_rseq_align; > + > + /* Size of the rseq area in the static TLS block. */ > + EXTERN size_t _dl_tls_rseq_size; > + > + /* Offset of the rseq area from the thread pointer. */ > + EXTERN ptrdiff_t _dl_tls_rseq_offset; > + Ok. > diff --git a/sysdeps/i386/nptl/tcb-access.h b/sysdeps/i386/nptl/tcb-access.h > index 4b6221e103..e6988186d0 100644 > --- a/sysdeps/i386/nptl/tcb-access.h > +++ b/sysdeps/i386/nptl/tcb-access.h > @@ -123,3 +123,59 @@ > "i" (offsetof (struct pthread, member)), \ > "r" (idx)); \ > }}) > + > + > +/* Read member of the RSEQ area directly. */ > +#define RSEQ_GETMEM_VOLATILE(descr, member) \ > + ({ __typeof (descr->member) __value; \ > + ptrdiff_t _rseq_offset = GLRO (dl_tls_rseq_offset); \ > + _Static_assert (sizeof (__value) == 1 \ > + || sizeof (__value) == 4 \ > + || sizeof (__value) == 8, \ > + "size of per-thread data"); \ > + if (sizeof (__value) == 1) \ > + asm volatile ("movb %%gs:%P2(%3),%b0" \ > + : "=q" (__value) \ > + : "0" (0), "i" (offsetof (struct rseq_area, member)), \ > + "r" (_rseq_offset)); \ > + else if (sizeof (__value) == 4) \ > + asm volatile ("movl %%gs:%P1(%2),%0" \ > + : "=r" (__value) \ > + : "i" (offsetof (struct rseq_area, member)), \ > + "r" (_rseq_offset)); \ > + else /* 8 */ \ > + { \ > + asm volatile ("movl %%gs:%P1(%2),%%eax\n\t" \ > + "movl %%gs:4+%P1(%2),%%edx" \ > + : "=&A" (__value) \ > + : "i" (offsetof (struct rseq_area, member)), \ > + "r" (_rseq_offset)); \ > + } \ > + __value; }) Ok. > +/* Set member of the RSEQ area directly. */ > +#define RSEQ_SETMEM(descr, member, value) \ > + ({ \ > + ptrdiff_t _rseq_offset = GLRO (dl_tls_rseq_offset); \ > + _Static_assert (sizeof (descr->member) == 1 \ > + || sizeof (descr->member) == 4 \ > + || sizeof (descr->member) == 8, \ > + "size of per-thread data"); \ > + if (sizeof (descr->member) == 1) \ > + asm volatile ("movb %b0,%%gs:%P1(%2)" : \ > + : "iq" (value), \ > + "i" (offsetof (struct rseq_area, member)), \ > + "r" (_rseq_offset)); \ > + else if (sizeof (descr->member) == 4) \ > + asm volatile ("movl %0,%%gs:%P1(%2)" : \ > + : "ir" (value), \ > + "i" (offsetof (struct rseq_area, member)), \ > + "r" (_rseq_offset)); \ > + else /* 8 */ \ > + { \ > + asm volatile ("movl %%eax,%%gs:%P1(%2)\n\t" \ > + "movl %%edx,%%gs:4+%P1(%2)" : \ > + : "A" ((uint64_t) cast_to_integer (value)), \ > + "i" (offsetof (struct rseq_area, member)), \ > + "r" (_rseq_offset)); \ > + }}) Ok. > diff --git a/sysdeps/nptl/dl-tls_init_tp.c b/sysdeps/nptl/dl-tls_init_tp.c > index 80eb0107b5..7aa15558e6 100644 > --- a/sysdeps/nptl/dl-tls_init_tp.c > +++ b/sysdeps/nptl/dl-tls_init_tp.c > @@ -107,7 +107,7 @@ __tls_init_tp (void) > do_rseq = TUNABLE_GET (rseq, int, NULL); > if (rseq_register_current_thread (pd, do_rseq)) > { > - _rseq_size = sizeof (pd->rseq_area); > + _rseq_size = GLRO (dl_tls_rseq_size); Ok. > @@ -116,7 +116,7 @@ __tls_init_tp (void) > all targets support __thread_pointer, so set __rseq_offset only > if the rseq registration may have happened because RSEQ_SIG is > defined. */ > - _rseq_offset = (char *) &pd->rseq_area - (char *) __thread_pointer (); > + _rseq_offset = GLRO (dl_tls_rseq_offset); Ok. > diff --git a/sysdeps/nptl/tcb-access.h b/sysdeps/nptl/tcb-access.h > index 600433766f..9532f30022 100644 > --- a/sysdeps/nptl/tcb-access.h > +++ b/sysdeps/nptl/tcb-access.h > @@ -30,3 +30,8 @@ > descr->member = (value) > #define THREAD_SETMEM_NC(descr, member, idx, value) \ > descr->member[idx] = (value) > + > +#define RSEQ_GETMEM_VOLATILE(descr, member) \ > + THREAD_GETMEM_VOLATILE(descr, member) > +#define RSEQ_SETMEM(descr, member, value) \ > + THREAD_SETMEM(descr, member, value) Ok. > diff --git a/sysdeps/unix/sysv/linux/Makefile b/sysdeps/unix/sysv/linux/Makefile > +tests-static += \ > + tst-rseq-disable-static \ > + tst-rseq-static \ > + # tests-static > + Ok. > @@ -394,6 +399,7 @@ $(objpfx)tst-mount-compile.out: ../sysdeps/unix/sysv/linux/tst-mount-compile.py > $(objpfx)tst-mount-compile.out: $(sysdeps-linux-python-deps) > > tst-rseq-disable-ENV = GLIBC_TUNABLES=glibc.pthread.rseq=0 > +tst-rseq-disable-static-ENV = GLIBC_TUNABLES=glibc.pthread.rseq=0 Ok. > @@ -655,4 +661,8 @@ tests += \ > tests-internal += \ > tst-rseq-nptl \ > # tests-internal > + > +tests-static += \ > + tst-rseq-nptl-static \ > + # tests-static Ok. > diff --git a/sysdeps/unix/sysv/linux/dl-parse_auxv.h b/sysdeps/unix/sysv/linux/dl-parse_auxv.h > index e3d758b163..fbc90fc786 100644 > --- a/sysdeps/unix/sysv/linux/dl-parse_auxv.h > +++ b/sysdeps/unix/sysv/linux/dl-parse_auxv.h > @@ -21,6 +21,7 @@ > #include <fpu_control.h> > #include <ldsodefs.h> > #include <link.h> > +#include <dl-rseq.h> > > typedef ElfW(Addr) dl_parse_auxv_t[AT_MINSIGSTKSZ + 1]; > > @@ -57,5 +58,10 @@ void _dl_parse_auxv (ElfW(auxv_t) *av, dl_parse_auxv_t auxv_values) > GLRO(dl_sysinfo) = auxv_values[AT_SYSINFO]; > #endif > > + GLRO(dl_tls_rseq_feature_size) = MAX (auxv_values[AT_RSEQ_FEATURE_SIZE], > + TLS_DL_RSEQ_MIN_FEATURE_SIZE); > + GLRO(dl_tls_rseq_align) = MAX (auxv_values[AT_RSEQ_ALIGN], > + TLS_DL_RSEQ_MIN_ALIGN); > + Ok. > diff --git a/sysdeps/unix/sysv/linux/rseq-internal.h b/sysdeps/unix/sysv/linux/rseq-internal.h > index 48eebc1e16..b6c9deea6b 100644 > --- a/sysdeps/unix/sysv/linux/rseq-internal.h > +++ b/sysdeps/unix/sysv/linux/rseq-internal.h > @@ -24,6 +24,24 @@ > #include <stdbool.h> > #include <stdio.h> > #include <sys/rseq.h> > +#include <thread_pointer.h> > +#include <ldsodefs.h> > + > +/* rseq area registered with the kernel. Use a custom definition > + here to isolate from kernel struct rseq changes. The > + implementation of sched_getcpu needs acccess to the cpu_id field; > + the other fields are unused and not included here. */ > +struct rseq_area > +{ > + uint32_t cpu_id_start; > + uint32_t cpu_id; > +}; > + > +static inline struct rseq_area * > +rseq_get_area(void) > +{ > + return (struct rseq_area *) ((char *) __thread_pointer() + GLRO (dl_tls_rseq_offset)); > +} Ok. > @@ -31,20 +49,23 @@ rseq_register_current_thread (struct pthread *self, bool do_rseq) > { > if (do_rseq) > { > - int ret = INTERNAL_SYSCALL_CALL (rseq, &self->rseq_area, > - sizeof (self->rseq_area), > + /* The kernel expects 'rseq_area->rseq_cs == NULL' on registration, zero > + the whole rseq area. */ > + memset(rseq_get_area(), 0, GLRO (dl_tls_rseq_size)); > + int ret = INTERNAL_SYSCALL_CALL (rseq, rseq_get_area(), > + GLRO (dl_tls_rseq_size), > 0, RSEQ_SIG); Ok. > - THREAD_SETMEM (self, rseq_area.cpu_id, RSEQ_CPU_ID_REGISTRATION_FAILED); > + RSEQ_SETMEM (rseq_get_area(), cpu_id, RSEQ_CPU_ID_REGISTRATION_FAILED); Ok. > - THREAD_SETMEM (self, rseq_area.cpu_id, RSEQ_CPU_ID_REGISTRATION_FAILED); > + RSEQ_SETMEM (rseq_get_area(), cpu_id, RSEQ_CPU_ID_REGISTRATION_FAILED); Ok. > diff --git a/sysdeps/unix/sysv/linux/sched_getcpu.c b/sysdeps/unix/sysv/linux/sched_getcpu.c > - int cpu_id = THREAD_GETMEM_VOLATILE (THREAD_SELF, rseq_area.cpu_id); > + int cpu_id = RSEQ_GETMEM_VOLATILE (rseq_get_area(), cpu_id); Ok. > diff --git a/sysdeps/unix/sysv/linux/tst-rseq-disable-static.c b/sysdeps/unix/sysv/linux/tst-rseq-disable-static.c > new file mode 100644 > index 0000000000..2687d13d3d > --- /dev/null > +++ b/sysdeps/unix/sysv/linux/tst-rseq-disable-static.c > @@ -0,0 +1 @@ > +#include "tst-rseq-disable.c" Ok. > diff --git a/sysdeps/unix/sysv/linux/tst-rseq-disable.c b/sysdeps/unix/sysv/linux/tst-rseq-disable.c > index bbc655bec4..ae4143916b 100644 > --- a/sysdeps/unix/sysv/linux/tst-rseq-disable.c > +++ b/sysdeps/unix/sysv/linux/tst-rseq-disable.c > @@ -26,27 +26,30 @@ > #include <unistd.h> > > #ifdef RSEQ_SIG > +# include <sys/auxv.h> > +# include "tst-rseq.h" > + > +static __thread struct rseq local_rseq; > > /* Check that rseq can be registered and has not been taken by glibc. */ > static void > check_rseq_disabled (void) > { > - struct pthread *pd = THREAD_SELF; > + struct rseq *rseq_area = (struct rseq *) ((char *) __thread_pointer () + __rseq_offset); > > TEST_COMPARE (__rseq_flags, 0); > - TEST_VERIFY ((char *) __thread_pointer () + __rseq_offset > - == (char *) &pd->rseq_area); > TEST_COMPARE (__rseq_size, 0); Isn't __rseq_size always at least 32 ? > - TEST_COMPARE ((int) pd->rseq_area.cpu_id, RSEQ_CPU_ID_REGISTRATION_FAILED); > + TEST_COMPARE ((int) rseq_area->cpu_id, RSEQ_CPU_ID_REGISTRATION_FAILED); Ok. > + TEST_COMPARE (sizeof (local_rseq), RSEQ_TEST_MIN_SIZE); This happens to be true at this point, but the comparison should really be >= not ==, esp as the features patch changes the size of this structure to be larger than MIN_SIZE. > - int ret = syscall (__NR_rseq, &pd->rseq_area, sizeof (pd->rseq_area), > - 0, RSEQ_SIG); > + int ret = syscall (__NR_rseq, &local_rseq, RSEQ_TEST_MIN_SIZE, 0, RSEQ_SIG); > if (ret == 0) > { > - ret = syscall (__NR_rseq, &pd->rseq_area, sizeof (pd->rseq_area), > + ret = syscall (__NR_rseq, &local_rseq, RSEQ_TEST_MIN_SIZE, > RSEQ_FLAG_UNREGISTER, RSEQ_SIG); > TEST_COMPARE (ret, 0); > - pd->rseq_area.cpu_id = RSEQ_CPU_ID_REGISTRATION_FAILED; > + rseq_area->cpu_id = RSEQ_CPU_ID_REGISTRATION_FAILED; > } Ok. > diff --git a/sysdeps/unix/sysv/linux/tst-rseq-nptl-static.c b/sysdeps/unix/sysv/linux/tst-rseq-nptl-static.c > new file mode 100644 > index 0000000000..6e2c923bb9 > --- /dev/null > +++ b/sysdeps/unix/sysv/linux/tst-rseq-nptl-static.c > @@ -0,0 +1 @@ > +#include "tst-rseq-nptl.c" Ok. > diff --git a/sysdeps/unix/sysv/linux/tst-rseq-static.c b/sysdeps/unix/sysv/linux/tst-rseq-static.c > new file mode 100644 > index 0000000000..1d97f3bd3d > --- /dev/null > +++ b/sysdeps/unix/sysv/linux/tst-rseq-static.c > @@ -0,0 +1 @@ > +#include "tst-rseq.c" Ok. > diff --git a/sysdeps/unix/sysv/linux/tst-rseq.c b/sysdeps/unix/sysv/linux/tst-rseq.c > index 2c90409ba0..c8c0518a5d 100644 > --- a/sysdeps/unix/sysv/linux/tst-rseq.c > +++ b/sysdeps/unix/sysv/linux/tst-rseq.c > @@ -31,18 +31,32 @@ > # include <syscall.h> > # include <thread_pointer.h> > # include <tls.h> > +# include <sys/auxv.h> > # include "tst-rseq.h" Ok. > static void > do_rseq_main_test (void) > { > - struct pthread *pd = THREAD_SELF; > + size_t rseq_align = MAX (getauxval (AT_RSEQ_ALIGN), RSEQ_TEST_MIN_ALIGN); > + size_t rseq_size = roundup (MAX (getauxval (AT_RSEQ_FEATURE_SIZE), RSEQ_TEST_MIN_SIZE), rseq_align); > + struct rseq *rseq = __thread_pointer () + __rseq_offset; Ok. > TEST_VERIFY_EXIT (rseq_thread_registered ()); > TEST_COMPARE (__rseq_flags, 0); > - TEST_VERIFY ((char *) __thread_pointer () + __rseq_offset > - == (char *) &pd->rseq_area); > - TEST_COMPARE (__rseq_size, sizeof (pd->rseq_area)); > + TEST_COMPARE (__rseq_size, rseq_size); Ok. > + /* The size of the rseq area must be a multiple of the alignment. */ > + TEST_VERIFY ((__rseq_size % rseq_align) == 0); Ok. > + /* The rseq area address must be aligned. */ > + TEST_VERIFY (((unsigned long) rseq % rseq_align) == 0); Ok. > +#if TLS_TCB_AT_TP > + /* The rseq area block should come before the thread pointer and be at least 32 bytes. */ > + TEST_VERIFY (__rseq_offset <= RSEQ_TEST_MIN_SIZE); > +#elif TLS_DTV_AT_TP > + /* The rseq area block should come after the thread pointer. */ > + TEST_VERIFY (__rseq_offset >= 0); > +#else > +# error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined" > +#endif Ok. > diff --git a/sysdeps/unix/sysv/linux/tst-rseq.h b/sysdeps/unix/sysv/linux/tst-rseq.h > index dc603327d3..4931aa3d14 100644 > --- a/sysdeps/unix/sysv/linux/tst-rseq.h > +++ b/sysdeps/unix/sysv/linux/tst-rseq.h > @@ -23,11 +23,18 @@ > #include <syscall.h> > #include <sys/rseq.h> > #include <tls.h> > +#include <rseq-internal.h> > + > +#define RSEQ_TEST_MIN_SIZE 32 > +#define RSEQ_TEST_MIN_FEATURE_SIZE 20 > +#define RSEQ_TEST_MIN_ALIGN 32 > > static inline bool > rseq_thread_registered (void) > { > - return THREAD_GETMEM_VOLATILE (THREAD_SELF, rseq_area.cpu_id) >= 0; > + struct rseq_area *rseq = (struct rseq_area *) ((char *) __thread_pointer () + __rseq_offset); > + > + return __atomic_load_n (&rseq->cpu_id, __ATOMIC_RELAXED) >= 0; > } Ok. > diff --git a/sysdeps/x86_64/nptl/tcb-access.h b/sysdeps/x86_64/nptl/tcb-access.h > index d35948f111..75ba4b3ce9 100644 > --- a/sysdeps/x86_64/nptl/tcb-access.h > +++ b/sysdeps/x86_64/nptl/tcb-access.h > @@ -130,3 +130,59 @@ > "i" (offsetof (struct pthread, member[0])), \ > "r" (idx)); \ > }}) > + > +/* Read member of the RSEQ area directly. */ > +# define RSEQ_GETMEM_VOLATILE(descr, member) \ > + ({ __typeof (descr->member) __value; \ > + ptrdiff_t _rseq_offset = GLRO (dl_tls_rseq_offset); \ > + _Static_assert (sizeof (__value) == 1 \ > + || sizeof (__value) == 4 \ > + || sizeof (__value) == 8, \ > + "size of per-thread data"); \ > + if (sizeof (__value) == 1) \ > + asm volatile ("movb %%fs:%P2(%q3),%b0" \ > + : "=q" (__value) \ > + : "0" (0), "i" (offsetof (struct rseq_area, member)), \ > + "r" (_rseq_offset)); \ > + else if (sizeof (__value) == 4) \ > + asm volatile ("movl %%fs:%P1(%q2),%0" \ > + : "=r" (__value) \ > + : "i" (offsetof (struct rseq_area, member)), \ > + "r" (_rseq_offset)); \ > + else /* 8 */ \ > + { \ > + asm volatile ("movq %%fs:%P1(%q2),%q0" \ > + : "=r" (__value) \ > + : "i" (offsetof (struct rseq_area, member)), \ > + "r" (_rseq_offset)); \ > + } \ > + __value; }) Ok. > +/* Set member of the RSEQ area directly. */ > +# define RSEQ_SETMEM(descr, member, value) \ > + ({ \ > + ptrdiff_t _rseq_offset = GLRO (dl_tls_rseq_offset); \ > + _Static_assert (sizeof (descr->member) == 1 \ > + || sizeof (descr->member) == 4 \ > + || sizeof (descr->member) == 8, \ > + "size of per-thread data"); \ > + if (sizeof (descr->member) == 1) \ > + asm volatile ("movb %b0,%%fs:%P1(%q2)" : \ > + : "iq" (value), \ > + "i" (offsetof (struct rseq_area, member)), \ > + "r" (_rseq_offset)); \ > + else if (sizeof (descr->member) == 4) \ > + asm volatile ("movl %0,%%fs:%P1(%q2)" : \ > + : IMM_MODE (value), \ > + "i" (offsetof (struct rseq_area, member)), \ > + "r" (_rseq_offset)); \ > + else /* 8 */ \ > + { \ > + /* Since movq takes a signed 32-bit immediate or a register source \ > + operand, use "er" constraint for 32-bit signed integer constant \ > + or register. */ \ > + asm volatile ("movq %q0,%%fs:%P1(%q2)" : \ > + : "er" ((uint64_t) cast_to_integer (value)), \ > + "i" (offsetof (struct rseq_area, member)), \ > + "r" (_rseq_offset)); \ > + }}) Ok.
On 2024-02-15 23 h 42, DJ Delorie wrote: > > One incorrect year > Could use more comments around the tcb math Ack, I'll address this in the next patch series. > Questions in sysdeps/unix/sysv/linux/tst-rseq-disable. > Michael Jeanson <mjeanson@efficios.com> writes: >> This makes the size of the rseq area variable and thus requires to >> relocate it out of 'struct pthread'. We chose to move it after (in block >> allocation order) the last TLS block since it required a fairly small >> modification to the TLS block allocator and did not interfere with the >> main executable TLS block which must always be first. > > The TLS doesn't get resized, does it? Or am I thinking of the DTV? The terminology is a bit confusing and could be clarified, my understanding is that the 'static TLS block' is the area where the 'TLS blocks' from shared objects and the main executable are allocated and the DTV is the array of descriptors that point to each of these 'TLS blocks' inside the 'static TLS block'. What we are doing here is adding a 'fake' 'TLS block' at the end of the 'static TLS block' (in allocation order which differs depending of the TLS model). We move the offset so our fake block is included in dl_tls_static_used but we don't register it in the DTV since it's not a 'real' TLS block with the required metadata. I think I should rewrite the commit message to make the distinction between the 'static TLS block' and 'TLS blocks'. >> #if TLS_TCB_AT_TP >> + /* Before the the thread pointer, add the aligned tls block size and then >> + align the rseq area block on top. */ >> + tls_blocks_size = roundup (roundup (memsz, align ?: 1) + rseq_size, rseq_align); >> + >> + /* Record the rseq_area offset. The offset is negative with TLS_TCB_AT_TP >> + because the TLS blocks are located before the thread pointer. */ >> + GLRO (dl_tls_rseq_offset) = -tls_blocks_size; >> + > > So the allocated area will be at negative offsets to TP anyway; we add > at the beginning, so *our* location is at the beginning, and thus the > most negative offset. > > What if the rseq area needs alignment greater than what > _dl_early_allocate() guarantees? This code takes the time to round the > tls size up to rseq_align, but never aligns the actual address of the > rseq area after allocation. Ah, the adjustments to max_align cause this > to happen later, as long as the offsets are suitably aligned also. > > Perhaps a quick note in the comment that "max_align enforces the > alignment of the resulting pointer" or something like that? Ack, I'll add a comment. >> diff --git a/sysdeps/generic/dl-rseq.h b/sysdeps/generic/dl-rseq.h >> new file mode 100644 >> index 0000000000..0855981c89 >> --- /dev/null >> +++ b/sysdeps/generic/dl-rseq.h >> @@ -0,0 +1,26 @@ >> +/* RSEQ defines for the dynamic linker. Generic version. >> + Copyright (C) 2023 Free Software Foundation, Inc. > > 2023 ? Ack. >> diff --git a/sysdeps/unix/sysv/linux/tst-rseq-disable.c b/sysdeps/unix/sysv/linux/tst-rseq-disable.c >> index bbc655bec4..ae4143916b 100644 >> --- a/sysdeps/unix/sysv/linux/tst-rseq-disable.c >> +++ b/sysdeps/unix/sysv/linux/tst-rseq-disable.c >> @@ -26,27 +26,30 @@ >> #include <unistd.h> >> >> #ifdef RSEQ_SIG >> +# include <sys/auxv.h> >> +# include "tst-rseq.h" >> + >> +static __thread struct rseq local_rseq; >> >> /* Check that rseq can be registered and has not been taken by glibc. */ >> static void >> check_rseq_disabled (void) >> { >> - struct pthread *pd = THREAD_SELF; >> + struct rseq *rseq_area = (struct rseq *) ((char *) __thread_pointer () + __rseq_offset); >> >> TEST_COMPARE (__rseq_flags, 0); >> - TEST_VERIFY ((char *) __thread_pointer () + __rseq_offset >> - == (char *) &pd->rseq_area); >> TEST_COMPARE (__rseq_size, 0); > > Isn't __rseq_size always at least 32 ? It's at least 32 when the registration succeeded, however if the registration failed or was disabled by tunable then it's set to 0. >> + TEST_COMPARE (sizeof (local_rseq), RSEQ_TEST_MIN_SIZE); > > This happens to be true at this point, but the comparison should really > be >= not ==, esp as the features patch changes the size of this > structure to be larger than MIN_SIZE. This test doesn't really make sense anymore, it's a leftover from a previous iteration, I'll remove it.
Michael Jeanson <mjeanson@efficios.com> writes: >> The TLS doesn't get resized, does it? Or am I thinking of the DTV? > > The terminology is a bit confusing and could be clarified, my > understanding is that the 'static TLS block' is the area where the 'TLS > blocks' from shared objects and the main executable are allocated and > the DTV is the array of descriptors that point to each of these 'TLS > blocks' inside the 'static TLS block'. I might be thinking of where TLS data for dlopen()'d objects go, after the static TLS block is filled. >> Isn't __rseq_size always at least 32 ? > > It's at least 32 when the registration succeeded, however if the > registration failed or was disabled by tunable then it's set to 0. Are we sure? /* Default to the rseq ABI minimum sizes, this will reduce TLS usage to 32 bytes when rseq is disabled by tunables. */ size_t rseq_size = TLS_DL_RSEQ_MIN_SIZE; size_t rseq_align = TLS_DL_RSEQ_MIN_ALIGN; bool do_rseq = true; do_rseq = TUNABLE_GET_FULL (glibc, pthread, rseq, int, NULL); if (do_rseq) { rseq_align = GLRO(dl_tls_rseq_align); /* Make sure the rseq area size is at least the minimum ABI size and a multiple of the requested aligment. */ rseq_size = roundup (MAX (GLRO(dl_tls_rseq_feature_size), TLS_DL_RSEQ_MIN_SIZE), rseq_align); } static inline bool rseq_register_current_thread (struct pthread *self, bool do_rseq) { if (do_rseq) { /* The kernel expects 'rseq_area->rseq_cs == NULL' on registration, zero the whole rseq area. */ memset(rseq_get_area(), 0, GLRO (dl_tls_rseq_size)); int ret = INTERNAL_SYSCALL_CALL (rseq, rseq_get_area(), GLRO (dl_tls_rseq_size), 0, RSEQ_SIG); if (!INTERNAL_SYSCALL_ERROR_P (ret)) return true; } RSEQ_SETMEM (rseq_get_area(), cpu_id, RSEQ_CPU_ID_REGISTRATION_FAILED); return false; }
On 2024-02-16 15 h 37, DJ Delorie wrote: > Michael Jeanson <mjeanson@efficios.com> writes: >>> The TLS doesn't get resized, does it? Or am I thinking of the DTV? >> >> The terminology is a bit confusing and could be clarified, my >> understanding is that the 'static TLS block' is the area where the 'TLS >> blocks' from shared objects and the main executable are allocated and >> the DTV is the array of descriptors that point to each of these 'TLS >> blocks' inside the 'static TLS block'. > > I might be thinking of where TLS data for dlopen()'d objects go, after > the static TLS block is filled. > >>> Isn't __rseq_size always at least 32 ? >> >> It's at least 32 when the registration succeeded, however if the >> registration failed or was disabled by tunable then it's set to 0. > > Are we sure? > > /* Default to the rseq ABI minimum sizes, this will reduce TLS usage to 32 > bytes when rseq is disabled by tunables. */ > size_t rseq_size = TLS_DL_RSEQ_MIN_SIZE; > size_t rseq_align = TLS_DL_RSEQ_MIN_ALIGN; > bool do_rseq = true; > do_rseq = TUNABLE_GET_FULL (glibc, pthread, rseq, int, NULL); > if (do_rseq) > { > rseq_align = GLRO(dl_tls_rseq_align); > /* Make sure the rseq area size is at least the minimum ABI size and a > multiple of the requested aligment. */ > rseq_size = roundup (MAX (GLRO(dl_tls_rseq_feature_size), > TLS_DL_RSEQ_MIN_SIZE), rseq_align); > } > > > static inline bool > rseq_register_current_thread (struct pthread *self, bool do_rseq) > { > if (do_rseq) > { > /* The kernel expects 'rseq_area->rseq_cs == NULL' on registration, zero > the whole rseq area. */ > memset(rseq_get_area(), 0, GLRO (dl_tls_rseq_size)); > int ret = INTERNAL_SYSCALL_CALL (rseq, rseq_get_area(), > GLRO (dl_tls_rseq_size), > 0, RSEQ_SIG); > if (!INTERNAL_SYSCALL_ERROR_P (ret)) > return true; > } > RSEQ_SETMEM (rseq_get_area(), cpu_id, RSEQ_CPU_ID_REGISTRATION_FAILED); > return false; > } When rseq registration is disabled by tunable we still need to allocate an rseq block because application code is allowed to check for the registration status by reading 'rseq->cpu_id'. So in this case we allocate the minimum ABI size of 32. Internally this is recorded in 'dl_tls_rseq_size' but the application visible '_rseq_size' is left at 0 (this happens in sysdeps/nptl/dl-tls_init_tp.c). I'll try making this more clear with additional comments and finding a name for 'rseq_size' that is not so close to '__rseq_size'.
Michael Jeanson <mjeanson@efficios.com> writes: > When rseq registration is disabled by tunable we still need to allocate > an rseq block because application code is allowed to check for the > registration status by reading 'rseq->cpu_id'. So in this case we > allocate the minimum ABI size of 32. Ok, I admit that was confusing ;-) > Internally this is recorded in 'dl_tls_rseq_size' but the application > visible '_rseq_size' is left at 0 (this happens in > sysdeps/nptl/dl-tls_init_tp.c). A comment in sysdeps/nptl/dl-tls_init_tp.c after the if{} noting what the variables would be otherwise might be useful. At least that was one of the places I looked at when trying to figure this out. > I'll try making this more clear with additional comments and finding a > name for 'rseq_size' that is not so close to '__rseq_size'. Perhaps "rseq_alloc_size" ?
On 2024-02-16 16 h 17, DJ Delorie wrote: > > Michael Jeanson <mjeanson@efficios.com> writes: >> When rseq registration is disabled by tunable we still need to allocate >> an rseq block because application code is allowed to check for the >> registration status by reading 'rseq->cpu_id'. So in this case we >> allocate the minimum ABI size of 32. > > Ok, I admit that was confusing ;-) Yeah, I had to think for a minute to remember that, it should be documented. > >> Internally this is recorded in 'dl_tls_rseq_size' but the application >> visible '_rseq_size' is left at 0 (this happens in >> sysdeps/nptl/dl-tls_init_tp.c). > > A comment in sysdeps/nptl/dl-tls_init_tp.c after the if{} noting what > the variables would be otherwise might be useful. At least that was one > of the places I looked at when trying to figure this out. Ack. > >> I'll try making this more clear with additional comments and finding a >> name for 'rseq_size' that is not so close to '__rseq_size'. > > Perhaps "rseq_alloc_size" ? Sounds good.
diff --git a/csu/libc-tls.c b/csu/libc-tls.c index b7682bdf43..9c70f53284 100644 --- a/csu/libc-tls.c +++ b/csu/libc-tls.c @@ -26,6 +26,8 @@ #include <array_length.h> #include <pthreadP.h> #include <dl-call_tls_init_tp.h> +#include <dl-rseq.h> +#include <elf/dl-tunables.h> #ifdef SHARED #error makefile bug, this file is for static only @@ -62,6 +64,18 @@ size_t _dl_tls_static_surplus; dynamic TLS access (e.g. with TLSDESC). */ size_t _dl_tls_static_optional; +/* Size of the features present in the rseq area. */ +size_t _dl_tls_rseq_feature_size; + +/* Alignment requirement of the rseq area. */ +size_t _dl_tls_rseq_align; + +/* Size of the rseq area in the static TLS block. */ +size_t _dl_tls_rseq_size; + +/* Offset of the rseq area from the thread pointer. */ +ptrdiff_t _dl_tls_rseq_offset; + /* Generation counter for the dtv. */ size_t _dl_tls_generation; @@ -110,6 +124,7 @@ __libc_setup_tls (void) size_t filesz = 0; void *initimage = NULL; size_t align = 0; + size_t tls_blocks_size = 0; size_t max_align = TCB_ALIGNMENT; size_t tcb_offset; const ElfW(Phdr) *phdr; @@ -135,22 +150,61 @@ __libc_setup_tls (void) /* Calculate the size of the static TLS surplus, with 0 auditors. */ _dl_tls_static_surplus_init (0); + /* Default to the rseq ABI minimum sizes, this will reduce TLS usage to 32 + bytes when rseq is disabled by tunables. */ + size_t rseq_size = TLS_DL_RSEQ_MIN_SIZE; + size_t rseq_align = TLS_DL_RSEQ_MIN_ALIGN; + bool do_rseq = true; + do_rseq = TUNABLE_GET_FULL (glibc, pthread, rseq, int, NULL); + if (do_rseq) + { + rseq_align = GLRO(dl_tls_rseq_align); + /* Make sure the rseq area size is at least the minimum ABI size and a + multiple of the requested aligment. */ + rseq_size = roundup (MAX (GLRO(dl_tls_rseq_feature_size), + TLS_DL_RSEQ_MIN_SIZE), rseq_align); + } + + /* Increase the max_align if necessary. */ + max_align = MAX (max_align, rseq_align); + + /* Record the rseq_area block size. */ + GLRO (dl_tls_rseq_size) = rseq_size; + /* We have to set up the TCB block which also (possibly) contains 'errno'. Therefore we avoid 'malloc' which might touch 'errno'. Instead we use 'sbrk' which would only uses 'errno' if it fails. In this case we are right away out of memory and the user gets what she/he deserves. */ #if TLS_TCB_AT_TP + /* Before the the thread pointer, add the aligned tls block size and then + align the rseq area block on top. */ + tls_blocks_size = roundup (roundup (memsz, align ?: 1) + rseq_size, rseq_align); + + /* Record the rseq_area offset. The offset is negative with TLS_TCB_AT_TP + because the TLS blocks are located before the thread pointer. */ + GLRO (dl_tls_rseq_offset) = -tls_blocks_size; + /* Align the TCB offset to the maximum alignment, as _dl_allocate_tls_storage (in elf/dl-tls.c) does using __libc_memalign and dl_tls_static_align. */ - tcb_offset = roundup (memsz + GLRO(dl_tls_static_surplus), max_align); + tcb_offset = roundup (tls_blocks_size + GLRO(dl_tls_static_surplus), max_align); tlsblock = _dl_early_allocate (tcb_offset + TLS_INIT_TCB_SIZE + max_align); if (tlsblock == NULL) _startup_fatal_tls_error (); #elif TLS_DTV_AT_TP + /* Align memsz on top of the initial tcb. */ tcb_offset = roundup (TLS_INIT_TCB_SIZE, align ?: 1); - tlsblock = _dl_early_allocate (tcb_offset + memsz + max_align + + /* After the thread pointer, add the initial tcb plus the tls block size and + then align the rseq area block on top. */ + tls_blocks_size = roundup (tcb_offset + memsz + rseq_size, rseq_align); + + /* Record the rseq_area offset. The offset is positive with TLS_DTV_AT_TP + because the TLS blocks are located after the thread pointer. */ + GLRO (dl_tls_rseq_offset) = tls_blocks_size - rseq_size; + + tlsblock = _dl_early_allocate (tls_blocks_size + max_align + TLS_PRE_TCB_SIZE + GLRO(dl_tls_static_surplus)); if (tlsblock == NULL) @@ -209,11 +263,5 @@ __libc_setup_tls (void) /* static_slotinfo.slotinfo[1].gen = 0; -- Already zero. */ static_slotinfo.slotinfo[1].map = main_map; - memsz = roundup (memsz, align ?: 1); - -#if TLS_DTV_AT_TP - memsz += tcb_offset; -#endif - - init_static_tls (memsz, MAX (TCB_ALIGNMENT, max_align)); + init_static_tls (tls_blocks_size, MAX (TCB_ALIGNMENT, max_align)); } diff --git a/elf/dl-tls.c b/elf/dl-tls.c index 7b3dd9ab60..70fbe7095f 100644 --- a/elf/dl-tls.c +++ b/elf/dl-tls.c @@ -27,6 +27,7 @@ #include <tls.h> #include <dl-tls.h> +#include <dl-rseq.h> #include <ldsodefs.h> #if PTHREAD_IN_LIBC @@ -298,6 +299,35 @@ _dl_determine_tlsoffset (void) slotinfo[cnt].map->l_tls_offset = off; } + /* Insert the rseq area block after the last TLS block. */ + + /* Default to the rseq ABI minimum sizes, this will reduce TLS usage to 32 + bytes when rseq is disabled by tunables. */ + size_t rseq_size = TLS_DL_RSEQ_MIN_SIZE; + size_t rseq_align = TLS_DL_RSEQ_MIN_ALIGN; + bool do_rseq = true; + do_rseq = TUNABLE_GET_FULL (glibc, pthread, rseq, int, NULL); + if (do_rseq) + { + rseq_align = GLRO(dl_tls_rseq_align); + /* Make sure the rseq area size is at least the minimum ABI size and a + multiple of the requested aligment. */ + rseq_size = roundup (MAX (GLRO(dl_tls_rseq_feature_size), + TLS_DL_RSEQ_MIN_SIZE), rseq_align); + } + + /* Add the rseq area block to the global offset. */ + offset = roundup (offset, rseq_align) + rseq_size; + + /* Increase the max_align if necessary. */ + max_align = MAX (max_align, rseq_align); + + /* Record the rseq_area block size and offset. The offset is negative + with TLS_TCB_AT_TP because the TLS blocks are located before the + thread pointer. */ + GLRO (dl_tls_rseq_offset) = -offset; + GLRO (dl_tls_rseq_size) = rseq_size; + GL(dl_tls_static_used) = offset; GLRO (dl_tls_static_size) = (roundup (offset + GLRO(dl_tls_static_surplus), max_align) @@ -343,6 +373,38 @@ _dl_determine_tlsoffset (void) offset = off + slotinfo[cnt].map->l_tls_blocksize - firstbyte; } + /* Insert the rseq area block after the last TLS block. */ + + /* Default to the rseq ABI minimum sizes, this will reduce TLS usage to 32 + bytes when rseq is disabled by tunables. */ + size_t rseq_size = TLS_DL_RSEQ_MIN_SIZE; + size_t rseq_align = TLS_DL_RSEQ_MIN_ALIGN; + bool do_rseq = true; + do_rseq = TUNABLE_GET_FULL (glibc, pthread, rseq, int, NULL); + if (do_rseq) + { + rseq_align = GLRO(dl_tls_rseq_align); + /* Make sure the rseq area size is at least the minimum ABI size and a + multiple of the requested aligment. */ + rseq_size = roundup (MAX (GLRO(dl_tls_rseq_feature_size), + TLS_DL_RSEQ_MIN_SIZE), rseq_align); + } + + /* Align the global offset to the beginning of the rseq area. */ + offset = roundup (offset, rseq_align); + + /* Record the rseq_area block size and offset. The offset is positive + with TLS_DTV_AT_TP because the TLS blocks are located after the + thread pointer. */ + GLRO (dl_tls_rseq_size) = rseq_size; + GLRO (dl_tls_rseq_offset) = offset; + + /* Add the rseq area block to the global offset. */ + offset += rseq_size; + + /* Increase the max_align if necessary. */ + max_align = MAX (max_align, rseq_align); + GL(dl_tls_static_used) = offset; GLRO (dl_tls_static_size) = roundup (offset + GLRO(dl_tls_static_surplus), TCB_ALIGNMENT); diff --git a/elf/rtld_static_init.c b/elf/rtld_static_init.c index e918e4ebdf..0606317b8c 100644 --- a/elf/rtld_static_init.c +++ b/elf/rtld_static_init.c @@ -78,6 +78,18 @@ __rtld_static_init (struct link_map *map) extern __typeof (dl->_dl_tls_static_size) _dl_tls_static_size attribute_hidden; dl->_dl_tls_static_size = _dl_tls_static_size; + extern __typeof (dl->_dl_tls_rseq_feature_size) _dl_tls_rseq_feature_size + attribute_hidden; + dl->_dl_tls_rseq_feature_size = _dl_tls_rseq_feature_size; + extern __typeof (dl->_dl_tls_rseq_align) _dl_tls_rseq_align + attribute_hidden; + dl->_dl_tls_rseq_align = _dl_tls_rseq_align; + extern __typeof (dl->_dl_tls_rseq_size) _dl_tls_rseq_size + attribute_hidden; + dl->_dl_tls_rseq_size = _dl_tls_rseq_size; + extern __typeof (dl->_dl_tls_rseq_offset) _dl_tls_rseq_offset + attribute_hidden; + dl->_dl_tls_rseq_offset = _dl_tls_rseq_offset; dl->_dl_find_object = _dl_find_object; __rtld_static_init_arch (map, dl); diff --git a/nptl/descr.h b/nptl/descr.h index 8cef95810c..cdc3c82d9a 100644 --- a/nptl/descr.h +++ b/nptl/descr.h @@ -404,25 +404,11 @@ struct pthread /* Used on strsignal. */ struct tls_internal_t tls_state; - /* rseq area registered with the kernel. Use a custom definition - here to isolate from kernel struct rseq changes. The - implementation of sched_getcpu needs acccess to the cpu_id field; - the other fields are unused and not included here. */ - union - { - struct - { - uint32_t cpu_id_start; - uint32_t cpu_id; - }; - char pad[32]; /* Original rseq area size. */ - } rseq_area __attribute__ ((aligned (32))); - /* Amount of end padding, if any, in this structure. - This definition relies on rseq_area being last. */ + This definition relies on tls_state being last. */ #define PTHREAD_STRUCT_END_PADDING \ - (sizeof (struct pthread) - offsetof (struct pthread, rseq_area) \ - + sizeof ((struct pthread) {}.rseq_area)) + (sizeof (struct pthread) - offsetof (struct pthread, tls_state) \ + + sizeof ((struct pthread) {}.tls_state)) } __attribute ((aligned (TCB_ALIGNMENT))); static inline bool diff --git a/nptl/pthread_create.c b/nptl/pthread_create.c index 1d3665d5ed..9b49ee7121 100644 --- a/nptl/pthread_create.c +++ b/nptl/pthread_create.c @@ -691,7 +691,7 @@ __pthread_create_2_1 (pthread_t *newthread, const pthread_attr_t *attr, /* Inherit rseq registration state. Without seccomp filters, rseq registration will either always fail or always succeed. */ - if ((int) THREAD_GETMEM_VOLATILE (self, rseq_area.cpu_id) >= 0) + if ((int) RSEQ_GETMEM_VOLATILE (rseq_get_area(), cpu_id) >= 0) pd->flags |= ATTR_FLAG_DO_RSEQ; /* Initialize the field for the ID of the thread which is waiting diff --git a/sysdeps/generic/dl-rseq.h b/sysdeps/generic/dl-rseq.h new file mode 100644 index 0000000000..0855981c89 --- /dev/null +++ b/sysdeps/generic/dl-rseq.h @@ -0,0 +1,26 @@ +/* RSEQ defines for the dynamic linker. Generic version. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +/* Minimum size of the rseq area. */ +#define TLS_DL_RSEQ_MIN_SIZE 32 + +/* Minimum feature size of the rseq area. */ +#define TLS_DL_RSEQ_MIN_FEATURE_SIZE 20 + +/* Minimum size of the rseq area alignment. */ +#define TLS_DL_RSEQ_MIN_ALIGN 32 diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h index 117c901ccc..53a0a208d8 100644 --- a/sysdeps/generic/ldsodefs.h +++ b/sysdeps/generic/ldsodefs.h @@ -610,6 +610,18 @@ struct rtld_global_ro See comments in elf/dl-tls.c where it is initialized. */ EXTERN size_t _dl_tls_static_surplus; + /* Size of the features present in the rseq area. */ + EXTERN size_t _dl_tls_rseq_feature_size; + + /* Alignment requirement of the rseq area. */ + EXTERN size_t _dl_tls_rseq_align; + + /* Size of the rseq area in the static TLS block. */ + EXTERN size_t _dl_tls_rseq_size; + + /* Offset of the rseq area from the thread pointer. */ + EXTERN ptrdiff_t _dl_tls_rseq_offset; + /* Name of the shared object to be profiled (if any). */ EXTERN const char *_dl_profile; /* Filename of the output file. */ diff --git a/sysdeps/i386/nptl/tcb-access.h b/sysdeps/i386/nptl/tcb-access.h index 4b6221e103..e6988186d0 100644 --- a/sysdeps/i386/nptl/tcb-access.h +++ b/sysdeps/i386/nptl/tcb-access.h @@ -123,3 +123,59 @@ "i" (offsetof (struct pthread, member)), \ "r" (idx)); \ }}) + + +/* Read member of the RSEQ area directly. */ +#define RSEQ_GETMEM_VOLATILE(descr, member) \ + ({ __typeof (descr->member) __value; \ + ptrdiff_t _rseq_offset = GLRO (dl_tls_rseq_offset); \ + _Static_assert (sizeof (__value) == 1 \ + || sizeof (__value) == 4 \ + || sizeof (__value) == 8, \ + "size of per-thread data"); \ + if (sizeof (__value) == 1) \ + asm volatile ("movb %%gs:%P2(%3),%b0" \ + : "=q" (__value) \ + : "0" (0), "i" (offsetof (struct rseq_area, member)), \ + "r" (_rseq_offset)); \ + else if (sizeof (__value) == 4) \ + asm volatile ("movl %%gs:%P1(%2),%0" \ + : "=r" (__value) \ + : "i" (offsetof (struct rseq_area, member)), \ + "r" (_rseq_offset)); \ + else /* 8 */ \ + { \ + asm volatile ("movl %%gs:%P1(%2),%%eax\n\t" \ + "movl %%gs:4+%P1(%2),%%edx" \ + : "=&A" (__value) \ + : "i" (offsetof (struct rseq_area, member)), \ + "r" (_rseq_offset)); \ + } \ + __value; }) + +/* Set member of the RSEQ area directly. */ +#define RSEQ_SETMEM(descr, member, value) \ + ({ \ + ptrdiff_t _rseq_offset = GLRO (dl_tls_rseq_offset); \ + _Static_assert (sizeof (descr->member) == 1 \ + || sizeof (descr->member) == 4 \ + || sizeof (descr->member) == 8, \ + "size of per-thread data"); \ + if (sizeof (descr->member) == 1) \ + asm volatile ("movb %b0,%%gs:%P1(%2)" : \ + : "iq" (value), \ + "i" (offsetof (struct rseq_area, member)), \ + "r" (_rseq_offset)); \ + else if (sizeof (descr->member) == 4) \ + asm volatile ("movl %0,%%gs:%P1(%2)" : \ + : "ir" (value), \ + "i" (offsetof (struct rseq_area, member)), \ + "r" (_rseq_offset)); \ + else /* 8 */ \ + { \ + asm volatile ("movl %%eax,%%gs:%P1(%2)\n\t" \ + "movl %%edx,%%gs:4+%P1(%2)" : \ + : "A" ((uint64_t) cast_to_integer (value)), \ + "i" (offsetof (struct rseq_area, member)), \ + "r" (_rseq_offset)); \ + }}) diff --git a/sysdeps/nptl/dl-tls_init_tp.c b/sysdeps/nptl/dl-tls_init_tp.c index 80eb0107b5..7aa15558e6 100644 --- a/sysdeps/nptl/dl-tls_init_tp.c +++ b/sysdeps/nptl/dl-tls_init_tp.c @@ -107,7 +107,7 @@ __tls_init_tp (void) do_rseq = TUNABLE_GET (rseq, int, NULL); if (rseq_register_current_thread (pd, do_rseq)) { - _rseq_size = sizeof (pd->rseq_area); + _rseq_size = GLRO (dl_tls_rseq_size); } #ifdef RSEQ_SIG @@ -116,7 +116,7 @@ __tls_init_tp (void) all targets support __thread_pointer, so set __rseq_offset only if the rseq registration may have happened because RSEQ_SIG is defined. */ - _rseq_offset = (char *) &pd->rseq_area - (char *) __thread_pointer (); + _rseq_offset = GLRO (dl_tls_rseq_offset); #endif } diff --git a/sysdeps/nptl/tcb-access.h b/sysdeps/nptl/tcb-access.h index 600433766f..9532f30022 100644 --- a/sysdeps/nptl/tcb-access.h +++ b/sysdeps/nptl/tcb-access.h @@ -30,3 +30,8 @@ descr->member = (value) #define THREAD_SETMEM_NC(descr, member, idx, value) \ descr->member[idx] = (value) + +#define RSEQ_GETMEM_VOLATILE(descr, member) \ + THREAD_GETMEM_VOLATILE(descr, member) +#define RSEQ_SETMEM(descr, member, value) \ + THREAD_SETMEM(descr, member, value) diff --git a/sysdeps/unix/sysv/linux/Makefile b/sysdeps/unix/sysv/linux/Makefile index 415aa1f14d..6bcf81461b 100644 --- a/sysdeps/unix/sysv/linux/Makefile +++ b/sysdeps/unix/sysv/linux/Makefile @@ -261,6 +261,11 @@ tests-internal += \ tst-rseq-disable \ # tests-internal +tests-static += \ + tst-rseq-disable-static \ + tst-rseq-static \ + # tests-static + tests-time64 += \ tst-adjtimex-time64 \ tst-clock_adjtime-time64 \ @@ -394,6 +399,7 @@ $(objpfx)tst-mount-compile.out: ../sysdeps/unix/sysv/linux/tst-mount-compile.py $(objpfx)tst-mount-compile.out: $(sysdeps-linux-python-deps) tst-rseq-disable-ENV = GLIBC_TUNABLES=glibc.pthread.rseq=0 +tst-rseq-disable-static-ENV = GLIBC_TUNABLES=glibc.pthread.rseq=0 endif # $(subdir) == misc @@ -655,4 +661,8 @@ tests += \ tests-internal += \ tst-rseq-nptl \ # tests-internal + +tests-static += \ + tst-rseq-nptl-static \ + # tests-static endif diff --git a/sysdeps/unix/sysv/linux/dl-parse_auxv.h b/sysdeps/unix/sysv/linux/dl-parse_auxv.h index e3d758b163..fbc90fc786 100644 --- a/sysdeps/unix/sysv/linux/dl-parse_auxv.h +++ b/sysdeps/unix/sysv/linux/dl-parse_auxv.h @@ -21,6 +21,7 @@ #include <fpu_control.h> #include <ldsodefs.h> #include <link.h> +#include <dl-rseq.h> typedef ElfW(Addr) dl_parse_auxv_t[AT_MINSIGSTKSZ + 1]; @@ -57,5 +58,10 @@ void _dl_parse_auxv (ElfW(auxv_t) *av, dl_parse_auxv_t auxv_values) GLRO(dl_sysinfo) = auxv_values[AT_SYSINFO]; #endif + GLRO(dl_tls_rseq_feature_size) = MAX (auxv_values[AT_RSEQ_FEATURE_SIZE], + TLS_DL_RSEQ_MIN_FEATURE_SIZE); + GLRO(dl_tls_rseq_align) = MAX (auxv_values[AT_RSEQ_ALIGN], + TLS_DL_RSEQ_MIN_ALIGN); + DL_PLATFORM_AUXV } diff --git a/sysdeps/unix/sysv/linux/rseq-internal.h b/sysdeps/unix/sysv/linux/rseq-internal.h index 48eebc1e16..b6c9deea6b 100644 --- a/sysdeps/unix/sysv/linux/rseq-internal.h +++ b/sysdeps/unix/sysv/linux/rseq-internal.h @@ -24,6 +24,24 @@ #include <stdbool.h> #include <stdio.h> #include <sys/rseq.h> +#include <thread_pointer.h> +#include <ldsodefs.h> + +/* rseq area registered with the kernel. Use a custom definition + here to isolate from kernel struct rseq changes. The + implementation of sched_getcpu needs acccess to the cpu_id field; + the other fields are unused and not included here. */ +struct rseq_area +{ + uint32_t cpu_id_start; + uint32_t cpu_id; +}; + +static inline struct rseq_area * +rseq_get_area(void) +{ + return (struct rseq_area *) ((char *) __thread_pointer() + GLRO (dl_tls_rseq_offset)); +} #ifdef RSEQ_SIG static inline bool @@ -31,20 +49,23 @@ rseq_register_current_thread (struct pthread *self, bool do_rseq) { if (do_rseq) { - int ret = INTERNAL_SYSCALL_CALL (rseq, &self->rseq_area, - sizeof (self->rseq_area), + /* The kernel expects 'rseq_area->rseq_cs == NULL' on registration, zero + the whole rseq area. */ + memset(rseq_get_area(), 0, GLRO (dl_tls_rseq_size)); + int ret = INTERNAL_SYSCALL_CALL (rseq, rseq_get_area(), + GLRO (dl_tls_rseq_size), 0, RSEQ_SIG); if (!INTERNAL_SYSCALL_ERROR_P (ret)) return true; } - THREAD_SETMEM (self, rseq_area.cpu_id, RSEQ_CPU_ID_REGISTRATION_FAILED); + RSEQ_SETMEM (rseq_get_area(), cpu_id, RSEQ_CPU_ID_REGISTRATION_FAILED); return false; } #else /* RSEQ_SIG */ static inline bool rseq_register_current_thread (struct pthread *self, bool do_rseq) { - THREAD_SETMEM (self, rseq_area.cpu_id, RSEQ_CPU_ID_REGISTRATION_FAILED); + RSEQ_SETMEM (rseq_get_area(), cpu_id, RSEQ_CPU_ID_REGISTRATION_FAILED); return false; } #endif /* RSEQ_SIG */ diff --git a/sysdeps/unix/sysv/linux/sched_getcpu.c b/sysdeps/unix/sysv/linux/sched_getcpu.c index dfb884568d..4f7cd499d3 100644 --- a/sysdeps/unix/sysv/linux/sched_getcpu.c +++ b/sysdeps/unix/sysv/linux/sched_getcpu.c @@ -19,6 +19,7 @@ #include <sched.h> #include <sysdep.h> #include <sysdep-vdso.h> +#include <rseq-internal.h> static int vsyscall_sched_getcpu (void) @@ -37,7 +38,7 @@ vsyscall_sched_getcpu (void) int sched_getcpu (void) { - int cpu_id = THREAD_GETMEM_VOLATILE (THREAD_SELF, rseq_area.cpu_id); + int cpu_id = RSEQ_GETMEM_VOLATILE (rseq_get_area(), cpu_id); return __glibc_likely (cpu_id >= 0) ? cpu_id : vsyscall_sched_getcpu (); } #else /* RSEQ_SIG */ diff --git a/sysdeps/unix/sysv/linux/tst-rseq-disable-static.c b/sysdeps/unix/sysv/linux/tst-rseq-disable-static.c new file mode 100644 index 0000000000..2687d13d3d --- /dev/null +++ b/sysdeps/unix/sysv/linux/tst-rseq-disable-static.c @@ -0,0 +1 @@ +#include "tst-rseq-disable.c" diff --git a/sysdeps/unix/sysv/linux/tst-rseq-disable.c b/sysdeps/unix/sysv/linux/tst-rseq-disable.c index bbc655bec4..ae4143916b 100644 --- a/sysdeps/unix/sysv/linux/tst-rseq-disable.c +++ b/sysdeps/unix/sysv/linux/tst-rseq-disable.c @@ -26,27 +26,30 @@ #include <unistd.h> #ifdef RSEQ_SIG +# include <sys/auxv.h> +# include "tst-rseq.h" + +static __thread struct rseq local_rseq; /* Check that rseq can be registered and has not been taken by glibc. */ static void check_rseq_disabled (void) { - struct pthread *pd = THREAD_SELF; + struct rseq *rseq_area = (struct rseq *) ((char *) __thread_pointer () + __rseq_offset); TEST_COMPARE (__rseq_flags, 0); - TEST_VERIFY ((char *) __thread_pointer () + __rseq_offset - == (char *) &pd->rseq_area); TEST_COMPARE (__rseq_size, 0); - TEST_COMPARE ((int) pd->rseq_area.cpu_id, RSEQ_CPU_ID_REGISTRATION_FAILED); + TEST_COMPARE ((int) rseq_area->cpu_id, RSEQ_CPU_ID_REGISTRATION_FAILED); + + TEST_COMPARE (sizeof (local_rseq), RSEQ_TEST_MIN_SIZE); - int ret = syscall (__NR_rseq, &pd->rseq_area, sizeof (pd->rseq_area), - 0, RSEQ_SIG); + int ret = syscall (__NR_rseq, &local_rseq, RSEQ_TEST_MIN_SIZE, 0, RSEQ_SIG); if (ret == 0) { - ret = syscall (__NR_rseq, &pd->rseq_area, sizeof (pd->rseq_area), + ret = syscall (__NR_rseq, &local_rseq, RSEQ_TEST_MIN_SIZE, RSEQ_FLAG_UNREGISTER, RSEQ_SIG); TEST_COMPARE (ret, 0); - pd->rseq_area.cpu_id = RSEQ_CPU_ID_REGISTRATION_FAILED; + rseq_area->cpu_id = RSEQ_CPU_ID_REGISTRATION_FAILED; } else { diff --git a/sysdeps/unix/sysv/linux/tst-rseq-nptl-static.c b/sysdeps/unix/sysv/linux/tst-rseq-nptl-static.c new file mode 100644 index 0000000000..6e2c923bb9 --- /dev/null +++ b/sysdeps/unix/sysv/linux/tst-rseq-nptl-static.c @@ -0,0 +1 @@ +#include "tst-rseq-nptl.c" diff --git a/sysdeps/unix/sysv/linux/tst-rseq-static.c b/sysdeps/unix/sysv/linux/tst-rseq-static.c new file mode 100644 index 0000000000..1d97f3bd3d --- /dev/null +++ b/sysdeps/unix/sysv/linux/tst-rseq-static.c @@ -0,0 +1 @@ +#include "tst-rseq.c" diff --git a/sysdeps/unix/sysv/linux/tst-rseq.c b/sysdeps/unix/sysv/linux/tst-rseq.c index 2c90409ba0..c8c0518a5d 100644 --- a/sysdeps/unix/sysv/linux/tst-rseq.c +++ b/sysdeps/unix/sysv/linux/tst-rseq.c @@ -31,18 +31,32 @@ # include <syscall.h> # include <thread_pointer.h> # include <tls.h> +# include <sys/auxv.h> # include "tst-rseq.h" static void do_rseq_main_test (void) { - struct pthread *pd = THREAD_SELF; + size_t rseq_align = MAX (getauxval (AT_RSEQ_ALIGN), RSEQ_TEST_MIN_ALIGN); + size_t rseq_size = roundup (MAX (getauxval (AT_RSEQ_FEATURE_SIZE), RSEQ_TEST_MIN_SIZE), rseq_align); + struct rseq *rseq = __thread_pointer () + __rseq_offset; TEST_VERIFY_EXIT (rseq_thread_registered ()); TEST_COMPARE (__rseq_flags, 0); - TEST_VERIFY ((char *) __thread_pointer () + __rseq_offset - == (char *) &pd->rseq_area); - TEST_COMPARE (__rseq_size, sizeof (pd->rseq_area)); + TEST_COMPARE (__rseq_size, rseq_size); + /* The size of the rseq area must be a multiple of the alignment. */ + TEST_VERIFY ((__rseq_size % rseq_align) == 0); + /* The rseq area address must be aligned. */ + TEST_VERIFY (((unsigned long) rseq % rseq_align) == 0); +#if TLS_TCB_AT_TP + /* The rseq area block should come before the thread pointer and be at least 32 bytes. */ + TEST_VERIFY (__rseq_offset <= RSEQ_TEST_MIN_SIZE); +#elif TLS_DTV_AT_TP + /* The rseq area block should come after the thread pointer. */ + TEST_VERIFY (__rseq_offset >= 0); +#else +# error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined" +#endif } static void diff --git a/sysdeps/unix/sysv/linux/tst-rseq.h b/sysdeps/unix/sysv/linux/tst-rseq.h index dc603327d3..4931aa3d14 100644 --- a/sysdeps/unix/sysv/linux/tst-rseq.h +++ b/sysdeps/unix/sysv/linux/tst-rseq.h @@ -23,11 +23,18 @@ #include <syscall.h> #include <sys/rseq.h> #include <tls.h> +#include <rseq-internal.h> + +#define RSEQ_TEST_MIN_SIZE 32 +#define RSEQ_TEST_MIN_FEATURE_SIZE 20 +#define RSEQ_TEST_MIN_ALIGN 32 static inline bool rseq_thread_registered (void) { - return THREAD_GETMEM_VOLATILE (THREAD_SELF, rseq_area.cpu_id) >= 0; + struct rseq_area *rseq = (struct rseq_area *) ((char *) __thread_pointer () + __rseq_offset); + + return __atomic_load_n (&rseq->cpu_id, __ATOMIC_RELAXED) >= 0; } static inline int diff --git a/sysdeps/x86_64/nptl/tcb-access.h b/sysdeps/x86_64/nptl/tcb-access.h index d35948f111..75ba4b3ce9 100644 --- a/sysdeps/x86_64/nptl/tcb-access.h +++ b/sysdeps/x86_64/nptl/tcb-access.h @@ -130,3 +130,59 @@ "i" (offsetof (struct pthread, member[0])), \ "r" (idx)); \ }}) + +/* Read member of the RSEQ area directly. */ +# define RSEQ_GETMEM_VOLATILE(descr, member) \ + ({ __typeof (descr->member) __value; \ + ptrdiff_t _rseq_offset = GLRO (dl_tls_rseq_offset); \ + _Static_assert (sizeof (__value) == 1 \ + || sizeof (__value) == 4 \ + || sizeof (__value) == 8, \ + "size of per-thread data"); \ + if (sizeof (__value) == 1) \ + asm volatile ("movb %%fs:%P2(%q3),%b0" \ + : "=q" (__value) \ + : "0" (0), "i" (offsetof (struct rseq_area, member)), \ + "r" (_rseq_offset)); \ + else if (sizeof (__value) == 4) \ + asm volatile ("movl %%fs:%P1(%q2),%0" \ + : "=r" (__value) \ + : "i" (offsetof (struct rseq_area, member)), \ + "r" (_rseq_offset)); \ + else /* 8 */ \ + { \ + asm volatile ("movq %%fs:%P1(%q2),%q0" \ + : "=r" (__value) \ + : "i" (offsetof (struct rseq_area, member)), \ + "r" (_rseq_offset)); \ + } \ + __value; }) + +/* Set member of the RSEQ area directly. */ +# define RSEQ_SETMEM(descr, member, value) \ + ({ \ + ptrdiff_t _rseq_offset = GLRO (dl_tls_rseq_offset); \ + _Static_assert (sizeof (descr->member) == 1 \ + || sizeof (descr->member) == 4 \ + || sizeof (descr->member) == 8, \ + "size of per-thread data"); \ + if (sizeof (descr->member) == 1) \ + asm volatile ("movb %b0,%%fs:%P1(%q2)" : \ + : "iq" (value), \ + "i" (offsetof (struct rseq_area, member)), \ + "r" (_rseq_offset)); \ + else if (sizeof (descr->member) == 4) \ + asm volatile ("movl %0,%%fs:%P1(%q2)" : \ + : IMM_MODE (value), \ + "i" (offsetof (struct rseq_area, member)), \ + "r" (_rseq_offset)); \ + else /* 8 */ \ + { \ + /* Since movq takes a signed 32-bit immediate or a register source \ + operand, use "er" constraint for 32-bit signed integer constant \ + or register. */ \ + asm volatile ("movq %q0,%%fs:%P1(%q2)" : \ + : "er" ((uint64_t) cast_to_integer (value)), \ + "i" (offsetof (struct rseq_area, member)), \ + "r" (_rseq_offset)); \ + }})