diff mbox series

[v12,15/17] nptl: Move the rseq area to the 'extra TLS' block

Message ID 20240801-rseq-abi-v11-split-v12-15-0e87479dddc0@efficios.com
State New
Headers show
Series Add rseq extensible ABI support | expand

Commit Message

Michael Jeanson Aug. 5, 2024, 8:48 p.m. UTC
Move the rseq area to the newly added 'extra TLS' block, this is the
last step in adding support for the rseq extended ABI. The size of the
rseq area is now dynamic and depends on the rseq features reported by
the kernel through the elf auxiliary vector. This will allow
applications to use rseq features past the 32 bytes of the original rseq
ABI as they become available in future kernels.

Signed-off-by: Michael Jeanson <mjeanson@efficios.com>
---
Changes since v11:
- __rseq_size is now set directly in _dl_parse_auxv, set it to 0 when
  the main thread registration fails or is disabled by tunable
---
 nptl/pthread_create.c                             |  2 +-
 sysdeps/nptl/dl-tls_init_tp.c                     | 20 +++---
 sysdeps/unix/sysv/linux/Makefile                  | 10 +++
 sysdeps/unix/sysv/linux/rseq-internal.h           | 64 ++++++++++++++++--
 sysdeps/unix/sysv/linux/sched_getcpu.c            |  3 +-
 sysdeps/unix/sysv/linux/tst-rseq-disable-static.c |  1 +
 sysdeps/unix/sysv/linux/tst-rseq-disable.c        | 60 ++++++++++++++---
 sysdeps/unix/sysv/linux/tst-rseq-nptl-static.c    |  1 +
 sysdeps/unix/sysv/linux/tst-rseq-static.c         |  1 +
 sysdeps/unix/sysv/linux/tst-rseq.c                | 81 ++++++++++++++++++-----
 sysdeps/unix/sysv/linux/tst-rseq.h                |  7 +-
 11 files changed, 205 insertions(+), 45 deletions(-)

Comments

Florian Weimer Sept. 14, 2024, 11:51 a.m. UTC | #1
* Michael Jeanson:

> diff --git a/nptl/pthread_create.c b/nptl/pthread_create.c
> index 1d3665d5ed..9b49ee7121 100644
> --- a/nptl/pthread_create.c
> +++ b/nptl/pthread_create.c
> @@ -691,7 +691,7 @@ __pthread_create_2_1 (pthread_t *newthread, const pthread_attr_t *attr,
>  
>    /* Inherit rseq registration state.  Without seccomp filters, rseq
>       registration will either always fail or always succeed.  */
> -  if ((int) THREAD_GETMEM_VOLATILE (self, rseq_area.cpu_id) >= 0)
> +  if ((int) RSEQ_GETMEM_VOLATILE (rseq_get_area(), cpu_id) >= 0)
>      pd->flags |= ATTR_FLAG_DO_RSEQ;

Style nit: space in rseq_get_area ()


> diff --git a/sysdeps/nptl/dl-tls_init_tp.c b/sysdeps/nptl/dl-tls_init_tp.c
> index b2abb2762d..46dc666cbc 100644
> --- a/sysdeps/nptl/dl-tls_init_tp.c
> +++ b/sysdeps/nptl/dl-tls_init_tp.c
> @@ -101,19 +101,17 @@ __tls_init_tp (void)
>    }

> +    if (!rseq_register_current_thread (pd, do_rseq))
> +      {
> +        _rseq_size = 0;
> +      }

Maybe drop the extra braces.

> diff --git a/sysdeps/unix/sysv/linux/rseq-internal.h b/sysdeps/unix/sysv/linux/rseq-internal.h
> index 2aae447b60..d26d69fd6a 100644
> --- a/sysdeps/unix/sysv/linux/rseq-internal.h
> +++ b/sysdeps/unix/sysv/linux/rseq-internal.h
> @@ -24,6 +24,26 @@

> +/* rseq area registered with the kernel.  Use a custom definition here to
> +   isolate from the system provided header which could lack some fields of the
> +   Extended ABI.
> +
> +   Access to fields of the Extended ABI beyond the 20 bytes of the original ABI
> +   (after 'flags') must be gated by a check of the feature size.  */
> +struct rseq_area
> +{
> +  /* Original ABI.  */
> +  uint32_t cpu_id_start;
> +  uint32_t cpu_id;
> +  uint64_t rseq_cs;
> +  uint32_t flags;
> +  /* Extended ABI.  */
> +  uint32_t node_id;
> +  uint32_t mm_cid;
> +};

(Note: UAPI uses struct rseq, so no conflict here.)

It's architecture-dependent whether this struct has 4 byte padding at
the end.  If this struct is only used to get the field offsets and
sizes, please add a comment to this effect, and maybe a flexible array
member at the end to discourage direct object allocations.

> +/* Returns a pointer to the current thread rseq area.  */
> +static inline struct rseq_area *
> +rseq_get_area(void)
> +{
> +#if IS_IN (rtld)
> +  /* Use the hidden symbol in ld.so.  */
> +  return (struct rseq_area *) ((char *) __thread_pointer() + _rseq_offset);
> +#else
> +  return (struct rseq_area *) ((char *) __thread_pointer() + __rseq_offset);
> +#endif
> +}

I wonder if this should go into the patch that adds RSEQ_GETMEM_VOLATILE
etc. because it's required by the generic macros.  And maybe name it
RSEQ_SELF for alignment with THREAD_SELF (but it can remain an inline
function).

The _rseq_offset/__rseq_offset gate here is ugly.  I believe we can
avoid it for read access if we also add _GI___rseq_size as an alias in
the .S file and add rtld_hidden_proto to the declaration of __rseq_size.
Then in ld.so, we automatically use a hidden symbol for access with
__rseq_size, and we won't need the #ifdefs.  The _rseq_size alias will
have to remain because it's not defined as const and used during
initialization.

> +      /* The feature size can be smaller than the minimum rseq area size of 32
> +	 bytes accepted by the syscall, if this is the case, bump the size of
> +	 the registration to the minimum. The 'extra TLS' block is always at
> +	 least 32 bytes. */
>        if (size < RSEQ_AREA_SIZE_INITIAL)
> -        /* The initial implementation used only 20 bytes out of 32,
> -           but still expected size 32.  */
>          size = RSEQ_AREA_SIZE_INITIAL;
> -      int ret = INTERNAL_SYSCALL_CALL (rseq, &self->rseq_area,
> -                                       size, 0, RSEQ_SIG);
> +
> +      /* The kernel expects 'rseq_area->rseq_cs == NULL' on registration, zero
> +         the whole rseq area.  */
> +      memset(rseq_get_area(), 0, size);
> +
> +      int ret = INTERNAL_SYSCALL_CALL (rseq, rseq_get_area(), size, 0,
> +		      RSEQ_SIG);
>        if (!INTERNAL_SYSCALL_ERROR_P (ret))
>          return true;

Style nits: missing spaces before '(' in function macro/calls.

This added memset may actually fix an existing bug, but it is hard to
tell.  On some architectures, the generic TLS code in combination with
the NPTL code zeroes struct pthread, and thus the rseq area embedded in
it.  But I'm not sure that this is what happens on all architectures.
Perhaps we should add a test for this that exits are thread with a
pollutted rseq area.  In any case, the addition of the memset should go
into a separate patch, so that we can backport it to all releases with
rseq support.

> diff --git a/sysdeps/unix/sysv/linux/tst-rseq-disable.c b/sysdeps/unix/sysv/linux/tst-rseq-disable.c
> index bbc655bec4..b1f4e894f1 100644
> --- a/sysdeps/unix/sysv/linux/tst-rseq-disable.c
> +++ b/sysdeps/unix/sysv/linux/tst-rseq-disable.c
> @@ -26,32 +26,65 @@

> +static __thread struct rseq local_rseq = {
> +  .cpu_id = RSEQ_CPU_ID_REGISTRATION_FAILED,
> +};

Maybe add a comment: Used to test private registration with the rseq
system call because glibc rseq is disabled.

>  
>  /* Check that rseq can be registered and has not been taken by glibc.  */
>  static void
>  check_rseq_disabled (void)
>  {
> -  struct pthread *pd = THREAD_SELF;
> +  struct rseq *rseq_area = (struct rseq *) ((char *) __thread_pointer () + __rseq_offset);

Style nit: Line length (also below).

> --- a/sysdeps/unix/sysv/linux/tst-rseq.c
> +++ b/sysdeps/unix/sysv/linux/tst-rseq.c
> @@ -19,6 +19,8 @@
>     not linked against libpthread.  */
>  
>  #include <support/check.h>
> +#include <support/namespace.h>
> +#include <support/xthread.h>
>  #include <stdio.h>
>  #include <sys/rseq.h>
>  #include <unistd.h>
> @@ -32,23 +34,66 @@
>  # include <sys/auxv.h>
>  # include <thread_pointer.h>
>  # include <tls.h>
> +# include <sys/auxv.h>
>  # include "tst-rseq.h"
>  
>  static void
>  do_rseq_main_test (void)
>  {
> -  struct pthread *pd = THREAD_SELF;
> +  size_t rseq_align = MAX (getauxval (AT_RSEQ_ALIGN), RSEQ_TEST_MIN_ALIGN);
> +  size_t rseq_feature_size = MAX (getauxval (AT_RSEQ_FEATURE_SIZE), RSEQ_TEST_MIN_FEATURE_SIZE);
> +  size_t rseq_alloc_size = roundup (MAX (rseq_feature_size, RSEQ_TEST_MIN_SIZE), rseq_align);

I think this assumes that getauxval returns 0 if the kernel doesn't have
these in auxv?

>  #include <support/test-driver.c>
> diff --git a/sysdeps/unix/sysv/linux/tst-rseq.h b/sysdeps/unix/sysv/linux/tst-rseq.h
> index dc603327d3..7a2e19b07f 100644
> --- a/sysdeps/unix/sysv/linux/tst-rseq.h
> +++ b/sysdeps/unix/sysv/linux/tst-rseq.h
> @@ -23,11 +23,16 @@
>  #include <syscall.h>
>  #include <sys/rseq.h>
>  #include <tls.h>
> +#include <rseq-internal.h>

Note: This obtains the struct rseq_area definition (the glibc variant of
the UAPI struct rseq).

Thanks,
Florian
diff mbox series

Patch

diff --git a/nptl/pthread_create.c b/nptl/pthread_create.c
index 1d3665d5ed..9b49ee7121 100644
--- a/nptl/pthread_create.c
+++ b/nptl/pthread_create.c
@@ -691,7 +691,7 @@  __pthread_create_2_1 (pthread_t *newthread, const pthread_attr_t *attr,
 
   /* Inherit rseq registration state.  Without seccomp filters, rseq
      registration will either always fail or always succeed.  */
-  if ((int) THREAD_GETMEM_VOLATILE (self, rseq_area.cpu_id) >= 0)
+  if ((int) RSEQ_GETMEM_VOLATILE (rseq_get_area(), cpu_id) >= 0)
     pd->flags |= ATTR_FLAG_DO_RSEQ;
 
   /* Initialize the field for the ID of the thread which is waiting
diff --git a/sysdeps/nptl/dl-tls_init_tp.c b/sysdeps/nptl/dl-tls_init_tp.c
index b2abb2762d..46dc666cbc 100644
--- a/sysdeps/nptl/dl-tls_init_tp.c
+++ b/sysdeps/nptl/dl-tls_init_tp.c
@@ -101,19 +101,17 @@  __tls_init_tp (void)
   }
 
   {
+    /* If the registration fails or is disabled by tunable, the public
+       '__rseq_size' will be set '0' regardless of the feature size of the
+       allocated rseq area.  An rseq area of at least 32 bytes is always
+       allocated since application code is allowed to test the status of the
+       rseq registration with 'rseq->cpu_id >= 0'.  */
     bool do_rseq = true;
     do_rseq = TUNABLE_GET (rseq, int, NULL);
-    if (rseq_register_current_thread (pd, do_rseq))
-      _rseq_size = RSEQ_AREA_SIZE_INITIAL_USED;
-
-#ifdef RSEQ_SIG
-    /* This should be a compile-time constant, but the current
-       infrastructure makes it difficult to determine its value.  Not
-       all targets support __thread_pointer, so set __rseq_offset only
-       if the rseq registration may have happened because RSEQ_SIG is
-       defined.  */
-    _rseq_offset = (char *) &pd->rseq_area - (char *) __thread_pointer ();
-#endif
+    if (!rseq_register_current_thread (pd, do_rseq))
+      {
+        _rseq_size = 0;
+      }
   }
 
   /* Set initial thread's stack block from 0 up to __libc_stack_end.
diff --git a/sysdeps/unix/sysv/linux/Makefile b/sysdeps/unix/sysv/linux/Makefile
index 59998c7af4..a6c142d363 100644
--- a/sysdeps/unix/sysv/linux/Makefile
+++ b/sysdeps/unix/sysv/linux/Makefile
@@ -263,6 +263,11 @@  tests-internal += \
   tst-rseq-disable \
   # tests-internal
 
+tests-static += \
+  tst-rseq-disable-static \
+  tst-rseq-static \
+  # tests-static
+
 tests-time64 += \
   tst-adjtimex-time64 \
   tst-clock_adjtime-time64 \
@@ -396,6 +401,7 @@  $(objpfx)tst-mount-compile.out: ../sysdeps/unix/sysv/linux/tst-mount-compile.py
 $(objpfx)tst-mount-compile.out: $(sysdeps-linux-python-deps)
 
 tst-rseq-disable-ENV = GLIBC_TUNABLES=glibc.pthread.rseq=0
+tst-rseq-disable-static-ENV = GLIBC_TUNABLES=glibc.pthread.rseq=0
 
 endif # $(subdir) == misc
 
@@ -661,4 +667,8 @@  tests += \
 tests-internal += \
   tst-rseq-nptl \
   # tests-internal
+
+tests-static += \
+  tst-rseq-nptl-static \
+  # tests-static
 endif
diff --git a/sysdeps/unix/sysv/linux/rseq-internal.h b/sysdeps/unix/sysv/linux/rseq-internal.h
index 2aae447b60..d26d69fd6a 100644
--- a/sysdeps/unix/sysv/linux/rseq-internal.h
+++ b/sysdeps/unix/sysv/linux/rseq-internal.h
@@ -24,6 +24,26 @@ 
 #include <stdbool.h>
 #include <stdio.h>
 #include <sys/rseq.h>
+#include <thread_pointer.h>
+#include <ldsodefs.h>
+
+/* rseq area registered with the kernel.  Use a custom definition here to
+   isolate from the system provided header which could lack some fields of the
+   Extended ABI.
+
+   Access to fields of the Extended ABI beyond the 20 bytes of the original ABI
+   (after 'flags') must be gated by a check of the feature size.  */
+struct rseq_area
+{
+  /* Original ABI.  */
+  uint32_t cpu_id_start;
+  uint32_t cpu_id;
+  uint64_t rseq_cs;
+  uint32_t flags;
+  /* Extended ABI.  */
+  uint32_t node_id;
+  uint32_t mm_cid;
+};
 
 /* Minimum size of the rseq area allocation required by the syscall.  The
    actually used rseq feature size may be less (20 bytes initially).  */
@@ -41,10 +61,27 @@  extern size_t _rseq_align attribute_hidden;
 
 /* Size of the active features in the rseq area.
    Populated from the auxiliary vector with a minimum of '20'.
+   Set to '0' on registration failure of the main thread.
    In .data.relro but not yet write-protected.  */
 extern unsigned int _rseq_size attribute_hidden;
+
+/* Offset from the thread pointer to the rseq area, always set to allow
+   checking the registration status by reading the 'cpu_id' field.
+   In .data.relro but not yet write-protected.  */
 extern ptrdiff_t _rseq_offset attribute_hidden;
 
+/* Returns a pointer to the current thread rseq area.  */
+static inline struct rseq_area *
+rseq_get_area(void)
+{
+#if IS_IN (rtld)
+  /* Use the hidden symbol in ld.so.  */
+  return (struct rseq_area *) ((char *) __thread_pointer() + _rseq_offset);
+#else
+  return (struct rseq_area *) ((char *) __thread_pointer() + __rseq_offset);
+#endif
+}
+
 #ifdef RSEQ_SIG
 static inline bool
 rseq_register_current_thread (struct pthread *self, bool do_rseq)
@@ -52,29 +89,44 @@  rseq_register_current_thread (struct pthread *self, bool do_rseq)
   if (do_rseq)
     {
       unsigned int size;
+
+      /* Get the feature size from the auxiliary vector, this will always be at
+         least 20 bytes.  */
 #if IS_IN (rtld)
       /* Use the hidden symbol in ld.so.  */
       size = _rseq_size;
 #else
       size = __rseq_size;
 #endif
+
+      /* The feature size can be smaller than the minimum rseq area size of 32
+	 bytes accepted by the syscall, if this is the case, bump the size of
+	 the registration to the minimum. The 'extra TLS' block is always at
+	 least 32 bytes. */
       if (size < RSEQ_AREA_SIZE_INITIAL)
-        /* The initial implementation used only 20 bytes out of 32,
-           but still expected size 32.  */
         size = RSEQ_AREA_SIZE_INITIAL;
-      int ret = INTERNAL_SYSCALL_CALL (rseq, &self->rseq_area,
-                                       size, 0, RSEQ_SIG);
+
+      /* The kernel expects 'rseq_area->rseq_cs == NULL' on registration, zero
+         the whole rseq area.  */
+      memset(rseq_get_area(), 0, size);
+
+      int ret = INTERNAL_SYSCALL_CALL (rseq, rseq_get_area(), size, 0,
+		      RSEQ_SIG);
       if (!INTERNAL_SYSCALL_ERROR_P (ret))
         return true;
     }
-  THREAD_SETMEM (self, rseq_area.cpu_id, RSEQ_CPU_ID_REGISTRATION_FAILED);
+
+  /* If the registration failed or is disabled by tunable, we have to set 'cpu_id' to
+     RSEQ_CPU_ID_REGISTRATION_FAILED to allow userspace to properly test the
+     status of the registration.  */
+  RSEQ_SETMEM (rseq_get_area(), cpu_id, RSEQ_CPU_ID_REGISTRATION_FAILED);
   return false;
 }
 #else /* RSEQ_SIG */
 static inline bool
 rseq_register_current_thread (struct pthread *self, bool do_rseq)
 {
-  THREAD_SETMEM (self, rseq_area.cpu_id, RSEQ_CPU_ID_REGISTRATION_FAILED);
+  RSEQ_SETMEM (rseq_get_area(), cpu_id, RSEQ_CPU_ID_REGISTRATION_FAILED);
   return false;
 }
 #endif /* RSEQ_SIG */
diff --git a/sysdeps/unix/sysv/linux/sched_getcpu.c b/sysdeps/unix/sysv/linux/sched_getcpu.c
index 72a3360550..3cdf854316 100644
--- a/sysdeps/unix/sysv/linux/sched_getcpu.c
+++ b/sysdeps/unix/sysv/linux/sched_getcpu.c
@@ -19,6 +19,7 @@ 
 #include <sched.h>
 #include <sysdep.h>
 #include <sysdep-vdso.h>
+#include <rseq-internal.h>
 
 static int
 vsyscall_sched_getcpu (void)
@@ -36,6 +37,6 @@  vsyscall_sched_getcpu (void)
 int
 sched_getcpu (void)
 {
-  int cpu_id = THREAD_GETMEM_VOLATILE (THREAD_SELF, rseq_area.cpu_id);
+  int cpu_id = RSEQ_GETMEM_VOLATILE (rseq_get_area(), cpu_id);
   return __glibc_likely (cpu_id >= 0) ? cpu_id : vsyscall_sched_getcpu ();
 }
diff --git a/sysdeps/unix/sysv/linux/tst-rseq-disable-static.c b/sysdeps/unix/sysv/linux/tst-rseq-disable-static.c
new file mode 100644
index 0000000000..2687d13d3d
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/tst-rseq-disable-static.c
@@ -0,0 +1 @@ 
+#include "tst-rseq-disable.c"
diff --git a/sysdeps/unix/sysv/linux/tst-rseq-disable.c b/sysdeps/unix/sysv/linux/tst-rseq-disable.c
index bbc655bec4..b1f4e894f1 100644
--- a/sysdeps/unix/sysv/linux/tst-rseq-disable.c
+++ b/sysdeps/unix/sysv/linux/tst-rseq-disable.c
@@ -26,32 +26,65 @@ 
 #include <unistd.h>
 
 #ifdef RSEQ_SIG
+# include <sys/auxv.h>
+# include "tst-rseq.h"
+
+static __thread struct rseq local_rseq = {
+  .cpu_id = RSEQ_CPU_ID_REGISTRATION_FAILED,
+};
 
 /* Check that rseq can be registered and has not been taken by glibc.  */
 static void
 check_rseq_disabled (void)
 {
-  struct pthread *pd = THREAD_SELF;
+  struct rseq *rseq_area = (struct rseq *) ((char *) __thread_pointer () + __rseq_offset);
+
+#if TLS_TCB_AT_TP
+  /* The rseq area block should come before the thread pointer and be at least 32 bytes. */
+  TEST_VERIFY (__rseq_offset <= RSEQ_TEST_MIN_SIZE);
+#elif TLS_DTV_AT_TP
+  /* The rseq area block should come after the thread pointer. */
+  TEST_VERIFY (__rseq_offset >= 0);
+#else
+# error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
+#endif
 
+  /* __rseq_flags is unused and should always be '0'.  */
   TEST_COMPARE (__rseq_flags, 0);
-  TEST_VERIFY ((char *) __thread_pointer () + __rseq_offset
-               == (char *) &pd->rseq_area);
+
+  /* When rseq is not registered, __rseq_size should always be '0'.  */
   TEST_COMPARE (__rseq_size, 0);
-  TEST_COMPARE ((int) pd->rseq_area.cpu_id, RSEQ_CPU_ID_REGISTRATION_FAILED);
 
-  int ret = syscall (__NR_rseq, &pd->rseq_area, sizeof (pd->rseq_area),
-                     0, RSEQ_SIG);
+  /* When rseq is not registered, the 'cpu_id' field should be set to
+     RSEQ_CPU_ID_REGISTRATION_FAILED.  */
+  TEST_COMPARE ((int) rseq_area->cpu_id, RSEQ_CPU_ID_REGISTRATION_FAILED);
+
+  /* Test a rseq registration which should succeed since the internal
+     registration is disabled.  */
+  int ret = syscall (__NR_rseq, &local_rseq, RSEQ_TEST_MIN_SIZE, 0, RSEQ_SIG);
   if (ret == 0)
     {
-      ret = syscall (__NR_rseq, &pd->rseq_area, sizeof (pd->rseq_area),
+      /* A successful registration should set the cpu id.  */
+      TEST_VERIFY (local_rseq.cpu_id >= 0);
+
+      /* Test we can also unregister rseq.  */
+      ret = syscall (__NR_rseq, &local_rseq, RSEQ_TEST_MIN_SIZE,
                      RSEQ_FLAG_UNREGISTER, RSEQ_SIG);
       TEST_COMPARE (ret, 0);
-      pd->rseq_area.cpu_id = RSEQ_CPU_ID_REGISTRATION_FAILED;
     }
   else
     {
-      TEST_VERIFY (errno != -EINVAL);
-      TEST_VERIFY (errno != -EBUSY);
+      /* Check if we failed with EINVAL which would mean an invalid rseq flags,
+         a mis-aligned rseq area address or an incorrect rseq size.  */
+      TEST_VERIFY (errno != EINVAL);
+
+      /* Check if we failed with EBUSY which means an existing rseq
+         registration. */
+      TEST_VERIFY (errno != EBUSY);
+
+      /* Check if we failed with EFAULT which means an invalid rseq area
+         address.  */
+      TEST_VERIFY (errno != EFAULT);
     }
 }
 
@@ -71,6 +104,13 @@  proc_func (void *ignored)
 static int
 do_test (void)
 {
+  printf ("info: __rseq_size: %u\n", __rseq_size);
+  printf ("info: __rseq_offset: %td\n", __rseq_offset);
+  printf ("info: __rseq_flags: %u\n", __rseq_flags);
+  printf ("info: getauxval (AT_RSEQ_FEATURE_SIZE): %ld\n",
+          getauxval (AT_RSEQ_FEATURE_SIZE));
+  printf ("info: getauxval (AT_RSEQ_ALIGN): %ld\n", getauxval (AT_RSEQ_ALIGN));
+
   puts ("info: checking main thread");
   check_rseq_disabled ();
 
diff --git a/sysdeps/unix/sysv/linux/tst-rseq-nptl-static.c b/sysdeps/unix/sysv/linux/tst-rseq-nptl-static.c
new file mode 100644
index 0000000000..6e2c923bb9
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/tst-rseq-nptl-static.c
@@ -0,0 +1 @@ 
+#include "tst-rseq-nptl.c"
diff --git a/sysdeps/unix/sysv/linux/tst-rseq-static.c b/sysdeps/unix/sysv/linux/tst-rseq-static.c
new file mode 100644
index 0000000000..1d97f3bd3d
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/tst-rseq-static.c
@@ -0,0 +1 @@ 
+#include "tst-rseq.c"
diff --git a/sysdeps/unix/sysv/linux/tst-rseq.c b/sysdeps/unix/sysv/linux/tst-rseq.c
index 08a9533130..802672e840 100644
--- a/sysdeps/unix/sysv/linux/tst-rseq.c
+++ b/sysdeps/unix/sysv/linux/tst-rseq.c
@@ -19,6 +19,8 @@ 
    not linked against libpthread.  */
 
 #include <support/check.h>
+#include <support/namespace.h>
+#include <support/xthread.h>
 #include <stdio.h>
 #include <sys/rseq.h>
 #include <unistd.h>
@@ -32,23 +34,66 @@ 
 # include <sys/auxv.h>
 # include <thread_pointer.h>
 # include <tls.h>
+# include <sys/auxv.h>
 # include "tst-rseq.h"
 
 static void
 do_rseq_main_test (void)
 {
-  struct pthread *pd = THREAD_SELF;
+  size_t rseq_align = MAX (getauxval (AT_RSEQ_ALIGN), RSEQ_TEST_MIN_ALIGN);
+  size_t rseq_feature_size = MAX (getauxval (AT_RSEQ_FEATURE_SIZE), RSEQ_TEST_MIN_FEATURE_SIZE);
+  size_t rseq_alloc_size = roundup (MAX (rseq_feature_size, RSEQ_TEST_MIN_SIZE), rseq_align);
+  struct rseq *global_rseq = __thread_pointer () + __rseq_offset;
 
   TEST_VERIFY_EXIT (rseq_thread_registered ());
+
+  /* __rseq_flags is unused and should always be '0'.  */
   TEST_COMPARE (__rseq_flags, 0);
-  TEST_VERIFY ((char *) __thread_pointer () + __rseq_offset
-               == (char *) &pd->rseq_area);
-  /* The current implementation only supports the initial size.  */
-  TEST_COMPARE (__rseq_size, 20);
+
+  /* When rseq is registered, __rseq_size should report the feature size.  */
+  TEST_COMPARE (__rseq_size, rseq_feature_size);
+
+  /* When rseq is registered, the 'cpu_id' field should be set to a valid cpu
+   * number.  */
+  TEST_VERIFY ((int32_t) global_rseq->cpu_id >= 0);
+
+  /* The rseq area address must be aligned.  */
+  TEST_VERIFY (((unsigned long) global_rseq % rseq_align) == 0);
+
+#if TLS_TCB_AT_TP
+  /* The rseq area block should come before the thread pointer and be at least 32 bytes. */
+  TEST_VERIFY (__rseq_offset <= RSEQ_TEST_MIN_SIZE);
+#elif TLS_DTV_AT_TP
+  /* The rseq area block should come after the thread pointer. */
+  TEST_VERIFY (__rseq_offset >= 0);
+#else
+# error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
+#endif
+
+  /* Test a rseq registration with the same arguments as the internal
+     registration which should fail with errno == EBUSY.  */
+  TEST_VERIFY (((unsigned long) global_rseq % rseq_align) == 0);
+  TEST_VERIFY (__rseq_size <= rseq_alloc_size);
+  int ret = syscall (__NR_rseq, global_rseq, rseq_alloc_size, 0, RSEQ_SIG);
+  TEST_VERIFY (ret != 0);
+  TEST_COMPARE (errno, EBUSY);
+}
+
+static void *
+thread_func (void *ignored)
+{
+  do_rseq_main_test ();
+  return NULL;
 }
 
 static void
-do_rseq_test (void)
+proc_func (void *ignored)
+{
+  do_rseq_main_test ();
+}
+
+static int
+do_test (void)
 {
   if (!rseq_available ())
     {
@@ -60,21 +105,27 @@  do_rseq_test (void)
   printf ("info: getauxval (AT_RSEQ_FEATURE_SIZE): %ld\n",
           getauxval (AT_RSEQ_FEATURE_SIZE));
   printf ("info: getauxval (AT_RSEQ_ALIGN): %ld\n", getauxval (AT_RSEQ_ALIGN));
+
+  puts ("info: checking main thread");
+  do_rseq_main_test ();
+
+  puts ("info: checking main thread (2)");
   do_rseq_main_test ();
+
+  puts ("info: checking new thread");
+  xpthread_join (xpthread_create (NULL, thread_func, NULL));
+
+  puts ("info: checking subprocess");
+  support_isolate_in_subprocess (proc_func, NULL);
+
+  return 0;
 }
 #else /* RSEQ_SIG */
-static void
-do_rseq_test (void)
-{
-  FAIL_UNSUPPORTED ("glibc does not define RSEQ_SIG, skipping test");
-}
-#endif /* RSEQ_SIG */
-
 static int
 do_test (void)
 {
-  do_rseq_test ();
-  return 0;
+  FAIL_UNSUPPORTED ("glibc does not define RSEQ_SIG, skipping test");
 }
+#endif /* RSEQ_SIG */
 
 #include <support/test-driver.c>
diff --git a/sysdeps/unix/sysv/linux/tst-rseq.h b/sysdeps/unix/sysv/linux/tst-rseq.h
index dc603327d3..7a2e19b07f 100644
--- a/sysdeps/unix/sysv/linux/tst-rseq.h
+++ b/sysdeps/unix/sysv/linux/tst-rseq.h
@@ -23,11 +23,16 @@ 
 #include <syscall.h>
 #include <sys/rseq.h>
 #include <tls.h>
+#include <rseq-internal.h>
+
+#define RSEQ_TEST_MIN_SIZE 32
+#define RSEQ_TEST_MIN_FEATURE_SIZE 20
+#define RSEQ_TEST_MIN_ALIGN 32
 
 static inline bool
 rseq_thread_registered (void)
 {
-  return THREAD_GETMEM_VOLATILE (THREAD_SELF, rseq_area.cpu_id) >= 0;
+  return RSEQ_GETMEM_VOLATILE (rseq_get_area(), cpu_id) >= 0;
 }
 
 static inline int