diff mbox series

[v10,1/4] nptl: fix potential merge of __rseq_* relro symbols

Message ID 20240325182927.914830-2-mjeanson@efficios.com
State New
Headers show
Series [v10,1/4] nptl: fix potential merge of __rseq_* relro symbols | expand

Commit Message

Michael Jeanson March 25, 2024, 6:29 p.m. UTC
While working on a patch to add support for the extensible rseq ABI, we
came across an issue where a new 'const' variable would be merged with
the existing '__rseq_size' variable. We tracked this to the use of
'-fmerge-all-constants' which allows the compiler to merge identical
constant variables. This means that all 'const' variables in a compile
unit that are of the same size and are initialized to the same value can
be merged.

In this specific case, on 32 bit systems 'unsigned int' and 'ptrdiff_t'
are both 4 bytes and initialized to 0 which should trigger the merge.
However for reasons we haven't delved into when the attribute 'section
(".data.rel.ro")' is added to the mix, only variables of the same exact
types are merged. As far as we know this behavior is not specified
anywhere and could change with a new compiler version, hence this patch.

Move the definitions of these variables into an assembler file and add
hidden writable aliases for internal use. This has the added bonus of
removing the asm workaround to set the values on rseq registration.

Tested on Debian 12 with GCC 12.2.

Signed-off-by: Michael Jeanson <mjeanson@efficios.com>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
---
Changes sinve v8:
- Remove superfluous attributes on externs
---
 csu/Makefile                  |  2 +-
 csu/rseq-sizes.sym            |  8 +++++
 elf/Makefile                  |  1 +
 elf/dl-rseq-symbols.S         | 55 +++++++++++++++++++++++++++++++++++
 sysdeps/nptl/dl-tls_init_tp.c | 14 ++++-----
 5 files changed, 71 insertions(+), 9 deletions(-)
 create mode 100644 csu/rseq-sizes.sym
 create mode 100644 elf/dl-rseq-symbols.S

Comments

Florian Weimer July 1, 2024, 11:31 a.m. UTC | #1
* Michael Jeanson:

> diff --git a/csu/rseq-sizes.sym b/csu/rseq-sizes.sym
> new file mode 100644
> index 0000000000..c959758ff0
> --- /dev/null
> +++ b/csu/rseq-sizes.sym
> @@ -0,0 +1,8 @@
> +#include <stddef.h>
> +
> +--
> +RSEQ_SIZE_SIZE		sizeof (unsigned int)
> +RSEQ_SIZE_ALIGN		__alignof (unsigned int)
> +
> +RSEQ_OFFSET_SIZE	sizeof (ptrdiff_t)
> +RSEQ_OFFSET_ALIGN	__alignof (ptrdiff_t)

If you do not use the actual variable identifiers as the arguments to
sizeof and __alignof, you can as well use #if __WORDSIZE == 64 in the .S
file (for the offset).

I think we can just use natural alignment, so there's no need for the
separate *_ALIGN macro.

> diff --git a/elf/dl-rseq-symbols.S b/elf/dl-rseq-symbols.S
> new file mode 100644
> index 0000000000..2d8e88367f
> --- /dev/null
> +++ b/elf/dl-rseq-symbols.S
> @@ -0,0 +1,55 @@

> +/* Define 2 symbols, __rseq_size is public const and _rseq_size, which is an
> +   alias of __rseq_size, but hidden and writable for internal use.
> */

Please say something about copy relocations here, like “These variables
are initialized to their final before copy relocations take place, so
the dynamic linker can use hidden references to them”, or whatever makes
it clear that this is in fact acceptable.

This should go in independently of the other changes, and I hope this
can still make 2.40.  It should be a relatively safe change.

> diff --git a/sysdeps/nptl/dl-tls_init_tp.c b/sysdeps/nptl/dl-tls_init_tp.c
> index 092c274f36..2f9750c50b 100644
> --- a/sysdeps/nptl/dl-tls_init_tp.c
> +++ b/sysdeps/nptl/dl-tls_init_tp.c
> @@ -45,8 +45,10 @@ rtld_mutex_dummy (pthread_mutex_t *lock)
>  #endif
>  
>  const unsigned int __rseq_flags;
> -const unsigned int __rseq_size attribute_relro;
> -const ptrdiff_t __rseq_offset attribute_relro;
> +
> +/* The variables are in .data.relro but are not yet write-protected.  */
> +extern unsigned int _rseq_size;
> +extern ptrdiff_t _rseq_offset;

Missing attribute_hidden (yes, it was missing before as well).  The end
result should be that there are no dynamic symbol references in ld.so.

Thanks,
Florian
Michael Jeanson July 2, 2024, 10:43 p.m. UTC | #2
On 2024-07-01 07:31, Florian Weimer wrote:
> * Michael Jeanson:
> 
>> diff --git a/csu/rseq-sizes.sym b/csu/rseq-sizes.sym
>> new file mode 100644
>> index 0000000000..c959758ff0
>> --- /dev/null
>> +++ b/csu/rseq-sizes.sym
>> @@ -0,0 +1,8 @@
>> +#include <stddef.h>
>> +
>> +--
>> +RSEQ_SIZE_SIZE		sizeof (unsigned int)
>> +RSEQ_SIZE_ALIGN		__alignof (unsigned int)
>> +
>> +RSEQ_OFFSET_SIZE	sizeof (ptrdiff_t)
>> +RSEQ_OFFSET_ALIGN	__alignof (ptrdiff_t)
> 
> If you do not use the actual variable identifiers as the arguments to
> sizeof and __alignof, you can as well use #if __WORDSIZE == 64 in the .S
> file (for the offset).
> 
> I think we can just use natural alignment, so there's no need for the
> separate *_ALIGN macro.

Ack, I'll remove the *_ALIGN macros and use literals for *_SIZE.

> 
>> diff --git a/elf/dl-rseq-symbols.S b/elf/dl-rseq-symbols.S
>> new file mode 100644
>> index 0000000000..2d8e88367f
>> --- /dev/null
>> +++ b/elf/dl-rseq-symbols.S
>> @@ -0,0 +1,55 @@
> 
>> +/* Define 2 symbols, __rseq_size is public const and _rseq_size, which is an
>> +   alias of __rseq_size, but hidden and writable for internal use.
>> */
> 
> Please say something about copy relocations here, like “These variables
> are initialized to their final before copy relocations take place, so
> the dynamic linker can use hidden references to them”, or whatever makes
> it clear that this is in fact acceptable.

Would this wording work for you?

    Define 2 symbols: '__rseq_offset' is public const and '_rseq_offset' (an
    alias of '__rseq_offset') is hidden and writable for internal use by the
    dynamic linker which will initialize the value both symbols point to
    before copy relocations take place.

> 
> This should go in independently of the other changes, and I hope this
> can still make 2.40.  It should be a relatively safe change.

Ack, I will re-send as a single patch.

> 
>> diff --git a/sysdeps/nptl/dl-tls_init_tp.c b/sysdeps/nptl/dl-tls_init_tp.c
>> index 092c274f36..2f9750c50b 100644
>> --- a/sysdeps/nptl/dl-tls_init_tp.c
>> +++ b/sysdeps/nptl/dl-tls_init_tp.c
>> @@ -45,8 +45,10 @@ rtld_mutex_dummy (pthread_mutex_t *lock)
>>   #endif
>>   
>>   const unsigned int __rseq_flags;
>> -const unsigned int __rseq_size attribute_relro;
>> -const ptrdiff_t __rseq_offset attribute_relro;
>> +
>> +/* The variables are in .data.relro but are not yet write-protected.  */
>> +extern unsigned int _rseq_size;
>> +extern ptrdiff_t _rseq_offset;
> 
> Missing attribute_hidden (yes, it was missing before as well).  The end
> result should be that there are no dynamic symbol references in ld.so.

Ack.
Florian Weimer July 3, 2024, 7:19 a.m. UTC | #3
* Michael Jeanson:

> Would this wording work for you?
>
>    Define 2 symbols: '__rseq_offset' is public const and '_rseq_offset' (an
>    alias of '__rseq_offset') is hidden and writable for internal use by the
>    dynamic linker which will initialize the value both symbols point to
>    before copy relocations take place.

Yes, looks good.

Thanks,
Florian
diff mbox series

Patch

diff --git a/csu/Makefile b/csu/Makefile
index 946fd91031..948b1aa79e 100644
--- a/csu/Makefile
+++ b/csu/Makefile
@@ -136,7 +136,7 @@  before-compile += $(objpfx)abi-tag.h
 generated += abi-tag.h
 
 # Put it here to generate it earlier.
-gen-as-const-headers += rtld-sizes.sym
+gen-as-const-headers += rtld-sizes.sym rseq-sizes.sym
 
 # These are the special initializer/finalizer files.  They are always the
 # first and last file in the link.  crti.o ... crtn.o define the global
diff --git a/csu/rseq-sizes.sym b/csu/rseq-sizes.sym
new file mode 100644
index 0000000000..c959758ff0
--- /dev/null
+++ b/csu/rseq-sizes.sym
@@ -0,0 +1,8 @@ 
+#include <stddef.h>
+
+--
+RSEQ_SIZE_SIZE		sizeof (unsigned int)
+RSEQ_SIZE_ALIGN		__alignof (unsigned int)
+
+RSEQ_OFFSET_SIZE	sizeof (ptrdiff_t)
+RSEQ_OFFSET_ALIGN	__alignof (ptrdiff_t)
diff --git a/elf/Makefile b/elf/Makefile
index 4f1903391a..1019e3e2dd 100644
--- a/elf/Makefile
+++ b/elf/Makefile
@@ -73,6 +73,7 @@  dl-routines = \
   dl-origin \
   dl-printf \
   dl-reloc \
+  dl-rseq-symbols \
   dl-runtime \
   dl-scope \
   dl-setup_hash \
diff --git a/elf/dl-rseq-symbols.S b/elf/dl-rseq-symbols.S
new file mode 100644
index 0000000000..2d8e88367f
--- /dev/null
+++ b/elf/dl-rseq-symbols.S
@@ -0,0 +1,55 @@ 
+/* Define symbols used by rseq.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <rseq-sizes.h>
+#include <sysdep.h>
+
+/* Some targets define a macro to denote the zero register.  */
+#undef zero
+
+/* Define 2 symbols, __rseq_size is public const and _rseq_size, which is an
+   alias of __rseq_size, but hidden and writable for internal use.  */
+
+	.globl	__rseq_size
+	.type	__rseq_size, %object
+	.size	__rseq_size, RSEQ_SIZE_SIZE
+	.hidden _rseq_size
+	.globl	_rseq_size
+	.type	_rseq_size, %object
+	.size	_rseq_size, RSEQ_SIZE_SIZE
+	.section .data.rel.ro
+	.balign	RSEQ_SIZE_ALIGN
+__rseq_size:
+_rseq_size:
+	.zero	RSEQ_SIZE_SIZE
+
+/* Define 2 symbols, __rseq_offset is public const and _rseq_offset, which is an
+   alias of __rseq_offset, but hidden and writable for internal use.  */
+
+	.globl	__rseq_offset
+	.type	__rseq_offset, %object
+	.size	__rseq_offset, RSEQ_OFFSET_SIZE
+	.hidden _rseq_offset
+	.globl	_rseq_offset
+	.type	_rseq_offset, %object
+	.size	_rseq_offset, RSEQ_OFFSET_SIZE
+	.section .data.rel.ro
+	.balign	RSEQ_OFFSET_ALIGN
+__rseq_offset:
+_rseq_offset:
+	.zero	RSEQ_OFFSET_SIZE
diff --git a/sysdeps/nptl/dl-tls_init_tp.c b/sysdeps/nptl/dl-tls_init_tp.c
index 092c274f36..2f9750c50b 100644
--- a/sysdeps/nptl/dl-tls_init_tp.c
+++ b/sysdeps/nptl/dl-tls_init_tp.c
@@ -45,8 +45,10 @@  rtld_mutex_dummy (pthread_mutex_t *lock)
 #endif
 
 const unsigned int __rseq_flags;
-const unsigned int __rseq_size attribute_relro;
-const ptrdiff_t __rseq_offset attribute_relro;
+
+/* The variables are in .data.relro but are not yet write-protected.  */
+extern unsigned int _rseq_size;
+extern ptrdiff_t _rseq_offset;
 
 void
 __tls_pre_init_tp (void)
@@ -105,10 +107,7 @@  __tls_init_tp (void)
     do_rseq = TUNABLE_GET (rseq, int, NULL);
     if (rseq_register_current_thread (pd, do_rseq))
       {
-        /* We need a writable view of the variables.  They are in
-           .data.relro and are not yet write-protected.  */
-        extern unsigned int size __asm__ ("__rseq_size");
-        size = sizeof (pd->rseq_area);
+        _rseq_size = sizeof (pd->rseq_area);
       }
 
 #ifdef RSEQ_SIG
@@ -117,8 +116,7 @@  __tls_init_tp (void)
        all targets support __thread_pointer, so set __rseq_offset only
        if the rseq registration may have happened because RSEQ_SIG is
        defined.  */
-    extern ptrdiff_t offset __asm__ ("__rseq_offset");
-    offset = (char *) &pd->rseq_area - (char *) __thread_pointer ();
+    _rseq_offset = (char *) &pd->rseq_area - (char *) __thread_pointer ();
 #endif
   }