diff mbox series

[v2] Linux: Call dl_hwcap_check earlier, before full auxiliary vector parse

Message ID 87y17339vb.fsf@oldenburg.str.redhat.com
State New
Headers show
Series [v2] Linux: Call dl_hwcap_check earlier, before full auxiliary vector parse | expand

Commit Message

Florian Weimer June 17, 2024, 2:46 p.m. UTC
The full parsing routine may need the memset function, which could
require non-baseline ISA support.

---
v2: Remove spurious braces from sysdeps/powerpc/powerpc64/le/dl-hwcap-check.h.
 sysdeps/generic/dl-hwcap-check.h              |  5 ++++-
 sysdeps/powerpc/powerpc64/le/dl-hwcap-check.h | 22 ++++++++++++++++++----
 sysdeps/s390/s390-64/dl-hwcap-check.h         | 20 +++++++++++++++++---
 sysdeps/unix/sysv/linux/dl-sysdep.c           | 20 +++++++++++++-------
 4 files changed, 52 insertions(+), 15 deletions(-)


base-commit: 55eb99e9a9d840ba452b128be14d6529c2dde039

Comments

Adhemerval Zanella Netto June 17, 2024, 3:52 p.m. UTC | #1
On 17/06/24 11:46, Florian Weimer wrote:
> The full parsing routine may need the memset function, which could
> require non-baseline ISA support.

It is not clear to me what this patch is trying to fix. It that
_dl_sysdep_parse_arguments may issue a memset call from compiler autogenerated
calls?

> 
> ---
> v2: Remove spurious braces from sysdeps/powerpc/powerpc64/le/dl-hwcap-check.h.
>  sysdeps/generic/dl-hwcap-check.h              |  5 ++++-
>  sysdeps/powerpc/powerpc64/le/dl-hwcap-check.h | 22 ++++++++++++++++++----
>  sysdeps/s390/s390-64/dl-hwcap-check.h         | 20 +++++++++++++++++---
>  sysdeps/unix/sysv/linux/dl-sysdep.c           | 20 +++++++++++++-------
>  4 files changed, 52 insertions(+), 15 deletions(-)
> 
> diff --git a/sysdeps/generic/dl-hwcap-check.h b/sysdeps/generic/dl-hwcap-check.h
> index 55efd129be..65dba6d1a0 100644
> --- a/sysdeps/generic/dl-hwcap-check.h
> +++ b/sysdeps/generic/dl-hwcap-check.h
> @@ -22,7 +22,10 @@
>  static inline void
>  dl_hwcap_check (void)
>  {
> -  /* The generic implementation does not perform any checks.  */
> +  /* The generic implementation does not perform any checks.  Specific
> +     implementations may assume that GLRO(dl_auxv) has been
> +     initialized (if it exists), but GLRO(dl_hwcap), GLRO(dl_hwcap2)
> +     have not been set at this point.  */
>  }
>  
>  #endif /* _DL_HWCAP_CHECK_H */
> diff --git a/sysdeps/powerpc/powerpc64/le/dl-hwcap-check.h b/sysdeps/powerpc/powerpc64/le/dl-hwcap-check.h
> index 6d463f0511..d7b2a2ca4d 100644
> --- a/sysdeps/powerpc/powerpc64/le/dl-hwcap-check.h
> +++ b/sysdeps/powerpc/powerpc64/le/dl-hwcap-check.h
> @@ -25,13 +25,27 @@
>  static inline void
>  dl_hwcap_check (void)
>  {
> +#if defined GCCMACRO_ARCH_PWR9 || defined GCCMACRO__FLOAT128_HARDWARE__ \
> +  || defined GCCMACRO_ARCH_PWR10 || defined GCCMACRO__PCREL__ \
> +  || defined GCCMACRO__MMA__
> +  /* Early auxiliary vector parsing, to avoid ISA dependencies in
> +     later full parsing.  */
> +  unsigned long int dl_hwcap2 = 0;
> +  for (ElfW(auxv_t) *av = GLRO(dl_auxv); av->a_type != AT_NULL; av++)
> +    if (av->a_type == AT_HWCAP2)
> +      {
> +        dl_hwcap2 = av->a_un.a_val;
> +        break;
> +      }
> +#endif
> +
>  #ifdef GCCMACRO_ARCH_PWR9
> -  if ((GLRO (dl_hwcap2) & PPC_FEATURE2_ARCH_3_00) == 0)
> +  if ((dl_hwcap2 & PPC_FEATURE2_ARCH_3_00) == 0)
>      _dl_fatal_printf ("\
>  Fatal glibc error: CPU lacks ISA 3.00 support (POWER9 or later required)\n");
>  #endif
>  #ifdef GCCMACRO__FLOAT128_HARDWARE__
> -  if ((GLRO (dl_hwcap2) & PPC_FEATURE2_HAS_IEEE128) == 0)
> +  if ((dl_hwcap2 & PPC_FEATURE2_HAS_IEEE128) == 0)
>      _dl_fatal_printf ("\
>  Fatal glibc error: CPU lacks float128 support (POWER 9 or later required)\n");
>  #endif
> @@ -39,12 +53,12 @@ Fatal glibc error: CPU lacks float128 support (POWER 9 or later required)\n");
>        running on POWER9 because there are faulting PCREL instructions
>        before this point.  */
>  #if defined GCCMACRO_ARCH_PWR10 || defined GCCMACRO__PCREL__
> -  if ((GLRO (dl_hwcap2) & PPC_FEATURE2_ARCH_3_1) == 0)
> +  if ((dl_hwcap2 & PPC_FEATURE2_ARCH_3_1) == 0)
>      _dl_fatal_printf ("\
>  Fatal glibc error: CPU lacks ISA 3.10 support (POWER10 or later required)\n");
>  #endif
>  #ifdef GCCMACRO__MMA__
> -  if ((GLRO (dl_hwcap2) & PPC_FEATURE2_MMA) == 0)
> +  if ((dl_hwcap2 & PPC_FEATURE2_MMA) == 0)
>      _dl_fatal_printf ("\
>  Fatal glibc error: CPU lacks MMA support (POWER10 or later required)\n");
>  #endif
> diff --git a/sysdeps/s390/s390-64/dl-hwcap-check.h b/sysdeps/s390/s390-64/dl-hwcap-check.h
> index 6ad3242cc9..ee67ebf76e 100644
> --- a/sysdeps/s390/s390-64/dl-hwcap-check.h
> +++ b/sysdeps/s390/s390-64/dl-hwcap-check.h
> @@ -26,16 +26,30 @@ static inline void
>  dl_hwcap_check (void)
>  {
>  #if defined __ARCH__
> +# if GCCMACRO__ARCH__ >= 12
> +  /* Early auxiliary vector parsing, to avoid ISA dependencies in
> +     later full parsing.  */
> +  unsigned long int dl_hwcap = 0;
> +  {
> +    for (ElfW(auxv_t) *av = GLRO(dl_auxv); av->a_type != AT_NULL; av++)
> +      if (av->a_type == AT_HWCAP)
> +        {
> +          dl_hwcap = av->a_un.a_val;
> +          break;
> +        }
> +  }
> +# endif
> +
>  # if GCCMACRO__ARCH__ >= 14
> -  if (!(GLRO(dl_hwcap) & HWCAP_S390_VXRS_PDE2))
> +  if (!(dl_hwcap & HWCAP_S390_VXRS_PDE2))
>      _dl_fatal_printf ("\
>  Fatal glibc error: CPU lacks VXRS_PDE2 support (z16 or later required)\n");
>  # elif GCCMACRO__ARCH__ >= 13
> -  if (!(GLRO(dl_hwcap) & HWCAP_S390_VXRS_EXT2))
> +  if (!(dl_hwcap & HWCAP_S390_VXRS_EXT2))
>      _dl_fatal_printf ("\
>  Fatal glibc error: CPU lacks VXRS_EXT2 support (z15 or later required)\n");
>  # elif GCCMACRO__ARCH__ >= 12
> -  if (!(GLRO(dl_hwcap) & HWCAP_S390_VXE))
> +  if (!(dl_hwcap & HWCAP_S390_VXE))
>      _dl_fatal_printf ("\
>  Fatal glibc error: CPU lacks VXE support (z14 or later required)\n");
>  # endif
> diff --git a/sysdeps/unix/sysv/linux/dl-sysdep.c b/sysdeps/unix/sysv/linux/dl-sysdep.c
> index e1b14e9eb3..c54100c8a5 100644
> --- a/sysdeps/unix/sysv/linux/dl-sysdep.c
> +++ b/sysdeps/unix/sysv/linux/dl-sysdep.c
> @@ -70,11 +70,10 @@ struct dl_main_arguments
>    ElfW(Addr) user_entry;
>  };
>  
> -/* Separate function, so that dl_main can be called without the large
> -   array on the stack.  */
> +/* First part of argument parsing, up to and including GLRO(dl_auxv)
> +   initialization.  */
>  static void
> -_dl_sysdep_parse_arguments (void **start_argptr,
> -			    struct dl_main_arguments *args)
> +_dl_sysdep_parse_arguments_1 (void **start_argptr)
>  {
>    _dl_argc = (intptr_t) *start_argptr;
>    _dl_argv = (char **) (start_argptr + 1); /* Necessary aliasing violation.  */
> @@ -86,7 +85,13 @@ _dl_sysdep_parse_arguments (void **start_argptr,
>  	GLRO(dl_auxv) = (ElfW(auxv_t) *) (tmp + 1);
>  	break;
>        }
> +}
>  
> +/* Separate function, so that dl_main can be called without the large
> +   array on the stack.  */
> +static void __attribute__ ((noinline))
> +_dl_sysdep_parse_arguments_2 (struct dl_main_arguments *args)
> +{
>    dl_parse_auxv_t auxv_values = { 0, };
>    _dl_parse_auxv (GLRO(dl_auxv), auxv_values);
>  
> @@ -102,11 +107,12 @@ _dl_sysdep_start (void **start_argptr,
>  {
>    __libc_stack_end = DL_STACK_END (start_argptr);
>  
> -  struct dl_main_arguments dl_main_args;
> -  _dl_sysdep_parse_arguments (start_argptr, &dl_main_args);
> -
> +  _dl_sysdep_parse_arguments_1 (start_argptr);
>    dl_hwcap_check ();
>  
> +  struct dl_main_arguments dl_main_args;
> +  _dl_sysdep_parse_arguments_2 (&dl_main_args);
> +
>    __tunables_init (_environ);
>  
>    /* Initialize DSO sorting algorithm after tunables.  */
> 
> base-commit: 55eb99e9a9d840ba452b128be14d6529c2dde039
>
Florian Weimer June 17, 2024, 4:14 p.m. UTC | #2
* Adhemerval Zanella Netto:

> On 17/06/24 11:46, Florian Weimer wrote:
>> The full parsing routine may need the memset function, which could
>> require non-baseline ISA support.
>
> It is not clear to me what this patch is trying to fix. It that
> _dl_sysdep_parse_arguments may issue a memset call from compiler
> autogenerated calls?

Yes, it's this initialization:

  dl_parse_auxv_t auxv_values = { 0, };

Previously, the array was small enough that the memset was expanded
inline on powerpc64le.  Or maybe we didn't have a different memset for
POWER9?  Either way, something has changed, and we now crash in this
place, instead of printing a CPU compatibility diagnostic.

Thanks,
Florian
Adhemerval Zanella Netto June 17, 2024, 4:17 p.m. UTC | #3
On 17/06/24 13:14, Florian Weimer wrote:
> * Adhemerval Zanella Netto:
> 
>> On 17/06/24 11:46, Florian Weimer wrote:
>>> The full parsing routine may need the memset function, which could
>>> require non-baseline ISA support.
>>
>> It is not clear to me what this patch is trying to fix. It that
>> _dl_sysdep_parse_arguments may issue a memset call from compiler
>> autogenerated calls?
> 
> Yes, it's this initialization:
> 
>   dl_parse_auxv_t auxv_values = { 0, };
> 
> Previously, the array was small enough that the memset was expanded
> inline on powerpc64le.  Or maybe we didn't have a different memset for
> POWER9?  Either way, something has changed, and we now crash in this
> place, instead of printing a CPU compatibility diagnostic.

Wouldn't dl-symbol-redir-ifunc.h fix this by routing the memset to
generic one?
Florian Weimer June 17, 2024, 5:03 p.m. UTC | #4
* Adhemerval Zanella Netto:

> On 17/06/24 13:14, Florian Weimer wrote:
>> * Adhemerval Zanella Netto:
>> 
>>> On 17/06/24 11:46, Florian Weimer wrote:
>>>> The full parsing routine may need the memset function, which could
>>>> require non-baseline ISA support.
>>>
>>> It is not clear to me what this patch is trying to fix. It that
>>> _dl_sysdep_parse_arguments may issue a memset call from compiler
>>> autogenerated calls?
>> 
>> Yes, it's this initialization:
>> 
>>   dl_parse_auxv_t auxv_values = { 0, };
>> 
>> Previously, the array was small enough that the memset was expanded
>> inline on powerpc64le.  Or maybe we didn't have a different memset for
>> POWER9?  Either way, something has changed, and we now crash in this
>> place, instead of printing a CPU compatibility diagnostic.
>
> Wouldn't dl-symbol-redir-ifunc.h fix this by routing the memset to
> generic one?

The generic memset is the POWER9 memset because we are building most of
glibc with -mcpu=power9.  That auxv_values initialization is currently
the only place where the POWER8 memset would be required.

Thanks,
Florian
Adhemerval Zanella Netto June 17, 2024, 5:36 p.m. UTC | #5
On 17/06/24 14:03, Florian Weimer wrote:
> * Adhemerval Zanella Netto:
> 
>> On 17/06/24 13:14, Florian Weimer wrote:
>>> * Adhemerval Zanella Netto:
>>>
>>>> On 17/06/24 11:46, Florian Weimer wrote:
>>>>> The full parsing routine may need the memset function, which could
>>>>> require non-baseline ISA support.
>>>>
>>>> It is not clear to me what this patch is trying to fix. It that
>>>> _dl_sysdep_parse_arguments may issue a memset call from compiler
>>>> autogenerated calls?
>>>
>>> Yes, it's this initialization:
>>>
>>>   dl_parse_auxv_t auxv_values = { 0, };
>>>
>>> Previously, the array was small enough that the memset was expanded
>>> inline on powerpc64le.  Or maybe we didn't have a different memset for
>>> POWER9?  Either way, something has changed, and we now crash in this
>>> place, instead of printing a CPU compatibility diagnostic.
>>
>> Wouldn't dl-symbol-redir-ifunc.h fix this by routing the memset to
>> generic one?
> 
> The generic memset is the POWER9 memset because we are building most of
> glibc with -mcpu=power9.  That auxv_values initialization is currently
> the only place where the POWER8 memset would be required.

Can't we make something similar to what s390x does and set the default
memset depending of the minimal ISA used to build glibc?
Adhemerval Zanella Netto June 17, 2024, 5:37 p.m. UTC | #6
On 17/06/24 14:36, Adhemerval Zanella Netto wrote:
> 
> 
> On 17/06/24 14:03, Florian Weimer wrote:
>> * Adhemerval Zanella Netto:
>>
>>> On 17/06/24 13:14, Florian Weimer wrote:
>>>> * Adhemerval Zanella Netto:
>>>>
>>>>> On 17/06/24 11:46, Florian Weimer wrote:
>>>>>> The full parsing routine may need the memset function, which could
>>>>>> require non-baseline ISA support.
>>>>>
>>>>> It is not clear to me what this patch is trying to fix. It that
>>>>> _dl_sysdep_parse_arguments may issue a memset call from compiler
>>>>> autogenerated calls?
>>>>
>>>> Yes, it's this initialization:
>>>>
>>>>   dl_parse_auxv_t auxv_values = { 0, };
>>>>
>>>> Previously, the array was small enough that the memset was expanded
>>>> inline on powerpc64le.  Or maybe we didn't have a different memset for
>>>> POWER9?  Either way, something has changed, and we now crash in this
>>>> place, instead of printing a CPU compatibility diagnostic.
>>>
>>> Wouldn't dl-symbol-redir-ifunc.h fix this by routing the memset to
>>> generic one?
>>
>> The generic memset is the POWER9 memset because we are building most of
>> glibc with -mcpu=power9.  That auxv_values initialization is currently
>> the only place where the POWER8 memset would be required.
> 
> Can't we make something similar to what s390x does and set the default
> memset depending of the minimal ISA used to build glibc?

I am insisting on the dl-symbol-redir-ifunc.h because I have added exactly
to avoid the constantly refactor the loader code to stumble in this kind
of traps.
Florian Weimer June 17, 2024, 7:34 p.m. UTC | #7
* Adhemerval Zanella Netto:

> I am insisting on the dl-symbol-redir-ifunc.h because I have added exactly
> to avoid the constantly refactor the loader code to stumble in this kind
> of traps.

I forgot this mechanism.  It seems to work for this.  I'll send an
updated patch.

Thanks,
Florian
diff mbox series

Patch

diff --git a/sysdeps/generic/dl-hwcap-check.h b/sysdeps/generic/dl-hwcap-check.h
index 55efd129be..65dba6d1a0 100644
--- a/sysdeps/generic/dl-hwcap-check.h
+++ b/sysdeps/generic/dl-hwcap-check.h
@@ -22,7 +22,10 @@ 
 static inline void
 dl_hwcap_check (void)
 {
-  /* The generic implementation does not perform any checks.  */
+  /* The generic implementation does not perform any checks.  Specific
+     implementations may assume that GLRO(dl_auxv) has been
+     initialized (if it exists), but GLRO(dl_hwcap), GLRO(dl_hwcap2)
+     have not been set at this point.  */
 }
 
 #endif /* _DL_HWCAP_CHECK_H */
diff --git a/sysdeps/powerpc/powerpc64/le/dl-hwcap-check.h b/sysdeps/powerpc/powerpc64/le/dl-hwcap-check.h
index 6d463f0511..d7b2a2ca4d 100644
--- a/sysdeps/powerpc/powerpc64/le/dl-hwcap-check.h
+++ b/sysdeps/powerpc/powerpc64/le/dl-hwcap-check.h
@@ -25,13 +25,27 @@ 
 static inline void
 dl_hwcap_check (void)
 {
+#if defined GCCMACRO_ARCH_PWR9 || defined GCCMACRO__FLOAT128_HARDWARE__ \
+  || defined GCCMACRO_ARCH_PWR10 || defined GCCMACRO__PCREL__ \
+  || defined GCCMACRO__MMA__
+  /* Early auxiliary vector parsing, to avoid ISA dependencies in
+     later full parsing.  */
+  unsigned long int dl_hwcap2 = 0;
+  for (ElfW(auxv_t) *av = GLRO(dl_auxv); av->a_type != AT_NULL; av++)
+    if (av->a_type == AT_HWCAP2)
+      {
+        dl_hwcap2 = av->a_un.a_val;
+        break;
+      }
+#endif
+
 #ifdef GCCMACRO_ARCH_PWR9
-  if ((GLRO (dl_hwcap2) & PPC_FEATURE2_ARCH_3_00) == 0)
+  if ((dl_hwcap2 & PPC_FEATURE2_ARCH_3_00) == 0)
     _dl_fatal_printf ("\
 Fatal glibc error: CPU lacks ISA 3.00 support (POWER9 or later required)\n");
 #endif
 #ifdef GCCMACRO__FLOAT128_HARDWARE__
-  if ((GLRO (dl_hwcap2) & PPC_FEATURE2_HAS_IEEE128) == 0)
+  if ((dl_hwcap2 & PPC_FEATURE2_HAS_IEEE128) == 0)
     _dl_fatal_printf ("\
 Fatal glibc error: CPU lacks float128 support (POWER 9 or later required)\n");
 #endif
@@ -39,12 +53,12 @@  Fatal glibc error: CPU lacks float128 support (POWER 9 or later required)\n");
       running on POWER9 because there are faulting PCREL instructions
       before this point.  */
 #if defined GCCMACRO_ARCH_PWR10 || defined GCCMACRO__PCREL__
-  if ((GLRO (dl_hwcap2) & PPC_FEATURE2_ARCH_3_1) == 0)
+  if ((dl_hwcap2 & PPC_FEATURE2_ARCH_3_1) == 0)
     _dl_fatal_printf ("\
 Fatal glibc error: CPU lacks ISA 3.10 support (POWER10 or later required)\n");
 #endif
 #ifdef GCCMACRO__MMA__
-  if ((GLRO (dl_hwcap2) & PPC_FEATURE2_MMA) == 0)
+  if ((dl_hwcap2 & PPC_FEATURE2_MMA) == 0)
     _dl_fatal_printf ("\
 Fatal glibc error: CPU lacks MMA support (POWER10 or later required)\n");
 #endif
diff --git a/sysdeps/s390/s390-64/dl-hwcap-check.h b/sysdeps/s390/s390-64/dl-hwcap-check.h
index 6ad3242cc9..ee67ebf76e 100644
--- a/sysdeps/s390/s390-64/dl-hwcap-check.h
+++ b/sysdeps/s390/s390-64/dl-hwcap-check.h
@@ -26,16 +26,30 @@  static inline void
 dl_hwcap_check (void)
 {
 #if defined __ARCH__
+# if GCCMACRO__ARCH__ >= 12
+  /* Early auxiliary vector parsing, to avoid ISA dependencies in
+     later full parsing.  */
+  unsigned long int dl_hwcap = 0;
+  {
+    for (ElfW(auxv_t) *av = GLRO(dl_auxv); av->a_type != AT_NULL; av++)
+      if (av->a_type == AT_HWCAP)
+        {
+          dl_hwcap = av->a_un.a_val;
+          break;
+        }
+  }
+# endif
+
 # if GCCMACRO__ARCH__ >= 14
-  if (!(GLRO(dl_hwcap) & HWCAP_S390_VXRS_PDE2))
+  if (!(dl_hwcap & HWCAP_S390_VXRS_PDE2))
     _dl_fatal_printf ("\
 Fatal glibc error: CPU lacks VXRS_PDE2 support (z16 or later required)\n");
 # elif GCCMACRO__ARCH__ >= 13
-  if (!(GLRO(dl_hwcap) & HWCAP_S390_VXRS_EXT2))
+  if (!(dl_hwcap & HWCAP_S390_VXRS_EXT2))
     _dl_fatal_printf ("\
 Fatal glibc error: CPU lacks VXRS_EXT2 support (z15 or later required)\n");
 # elif GCCMACRO__ARCH__ >= 12
-  if (!(GLRO(dl_hwcap) & HWCAP_S390_VXE))
+  if (!(dl_hwcap & HWCAP_S390_VXE))
     _dl_fatal_printf ("\
 Fatal glibc error: CPU lacks VXE support (z14 or later required)\n");
 # endif
diff --git a/sysdeps/unix/sysv/linux/dl-sysdep.c b/sysdeps/unix/sysv/linux/dl-sysdep.c
index e1b14e9eb3..c54100c8a5 100644
--- a/sysdeps/unix/sysv/linux/dl-sysdep.c
+++ b/sysdeps/unix/sysv/linux/dl-sysdep.c
@@ -70,11 +70,10 @@  struct dl_main_arguments
   ElfW(Addr) user_entry;
 };
 
-/* Separate function, so that dl_main can be called without the large
-   array on the stack.  */
+/* First part of argument parsing, up to and including GLRO(dl_auxv)
+   initialization.  */
 static void
-_dl_sysdep_parse_arguments (void **start_argptr,
-			    struct dl_main_arguments *args)
+_dl_sysdep_parse_arguments_1 (void **start_argptr)
 {
   _dl_argc = (intptr_t) *start_argptr;
   _dl_argv = (char **) (start_argptr + 1); /* Necessary aliasing violation.  */
@@ -86,7 +85,13 @@  _dl_sysdep_parse_arguments (void **start_argptr,
 	GLRO(dl_auxv) = (ElfW(auxv_t) *) (tmp + 1);
 	break;
       }
+}
 
+/* Separate function, so that dl_main can be called without the large
+   array on the stack.  */
+static void __attribute__ ((noinline))
+_dl_sysdep_parse_arguments_2 (struct dl_main_arguments *args)
+{
   dl_parse_auxv_t auxv_values = { 0, };
   _dl_parse_auxv (GLRO(dl_auxv), auxv_values);
 
@@ -102,11 +107,12 @@  _dl_sysdep_start (void **start_argptr,
 {
   __libc_stack_end = DL_STACK_END (start_argptr);
 
-  struct dl_main_arguments dl_main_args;
-  _dl_sysdep_parse_arguments (start_argptr, &dl_main_args);
-
+  _dl_sysdep_parse_arguments_1 (start_argptr);
   dl_hwcap_check ();
 
+  struct dl_main_arguments dl_main_args;
+  _dl_sysdep_parse_arguments_2 (&dl_main_args);
+
   __tunables_init (_environ);
 
   /* Initialize DSO sorting algorithm after tunables.  */