Message ID | 87y17339vb.fsf@oldenburg.str.redhat.com |
---|---|
State | New |
Headers | show |
Series | [v2] Linux: Call dl_hwcap_check earlier, before full auxiliary vector parse | expand |
On 17/06/24 11:46, Florian Weimer wrote: > The full parsing routine may need the memset function, which could > require non-baseline ISA support. It is not clear to me what this patch is trying to fix. It that _dl_sysdep_parse_arguments may issue a memset call from compiler autogenerated calls? > > --- > v2: Remove spurious braces from sysdeps/powerpc/powerpc64/le/dl-hwcap-check.h. > sysdeps/generic/dl-hwcap-check.h | 5 ++++- > sysdeps/powerpc/powerpc64/le/dl-hwcap-check.h | 22 ++++++++++++++++++---- > sysdeps/s390/s390-64/dl-hwcap-check.h | 20 +++++++++++++++++--- > sysdeps/unix/sysv/linux/dl-sysdep.c | 20 +++++++++++++------- > 4 files changed, 52 insertions(+), 15 deletions(-) > > diff --git a/sysdeps/generic/dl-hwcap-check.h b/sysdeps/generic/dl-hwcap-check.h > index 55efd129be..65dba6d1a0 100644 > --- a/sysdeps/generic/dl-hwcap-check.h > +++ b/sysdeps/generic/dl-hwcap-check.h > @@ -22,7 +22,10 @@ > static inline void > dl_hwcap_check (void) > { > - /* The generic implementation does not perform any checks. */ > + /* The generic implementation does not perform any checks. Specific > + implementations may assume that GLRO(dl_auxv) has been > + initialized (if it exists), but GLRO(dl_hwcap), GLRO(dl_hwcap2) > + have not been set at this point. */ > } > > #endif /* _DL_HWCAP_CHECK_H */ > diff --git a/sysdeps/powerpc/powerpc64/le/dl-hwcap-check.h b/sysdeps/powerpc/powerpc64/le/dl-hwcap-check.h > index 6d463f0511..d7b2a2ca4d 100644 > --- a/sysdeps/powerpc/powerpc64/le/dl-hwcap-check.h > +++ b/sysdeps/powerpc/powerpc64/le/dl-hwcap-check.h > @@ -25,13 +25,27 @@ > static inline void > dl_hwcap_check (void) > { > +#if defined GCCMACRO_ARCH_PWR9 || defined GCCMACRO__FLOAT128_HARDWARE__ \ > + || defined GCCMACRO_ARCH_PWR10 || defined GCCMACRO__PCREL__ \ > + || defined GCCMACRO__MMA__ > + /* Early auxiliary vector parsing, to avoid ISA dependencies in > + later full parsing. */ > + unsigned long int dl_hwcap2 = 0; > + for (ElfW(auxv_t) *av = GLRO(dl_auxv); av->a_type != AT_NULL; av++) > + if (av->a_type == AT_HWCAP2) > + { > + dl_hwcap2 = av->a_un.a_val; > + break; > + } > +#endif > + > #ifdef GCCMACRO_ARCH_PWR9 > - if ((GLRO (dl_hwcap2) & PPC_FEATURE2_ARCH_3_00) == 0) > + if ((dl_hwcap2 & PPC_FEATURE2_ARCH_3_00) == 0) > _dl_fatal_printf ("\ > Fatal glibc error: CPU lacks ISA 3.00 support (POWER9 or later required)\n"); > #endif > #ifdef GCCMACRO__FLOAT128_HARDWARE__ > - if ((GLRO (dl_hwcap2) & PPC_FEATURE2_HAS_IEEE128) == 0) > + if ((dl_hwcap2 & PPC_FEATURE2_HAS_IEEE128) == 0) > _dl_fatal_printf ("\ > Fatal glibc error: CPU lacks float128 support (POWER 9 or later required)\n"); > #endif > @@ -39,12 +53,12 @@ Fatal glibc error: CPU lacks float128 support (POWER 9 or later required)\n"); > running on POWER9 because there are faulting PCREL instructions > before this point. */ > #if defined GCCMACRO_ARCH_PWR10 || defined GCCMACRO__PCREL__ > - if ((GLRO (dl_hwcap2) & PPC_FEATURE2_ARCH_3_1) == 0) > + if ((dl_hwcap2 & PPC_FEATURE2_ARCH_3_1) == 0) > _dl_fatal_printf ("\ > Fatal glibc error: CPU lacks ISA 3.10 support (POWER10 or later required)\n"); > #endif > #ifdef GCCMACRO__MMA__ > - if ((GLRO (dl_hwcap2) & PPC_FEATURE2_MMA) == 0) > + if ((dl_hwcap2 & PPC_FEATURE2_MMA) == 0) > _dl_fatal_printf ("\ > Fatal glibc error: CPU lacks MMA support (POWER10 or later required)\n"); > #endif > diff --git a/sysdeps/s390/s390-64/dl-hwcap-check.h b/sysdeps/s390/s390-64/dl-hwcap-check.h > index 6ad3242cc9..ee67ebf76e 100644 > --- a/sysdeps/s390/s390-64/dl-hwcap-check.h > +++ b/sysdeps/s390/s390-64/dl-hwcap-check.h > @@ -26,16 +26,30 @@ static inline void > dl_hwcap_check (void) > { > #if defined __ARCH__ > +# if GCCMACRO__ARCH__ >= 12 > + /* Early auxiliary vector parsing, to avoid ISA dependencies in > + later full parsing. */ > + unsigned long int dl_hwcap = 0; > + { > + for (ElfW(auxv_t) *av = GLRO(dl_auxv); av->a_type != AT_NULL; av++) > + if (av->a_type == AT_HWCAP) > + { > + dl_hwcap = av->a_un.a_val; > + break; > + } > + } > +# endif > + > # if GCCMACRO__ARCH__ >= 14 > - if (!(GLRO(dl_hwcap) & HWCAP_S390_VXRS_PDE2)) > + if (!(dl_hwcap & HWCAP_S390_VXRS_PDE2)) > _dl_fatal_printf ("\ > Fatal glibc error: CPU lacks VXRS_PDE2 support (z16 or later required)\n"); > # elif GCCMACRO__ARCH__ >= 13 > - if (!(GLRO(dl_hwcap) & HWCAP_S390_VXRS_EXT2)) > + if (!(dl_hwcap & HWCAP_S390_VXRS_EXT2)) > _dl_fatal_printf ("\ > Fatal glibc error: CPU lacks VXRS_EXT2 support (z15 or later required)\n"); > # elif GCCMACRO__ARCH__ >= 12 > - if (!(GLRO(dl_hwcap) & HWCAP_S390_VXE)) > + if (!(dl_hwcap & HWCAP_S390_VXE)) > _dl_fatal_printf ("\ > Fatal glibc error: CPU lacks VXE support (z14 or later required)\n"); > # endif > diff --git a/sysdeps/unix/sysv/linux/dl-sysdep.c b/sysdeps/unix/sysv/linux/dl-sysdep.c > index e1b14e9eb3..c54100c8a5 100644 > --- a/sysdeps/unix/sysv/linux/dl-sysdep.c > +++ b/sysdeps/unix/sysv/linux/dl-sysdep.c > @@ -70,11 +70,10 @@ struct dl_main_arguments > ElfW(Addr) user_entry; > }; > > -/* Separate function, so that dl_main can be called without the large > - array on the stack. */ > +/* First part of argument parsing, up to and including GLRO(dl_auxv) > + initialization. */ > static void > -_dl_sysdep_parse_arguments (void **start_argptr, > - struct dl_main_arguments *args) > +_dl_sysdep_parse_arguments_1 (void **start_argptr) > { > _dl_argc = (intptr_t) *start_argptr; > _dl_argv = (char **) (start_argptr + 1); /* Necessary aliasing violation. */ > @@ -86,7 +85,13 @@ _dl_sysdep_parse_arguments (void **start_argptr, > GLRO(dl_auxv) = (ElfW(auxv_t) *) (tmp + 1); > break; > } > +} > > +/* Separate function, so that dl_main can be called without the large > + array on the stack. */ > +static void __attribute__ ((noinline)) > +_dl_sysdep_parse_arguments_2 (struct dl_main_arguments *args) > +{ > dl_parse_auxv_t auxv_values = { 0, }; > _dl_parse_auxv (GLRO(dl_auxv), auxv_values); > > @@ -102,11 +107,12 @@ _dl_sysdep_start (void **start_argptr, > { > __libc_stack_end = DL_STACK_END (start_argptr); > > - struct dl_main_arguments dl_main_args; > - _dl_sysdep_parse_arguments (start_argptr, &dl_main_args); > - > + _dl_sysdep_parse_arguments_1 (start_argptr); > dl_hwcap_check (); > > + struct dl_main_arguments dl_main_args; > + _dl_sysdep_parse_arguments_2 (&dl_main_args); > + > __tunables_init (_environ); > > /* Initialize DSO sorting algorithm after tunables. */ > > base-commit: 55eb99e9a9d840ba452b128be14d6529c2dde039 >
* Adhemerval Zanella Netto: > On 17/06/24 11:46, Florian Weimer wrote: >> The full parsing routine may need the memset function, which could >> require non-baseline ISA support. > > It is not clear to me what this patch is trying to fix. It that > _dl_sysdep_parse_arguments may issue a memset call from compiler > autogenerated calls? Yes, it's this initialization: dl_parse_auxv_t auxv_values = { 0, }; Previously, the array was small enough that the memset was expanded inline on powerpc64le. Or maybe we didn't have a different memset for POWER9? Either way, something has changed, and we now crash in this place, instead of printing a CPU compatibility diagnostic. Thanks, Florian
On 17/06/24 13:14, Florian Weimer wrote: > * Adhemerval Zanella Netto: > >> On 17/06/24 11:46, Florian Weimer wrote: >>> The full parsing routine may need the memset function, which could >>> require non-baseline ISA support. >> >> It is not clear to me what this patch is trying to fix. It that >> _dl_sysdep_parse_arguments may issue a memset call from compiler >> autogenerated calls? > > Yes, it's this initialization: > > dl_parse_auxv_t auxv_values = { 0, }; > > Previously, the array was small enough that the memset was expanded > inline on powerpc64le. Or maybe we didn't have a different memset for > POWER9? Either way, something has changed, and we now crash in this > place, instead of printing a CPU compatibility diagnostic. Wouldn't dl-symbol-redir-ifunc.h fix this by routing the memset to generic one?
* Adhemerval Zanella Netto: > On 17/06/24 13:14, Florian Weimer wrote: >> * Adhemerval Zanella Netto: >> >>> On 17/06/24 11:46, Florian Weimer wrote: >>>> The full parsing routine may need the memset function, which could >>>> require non-baseline ISA support. >>> >>> It is not clear to me what this patch is trying to fix. It that >>> _dl_sysdep_parse_arguments may issue a memset call from compiler >>> autogenerated calls? >> >> Yes, it's this initialization: >> >> dl_parse_auxv_t auxv_values = { 0, }; >> >> Previously, the array was small enough that the memset was expanded >> inline on powerpc64le. Or maybe we didn't have a different memset for >> POWER9? Either way, something has changed, and we now crash in this >> place, instead of printing a CPU compatibility diagnostic. > > Wouldn't dl-symbol-redir-ifunc.h fix this by routing the memset to > generic one? The generic memset is the POWER9 memset because we are building most of glibc with -mcpu=power9. That auxv_values initialization is currently the only place where the POWER8 memset would be required. Thanks, Florian
On 17/06/24 14:03, Florian Weimer wrote: > * Adhemerval Zanella Netto: > >> On 17/06/24 13:14, Florian Weimer wrote: >>> * Adhemerval Zanella Netto: >>> >>>> On 17/06/24 11:46, Florian Weimer wrote: >>>>> The full parsing routine may need the memset function, which could >>>>> require non-baseline ISA support. >>>> >>>> It is not clear to me what this patch is trying to fix. It that >>>> _dl_sysdep_parse_arguments may issue a memset call from compiler >>>> autogenerated calls? >>> >>> Yes, it's this initialization: >>> >>> dl_parse_auxv_t auxv_values = { 0, }; >>> >>> Previously, the array was small enough that the memset was expanded >>> inline on powerpc64le. Or maybe we didn't have a different memset for >>> POWER9? Either way, something has changed, and we now crash in this >>> place, instead of printing a CPU compatibility diagnostic. >> >> Wouldn't dl-symbol-redir-ifunc.h fix this by routing the memset to >> generic one? > > The generic memset is the POWER9 memset because we are building most of > glibc with -mcpu=power9. That auxv_values initialization is currently > the only place where the POWER8 memset would be required. Can't we make something similar to what s390x does and set the default memset depending of the minimal ISA used to build glibc?
On 17/06/24 14:36, Adhemerval Zanella Netto wrote: > > > On 17/06/24 14:03, Florian Weimer wrote: >> * Adhemerval Zanella Netto: >> >>> On 17/06/24 13:14, Florian Weimer wrote: >>>> * Adhemerval Zanella Netto: >>>> >>>>> On 17/06/24 11:46, Florian Weimer wrote: >>>>>> The full parsing routine may need the memset function, which could >>>>>> require non-baseline ISA support. >>>>> >>>>> It is not clear to me what this patch is trying to fix. It that >>>>> _dl_sysdep_parse_arguments may issue a memset call from compiler >>>>> autogenerated calls? >>>> >>>> Yes, it's this initialization: >>>> >>>> dl_parse_auxv_t auxv_values = { 0, }; >>>> >>>> Previously, the array was small enough that the memset was expanded >>>> inline on powerpc64le. Or maybe we didn't have a different memset for >>>> POWER9? Either way, something has changed, and we now crash in this >>>> place, instead of printing a CPU compatibility diagnostic. >>> >>> Wouldn't dl-symbol-redir-ifunc.h fix this by routing the memset to >>> generic one? >> >> The generic memset is the POWER9 memset because we are building most of >> glibc with -mcpu=power9. That auxv_values initialization is currently >> the only place where the POWER8 memset would be required. > > Can't we make something similar to what s390x does and set the default > memset depending of the minimal ISA used to build glibc? I am insisting on the dl-symbol-redir-ifunc.h because I have added exactly to avoid the constantly refactor the loader code to stumble in this kind of traps.
* Adhemerval Zanella Netto: > I am insisting on the dl-symbol-redir-ifunc.h because I have added exactly > to avoid the constantly refactor the loader code to stumble in this kind > of traps. I forgot this mechanism. It seems to work for this. I'll send an updated patch. Thanks, Florian
diff --git a/sysdeps/generic/dl-hwcap-check.h b/sysdeps/generic/dl-hwcap-check.h index 55efd129be..65dba6d1a0 100644 --- a/sysdeps/generic/dl-hwcap-check.h +++ b/sysdeps/generic/dl-hwcap-check.h @@ -22,7 +22,10 @@ static inline void dl_hwcap_check (void) { - /* The generic implementation does not perform any checks. */ + /* The generic implementation does not perform any checks. Specific + implementations may assume that GLRO(dl_auxv) has been + initialized (if it exists), but GLRO(dl_hwcap), GLRO(dl_hwcap2) + have not been set at this point. */ } #endif /* _DL_HWCAP_CHECK_H */ diff --git a/sysdeps/powerpc/powerpc64/le/dl-hwcap-check.h b/sysdeps/powerpc/powerpc64/le/dl-hwcap-check.h index 6d463f0511..d7b2a2ca4d 100644 --- a/sysdeps/powerpc/powerpc64/le/dl-hwcap-check.h +++ b/sysdeps/powerpc/powerpc64/le/dl-hwcap-check.h @@ -25,13 +25,27 @@ static inline void dl_hwcap_check (void) { +#if defined GCCMACRO_ARCH_PWR9 || defined GCCMACRO__FLOAT128_HARDWARE__ \ + || defined GCCMACRO_ARCH_PWR10 || defined GCCMACRO__PCREL__ \ + || defined GCCMACRO__MMA__ + /* Early auxiliary vector parsing, to avoid ISA dependencies in + later full parsing. */ + unsigned long int dl_hwcap2 = 0; + for (ElfW(auxv_t) *av = GLRO(dl_auxv); av->a_type != AT_NULL; av++) + if (av->a_type == AT_HWCAP2) + { + dl_hwcap2 = av->a_un.a_val; + break; + } +#endif + #ifdef GCCMACRO_ARCH_PWR9 - if ((GLRO (dl_hwcap2) & PPC_FEATURE2_ARCH_3_00) == 0) + if ((dl_hwcap2 & PPC_FEATURE2_ARCH_3_00) == 0) _dl_fatal_printf ("\ Fatal glibc error: CPU lacks ISA 3.00 support (POWER9 or later required)\n"); #endif #ifdef GCCMACRO__FLOAT128_HARDWARE__ - if ((GLRO (dl_hwcap2) & PPC_FEATURE2_HAS_IEEE128) == 0) + if ((dl_hwcap2 & PPC_FEATURE2_HAS_IEEE128) == 0) _dl_fatal_printf ("\ Fatal glibc error: CPU lacks float128 support (POWER 9 or later required)\n"); #endif @@ -39,12 +53,12 @@ Fatal glibc error: CPU lacks float128 support (POWER 9 or later required)\n"); running on POWER9 because there are faulting PCREL instructions before this point. */ #if defined GCCMACRO_ARCH_PWR10 || defined GCCMACRO__PCREL__ - if ((GLRO (dl_hwcap2) & PPC_FEATURE2_ARCH_3_1) == 0) + if ((dl_hwcap2 & PPC_FEATURE2_ARCH_3_1) == 0) _dl_fatal_printf ("\ Fatal glibc error: CPU lacks ISA 3.10 support (POWER10 or later required)\n"); #endif #ifdef GCCMACRO__MMA__ - if ((GLRO (dl_hwcap2) & PPC_FEATURE2_MMA) == 0) + if ((dl_hwcap2 & PPC_FEATURE2_MMA) == 0) _dl_fatal_printf ("\ Fatal glibc error: CPU lacks MMA support (POWER10 or later required)\n"); #endif diff --git a/sysdeps/s390/s390-64/dl-hwcap-check.h b/sysdeps/s390/s390-64/dl-hwcap-check.h index 6ad3242cc9..ee67ebf76e 100644 --- a/sysdeps/s390/s390-64/dl-hwcap-check.h +++ b/sysdeps/s390/s390-64/dl-hwcap-check.h @@ -26,16 +26,30 @@ static inline void dl_hwcap_check (void) { #if defined __ARCH__ +# if GCCMACRO__ARCH__ >= 12 + /* Early auxiliary vector parsing, to avoid ISA dependencies in + later full parsing. */ + unsigned long int dl_hwcap = 0; + { + for (ElfW(auxv_t) *av = GLRO(dl_auxv); av->a_type != AT_NULL; av++) + if (av->a_type == AT_HWCAP) + { + dl_hwcap = av->a_un.a_val; + break; + } + } +# endif + # if GCCMACRO__ARCH__ >= 14 - if (!(GLRO(dl_hwcap) & HWCAP_S390_VXRS_PDE2)) + if (!(dl_hwcap & HWCAP_S390_VXRS_PDE2)) _dl_fatal_printf ("\ Fatal glibc error: CPU lacks VXRS_PDE2 support (z16 or later required)\n"); # elif GCCMACRO__ARCH__ >= 13 - if (!(GLRO(dl_hwcap) & HWCAP_S390_VXRS_EXT2)) + if (!(dl_hwcap & HWCAP_S390_VXRS_EXT2)) _dl_fatal_printf ("\ Fatal glibc error: CPU lacks VXRS_EXT2 support (z15 or later required)\n"); # elif GCCMACRO__ARCH__ >= 12 - if (!(GLRO(dl_hwcap) & HWCAP_S390_VXE)) + if (!(dl_hwcap & HWCAP_S390_VXE)) _dl_fatal_printf ("\ Fatal glibc error: CPU lacks VXE support (z14 or later required)\n"); # endif diff --git a/sysdeps/unix/sysv/linux/dl-sysdep.c b/sysdeps/unix/sysv/linux/dl-sysdep.c index e1b14e9eb3..c54100c8a5 100644 --- a/sysdeps/unix/sysv/linux/dl-sysdep.c +++ b/sysdeps/unix/sysv/linux/dl-sysdep.c @@ -70,11 +70,10 @@ struct dl_main_arguments ElfW(Addr) user_entry; }; -/* Separate function, so that dl_main can be called without the large - array on the stack. */ +/* First part of argument parsing, up to and including GLRO(dl_auxv) + initialization. */ static void -_dl_sysdep_parse_arguments (void **start_argptr, - struct dl_main_arguments *args) +_dl_sysdep_parse_arguments_1 (void **start_argptr) { _dl_argc = (intptr_t) *start_argptr; _dl_argv = (char **) (start_argptr + 1); /* Necessary aliasing violation. */ @@ -86,7 +85,13 @@ _dl_sysdep_parse_arguments (void **start_argptr, GLRO(dl_auxv) = (ElfW(auxv_t) *) (tmp + 1); break; } +} +/* Separate function, so that dl_main can be called without the large + array on the stack. */ +static void __attribute__ ((noinline)) +_dl_sysdep_parse_arguments_2 (struct dl_main_arguments *args) +{ dl_parse_auxv_t auxv_values = { 0, }; _dl_parse_auxv (GLRO(dl_auxv), auxv_values); @@ -102,11 +107,12 @@ _dl_sysdep_start (void **start_argptr, { __libc_stack_end = DL_STACK_END (start_argptr); - struct dl_main_arguments dl_main_args; - _dl_sysdep_parse_arguments (start_argptr, &dl_main_args); - + _dl_sysdep_parse_arguments_1 (start_argptr); dl_hwcap_check (); + struct dl_main_arguments dl_main_args; + _dl_sysdep_parse_arguments_2 (&dl_main_args); + __tunables_init (_environ); /* Initialize DSO sorting algorithm after tunables. */