Message ID | 20240228175114.271103-1-hjl.tools@gmail.com |
---|---|
State | New |
Headers | show |
Series | x86-64: Don't use SSE resolvers for ISA level 3 or above | expand |
On Wed, Feb 28, 2024 at 11:51 AM H.J. Lu <hjl.tools@gmail.com> wrote: > > When glibc is built with ISA level 3 or above enabled, SSE resolvers > aren't available and glibc fails to build: > > ld: .../elf/librtld.os: in function `init_cpu_features': > .../elf/../sysdeps/x86/cpu-features.c:1200:(.text+0x1445f): undefined reference to `_dl_runtime_resolve_fxsave' > ld: .../elf/librtld.os: relocation R_X86_64_PC32 against undefined hidden symbol `_dl_runtime_resolve_fxsave' can not be used when making a shared object > /usr/local/bin/ld: final link failed: bad value > > For ISA level 3 or above, don't use _dl_runtime_resolve_fxsave nor > _dl_tlsdesc_dynamic_fxsave and also exclude _dl_tlsdesc_dynamic_fxsave. > > This fixes BZ #31429. > --- > sysdeps/x86/cpu-features.c | 17 +++++++++++------ > sysdeps/x86_64/dl-tlsdesc.S | 15 +++++++++------ > 2 files changed, 20 insertions(+), 12 deletions(-) > > diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c > index d71e8d3d2e..e7c7ece462 100644 > --- a/sysdeps/x86/cpu-features.c > +++ b/sysdeps/x86/cpu-features.c > @@ -18,6 +18,7 @@ > > #include <dl-hwcap.h> > #include <libc-pointer-arith.h> > +#include <isa-level.h> > #include <get-isa-level.h> > #include <cacheinfo.h> > #include <dl-cacheinfo.h> > @@ -1195,7 +1196,9 @@ no_cpuid: > TUNABLE_CALLBACK (set_x86_shstk)); > #endif > > +#if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL > if (GLRO(dl_x86_cpu_features).xsave_state_size != 0) > +#endif > { > if (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC)) > { > @@ -1216,22 +1219,24 @@ no_cpuid: > #endif > } > } > +#if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL > else > { > -#ifdef __x86_64__ > +# ifdef __x86_64__ > GLRO(dl_x86_64_runtime_resolve) = _dl_runtime_resolve_fxsave; > -# ifdef SHARED > +# ifdef SHARED > GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fxsave; > -# endif > -#else > -# ifdef SHARED > +# endif > +# else > +# ifdef SHARED > if (CPU_FEATURE_USABLE_P (cpu_features, FXSR)) > GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fxsave; > else > GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fnsave; > +# endif > # endif > -#endif > } > +#endif > > #ifdef SHARED > # ifdef __x86_64__ > diff --git a/sysdeps/x86_64/dl-tlsdesc.S b/sysdeps/x86_64/dl-tlsdesc.S > index ea69f5223a..057a10862a 100644 > --- a/sysdeps/x86_64/dl-tlsdesc.S > +++ b/sysdeps/x86_64/dl-tlsdesc.S > @@ -20,6 +20,7 @@ > #include <tls.h> > #include <cpu-features-offsets.h> > #include <features-offsets.h> > +#include <isa-level.h> > #include "tlsdesc.h" > #include "dl-trampoline-save.h" > > @@ -79,12 +80,14 @@ _dl_tlsdesc_undefweak: > .size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak > > #ifdef SHARED > -# define USE_FXSAVE > -# define STATE_SAVE_ALIGNMENT 16 > -# define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_fxsave > -# include "dl-tlsdesc-dynamic.h" > -# undef _dl_tlsdesc_dynamic > -# undef USE_FXSAVE > +# if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL > +# define USE_FXSAVE > +# define STATE_SAVE_ALIGNMENT 16 > +# define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_fxsave > +# include "dl-tlsdesc-dynamic.h" > +# undef _dl_tlsdesc_dynamic > +# undef USE_FXSAVE > +# endif > > # define USE_XSAVE > # define STATE_SAVE_ALIGNMENT 64 > -- > 2.43.2 > LGTM. Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
On 2/28/24 07:51, H.J. Lu wrote: > --- a/sysdeps/x86/cpu-features.c > +++ b/sysdeps/x86/cpu-features.c > @@ -18,6 +18,7 @@ > > #include <dl-hwcap.h> > #include <libc-pointer-arith.h> > +#include <isa-level.h> > #include <get-isa-level.h> > #include <cacheinfo.h> > #include <dl-cacheinfo.h> > @@ -1195,7 +1196,9 @@ no_cpuid: > TUNABLE_CALLBACK (set_x86_shstk)); > #endif > > +#if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL > if (GLRO(dl_x86_cpu_features).xsave_state_size != 0) > +#endif > { > if (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC)) > { Surely this can be written with IF and not IFDEF: if (MINIMUM_X86_ISA_LEVEL >= AVX_X86_ISA_LEVEL || GLRO(dl_x86_cpu_features).xsave_state_size != 0) r~
On Thu, Feb 29, 2024 at 12:55 PM Richard Henderson < richard.henderson@linaro.org> wrote: > On 2/28/24 07:51, H.J. Lu wrote: > > --- a/sysdeps/x86/cpu-features.c > > +++ b/sysdeps/x86/cpu-features.c > > @@ -18,6 +18,7 @@ > > > > #include <dl-hwcap.h> > > #include <libc-pointer-arith.h> > > +#include <isa-level.h> > > #include <get-isa-level.h> > > #include <cacheinfo.h> > > #include <dl-cacheinfo.h> > > @@ -1195,7 +1196,9 @@ no_cpuid: > > TUNABLE_CALLBACK (set_x86_shstk)); > > #endif > > > > +#if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL > > if (GLRO(dl_x86_cpu_features).xsave_state_size != 0) > > +#endif > > { > > if (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC)) > > { > > Surely this can be written with IF and not IFDEF: > > if (MINIMUM_X86_ISA_LEVEL >= AVX_X86_ISA_LEVEL > || GLRO(dl_x86_cpu_features).xsave_state_size != 0) > For sure it can be written like IF, but using IFDEF has the following perf and code size advantage. For MINIMUM_X86_ISA_LEVEL >= AVX_X86_ISA_LEVEL case it will remove conditional if (GLRO(dl_x86_cpu_features).xsave_state_size != 0) and else block code of this conditional during preprocessing. --Sunil
On 2/29/24 16:40, Sunil Pandey wrote: > > > On Thu, Feb 29, 2024 at 12:55 PM Richard Henderson <richard.henderson@linaro.org > <mailto:richard.henderson@linaro.org>> wrote: > > On 2/28/24 07:51, H.J. Lu wrote: > > --- a/sysdeps/x86/cpu-features.c > > +++ b/sysdeps/x86/cpu-features.c > > @@ -18,6 +18,7 @@ > > > > #include <dl-hwcap.h> > > #include <libc-pointer-arith.h> > > +#include <isa-level.h> > > #include <get-isa-level.h> > > #include <cacheinfo.h> > > #include <dl-cacheinfo.h> > > @@ -1195,7 +1196,9 @@ no_cpuid: > > TUNABLE_CALLBACK (set_x86_shstk)); > > #endif > > > > +#if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL > > if (GLRO(dl_x86_cpu_features).xsave_state_size != 0) > > +#endif > > { > > if (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC)) > > { > > Surely this can be written with IF and not IFDEF: > > if (MINIMUM_X86_ISA_LEVEL >= AVX_X86_ISA_LEVEL > || GLRO(dl_x86_cpu_features).xsave_state_size != 0) > > > For sure it can be written like IF, but using IFDEF has the following perf and code size > advantage. > > For MINIMUM_X86_ISA_LEVEL >= AVX_X86_ISA_LEVEL case > > it will remove conditional > > if (GLRO(dl_x86_cpu_features).xsave_state_size != 0) > > and else block code of this conditional during preprocessing. You miss the point -- both of these are constants and will be folded by the compiler. There is no perf or code size advantage for cpp. r~
On Fri, Mar 1, 2024 at 2:23 PM Richard Henderson < richard.henderson@linaro.org> wrote: > On 2/29/24 16:40, Sunil Pandey wrote: > > > > > > On Thu, Feb 29, 2024 at 12:55 PM Richard Henderson < > richard.henderson@linaro.org > > <mailto:richard.henderson@linaro.org>> wrote: > > > > On 2/28/24 07:51, H.J. Lu wrote: > > > --- a/sysdeps/x86/cpu-features.c > > > +++ b/sysdeps/x86/cpu-features.c > > > @@ -18,6 +18,7 @@ > > > > > > #include <dl-hwcap.h> > > > #include <libc-pointer-arith.h> > > > +#include <isa-level.h> > > > #include <get-isa-level.h> > > > #include <cacheinfo.h> > > > #include <dl-cacheinfo.h> > > > @@ -1195,7 +1196,9 @@ no_cpuid: > > > TUNABLE_CALLBACK (set_x86_shstk)); > > > #endif > > > > > > +#if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL > > > if (GLRO(dl_x86_cpu_features).xsave_state_size != 0) > > > +#endif > > > { > > > if (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC)) > > > { > > > > Surely this can be written with IF and not IFDEF: > > > > if (MINIMUM_X86_ISA_LEVEL >= AVX_X86_ISA_LEVEL > > || GLRO(dl_x86_cpu_features).xsave_state_size != 0) > > > > > > For sure it can be written like IF, but using IFDEF has the following > perf and code size > > advantage. > > > > For MINIMUM_X86_ISA_LEVEL >= AVX_X86_ISA_LEVEL case > > > > it will remove conditional > > > > if (GLRO(dl_x86_cpu_features).xsave_state_size != 0) > > > > and else block code of this conditional during preprocessing. > > You miss the point -- both of these are constants and will be folded by > the compiler. > There is no perf or code size advantage for cpp. > > Got it. Will submit a patch to fix it. --Sunil
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c index d71e8d3d2e..e7c7ece462 100644 --- a/sysdeps/x86/cpu-features.c +++ b/sysdeps/x86/cpu-features.c @@ -18,6 +18,7 @@ #include <dl-hwcap.h> #include <libc-pointer-arith.h> +#include <isa-level.h> #include <get-isa-level.h> #include <cacheinfo.h> #include <dl-cacheinfo.h> @@ -1195,7 +1196,9 @@ no_cpuid: TUNABLE_CALLBACK (set_x86_shstk)); #endif +#if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL if (GLRO(dl_x86_cpu_features).xsave_state_size != 0) +#endif { if (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC)) { @@ -1216,22 +1219,24 @@ no_cpuid: #endif } } +#if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL else { -#ifdef __x86_64__ +# ifdef __x86_64__ GLRO(dl_x86_64_runtime_resolve) = _dl_runtime_resolve_fxsave; -# ifdef SHARED +# ifdef SHARED GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fxsave; -# endif -#else -# ifdef SHARED +# endif +# else +# ifdef SHARED if (CPU_FEATURE_USABLE_P (cpu_features, FXSR)) GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fxsave; else GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fnsave; +# endif # endif -#endif } +#endif #ifdef SHARED # ifdef __x86_64__ diff --git a/sysdeps/x86_64/dl-tlsdesc.S b/sysdeps/x86_64/dl-tlsdesc.S index ea69f5223a..057a10862a 100644 --- a/sysdeps/x86_64/dl-tlsdesc.S +++ b/sysdeps/x86_64/dl-tlsdesc.S @@ -20,6 +20,7 @@ #include <tls.h> #include <cpu-features-offsets.h> #include <features-offsets.h> +#include <isa-level.h> #include "tlsdesc.h" #include "dl-trampoline-save.h" @@ -79,12 +80,14 @@ _dl_tlsdesc_undefweak: .size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak #ifdef SHARED -# define USE_FXSAVE -# define STATE_SAVE_ALIGNMENT 16 -# define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_fxsave -# include "dl-tlsdesc-dynamic.h" -# undef _dl_tlsdesc_dynamic -# undef USE_FXSAVE +# if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL +# define USE_FXSAVE +# define STATE_SAVE_ALIGNMENT 16 +# define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_fxsave +# include "dl-tlsdesc-dynamic.h" +# undef _dl_tlsdesc_dynamic +# undef USE_FXSAVE +# endif # define USE_XSAVE # define STATE_SAVE_ALIGNMENT 64