Message ID | 20221003195944.3274548-9-aurelien@aurel32.net |
---|---|
State | New |
Headers | show |
Series | x86: Fix AVX2 string functions requiring BMI1, BMI2 or LZCNT (BZ #29611) | expand |
On Mon, Oct 3, 2022 at 12:59 PM Aurelien Jarno <aurelien@aurel32.net> wrote: > > The AVX2 strrchr and wcsrchr implementation uses the 'blsmsk' > instruction which belongs to the BMI1 CPU feature and the 'shrx' > instruction, which belongs to the BMI2 CPU feature. > > Fixes: df7e295d18ff ("x86: Optimize {str|wcs}rchr-avx2") > Partially resolves: BZ #29611 > --- > sysdeps/x86/isa-level.h | 1 + > sysdeps/x86_64/multiarch/ifunc-avx2.h | 1 + > sysdeps/x86_64/multiarch/ifunc-impl-list.c | 17 ++++++++++++++--- > 3 files changed, 16 insertions(+), 3 deletions(-) > > diff --git a/sysdeps/x86/isa-level.h b/sysdeps/x86/isa-level.h > index bbb90f5c5e..06f6c9663e 100644 > --- a/sysdeps/x86/isa-level.h > +++ b/sysdeps/x86/isa-level.h > @@ -79,6 +79,7 @@ > /* ISA level >= 3 guaranteed includes. */ > #define AVX_X86_ISA_LEVEL 3 > #define AVX2_X86_ISA_LEVEL 3 > +#define BMI1_X86_ISA_LEVEL 3 > #define BMI2_X86_ISA_LEVEL 3 > #define LZCNT_X86_ISA_LEVEL 3 > #define MOVBE_X86_ISA_LEVEL 3 > diff --git a/sysdeps/x86_64/multiarch/ifunc-avx2.h b/sysdeps/x86_64/multiarch/ifunc-avx2.h > index f1741083fd..f2f5e8a211 100644 > --- a/sysdeps/x86_64/multiarch/ifunc-avx2.h > +++ b/sysdeps/x86_64/multiarch/ifunc-avx2.h > @@ -36,6 +36,7 @@ IFUNC_SELECTOR (void) > const struct cpu_features *cpu_features = __get_cpu_features (); > > if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2) > + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI1) > && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2) > && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, LZCNT) > && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, > diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c > index ec1c5b55fb..00a91123d3 100644 > --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c > +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c > @@ -578,13 +578,19 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, > IFUNC_IMPL (i, name, strrchr, > X86_IFUNC_IMPL_ADD_V4 (array, i, strrchr, > (CPU_FEATURE_USABLE (AVX512VL) > - && CPU_FEATURE_USABLE (AVX512BW)), > + && CPU_FEATURE_USABLE (AVX512BW) > + && CPU_FEATURE_USABLE (BMI1) > + && CPU_FEATURE_USABLE (BMI2)), > __strrchr_evex) > X86_IFUNC_IMPL_ADD_V3 (array, i, strrchr, > - CPU_FEATURE_USABLE (AVX2), > + (CPU_FEATURE_USABLE (AVX2) > + && CPU_FEATURE_USABLE (BMI1) > + && CPU_FEATURE_USABLE (BMI2)), > __strrchr_avx2) > X86_IFUNC_IMPL_ADD_V3 (array, i, strrchr, > (CPU_FEATURE_USABLE (AVX2) > + && CPU_FEATURE_USABLE (BMI1) > + && CPU_FEATURE_USABLE (BMI2) > && CPU_FEATURE_USABLE (RTM)), > __strrchr_avx2_rtm) > /* ISA V2 wrapper for SSE2 implementation because the SSE2 > @@ -797,13 +803,18 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, > X86_IFUNC_IMPL_ADD_V4 (array, i, wcsrchr, > (CPU_FEATURE_USABLE (AVX512VL) > && CPU_FEATURE_USABLE (AVX512BW) > + && CPU_FEATURE_USABLE (BMI1) > && CPU_FEATURE_USABLE (BMI2)), > __wcsrchr_evex) > X86_IFUNC_IMPL_ADD_V3 (array, i, wcsrchr, > - CPU_FEATURE_USABLE (AVX2), > + (CPU_FEATURE_USABLE (AVX2) > + && CPU_FEATURE_USABLE (BMI1) > + && CPU_FEATURE_USABLE (BMI2)), > __wcsrchr_avx2) > X86_IFUNC_IMPL_ADD_V3 (array, i, wcsrchr, > (CPU_FEATURE_USABLE (AVX2) > + && CPU_FEATURE_USABLE (BMI1) > + && CPU_FEATURE_USABLE (BMI2) > && CPU_FEATURE_USABLE (RTM)), > __wcsrchr_avx2_rtm) > /* ISA V2 wrapper for SSE2 implementation because the SSE2 > -- > 2.35.1 > LGTM. Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
diff --git a/sysdeps/x86/isa-level.h b/sysdeps/x86/isa-level.h index bbb90f5c5e..06f6c9663e 100644 --- a/sysdeps/x86/isa-level.h +++ b/sysdeps/x86/isa-level.h @@ -79,6 +79,7 @@ /* ISA level >= 3 guaranteed includes. */ #define AVX_X86_ISA_LEVEL 3 #define AVX2_X86_ISA_LEVEL 3 +#define BMI1_X86_ISA_LEVEL 3 #define BMI2_X86_ISA_LEVEL 3 #define LZCNT_X86_ISA_LEVEL 3 #define MOVBE_X86_ISA_LEVEL 3 diff --git a/sysdeps/x86_64/multiarch/ifunc-avx2.h b/sysdeps/x86_64/multiarch/ifunc-avx2.h index f1741083fd..f2f5e8a211 100644 --- a/sysdeps/x86_64/multiarch/ifunc-avx2.h +++ b/sysdeps/x86_64/multiarch/ifunc-avx2.h @@ -36,6 +36,7 @@ IFUNC_SELECTOR (void) const struct cpu_features *cpu_features = __get_cpu_features (); if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2) + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI1) && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2) && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, LZCNT) && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c index ec1c5b55fb..00a91123d3 100644 --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c @@ -578,13 +578,19 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL (i, name, strrchr, X86_IFUNC_IMPL_ADD_V4 (array, i, strrchr, (CPU_FEATURE_USABLE (AVX512VL) - && CPU_FEATURE_USABLE (AVX512BW)), + && CPU_FEATURE_USABLE (AVX512BW) + && CPU_FEATURE_USABLE (BMI1) + && CPU_FEATURE_USABLE (BMI2)), __strrchr_evex) X86_IFUNC_IMPL_ADD_V3 (array, i, strrchr, - CPU_FEATURE_USABLE (AVX2), + (CPU_FEATURE_USABLE (AVX2) + && CPU_FEATURE_USABLE (BMI1) + && CPU_FEATURE_USABLE (BMI2)), __strrchr_avx2) X86_IFUNC_IMPL_ADD_V3 (array, i, strrchr, (CPU_FEATURE_USABLE (AVX2) + && CPU_FEATURE_USABLE (BMI1) + && CPU_FEATURE_USABLE (BMI2) && CPU_FEATURE_USABLE (RTM)), __strrchr_avx2_rtm) /* ISA V2 wrapper for SSE2 implementation because the SSE2 @@ -797,13 +803,18 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, X86_IFUNC_IMPL_ADD_V4 (array, i, wcsrchr, (CPU_FEATURE_USABLE (AVX512VL) && CPU_FEATURE_USABLE (AVX512BW) + && CPU_FEATURE_USABLE (BMI1) && CPU_FEATURE_USABLE (BMI2)), __wcsrchr_evex) X86_IFUNC_IMPL_ADD_V3 (array, i, wcsrchr, - CPU_FEATURE_USABLE (AVX2), + (CPU_FEATURE_USABLE (AVX2) + && CPU_FEATURE_USABLE (BMI1) + && CPU_FEATURE_USABLE (BMI2)), __wcsrchr_avx2) X86_IFUNC_IMPL_ADD_V3 (array, i, wcsrchr, (CPU_FEATURE_USABLE (AVX2) + && CPU_FEATURE_USABLE (BMI1) + && CPU_FEATURE_USABLE (BMI2) && CPU_FEATURE_USABLE (RTM)), __wcsrchr_avx2_rtm) /* ISA V2 wrapper for SSE2 implementation because the SSE2