Message ID | 20220713233301.1868315-1-goldstein.w.n@gmail.com |
---|---|
State | New |
Headers | show |
Series | [v2,1/3] x86: Add support to build strcmp/strlen/strchr with explicit ISA level | expand |
On Wed, Jul 13, 2022 at 4:33 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > 1. Add default ISA level selection in non-multiarch/rtld > implementations. > > 2. Add ISA level build guards to different implementations. > - I.e strcmp-avx2.S which is ISA level 3 will only build if > compiled ISA level <= 3. Otherwise there is no reason to > include it as we will always use one of the ISA level 4 > implementations (strcmp-evex.S). > > 3. Refactor the ifunc selector and ifunc implementation list to use > the ISA level aware wrapper macros that allow functions below the > compiled ISA level (with a guranteed replacement) to be skipped. > > Tested with and without multiarch on x86_64 for ISA levels: > {generic, x86-64-v2, x86-64-v3, x86-64-v4} > > And m32 with and without multiarch. > --- > sysdeps/x86/isa-level.h | 10 + > sysdeps/x86_64/Makefile | 6 + > sysdeps/x86_64/memrchr.S | 10 +- > sysdeps/x86_64/multiarch/Makefile | 2 - > sysdeps/x86_64/multiarch/ifunc-avx2.h | 22 +- > sysdeps/x86_64/multiarch/ifunc-impl-list.c | 648 ++++++++++-------- > sysdeps/x86_64/multiarch/ifunc-strcasecmp.h | 33 +- > sysdeps/x86_64/multiarch/ifunc-wcslen.h | 27 +- > sysdeps/x86_64/multiarch/memrchr-avx2.S | 4 +- > sysdeps/x86_64/multiarch/memrchr-evex.S | 4 +- > sysdeps/x86_64/multiarch/memrchr-sse2.S | 19 +- > .../x86_64/multiarch/strcasecmp_l-avx2-rtm.S | 17 +- > sysdeps/x86_64/multiarch/strcasecmp_l-avx2.S | 3 - > sysdeps/x86_64/multiarch/strcasecmp_l-evex.S | 3 - > sysdeps/x86_64/multiarch/strcasecmp_l-sse2.S | 1 + > sysdeps/x86_64/multiarch/strchr-avx2.S | 4 +- > sysdeps/x86_64/multiarch/strchr-evex.S | 4 +- > sysdeps/x86_64/multiarch/strchr-sse2-no-bsf.S | 6 +- > sysdeps/x86_64/multiarch/strchr-sse2.S | 7 +- > sysdeps/x86_64/multiarch/strchr.c | 30 +- > sysdeps/x86_64/multiarch/strchrnul-avx2.S | 7 +- > sysdeps/x86_64/multiarch/strchrnul-evex.S | 7 +- > sysdeps/x86_64/multiarch/strchrnul-sse2.S | 8 +- > sysdeps/x86_64/multiarch/strcmp-avx2-rtm.S | 5 +- > sysdeps/x86_64/multiarch/strcmp-avx2.S | 22 +- > sysdeps/x86_64/multiarch/strcmp-evex.S | 20 +- > .../x86_64/multiarch/strcmp-sse2-unaligned.S | 18 +- > sysdeps/x86_64/multiarch/strcmp-sse2.S | 6 +- > sysdeps/x86_64/multiarch/strcmp-sse4_2.S | 6 +- > sysdeps/x86_64/multiarch/strcmp.c | 37 +- > sysdeps/x86_64/multiarch/strlen-avx2.S | 4 +- > sysdeps/x86_64/multiarch/strlen-evex-base.S | 6 +- > sysdeps/x86_64/multiarch/strlen-evex.S | 4 +- > sysdeps/x86_64/multiarch/strlen-sse2.S | 17 +- > .../x86_64/multiarch/strncase_l-avx2-rtm.S | 18 +- > sysdeps/x86_64/multiarch/strncase_l-avx2.S | 7 +- > sysdeps/x86_64/multiarch/strncase_l-evex.S | 4 - > sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S | 2 - > sysdeps/x86_64/multiarch/strncmp-avx2.S | 3 +- > sysdeps/x86_64/multiarch/strncmp-evex.S | 1 - > sysdeps/x86_64/multiarch/strncmp.c | 27 +- > sysdeps/x86_64/multiarch/strnlen-avx2.S | 6 +- > sysdeps/x86_64/multiarch/strnlen-evex.S | 6 +- > sysdeps/x86_64/multiarch/strnlen-sse2.S | 10 +- > sysdeps/x86_64/multiarch/strrchr-avx2.S | 4 +- > sysdeps/x86_64/multiarch/strrchr-evex.S | 4 +- > sysdeps/x86_64/multiarch/strrchr-sse2.S | 99 +-- > .../x86_64/multiarch/strstr-sse2-unaligned.S | 3 +- > sysdeps/x86_64/multiarch/wcschr-avx2.S | 7 +- > sysdeps/x86_64/multiarch/wcschr-evex.S | 7 +- > sysdeps/x86_64/multiarch/wcschr-sse2.S | 11 +- > sysdeps/x86_64/multiarch/wcscmp-avx2-rtm.S | 1 - > sysdeps/x86_64/multiarch/wcscmp-avx2.S | 1 - > sysdeps/x86_64/multiarch/wcscmp-evex.S | 1 - > sysdeps/x86_64/multiarch/wcscmp-sse2.S | 14 +- > sysdeps/x86_64/multiarch/wcslen-avx2.S | 6 +- > sysdeps/x86_64/multiarch/wcslen-evex.S | 6 +- > sysdeps/x86_64/multiarch/wcslen-sse2.S | 15 +- > sysdeps/x86_64/multiarch/wcslen-sse4_1.S | 10 +- > sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S | 2 - > sysdeps/x86_64/multiarch/wcsncmp-avx2.S | 3 +- > sysdeps/x86_64/multiarch/wcsncmp-evex.S | 1 - > sysdeps/x86_64/multiarch/wcsncmp-generic.c | 9 +- > sysdeps/x86_64/multiarch/wcsnlen-avx2.S | 6 +- > sysdeps/x86_64/multiarch/wcsnlen-evex.S | 6 +- > sysdeps/x86_64/multiarch/wcsnlen-generic.c | 13 +- > sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S | 7 +- > sysdeps/x86_64/multiarch/wcsrchr-avx2.S | 7 +- > sysdeps/x86_64/multiarch/wcsrchr-evex.S | 6 +- > sysdeps/x86_64/multiarch/wcsrchr-sse2.S | 7 +- > sysdeps/x86_64/strcasecmp_l.S | 32 +- > sysdeps/x86_64/strchr-isa-default-impl.h | 28 + > sysdeps/x86_64/strchr.S | 12 +- > sysdeps/x86_64/strchrnul.S | 13 +- > sysdeps/x86_64/strcmp.S | 13 +- > sysdeps/x86_64/strlen.S | 9 +- > sysdeps/x86_64/strncase_l.S | 32 +- > sysdeps/x86_64/strncmp.S | 29 +- > sysdeps/x86_64/strnlen.S | 31 +- > sysdeps/x86_64/strrchr.S | 10 +- > sysdeps/x86_64/wcschr.S | 13 +- > sysdeps/x86_64/wcscmp.S | 9 +- > sysdeps/x86_64/wcslen.S | 19 +- > sysdeps/x86_64/wcsncmp-generic.c | 29 + > sysdeps/x86_64/wcsncmp.S | 40 ++ > sysdeps/x86_64/wcsnlen-generic.c | 29 + > sysdeps/x86_64/wcsnlen.S | 49 ++ > sysdeps/x86_64/wcsrchr.S | 11 +- > 88 files changed, 1157 insertions(+), 618 deletions(-) > create mode 100644 sysdeps/x86_64/strchr-isa-default-impl.h > create mode 100644 sysdeps/x86_64/wcsncmp-generic.c > create mode 100644 sysdeps/x86_64/wcsncmp.S > create mode 100644 sysdeps/x86_64/wcsnlen-generic.c > create mode 100644 sysdeps/x86_64/wcsnlen.S > > diff --git a/sysdeps/x86/isa-level.h b/sysdeps/x86/isa-level.h > index 77f9e2c0c3..3c4480aba7 100644 > --- a/sysdeps/x86/isa-level.h > +++ b/sysdeps/x86/isa-level.h > @@ -84,6 +84,7 @@ > > /* ISA level >= 2 guaranteed includes. */ > #define SSE4_2_X86_ISA_LEVEL 2 > +#define SSE4_1_X86_ISA_LEVEL 2 > #define SSSE3_X86_ISA_LEVEL 2 > > > @@ -101,9 +102,18 @@ > when ISA level < 3. */ > #define Prefer_No_VZEROUPPER_X86_ISA_LEVEL 3 > > +/* NB: This feature is disable when ISA level >= 3. All CPUs with > + this feature don't run on glibc built with ISA level >= 3. */ > +#define Slow_SSE42_X86_ISA_LEVEL 3 > + > /* Feature(s) enabled when ISA level >= 2. */ > #define Fast_Unaligned_Load_X86_ISA_LEVEL 2 > > +/* NB: This feature is disable when ISA level >= 2, which was enabled > + for the early Atom CPUs. */ > +#define Slow_BSF_X86_ISA_LEVEL 2 > + > + > /* Both X86_ISA_CPU_FEATURE_USABLE_P and X86_ISA_CPU_FEATURES_ARCH_P > macros are wrappers for the respective CPU_FEATURE{S}_{USABLE|ARCH}_P > runtime checks. They differ in two ways. > diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile > index e597a4855f..341ee69a35 100644 > --- a/sysdeps/x86_64/Makefile > +++ b/sysdeps/x86_64/Makefile > @@ -197,6 +197,12 @@ gen-as-const-headers += tlsdesc.sym rtld-offsets.sym > endif > > ifeq ($(subdir),wcsmbs) > + > +sysdep_routines += \ > + wcsncmp-generic \ > + wcsnlen-generic \ > +# sysdep_routines > + > tests += \ > tst-rsi-wcslen > endif > diff --git a/sysdeps/x86_64/memrchr.S b/sysdeps/x86_64/memrchr.S > index 385e2c5668..d39b1aa0e2 100644 > --- a/sysdeps/x86_64/memrchr.S > +++ b/sysdeps/x86_64/memrchr.S > @@ -1,4 +1,4 @@ > -/* fast SSE2 memrchr with 64 byte loop and pmaxub instruction using > +/* memrchr dispatch for RTLD and non-multiarch build > > Copyright (C) 2011-2022 Free Software Foundation, Inc. > This file is part of the GNU C Library. > @@ -18,5 +18,11 @@ > <https://www.gnu.org/licenses/>. */ > > #define MEMRCHR __memrchr > -#include "multiarch/memrchr-sse2.S" > + > +#define DEFAULT_IMPL_V1 "multiarch/memrchr-sse2.S" > +#define DEFAULT_IMPL_V3 "multiarch/memrchr-avx2.S" > +#define DEFAULT_IMPL_V4 "multiarch/memrchr-evex.S" > + > +#include "isa-default-impl.h" > + > weak_alias (__memrchr, memrchr) > diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile > index d6b62af850..ba29a65716 100644 > --- a/sysdeps/x86_64/multiarch/Makefile > +++ b/sysdeps/x86_64/multiarch/Makefile > @@ -144,11 +144,9 @@ sysdep_routines += \ > wcslen-sse4_1 \ > wcsncmp-avx2 \ > wcsncmp-avx2-rtm \ > - wcsncmp-generic \ > wcsncmp-evex \ > wcsnlen-avx2 \ > wcsnlen-avx2-rtm \ > - wcsnlen-generic \ > wcsnlen-evex \ > wcsnlen-evex512 \ > wcsnlen-sse4_1 \ > diff --git a/sysdeps/x86_64/multiarch/ifunc-avx2.h b/sysdeps/x86_64/multiarch/ifunc-avx2.h > index 1d9cdfcfec..a57a9952f3 100644 > --- a/sysdeps/x86_64/multiarch/ifunc-avx2.h > +++ b/sysdeps/x86_64/multiarch/ifunc-avx2.h > @@ -23,28 +23,32 @@ > # define GENERIC sse2 > #endif > > -extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden; > +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; > + > extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; > extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden; > -extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; > + > +extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden; > > static inline void * > IFUNC_SELECTOR (void) > { > - const struct cpu_features* cpu_features = __get_cpu_features (); > + const struct cpu_features *cpu_features = __get_cpu_features (); > > - if (CPU_FEATURE_USABLE_P (cpu_features, AVX2) > - && CPU_FEATURE_USABLE_P (cpu_features, BMI2) > - && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) > + if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2) > + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2) > + && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, > + AVX_Fast_Unaligned_Load, )) > { > - if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) > - && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)) > + if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) > + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)) > return OPTIMIZE (evex); > > if (CPU_FEATURE_USABLE_P (cpu_features, RTM)) > return OPTIMIZE (avx2_rtm); > > - if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) > + if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, > + Prefer_No_VZEROUPPER, !)) > return OPTIMIZE (avx2); > } > > diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c > index 2c96cb62d2..3b1df9b73c 100644 > --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c > +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c > @@ -205,19 +205,22 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, > > /* Support sysdeps/x86_64/multiarch/memrchr.c. */ > IFUNC_IMPL (i, name, memrchr, > - IFUNC_IMPL_ADD (array, i, memrchr, > - CPU_FEATURE_USABLE (AVX2), > - __memrchr_avx2) > - IFUNC_IMPL_ADD (array, i, memrchr, > - (CPU_FEATURE_USABLE (AVX2) > - && CPU_FEATURE_USABLE (RTM)), > - __memrchr_avx2_rtm) > - IFUNC_IMPL_ADD (array, i, memrchr, > - (CPU_FEATURE_USABLE (AVX512VL) > - && CPU_FEATURE_USABLE (AVX512BW)), > - __memrchr_evex) > - > - IFUNC_IMPL_ADD (array, i, memrchr, 1, __memrchr_sse2)) > + X86_IFUNC_IMPL_ADD_V4 (array, i, memrchr, > + (CPU_FEATURE_USABLE (AVX512VL) > + && CPU_FEATURE_USABLE (AVX512BW)), > + __memrchr_evex) > + X86_IFUNC_IMPL_ADD_V3 (array, i, memrchr, > + CPU_FEATURE_USABLE (AVX2), > + __memrchr_avx2) > + X86_IFUNC_IMPL_ADD_V3 (array, i, memrchr, > + (CPU_FEATURE_USABLE (AVX2) > + && CPU_FEATURE_USABLE (RTM)), > + __memrchr_avx2_rtm) > + /* ISA V2 wrapper for SSE2 implementation because the SSE2 > + implementation is also used at ISA level 2. */ > + X86_IFUNC_IMPL_ADD_V2 (array, i, memrchr, > + 1, > + __memrchr_sse2)) > > #ifdef SHARED > /* Support sysdeps/x86_64/multiarch/memset_chk.c. */ > @@ -346,49 +349,57 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, > > /* Support sysdeps/x86_64/multiarch/strlen.c. */ > IFUNC_IMPL (i, name, strlen, > - IFUNC_IMPL_ADD (array, i, strlen, > - (CPU_FEATURE_USABLE (AVX2) > - && CPU_FEATURE_USABLE (BMI2)), > - __strlen_avx2) > - IFUNC_IMPL_ADD (array, i, strlen, > - (CPU_FEATURE_USABLE (AVX2) > - && CPU_FEATURE_USABLE (BMI2) > - && CPU_FEATURE_USABLE (RTM)), > - __strlen_avx2_rtm) > - IFUNC_IMPL_ADD (array, i, strlen, > - (CPU_FEATURE_USABLE (AVX512VL) > - && CPU_FEATURE_USABLE (AVX512BW) > - && CPU_FEATURE_USABLE (BMI2)), > - __strlen_evex) > - IFUNC_IMPL_ADD (array, i, strlen, > - (CPU_FEATURE_USABLE (AVX512VL) > - && CPU_FEATURE_USABLE (AVX512BW) > - && CPU_FEATURE_USABLE (BMI2)), > - __strlen_evex512) > - IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_sse2)) > + X86_IFUNC_IMPL_ADD_V4 (array, i, strlen, > + (CPU_FEATURE_USABLE (AVX512VL) > + && CPU_FEATURE_USABLE (AVX512BW) > + && CPU_FEATURE_USABLE (BMI2)), > + __strlen_evex) > + X86_IFUNC_IMPL_ADD_V4 (array, i, strlen, > + (CPU_FEATURE_USABLE (AVX512VL) > + && CPU_FEATURE_USABLE (AVX512BW) > + && CPU_FEATURE_USABLE (BMI2)), > + __strlen_evex512) > + X86_IFUNC_IMPL_ADD_V3 (array, i, strlen, > + (CPU_FEATURE_USABLE (AVX2) > + && CPU_FEATURE_USABLE (BMI2)), > + __strlen_avx2) > + X86_IFUNC_IMPL_ADD_V3 (array, i, strlen, > + (CPU_FEATURE_USABLE (AVX2) > + && CPU_FEATURE_USABLE (BMI2) > + && CPU_FEATURE_USABLE (RTM)), > + __strlen_avx2_rtm) > + /* ISA V2 wrapper for SSE2 implementation because the SSE2 > + implementation is also used at ISA level 2. */ > + X86_IFUNC_IMPL_ADD_V2 (array, i, strlen, > + 1, > + __strlen_sse2)) > > /* Support sysdeps/x86_64/multiarch/strnlen.c. */ > IFUNC_IMPL (i, name, strnlen, > - IFUNC_IMPL_ADD (array, i, strnlen, > - (CPU_FEATURE_USABLE (AVX2) > - && CPU_FEATURE_USABLE (BMI2)), > - __strnlen_avx2) > - IFUNC_IMPL_ADD (array, i, strnlen, > - (CPU_FEATURE_USABLE (AVX2) > - && CPU_FEATURE_USABLE (BMI2) > - && CPU_FEATURE_USABLE (RTM)), > - __strnlen_avx2_rtm) > - IFUNC_IMPL_ADD (array, i, strnlen, > - (CPU_FEATURE_USABLE (AVX512VL) > - && CPU_FEATURE_USABLE (AVX512BW) > - && CPU_FEATURE_USABLE (BMI2)), > - __strnlen_evex) > - IFUNC_IMPL_ADD (array, i, strnlen, > - (CPU_FEATURE_USABLE (AVX512VL) > - && CPU_FEATURE_USABLE (AVX512BW) > - && CPU_FEATURE_USABLE (BMI2)), > - __strnlen_evex512) > - IFUNC_IMPL_ADD (array, i, strnlen, 1, __strnlen_sse2)) > + X86_IFUNC_IMPL_ADD_V4 (array, i, strnlen, > + (CPU_FEATURE_USABLE (AVX512VL) > + && CPU_FEATURE_USABLE (AVX512BW) > + && CPU_FEATURE_USABLE (BMI2)), > + __strnlen_evex) > + X86_IFUNC_IMPL_ADD_V4 (array, i, strnlen, > + (CPU_FEATURE_USABLE (AVX512VL) > + && CPU_FEATURE_USABLE (AVX512BW) > + && CPU_FEATURE_USABLE (BMI2)), > + __strnlen_evex512) > + X86_IFUNC_IMPL_ADD_V3 (array, i, strnlen, > + (CPU_FEATURE_USABLE (AVX2) > + && CPU_FEATURE_USABLE (BMI2)), > + __strnlen_avx2) > + X86_IFUNC_IMPL_ADD_V3 (array, i, strnlen, > + (CPU_FEATURE_USABLE (AVX2) > + && CPU_FEATURE_USABLE (BMI2) > + && CPU_FEATURE_USABLE (RTM)), > + __strnlen_avx2_rtm) > + /* ISA V2 wrapper for SSE2 implementation because the SSE2 > + implementation is also used at ISA level 2. */ > + X86_IFUNC_IMPL_ADD_V2 (array, i, strnlen, > + 1, > + __strnlen_sse2)) > > /* Support sysdeps/x86_64/multiarch/stpncpy.c. */ > IFUNC_IMPL (i, name, stpncpy, > @@ -422,40 +433,47 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, > > /* Support sysdeps/x86_64/multiarch/strcasecmp_l.c. */ > IFUNC_IMPL (i, name, strcasecmp, > - IFUNC_IMPL_ADD (array, i, strcasecmp, > - (CPU_FEATURE_USABLE (AVX512VL) > - && CPU_FEATURE_USABLE (AVX512BW)), > - __strcasecmp_evex) > - IFUNC_IMPL_ADD (array, i, strcasecmp, > - CPU_FEATURE_USABLE (AVX2), > - __strcasecmp_avx2) > - IFUNC_IMPL_ADD (array, i, strcasecmp, > - (CPU_FEATURE_USABLE (AVX2) > - && CPU_FEATURE_USABLE (RTM)), > - __strcasecmp_avx2_rtm) > - IFUNC_IMPL_ADD (array, i, strcasecmp, > - CPU_FEATURE_USABLE (SSE4_2), > - __strcasecmp_sse42) > - IFUNC_IMPL_ADD (array, i, strcasecmp, 1, __strcasecmp_sse2)) > + X86_IFUNC_IMPL_ADD_V4 (array, i, strcasecmp, > + (CPU_FEATURE_USABLE (AVX512VL) > + && CPU_FEATURE_USABLE (AVX512BW)), > + __strcasecmp_evex) > + X86_IFUNC_IMPL_ADD_V3 (array, i, strcasecmp, > + CPU_FEATURE_USABLE (AVX2), > + __strcasecmp_avx2) > + X86_IFUNC_IMPL_ADD_V3 (array, i, strcasecmp, > + (CPU_FEATURE_USABLE (AVX2) > + && CPU_FEATURE_USABLE (RTM)), > + __strcasecmp_avx2_rtm) > + X86_IFUNC_IMPL_ADD_V2 (array, i, strcasecmp, > + CPU_FEATURE_USABLE (SSE4_2), > + __strcasecmp_sse42) > + /* ISA V2 wrapper for SSE2 implementation because the SSE2 > + implementation is also used at ISA level 2. */ > + X86_IFUNC_IMPL_ADD_V2 (array, i, strcasecmp, > + 1, > + __strcasecmp_sse2)) > > /* Support sysdeps/x86_64/multiarch/strcasecmp_l.c. */ > IFUNC_IMPL (i, name, strcasecmp_l, > - IFUNC_IMPL_ADD (array, i, strcasecmp, > - (CPU_FEATURE_USABLE (AVX512VL) > - && CPU_FEATURE_USABLE (AVX512BW)), > - __strcasecmp_l_evex) > - IFUNC_IMPL_ADD (array, i, strcasecmp, > - CPU_FEATURE_USABLE (AVX2), > - __strcasecmp_l_avx2) > - IFUNC_IMPL_ADD (array, i, strcasecmp, > - (CPU_FEATURE_USABLE (AVX2) > - && CPU_FEATURE_USABLE (RTM)), > - __strcasecmp_l_avx2_rtm) > - IFUNC_IMPL_ADD (array, i, strcasecmp_l, > - CPU_FEATURE_USABLE (SSE4_2), > - __strcasecmp_l_sse42) > - IFUNC_IMPL_ADD (array, i, strcasecmp_l, 1, > - __strcasecmp_l_sse2)) > + X86_IFUNC_IMPL_ADD_V4 (array, i, strcasecmp, > + (CPU_FEATURE_USABLE (AVX512VL) > + && CPU_FEATURE_USABLE (AVX512BW)), > + __strcasecmp_l_evex) > + X86_IFUNC_IMPL_ADD_V3 (array, i, strcasecmp, > + CPU_FEATURE_USABLE (AVX2), > + __strcasecmp_l_avx2) > + X86_IFUNC_IMPL_ADD_V3 (array, i, strcasecmp, > + (CPU_FEATURE_USABLE (AVX2) > + && CPU_FEATURE_USABLE (RTM)), > + __strcasecmp_l_avx2_rtm) > + X86_IFUNC_IMPL_ADD_V2 (array, i, strcasecmp_l, > + CPU_FEATURE_USABLE (SSE4_2), > + __strcasecmp_l_sse42) > + /* ISA V2 wrapper for SSE2 implementation because the SSE2 > + implementation is also used at ISA level 2. */ > + X86_IFUNC_IMPL_ADD_V2 (array, i, strcasecmp_l, > + 1, > + __strcasecmp_l_sse2)) > > /* Support sysdeps/x86_64/multiarch/strcat.c. */ > IFUNC_IMPL (i, name, strcat, > @@ -474,74 +492,95 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, > > /* Support sysdeps/x86_64/multiarch/strchr.c. */ > IFUNC_IMPL (i, name, strchr, > - IFUNC_IMPL_ADD (array, i, strchr, > - (CPU_FEATURE_USABLE (AVX2) > - && CPU_FEATURE_USABLE (BMI2)), > - __strchr_avx2) > - IFUNC_IMPL_ADD (array, i, strchr, > - (CPU_FEATURE_USABLE (AVX2) > - && CPU_FEATURE_USABLE (BMI2) > - && CPU_FEATURE_USABLE (RTM)), > - __strchr_avx2_rtm) > - IFUNC_IMPL_ADD (array, i, strchr, > - (CPU_FEATURE_USABLE (AVX512VL) > - && CPU_FEATURE_USABLE (AVX512BW) > - && CPU_FEATURE_USABLE (BMI2)), > - __strchr_evex) > - IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_sse2_no_bsf) > - IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_sse2)) > + X86_IFUNC_IMPL_ADD_V4 (array, i, strchr, > + (CPU_FEATURE_USABLE (AVX512VL) > + && CPU_FEATURE_USABLE (AVX512BW) > + && CPU_FEATURE_USABLE (BMI2)), > + __strchr_evex) > + X86_IFUNC_IMPL_ADD_V3 (array, i, strchr, > + (CPU_FEATURE_USABLE (AVX2) > + && CPU_FEATURE_USABLE (BMI2)), > + __strchr_avx2) > + X86_IFUNC_IMPL_ADD_V3 (array, i, strchr, > + (CPU_FEATURE_USABLE (AVX2) > + && CPU_FEATURE_USABLE (BMI2) > + && CPU_FEATURE_USABLE (RTM)), > + __strchr_avx2_rtm) > + /* ISA V2 wrapper for SSE2 implementation because the SSE2 > + implementation is also used at ISA level 2. */ > + X86_IFUNC_IMPL_ADD_V2 (array, i, strchr, > + 1, > + __strchr_sse2) > + X86_IFUNC_IMPL_ADD_V1 (array, i, strchr, > + 1, > + __strchr_sse2_no_bsf)) > > /* Support sysdeps/x86_64/multiarch/strchrnul.c. */ > IFUNC_IMPL (i, name, strchrnul, > - IFUNC_IMPL_ADD (array, i, strchrnul, > - (CPU_FEATURE_USABLE (AVX2) > - && CPU_FEATURE_USABLE (BMI2)), > - __strchrnul_avx2) > - IFUNC_IMPL_ADD (array, i, strchrnul, > - (CPU_FEATURE_USABLE (AVX2) > - && CPU_FEATURE_USABLE (BMI2) > - && CPU_FEATURE_USABLE (RTM)), > - __strchrnul_avx2_rtm) > - IFUNC_IMPL_ADD (array, i, strchrnul, > - (CPU_FEATURE_USABLE (AVX512VL) > - && CPU_FEATURE_USABLE (AVX512BW) > - && CPU_FEATURE_USABLE (BMI2)), > - __strchrnul_evex) > - IFUNC_IMPL_ADD (array, i, strchrnul, 1, __strchrnul_sse2)) > + X86_IFUNC_IMPL_ADD_V4 (array, i, strchrnul, > + (CPU_FEATURE_USABLE (AVX512VL) > + && CPU_FEATURE_USABLE (AVX512BW) > + && CPU_FEATURE_USABLE (BMI2)), > + __strchrnul_evex) > + X86_IFUNC_IMPL_ADD_V3 (array, i, strchrnul, > + (CPU_FEATURE_USABLE (AVX2) > + && CPU_FEATURE_USABLE (BMI2)), > + __strchrnul_avx2) > + X86_IFUNC_IMPL_ADD_V3 (array, i, strchrnul, > + (CPU_FEATURE_USABLE (AVX2) > + && CPU_FEATURE_USABLE (BMI2) > + && CPU_FEATURE_USABLE (RTM)), > + __strchrnul_avx2_rtm) > + /* ISA V2 wrapper for SSE2 implementation because the SSE2 > + implementation is also used at ISA level 2. */ > + X86_IFUNC_IMPL_ADD_V2 (array, i, strchrnul, > + 1, > + __strchrnul_sse2)) > > /* Support sysdeps/x86_64/multiarch/strrchr.c. */ > IFUNC_IMPL (i, name, strrchr, > - IFUNC_IMPL_ADD (array, i, strrchr, > - CPU_FEATURE_USABLE (AVX2), > - __strrchr_avx2) > - IFUNC_IMPL_ADD (array, i, strrchr, > - (CPU_FEATURE_USABLE (AVX2) > - && CPU_FEATURE_USABLE (RTM)), > - __strrchr_avx2_rtm) > - IFUNC_IMPL_ADD (array, i, strrchr, > - (CPU_FEATURE_USABLE (AVX512VL) > - && CPU_FEATURE_USABLE (AVX512BW)), > - __strrchr_evex) > - IFUNC_IMPL_ADD (array, i, strrchr, 1, __strrchr_sse2)) > + X86_IFUNC_IMPL_ADD_V4 (array, i, strrchr, > + (CPU_FEATURE_USABLE (AVX512VL) > + && CPU_FEATURE_USABLE (AVX512BW)), > + __strrchr_evex) > + X86_IFUNC_IMPL_ADD_V3 (array, i, strrchr, > + CPU_FEATURE_USABLE (AVX2), > + __strrchr_avx2) > + X86_IFUNC_IMPL_ADD_V3 (array, i, strrchr, > + (CPU_FEATURE_USABLE (AVX2) > + && CPU_FEATURE_USABLE (RTM)), > + __strrchr_avx2_rtm) > + /* ISA V2 wrapper for SSE2 implementation because the SSE2 > + implementation is also used at ISA level 2. */ > + X86_IFUNC_IMPL_ADD_V2 (array, i, strrchr, > + 1, > + __strrchr_sse2)) > > /* Support sysdeps/x86_64/multiarch/strcmp.c. */ > IFUNC_IMPL (i, name, strcmp, > - IFUNC_IMPL_ADD (array, i, strcmp, > - CPU_FEATURE_USABLE (AVX2), > - __strcmp_avx2) > - IFUNC_IMPL_ADD (array, i, strcmp, > - (CPU_FEATURE_USABLE (AVX2) > - && CPU_FEATURE_USABLE (RTM)), > - __strcmp_avx2_rtm) > - IFUNC_IMPL_ADD (array, i, strcmp, > - (CPU_FEATURE_USABLE (AVX512VL) > - && CPU_FEATURE_USABLE (AVX512BW) > - && CPU_FEATURE_USABLE (BMI2)), > - __strcmp_evex) > - IFUNC_IMPL_ADD (array, i, strcmp, CPU_FEATURE_USABLE (SSE4_2), > - __strcmp_sse42) > - IFUNC_IMPL_ADD (array, i, strcmp, 1, __strcmp_sse2_unaligned) > - IFUNC_IMPL_ADD (array, i, strcmp, 1, __strcmp_sse2)) > + X86_IFUNC_IMPL_ADD_V4 (array, i, strcmp, > + (CPU_FEATURE_USABLE (AVX512VL) > + && CPU_FEATURE_USABLE (AVX512BW) > + && CPU_FEATURE_USABLE (BMI2)), > + __strcmp_evex) > + X86_IFUNC_IMPL_ADD_V3 (array, i, strcmp, > + CPU_FEATURE_USABLE (AVX2), > + __strcmp_avx2) > + X86_IFUNC_IMPL_ADD_V3 (array, i, strcmp, > + (CPU_FEATURE_USABLE (AVX2) > + && CPU_FEATURE_USABLE (RTM)), > + __strcmp_avx2_rtm) > + X86_IFUNC_IMPL_ADD_V2 (array, i, strcmp, > + CPU_FEATURE_USABLE (SSE4_2), > + __strcmp_sse42) > + /* ISA V2 wrapper for SSE2 implementations because the SSE2 > + implementations are also used at ISA level 2. */ > + X86_IFUNC_IMPL_ADD_V2 (array, i, strcmp, > + 1, > + __strcmp_sse2_unaligned) > + X86_IFUNC_IMPL_ADD_V2 (array, i, strcmp, > + 1, > + __strcmp_sse2)) > > /* Support sysdeps/x86_64/multiarch/strcpy.c. */ > IFUNC_IMPL (i, name, strcpy, > @@ -568,41 +607,47 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, > > /* Support sysdeps/x86_64/multiarch/strncase_l.c. */ > IFUNC_IMPL (i, name, strncasecmp, > - IFUNC_IMPL_ADD (array, i, strncasecmp, > - (CPU_FEATURE_USABLE (AVX512VL) > - && CPU_FEATURE_USABLE (AVX512BW)), > - __strncasecmp_evex) > - IFUNC_IMPL_ADD (array, i, strncasecmp, > - CPU_FEATURE_USABLE (AVX2), > - __strncasecmp_avx2) > - IFUNC_IMPL_ADD (array, i, strncasecmp, > - (CPU_FEATURE_USABLE (AVX2) > - && CPU_FEATURE_USABLE (RTM)), > - __strncasecmp_avx2_rtm) > - IFUNC_IMPL_ADD (array, i, strncasecmp, > - CPU_FEATURE_USABLE (SSE4_2), > - __strncasecmp_sse42) > - IFUNC_IMPL_ADD (array, i, strncasecmp, 1, > - __strncasecmp_sse2)) > + X86_IFUNC_IMPL_ADD_V4 (array, i, strncasecmp, > + (CPU_FEATURE_USABLE (AVX512VL) > + && CPU_FEATURE_USABLE (AVX512BW)), > + __strncasecmp_evex) > + X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp, > + CPU_FEATURE_USABLE (AVX2), > + __strncasecmp_avx2) > + X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp, > + (CPU_FEATURE_USABLE (AVX2) > + && CPU_FEATURE_USABLE (RTM)), > + __strncasecmp_avx2_rtm) > + X86_IFUNC_IMPL_ADD_V2 (array, i, strncasecmp, > + CPU_FEATURE_USABLE (SSE4_2), > + __strncasecmp_sse42) > + /* ISA V2 wrapper for SSE2 implementation because the SSE2 > + implementation is also used at ISA level 2. */ > + X86_IFUNC_IMPL_ADD_V2 (array, i, strncasecmp, > + 1, > + __strncasecmp_sse2)) > > /* Support sysdeps/x86_64/multiarch/strncase_l.c. */ > IFUNC_IMPL (i, name, strncasecmp_l, > - IFUNC_IMPL_ADD (array, i, strncasecmp, > - (CPU_FEATURE_USABLE (AVX512VL) > - && CPU_FEATURE_USABLE (AVX512BW)), > - __strncasecmp_l_evex) > - IFUNC_IMPL_ADD (array, i, strncasecmp, > - CPU_FEATURE_USABLE (AVX2), > - __strncasecmp_l_avx2) > - IFUNC_IMPL_ADD (array, i, strncasecmp, > - (CPU_FEATURE_USABLE (AVX2) > - && CPU_FEATURE_USABLE (RTM)), > - __strncasecmp_l_avx2_rtm) > - IFUNC_IMPL_ADD (array, i, strncasecmp_l, > - CPU_FEATURE_USABLE (SSE4_2), > - __strncasecmp_l_sse42) > - IFUNC_IMPL_ADD (array, i, strncasecmp_l, 1, > - __strncasecmp_l_sse2)) > + X86_IFUNC_IMPL_ADD_V4 (array, i, strncasecmp, > + (CPU_FEATURE_USABLE (AVX512VL) > + && CPU_FEATURE_USABLE (AVX512BW)), > + __strncasecmp_l_evex) > + X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp, > + CPU_FEATURE_USABLE (AVX2), > + __strncasecmp_l_avx2) > + X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp, > + (CPU_FEATURE_USABLE (AVX2) > + && CPU_FEATURE_USABLE (RTM)), > + __strncasecmp_l_avx2_rtm) > + X86_IFUNC_IMPL_ADD_V2 (array, i, strncasecmp_l, > + CPU_FEATURE_USABLE (SSE4_2), > + __strncasecmp_l_sse42) > + /* ISA V2 wrapper for SSE2 implementation because the SSE2 > + implementation is also used at ISA level 2. */ > + X86_IFUNC_IMPL_ADD_V2 (array, i, strncasecmp_l, > + 1, > + __strncasecmp_l_sse2)) > > /* Support sysdeps/x86_64/multiarch/strncat.c. */ > IFUNC_IMPL (i, name, strncat, > @@ -664,69 +709,85 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, > > /* Support sysdeps/x86_64/multiarch/wcschr.c. */ > IFUNC_IMPL (i, name, wcschr, > - IFUNC_IMPL_ADD (array, i, wcschr, > - (CPU_FEATURE_USABLE (AVX2) > - && CPU_FEATURE_USABLE (BMI2)), > - __wcschr_avx2) > - IFUNC_IMPL_ADD (array, i, wcschr, > - (CPU_FEATURE_USABLE (AVX2) > - && CPU_FEATURE_USABLE (BMI2) > - && CPU_FEATURE_USABLE (RTM)), > - __wcschr_avx2_rtm) > - IFUNC_IMPL_ADD (array, i, wcschr, > - (CPU_FEATURE_USABLE (AVX512VL) > - && CPU_FEATURE_USABLE (AVX512BW) > - && CPU_FEATURE_USABLE (BMI2)), > - __wcschr_evex) > - IFUNC_IMPL_ADD (array, i, wcschr, 1, __wcschr_sse2)) > + X86_IFUNC_IMPL_ADD_V4 (array, i, wcschr, > + (CPU_FEATURE_USABLE (AVX512VL) > + && CPU_FEATURE_USABLE (AVX512BW) > + && CPU_FEATURE_USABLE (BMI2)), > + __wcschr_evex) > + X86_IFUNC_IMPL_ADD_V3 (array, i, wcschr, > + (CPU_FEATURE_USABLE (AVX2) > + && CPU_FEATURE_USABLE (BMI2)), > + __wcschr_avx2) > + X86_IFUNC_IMPL_ADD_V3 (array, i, wcschr, > + (CPU_FEATURE_USABLE (AVX2) > + && CPU_FEATURE_USABLE (BMI2) > + && CPU_FEATURE_USABLE (RTM)), > + __wcschr_avx2_rtm) > + /* ISA V2 wrapper for SSE2 implementation because the SSE2 > + implementation is also used at ISA level 2. */ > + X86_IFUNC_IMPL_ADD_V2 (array, i, wcschr, > + 1, > + __wcschr_sse2)) > > /* Support sysdeps/x86_64/multiarch/wcsrchr.c. */ > IFUNC_IMPL (i, name, wcsrchr, > - IFUNC_IMPL_ADD (array, i, wcsrchr, > - CPU_FEATURE_USABLE (AVX2), > - __wcsrchr_avx2) > - IFUNC_IMPL_ADD (array, i, wcsrchr, > - (CPU_FEATURE_USABLE (AVX2) > - && CPU_FEATURE_USABLE (RTM)), > - __wcsrchr_avx2_rtm) > - IFUNC_IMPL_ADD (array, i, wcsrchr, > - (CPU_FEATURE_USABLE (AVX512VL) > - && CPU_FEATURE_USABLE (AVX512BW) > - && CPU_FEATURE_USABLE (BMI2)), > - __wcsrchr_evex) > - IFUNC_IMPL_ADD (array, i, wcsrchr, 1, __wcsrchr_sse2)) > + X86_IFUNC_IMPL_ADD_V4 (array, i, wcsrchr, > + (CPU_FEATURE_USABLE (AVX512VL) > + && CPU_FEATURE_USABLE (AVX512BW) > + && CPU_FEATURE_USABLE (BMI2)), > + __wcsrchr_evex) > + X86_IFUNC_IMPL_ADD_V3 (array, i, wcsrchr, > + CPU_FEATURE_USABLE (AVX2), > + __wcsrchr_avx2) > + X86_IFUNC_IMPL_ADD_V3 (array, i, wcsrchr, > + (CPU_FEATURE_USABLE (AVX2) > + && CPU_FEATURE_USABLE (RTM)), > + __wcsrchr_avx2_rtm) > + /* ISA V2 wrapper for SSE2 implementation because the SSE2 > + implementation is also used at ISA level 2. */ > + X86_IFUNC_IMPL_ADD_V2 (array, i, wcsrchr, > + 1, > + __wcsrchr_sse2)) > > /* Support sysdeps/x86_64/multiarch/wcscmp.c. */ > IFUNC_IMPL (i, name, wcscmp, > - IFUNC_IMPL_ADD (array, i, wcscmp, > - CPU_FEATURE_USABLE (AVX2), > - __wcscmp_avx2) > - IFUNC_IMPL_ADD (array, i, wcscmp, > - (CPU_FEATURE_USABLE (AVX2) > - && CPU_FEATURE_USABLE (RTM)), > - __wcscmp_avx2_rtm) > - IFUNC_IMPL_ADD (array, i, wcscmp, > - (CPU_FEATURE_USABLE (AVX512VL) > - && CPU_FEATURE_USABLE (AVX512BW) > - && CPU_FEATURE_USABLE (BMI2)), > - __wcscmp_evex) > - IFUNC_IMPL_ADD (array, i, wcscmp, 1, __wcscmp_sse2)) > + X86_IFUNC_IMPL_ADD_V4 (array, i, wcscmp, > + (CPU_FEATURE_USABLE (AVX512VL) > + && CPU_FEATURE_USABLE (AVX512BW) > + && CPU_FEATURE_USABLE (BMI2)), > + __wcscmp_evex) > + X86_IFUNC_IMPL_ADD_V3 (array, i, wcscmp, > + CPU_FEATURE_USABLE (AVX2), > + __wcscmp_avx2) > + X86_IFUNC_IMPL_ADD_V3 (array, i, wcscmp, > + (CPU_FEATURE_USABLE (AVX2) > + && CPU_FEATURE_USABLE (RTM)), > + __wcscmp_avx2_rtm) > + /* ISA V2 wrapper for SSE2 implementation because the SSE2 > + implementation is also used at ISA level 2. */ > + X86_IFUNC_IMPL_ADD_V2 (array, i, wcscmp, > + 1, > + __wcscmp_sse2)) > > /* Support sysdeps/x86_64/multiarch/wcsncmp.c. */ > IFUNC_IMPL (i, name, wcsncmp, > - IFUNC_IMPL_ADD (array, i, wcsncmp, > - CPU_FEATURE_USABLE (AVX2), > - __wcsncmp_avx2) > - IFUNC_IMPL_ADD (array, i, wcsncmp, > - (CPU_FEATURE_USABLE (AVX2) > - && CPU_FEATURE_USABLE (RTM)), > - __wcsncmp_avx2_rtm) > - IFUNC_IMPL_ADD (array, i, wcsncmp, > - (CPU_FEATURE_USABLE (AVX512VL) > - && CPU_FEATURE_USABLE (AVX512BW) > - && CPU_FEATURE_USABLE (BMI2)), > - __wcsncmp_evex) > - IFUNC_IMPL_ADD (array, i, wcsncmp, 1, __wcsncmp_generic)) > + X86_IFUNC_IMPL_ADD_V4 (array, i, wcsncmp, > + (CPU_FEATURE_USABLE (AVX512VL) > + && CPU_FEATURE_USABLE (AVX512BW) > + && CPU_FEATURE_USABLE (BMI2)), > + __wcsncmp_evex) > + X86_IFUNC_IMPL_ADD_V3 (array, i, wcsncmp, > + CPU_FEATURE_USABLE (AVX2), > + __wcsncmp_avx2) > + X86_IFUNC_IMPL_ADD_V3 (array, i, wcsncmp, > + (CPU_FEATURE_USABLE (AVX2) > + && CPU_FEATURE_USABLE (RTM)), > + __wcsncmp_avx2_rtm) > + /* ISA V2 wrapper for GENERIC implementation because the > + GENERIC implementation is also used at ISA level 2. */ > + X86_IFUNC_IMPL_ADD_V2 (array, i, wcsncmp, > + 1, > + __wcsncmp_generic)) > > /* Support sysdeps/x86_64/multiarch/wcscpy.c. */ > IFUNC_IMPL (i, name, wcscpy, > @@ -736,55 +797,59 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, > > /* Support sysdeps/x86_64/multiarch/wcslen.c. */ > IFUNC_IMPL (i, name, wcslen, > - IFUNC_IMPL_ADD (array, i, wcslen, > - (CPU_FEATURE_USABLE (AVX2) > - && CPU_FEATURE_USABLE (BMI2)), > - __wcslen_avx2) > - IFUNC_IMPL_ADD (array, i, wcslen, > - (CPU_FEATURE_USABLE (AVX2) > - && CPU_FEATURE_USABLE (BMI2) > - && CPU_FEATURE_USABLE (RTM)), > - __wcslen_avx2_rtm) > - IFUNC_IMPL_ADD (array, i, wcslen, > - (CPU_FEATURE_USABLE (AVX512VL) > - && CPU_FEATURE_USABLE (AVX512BW) > - && CPU_FEATURE_USABLE (BMI2)), > - __wcslen_evex) > - IFUNC_IMPL_ADD (array, i, wcslen, > - (CPU_FEATURE_USABLE (AVX512VL) > - && CPU_FEATURE_USABLE (AVX512BW) > - && CPU_FEATURE_USABLE (BMI2)), > - __wcslen_evex512) > - IFUNC_IMPL_ADD (array, i, wcslen, > - CPU_FEATURE_USABLE (SSE4_1), > - __wcslen_sse4_1) > - IFUNC_IMPL_ADD (array, i, wcslen, 1, __wcslen_sse2)) > + X86_IFUNC_IMPL_ADD_V4 (array, i, wcslen, > + (CPU_FEATURE_USABLE (AVX512VL) > + && CPU_FEATURE_USABLE (AVX512BW) > + && CPU_FEATURE_USABLE (BMI2)), > + __wcslen_evex) > + X86_IFUNC_IMPL_ADD_V4 (array, i, wcslen, > + (CPU_FEATURE_USABLE (AVX512VL) > + && CPU_FEATURE_USABLE (AVX512BW) > + && CPU_FEATURE_USABLE (BMI2)), > + __wcslen_evex512) > + X86_IFUNC_IMPL_ADD_V3 (array, i, wcslen, > + (CPU_FEATURE_USABLE (AVX2) > + && CPU_FEATURE_USABLE (BMI2)), > + __wcslen_avx2) > + X86_IFUNC_IMPL_ADD_V3 (array, i, wcslen, > + (CPU_FEATURE_USABLE (AVX2) > + && CPU_FEATURE_USABLE (BMI2) > + && CPU_FEATURE_USABLE (RTM)), > + __wcslen_avx2_rtm) > + X86_IFUNC_IMPL_ADD_V2 (array, i, wcslen, > + CPU_FEATURE_USABLE (SSE4_1), > + __wcslen_sse4_1) > + X86_IFUNC_IMPL_ADD_V1 (array, i, wcslen, > + 1, > + __wcslen_sse2)) > > /* Support sysdeps/x86_64/multiarch/wcsnlen.c. */ > IFUNC_IMPL (i, name, wcsnlen, > - IFUNC_IMPL_ADD (array, i, wcsnlen, > - (CPU_FEATURE_USABLE (AVX2) > - && CPU_FEATURE_USABLE (BMI2)), > - __wcsnlen_avx2) > - IFUNC_IMPL_ADD (array, i, wcsnlen, > - (CPU_FEATURE_USABLE (AVX2) > - && CPU_FEATURE_USABLE (BMI2) > - && CPU_FEATURE_USABLE (RTM)), > - __wcsnlen_avx2_rtm) > - IFUNC_IMPL_ADD (array, i, wcsnlen, > - (CPU_FEATURE_USABLE (AVX512VL) > - && CPU_FEATURE_USABLE (AVX512BW) > - && CPU_FEATURE_USABLE (BMI2)), > - __wcsnlen_evex) > - IFUNC_IMPL_ADD (array, i, wcsnlen, > - (CPU_FEATURE_USABLE (AVX512VL) > - && CPU_FEATURE_USABLE (AVX512BW) > - && CPU_FEATURE_USABLE (BMI2)), > - __wcsnlen_evex512) > - IFUNC_IMPL_ADD (array, i, wcsnlen, > - CPU_FEATURE_USABLE (SSE4_1), > - __wcsnlen_sse4_1) > - IFUNC_IMPL_ADD (array, i, wcsnlen, 1, __wcsnlen_generic)) > + X86_IFUNC_IMPL_ADD_V4 (array, i, wcsnlen, > + (CPU_FEATURE_USABLE (AVX512VL) > + && CPU_FEATURE_USABLE (AVX512BW) > + && CPU_FEATURE_USABLE (BMI2)), > + __wcsnlen_evex) > + X86_IFUNC_IMPL_ADD_V4 (array, i, wcsnlen, > + (CPU_FEATURE_USABLE (AVX512VL) > + && CPU_FEATURE_USABLE (AVX512BW) > + && CPU_FEATURE_USABLE (BMI2)), > + __wcsnlen_evex512) > + X86_IFUNC_IMPL_ADD_V3 (array, i, wcsnlen, > + (CPU_FEATURE_USABLE (AVX2) > + && CPU_FEATURE_USABLE (BMI2)), > + __wcsnlen_avx2) > + X86_IFUNC_IMPL_ADD_V3 (array, i, wcsnlen, > + (CPU_FEATURE_USABLE (AVX2) > + && CPU_FEATURE_USABLE (BMI2) > + && CPU_FEATURE_USABLE (RTM)), > + __wcsnlen_avx2_rtm) > + X86_IFUNC_IMPL_ADD_V2 (array, i, wcsnlen, > + CPU_FEATURE_USABLE (SSE4_1), > + __wcsnlen_sse4_1) > + X86_IFUNC_IMPL_ADD_V1 (array, i, wcsnlen, > + 1, > + __wcsnlen_generic)) > > /* Support sysdeps/x86_64/multiarch/wmemchr.c. */ > IFUNC_IMPL (i, name, wmemchr, > @@ -1050,20 +1115,25 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, > > /* Support sysdeps/x86_64/multiarch/strncmp.c. */ > IFUNC_IMPL (i, name, strncmp, > - IFUNC_IMPL_ADD (array, i, strncmp, > - CPU_FEATURE_USABLE (AVX2), > - __strncmp_avx2) > - IFUNC_IMPL_ADD (array, i, strncmp, > - (CPU_FEATURE_USABLE (AVX2) > - && CPU_FEATURE_USABLE (RTM)), > - __strncmp_avx2_rtm) > - IFUNC_IMPL_ADD (array, i, strncmp, > - (CPU_FEATURE_USABLE (AVX512VL) > - && CPU_FEATURE_USABLE (AVX512BW)), > - __strncmp_evex) > - IFUNC_IMPL_ADD (array, i, strncmp, CPU_FEATURE_USABLE (SSE4_2), > - __strncmp_sse42) > - IFUNC_IMPL_ADD (array, i, strncmp, 1, __strncmp_sse2)) > + X86_IFUNC_IMPL_ADD_V4 (array, i, strncmp, > + (CPU_FEATURE_USABLE (AVX512VL) > + && CPU_FEATURE_USABLE (AVX512BW)), > + __strncmp_evex) > + X86_IFUNC_IMPL_ADD_V3 (array, i, strncmp, > + CPU_FEATURE_USABLE (AVX2), > + __strncmp_avx2) > + X86_IFUNC_IMPL_ADD_V3 (array, i, strncmp, > + (CPU_FEATURE_USABLE (AVX2) > + && CPU_FEATURE_USABLE (RTM)), > + __strncmp_avx2_rtm) > + X86_IFUNC_IMPL_ADD_V2 (array, i, strncmp, > + CPU_FEATURE_USABLE (SSE4_2), > + __strncmp_sse42) > + /* ISA V2 wrapper for SSE2 implementation because the SSE2 > + implementation is also used at ISA level 2. */ > + X86_IFUNC_IMPL_ADD_V2 (array, i, strncmp, > + 1, > + __strncmp_sse2)) > > #ifdef SHARED > /* Support sysdeps/x86_64/multiarch/wmemset_chk.c. */ > diff --git a/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h b/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h > index 296d32071b..68646ef199 100644 > --- a/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h > +++ b/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h > @@ -19,32 +19,39 @@ > > #include <init-arch.h> > > -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; > -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden; > +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; > + > extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; > extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden; > -extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; > + > +extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden; > + > +extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; > > static inline void * > IFUNC_SELECTOR (void) > { > - const struct cpu_features* cpu_features = __get_cpu_features (); > + const struct cpu_features *cpu_features = __get_cpu_features (); > > - if (CPU_FEATURE_USABLE_P (cpu_features, AVX2) > - && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) > + if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2) > + && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, > + AVX_Fast_Unaligned_Load, )) > { > - if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) > - && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)) > - return OPTIMIZE (evex); > + if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) > + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)) > + return OPTIMIZE (evex); > > if (CPU_FEATURE_USABLE_P (cpu_features, RTM)) > - return OPTIMIZE (avx2_rtm); > + return OPTIMIZE (avx2_rtm); > > - if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) > - return OPTIMIZE (avx2); > + if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, > + Prefer_No_VZEROUPPER, !)) > + return OPTIMIZE (avx2); > } > > - if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2) > + if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, SSE4_2) > + /* Keep this as a runtime check as its not guaranteed at ISA > + level 2. */ > && !CPU_FEATURES_ARCH_P (cpu_features, Slow_SSE4_2)) > return OPTIMIZE (sse42); > > diff --git a/sysdeps/x86_64/multiarch/ifunc-wcslen.h b/sysdeps/x86_64/multiarch/ifunc-wcslen.h > index 88c1c502af..064722c2bd 100644 > --- a/sysdeps/x86_64/multiarch/ifunc-wcslen.h > +++ b/sysdeps/x86_64/multiarch/ifunc-wcslen.h > @@ -23,33 +23,38 @@ > # define GENERIC sse2 > #endif > > -extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden; > -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden; > +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; > + > extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; > extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden; > -extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; > + > +extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden; > + > +extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden; > > static inline void * > IFUNC_SELECTOR (void) > { > - const struct cpu_features* cpu_features = __get_cpu_features (); > + const struct cpu_features *cpu_features = __get_cpu_features (); > > - if (CPU_FEATURE_USABLE_P (cpu_features, AVX2) > - && CPU_FEATURE_USABLE_P (cpu_features, BMI2) > - && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) > + if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2) > + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2) > + && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, > + AVX_Fast_Unaligned_Load, )) > { > - if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) > - && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)) > + if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) > + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)) > return OPTIMIZE (evex); > > if (CPU_FEATURE_USABLE_P (cpu_features, RTM)) > return OPTIMIZE (avx2_rtm); > > - if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) > + if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, > + Prefer_No_VZEROUPPER, !)) > return OPTIMIZE (avx2); > } > > - if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_1)) > + if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, SSE4_1)) > return OPTIMIZE (sse4_1); > > return OPTIMIZE (GENERIC); > diff --git a/sysdeps/x86_64/multiarch/memrchr-avx2.S b/sysdeps/x86_64/multiarch/memrchr-avx2.S > index f300d7daf4..d1457ab60c 100644 > --- a/sysdeps/x86_64/multiarch/memrchr-avx2.S > +++ b/sysdeps/x86_64/multiarch/memrchr-avx2.S > @@ -16,7 +16,9 @@ > License along with the GNU C Library; if not, see > <https://www.gnu.org/licenses/>. */ > > -#if IS_IN (libc) > +#include <isa-level.h> > + > +#if ISA_SHOULD_BUILD (3) > > # include <sysdep.h> > > diff --git a/sysdeps/x86_64/multiarch/memrchr-evex.S b/sysdeps/x86_64/multiarch/memrchr-evex.S > index 91329b18dc..ea3a0a0a60 100644 > --- a/sysdeps/x86_64/multiarch/memrchr-evex.S > +++ b/sysdeps/x86_64/multiarch/memrchr-evex.S > @@ -16,7 +16,9 @@ > License along with the GNU C Library; if not, see > <https://www.gnu.org/licenses/>. */ > > -#if IS_IN (libc) > +#include <isa-level.h> > + > +#if ISA_SHOULD_BUILD (4) > > # include <sysdep.h> > # include "evex256-vecs.h" > diff --git a/sysdeps/x86_64/multiarch/memrchr-sse2.S b/sysdeps/x86_64/multiarch/memrchr-sse2.S > index d92a4022dc..4cc8b9e3b0 100644 > --- a/sysdeps/x86_64/multiarch/memrchr-sse2.S > +++ b/sysdeps/x86_64/multiarch/memrchr-sse2.S > @@ -16,22 +16,26 @@ > License along with the GNU C Library; if not, see > <https://www.gnu.org/licenses/>. */ > > -#if IS_IN (libc) > +#include <isa-level.h> > + > +/* MINIMUM_X86_ISA_LEVEL <= 2 because there is no V2 implementation > + so we need this to build for ISA V2 builds. */ > +#if ISA_SHOULD_BUILD (2) > + > # ifndef MEMRCHR > # define MEMRCHR __memrchr_sse2 > # endif > -#endif > > -#include <sysdep.h> > -#define VEC_SIZE 16 > -#define PAGE_SIZE 4096 > +# include <sysdep.h> > +# define VEC_SIZE 16 > +# define PAGE_SIZE 4096 > > .text > ENTRY_P2ALIGN(MEMRCHR, 6) > -#ifdef __ILP32__ > +# ifdef __ILP32__ > /* Clear upper bits. */ > mov %RDX_LP, %RDX_LP > -#endif > +# endif > movd %esi, %xmm0 > > /* Get end pointer. */ > @@ -352,3 +356,4 @@ L(zero_3): > ret > /* 2-bytes from next cache line. */ > END(MEMRCHR) > +#endif > diff --git a/sysdeps/x86_64/multiarch/strcasecmp_l-avx2-rtm.S b/sysdeps/x86_64/multiarch/strcasecmp_l-avx2-rtm.S > index 09957fc3c5..d408751f4c 100644 > --- a/sysdeps/x86_64/multiarch/strcasecmp_l-avx2-rtm.S > +++ b/sysdeps/x86_64/multiarch/strcasecmp_l-avx2-rtm.S > @@ -1,15 +1,2 @@ > -#ifndef STRCMP > -# define STRCMP __strcasecmp_l_avx2_rtm > -#endif > - > -#define _GLABEL(x) x ## _rtm > -#define GLABEL(x) _GLABEL(x) > - > -#define ZERO_UPPER_VEC_REGISTERS_RETURN \ > - ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST > - > -#define VZEROUPPER_RETURN jmp L(return_vzeroupper) > - > -#define SECTION(p) p##.avx.rtm > - > -#include "strcasecmp_l-avx2.S" > +#define USE_AS_STRCASECMP_L > +#include "strcmp-avx2-rtm.S" > diff --git a/sysdeps/x86_64/multiarch/strcasecmp_l-avx2.S b/sysdeps/x86_64/multiarch/strcasecmp_l-avx2.S > index e2762f2a22..167f866014 100644 > --- a/sysdeps/x86_64/multiarch/strcasecmp_l-avx2.S > +++ b/sysdeps/x86_64/multiarch/strcasecmp_l-avx2.S > @@ -16,8 +16,5 @@ > License along with the GNU C Library; if not, see > <https://www.gnu.org/licenses/>. */ > > -#ifndef STRCMP > -# define STRCMP __strcasecmp_l_avx2 > -#endif > #define USE_AS_STRCASECMP_L > #include "strcmp-avx2.S" > diff --git a/sysdeps/x86_64/multiarch/strcasecmp_l-evex.S b/sysdeps/x86_64/multiarch/strcasecmp_l-evex.S > index 58642db748..012a084930 100644 > --- a/sysdeps/x86_64/multiarch/strcasecmp_l-evex.S > +++ b/sysdeps/x86_64/multiarch/strcasecmp_l-evex.S > @@ -16,8 +16,5 @@ > License along with the GNU C Library; if not, see > <https://www.gnu.org/licenses/>. */ > > -#ifndef STRCMP > -# define STRCMP __strcasecmp_l_evex > -#endif > #define USE_AS_STRCASECMP_L > #include "strcmp-evex.S" > diff --git a/sysdeps/x86_64/multiarch/strcasecmp_l-sse2.S b/sysdeps/x86_64/multiarch/strcasecmp_l-sse2.S > index a2b5741399..6ffd09b513 100644 > --- a/sysdeps/x86_64/multiarch/strcasecmp_l-sse2.S > +++ b/sysdeps/x86_64/multiarch/strcasecmp_l-sse2.S > @@ -17,4 +17,5 @@ > <https://www.gnu.org/licenses/>. */ > > #define USE_AS_STRCASECMP_L > + > #include "strcmp-sse2.S" > diff --git a/sysdeps/x86_64/multiarch/strchr-avx2.S b/sysdeps/x86_64/multiarch/strchr-avx2.S > index 1a916cc951..425a40b8de 100644 > --- a/sysdeps/x86_64/multiarch/strchr-avx2.S > +++ b/sysdeps/x86_64/multiarch/strchr-avx2.S > @@ -16,7 +16,9 @@ > License along with the GNU C Library; if not, see > <https://www.gnu.org/licenses/>. */ > > -#if IS_IN (libc) > +#include <isa-level.h> > + > +#if ISA_SHOULD_BUILD (3) > > # include <sysdep.h> > > diff --git a/sysdeps/x86_64/multiarch/strchr-evex.S b/sysdeps/x86_64/multiarch/strchr-evex.S > index ec739fb8f9..a1c15c4419 100644 > --- a/sysdeps/x86_64/multiarch/strchr-evex.S > +++ b/sysdeps/x86_64/multiarch/strchr-evex.S > @@ -16,7 +16,9 @@ > License along with the GNU C Library; if not, see > <https://www.gnu.org/licenses/>. */ > > -#if IS_IN (libc) > +#include <isa-level.h> > + > +#if ISA_SHOULD_BUILD (4) > > # include <sysdep.h> > > diff --git a/sysdeps/x86_64/multiarch/strchr-sse2-no-bsf.S b/sysdeps/x86_64/multiarch/strchr-sse2-no-bsf.S > index 93e6f62d7f..bb092e3f61 100644 > --- a/sysdeps/x86_64/multiarch/strchr-sse2-no-bsf.S > +++ b/sysdeps/x86_64/multiarch/strchr-sse2-no-bsf.S > @@ -16,7 +16,11 @@ > License along with the GNU C Library; if not, see > <https://www.gnu.org/licenses/>. */ > > -#if IS_IN (libc) > +#include <isa-level.h> > + > +/* NB: atom builds with ISA level == 1 so no reason to hold onto this > + at ISA level >= 2. */ > +#if ISA_SHOULD_BUILD (1) > > # include <sysdep.h> > # include "asm-syntax.h" > diff --git a/sysdeps/x86_64/multiarch/strchr-sse2.S b/sysdeps/x86_64/multiarch/strchr-sse2.S > index f7767ca543..7a182f0c3b 100644 > --- a/sysdeps/x86_64/multiarch/strchr-sse2.S > +++ b/sysdeps/x86_64/multiarch/strchr-sse2.S > @@ -16,7 +16,12 @@ > License along with the GNU C Library; if not, see > <https://www.gnu.org/licenses/>. */ > > -#if IS_IN (libc) || defined STRCHR > +#include <isa-level.h> > + > +/* MINIMUM_X86_ISA_LEVEL <= 2 because there is no V2 implementation > + so we need this to build for ISA V2 builds. */ > +#if ISA_SHOULD_BUILD (2) > + > # ifndef STRCHR > # define STRCHR __strchr_sse2 > # endif > diff --git a/sysdeps/x86_64/multiarch/strchr.c b/sysdeps/x86_64/multiarch/strchr.c > index de737580eb..ce7441c532 100644 > --- a/sysdeps/x86_64/multiarch/strchr.c > +++ b/sysdeps/x86_64/multiarch/strchr.c > @@ -26,36 +26,40 @@ > # define SYMBOL_NAME strchr > # include <init-arch.h> > > -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; > -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_no_bsf) attribute_hidden; > +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; > + > extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; > extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden; > -extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; > + > +extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; > +extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_no_bsf) attribute_hidden; > > static inline void * > IFUNC_SELECTOR (void) > { > - const struct cpu_features* cpu_features = __get_cpu_features (); > + const struct cpu_features *cpu_features = __get_cpu_features (); > > - if (CPU_FEATURE_USABLE_P (cpu_features, AVX2) > - && CPU_FEATURE_USABLE_P (cpu_features, BMI2) > - && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) > + if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2) > + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2) > + && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, > + AVX_Fast_Unaligned_Load, )) > { > - if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) > - && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)) > + if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) > + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)) > return OPTIMIZE (evex); > > if (CPU_FEATURE_USABLE_P (cpu_features, RTM)) > return OPTIMIZE (avx2_rtm); > > - if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) > + if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, > + Prefer_No_VZEROUPPER, !)) > return OPTIMIZE (avx2); > } > > - if (CPU_FEATURES_ARCH_P (cpu_features, Slow_BSF)) > - return OPTIMIZE (sse2_no_bsf); > + if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, Slow_BSF, !)) > + return OPTIMIZE (sse2); > > - return OPTIMIZE (sse2); > + return OPTIMIZE (sse2_no_bsf); > } > > libc_ifunc_redirected (__redirect_strchr, strchr, IFUNC_SELECTOR ()); > diff --git a/sysdeps/x86_64/multiarch/strchrnul-avx2.S b/sysdeps/x86_64/multiarch/strchrnul-avx2.S > index fa0cc09760..10ad5e6058 100644 > --- a/sysdeps/x86_64/multiarch/strchrnul-avx2.S > +++ b/sysdeps/x86_64/multiarch/strchrnul-avx2.S > @@ -1,3 +1,8 @@ > -#define STRCHR __strchrnul_avx2 > +#ifndef STRCHRNUL > +# define STRCHRNUL __strchrnul_avx2 > +#endif > + > +#define STRCHR STRCHRNUL > #define USE_AS_STRCHRNUL 1 > + > #include "strchr-avx2.S" > diff --git a/sysdeps/x86_64/multiarch/strchrnul-evex.S b/sysdeps/x86_64/multiarch/strchrnul-evex.S > index 064fe7ca9e..0f216cb47f 100644 > --- a/sysdeps/x86_64/multiarch/strchrnul-evex.S > +++ b/sysdeps/x86_64/multiarch/strchrnul-evex.S > @@ -1,3 +1,8 @@ > -#define STRCHR __strchrnul_evex > +#ifndef STRCHRNUL > +# define STRCHRNUL __strchrnul_evex > +#endif > + > +#define STRCHR STRCHRNUL > #define USE_AS_STRCHRNUL 1 > + > #include "strchr-evex.S" > diff --git a/sysdeps/x86_64/multiarch/strchrnul-sse2.S b/sysdeps/x86_64/multiarch/strchrnul-sse2.S > index 7238977a21..7ee81ae510 100644 > --- a/sysdeps/x86_64/multiarch/strchrnul-sse2.S > +++ b/sysdeps/x86_64/multiarch/strchrnul-sse2.S > @@ -16,12 +16,10 @@ > License along with the GNU C Library; if not, see > <https://www.gnu.org/licenses/>. */ > > -#if IS_IN (libc) > -# ifndef STRCHR > -# define STRCHR __strchrnul_sse2 > -# endif > +#ifndef STRCHRNUL > +# define STRCHRNUL __strchrnul_sse2 > #endif > - > #define AS_STRCHRNUL > +#define STRCHR STRCHRNUL > > #include "strchr-sse2.S" > diff --git a/sysdeps/x86_64/multiarch/strcmp-avx2-rtm.S b/sysdeps/x86_64/multiarch/strcmp-avx2-rtm.S > index aecd30d97f..74f1f996a9 100644 > --- a/sysdeps/x86_64/multiarch/strcmp-avx2-rtm.S > +++ b/sysdeps/x86_64/multiarch/strcmp-avx2-rtm.S > @@ -1,12 +1,9 @@ > -#ifndef STRCMP > -# define STRCMP __strcmp_avx2_rtm > -#endif > - > #define ZERO_UPPER_VEC_REGISTERS_RETURN \ > ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST > > #define VZEROUPPER_RETURN jmp L(return_vzeroupper) > > #define SECTION(p) p##.avx.rtm > +#define STRCMP_ISA _avx2_rtm > > #include "strcmp-avx2.S" > diff --git a/sysdeps/x86_64/multiarch/strcmp-avx2.S b/sysdeps/x86_64/multiarch/strcmp-avx2.S > index 3ab21e3a58..4c01d664e8 100644 > --- a/sysdeps/x86_64/multiarch/strcmp-avx2.S > +++ b/sysdeps/x86_64/multiarch/strcmp-avx2.S > @@ -16,7 +16,15 @@ > License along with the GNU C Library; if not, see > <https://www.gnu.org/licenses/>. */ > > -#if IS_IN (libc) > +#include <isa-level.h> > + > +#if ISA_SHOULD_BUILD (3) > + > +# ifndef STRCMP_ISA > +# define STRCMP_ISA _avx2 > +# endif > + > +# include "strcmp-naming.h" > > # include <sysdep.h> > > @@ -86,15 +94,11 @@ > > # ifdef USE_AS_STRCASECMP_L > # ifdef USE_AS_STRNCMP > -# define STRCASECMP __strncasecmp_avx2 > # define LOCALE_REG rcx > # define LOCALE_REG_LP RCX_LP > -# define STRCASECMP_L_NONASCII __strncasecmp_l_nonascii > # else > -# define STRCASECMP __strcasecmp_avx2 > # define LOCALE_REG rdx > # define LOCALE_REG_LP RDX_LP > -# define STRCASECMP_L_NONASCII __strcasecmp_l_nonascii > # endif > # endif > > @@ -185,18 +189,14 @@ > .type STRCMP, @function > .globl STRCMP > > -# ifndef GLABEL > -# define GLABEL(...) __VA_ARGS__ > -# endif > - > # ifdef USE_AS_STRCASECMP_L > -ENTRY (GLABEL(STRCASECMP)) > +ENTRY (STRCASECMP) > movq __libc_tsd_LOCALE@gottpoff(%rip), %rax > mov %fs:(%rax), %LOCALE_REG_LP > > /* Either 1 or 5 bytes (dependeing if CET is enabled). */ > .p2align 4 > -END (GLABEL(STRCASECMP)) > +END (STRCASECMP) > /* FALLTHROUGH to strcasecmp/strncasecmp_l. */ > # endif > > diff --git a/sysdeps/x86_64/multiarch/strcmp-evex.S b/sysdeps/x86_64/multiarch/strcmp-evex.S > index afbf13a230..e482d0167f 100644 > --- a/sysdeps/x86_64/multiarch/strcmp-evex.S > +++ b/sysdeps/x86_64/multiarch/strcmp-evex.S > @@ -16,7 +16,12 @@ > License along with the GNU C Library; if not, see > <https://www.gnu.org/licenses/>. */ > > -#if IS_IN (libc) > +#include <isa-level.h> > + > +#if ISA_SHOULD_BUILD (4) > + > +# define STRCMP_ISA _evex > +# include "strcmp-naming.h" > > # include <sysdep.h> > # if defined USE_AS_STRCASECMP_L > @@ -37,10 +42,6 @@ > # define VMOVA vmovdqa64 > > # ifdef USE_AS_WCSCMP > -# ifndef OVERFLOW_STRCMP > -# define OVERFLOW_STRCMP __wcscmp_evex > -# endif > - > # define TESTEQ subl $0xff, > /* Compare packed dwords. */ > # define VPCMP vpcmpd > @@ -50,10 +51,6 @@ > /* 1 dword char == 4 bytes. */ > # define SIZE_OF_CHAR 4 > # else > -# ifndef OVERFLOW_STRCMP > -# define OVERFLOW_STRCMP __strcmp_evex > -# endif > - > # define TESTEQ incl > /* Compare packed bytes. */ > # define VPCMP vpcmpb > @@ -120,15 +117,11 @@ > > # ifdef USE_AS_STRCASECMP_L > # ifdef USE_AS_STRNCMP > -# define STRCASECMP __strncasecmp_evex > # define LOCALE_REG rcx > # define LOCALE_REG_LP RCX_LP > -# define STRCASECMP_L_NONASCII __strncasecmp_l_nonascii > # else > -# define STRCASECMP __strcasecmp_evex > # define LOCALE_REG rdx > # define LOCALE_REG_LP RDX_LP > -# define STRCASECMP_L_NONASCII __strcasecmp_l_nonascii > # endif > # endif > > @@ -214,7 +207,6 @@ > .align 16 > .type STRCMP, @function > .globl STRCMP > - > # ifdef USE_AS_STRCASECMP_L > ENTRY (STRCASECMP) > movq __libc_tsd_LOCALE@gottpoff(%rip), %rax > diff --git a/sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S > index 0d691b78a8..33c18a28e8 100644 > --- a/sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S > +++ b/sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S > @@ -16,11 +16,20 @@ > License along with the GNU C Library; if not, see > <https://www.gnu.org/licenses/>. */ > > -#if IS_IN (libc) > +#include <isa-level.h> > > -#include "sysdep.h" > +/* Continue building as ISA level 2. We use this as ISA V2 default > + because strcmp-sse42 uses pcmpstri (slow on some SSE4.2 > + processors) and this implementation is potenially faster than > + strcmp-sse42 (aside from the slower page cross case). */ > +#if ISA_SHOULD_BUILD (2) > > -ENTRY ( __strcmp_sse2_unaligned) > +# define STRCMP_ISA _sse2_unaligned > +# include "strcmp-naming.h" > + > +# include "sysdep.h" > + > +ENTRY (STRCMP) > movl %edi, %eax > xorl %edx, %edx > pxor %xmm7, %xmm7 > @@ -208,6 +217,5 @@ L(cross_page): > L(different): > subl %ecx, %eax > ret > -END (__strcmp_sse2_unaligned) > - > +END (STRCMP) > #endif > diff --git a/sysdeps/x86_64/multiarch/strcmp-sse2.S b/sysdeps/x86_64/multiarch/strcmp-sse2.S > index b1220231ab..3c69fc1df1 100644 > --- a/sysdeps/x86_64/multiarch/strcmp-sse2.S > +++ b/sysdeps/x86_64/multiarch/strcmp-sse2.S > @@ -16,7 +16,11 @@ > License along with the GNU C Library; if not, see > <https://www.gnu.org/licenses/>. */ > > -#if IS_IN (libc) || IS_IN (rtld) > +#include <isa-level.h> > + > +/* Continue building at ISA level 2 as the strcmp-sse42 is not always > + preferable for ISA level == 2 CPUs. */ > +#if ISA_SHOULD_BUILD (2) > > # define STRCMP_ISA _sse2 > # include "strcmp-naming.h" > diff --git a/sysdeps/x86_64/multiarch/strcmp-sse4_2.S b/sysdeps/x86_64/multiarch/strcmp-sse4_2.S > index 963e208ccb..dc6fc90e14 100644 > --- a/sysdeps/x86_64/multiarch/strcmp-sse4_2.S > +++ b/sysdeps/x86_64/multiarch/strcmp-sse4_2.S > @@ -16,7 +16,10 @@ > License along with the GNU C Library; if not, see > <https://www.gnu.org/licenses/>. */ > > -#if IS_IN (libc) > +#include <isa-level.h> > + > +#if ISA_SHOULD_BUILD (2) > + > # include <sysdep.h> > > # define STRCMP_ISA _sse42 > @@ -1766,7 +1769,6 @@ LABEL(unaligned_table): > .int LABEL(ashr_0) - LABEL(unaligned_table) > > # undef LABEL > -# undef GLABEL > # undef SECTION > # undef movdqa > # undef movdqu > diff --git a/sysdeps/x86_64/multiarch/strcmp.c b/sysdeps/x86_64/multiarch/strcmp.c > index 9c1677724c..fdd5afe3af 100644 > --- a/sysdeps/x86_64/multiarch/strcmp.c > +++ b/sysdeps/x86_64/multiarch/strcmp.c > @@ -26,37 +26,50 @@ > # define SYMBOL_NAME strcmp > # include <init-arch.h> > > -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; > -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned) attribute_hidden; > -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden; > +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; > + > extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; > extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden; > -extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; > + > +extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden; > + > +extern __typeof (REDIRECT_NAME) > + OPTIMIZE (sse2_unaligned) attribute_hidden; > +extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; > + > + > > static inline void * > IFUNC_SELECTOR (void) > { > - const struct cpu_features* cpu_features = __get_cpu_features (); > + const struct cpu_features *cpu_features = __get_cpu_features (); > > - if (CPU_FEATURE_USABLE_P (cpu_features, AVX2) > - && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) > + if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2) > + && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, > + AVX_Fast_Unaligned_Load, )) > { > - if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) > - && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW) > - && CPU_FEATURE_USABLE_P (cpu_features, BMI2)) > + if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) > + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW) > + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)) > return OPTIMIZE (evex); > > if (CPU_FEATURE_USABLE_P (cpu_features, RTM)) > return OPTIMIZE (avx2_rtm); > > - if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) > + if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, > + Prefer_No_VZEROUPPER, !)) > return OPTIMIZE (avx2); > } > > - if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2) > + if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, SSE4_2) > + /* Keep this as runtime check. Some ISA level >= 2 CPUs such as > + Tremont, Silvermont, and more check this. */ > && !CPU_FEATURES_ARCH_P (cpu_features, Slow_SSE4_2)) > return OPTIMIZE (sse42); > > + /* Keep this as runtime check. The standard SSE2 version has > + meaningful optimizations around keeping all loads aligned in the > + main loop which can benefit some ISA level >= 2 CPUs. */ > if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load)) > return OPTIMIZE (sse2_unaligned); > > diff --git a/sysdeps/x86_64/multiarch/strlen-avx2.S b/sysdeps/x86_64/multiarch/strlen-avx2.S > index 9e36290dd2..0593fb303b 100644 > --- a/sysdeps/x86_64/multiarch/strlen-avx2.S > +++ b/sysdeps/x86_64/multiarch/strlen-avx2.S > @@ -16,7 +16,9 @@ > License along with the GNU C Library; if not, see > <https://www.gnu.org/licenses/>. */ > > -#if IS_IN (libc) > +#include <isa-level.h> > + > +#if ISA_SHOULD_BUILD (3) > > # include <sysdep.h> > > diff --git a/sysdeps/x86_64/multiarch/strlen-evex-base.S b/sysdeps/x86_64/multiarch/strlen-evex-base.S > index 278c899691..418e9f8411 100644 > --- a/sysdeps/x86_64/multiarch/strlen-evex-base.S > +++ b/sysdeps/x86_64/multiarch/strlen-evex-base.S > @@ -16,7 +16,11 @@ > License along with the GNU C Library; if not, see > <https://www.gnu.org/licenses/>. */ > > -#if IS_IN (libc) > +/* UNUSED. Exists purely as reference implementation. */ > + > +#include <isa-level.h> > + > +#if ISA_SHOULD_BUILD (4) > > # include <sysdep.h> > > diff --git a/sysdeps/x86_64/multiarch/strlen-evex.S b/sysdeps/x86_64/multiarch/strlen-evex.S > index 59ade77498..2109ec2f7a 100644 > --- a/sysdeps/x86_64/multiarch/strlen-evex.S > +++ b/sysdeps/x86_64/multiarch/strlen-evex.S > @@ -16,7 +16,9 @@ > License along with the GNU C Library; if not, see > <https://www.gnu.org/licenses/>. */ > > -#if IS_IN (libc) > +#include <isa-level.h> > + > +#if ISA_SHOULD_BUILD (4) > > # include <sysdep.h> > > diff --git a/sysdeps/x86_64/multiarch/strlen-sse2.S b/sysdeps/x86_64/multiarch/strlen-sse2.S > index 5be72267d5..a96ccbb2d5 100644 > --- a/sysdeps/x86_64/multiarch/strlen-sse2.S > +++ b/sysdeps/x86_64/multiarch/strlen-sse2.S > @@ -16,15 +16,20 @@ > License along with the GNU C Library; if not, see > <https://www.gnu.org/licenses/>. */ > > -#if IS_IN (libc) || defined STRLEN > - > -# ifndef STRLEN > -# define STRLEN __strlen_sse2 > -# endif > +#include <isa-level.h> > > +/* ISA level >= 2 for both strlen and wcslen. wcslen uses `pminud` > + which is SSE4.1. strlen doesn't have an ISA level == 2 > + implementation so the SSE2 implementation must be built with ISA > + level == 2. */ > +# if ISA_SHOULD_BUILD (2) > > # include <sysdep.h> > > +# ifndef STRLEN > +# define STRLEN __strlen_sse2 > +# endif > + > # ifdef AS_WCSLEN > # define PMINU pminud > # define PCMPEQ pcmpeqd > @@ -82,7 +87,7 @@ L(n_nonzero): > suffice. */ > mov %RSI_LP, %R10_LP > sar $62, %R10_LP > - jnz __wcslen_sse4_1 > + jnz OVERFLOW_STRLEN > sal $2, %RSI_LP > # endif > > diff --git a/sysdeps/x86_64/multiarch/strncase_l-avx2-rtm.S b/sysdeps/x86_64/multiarch/strncase_l-avx2-rtm.S > index 58c05dcfb8..c2596ab103 100644 > --- a/sysdeps/x86_64/multiarch/strncase_l-avx2-rtm.S > +++ b/sysdeps/x86_64/multiarch/strncase_l-avx2-rtm.S > @@ -1,16 +1,4 @@ > -#ifndef STRCMP > -# define STRCMP __strncasecmp_l_avx2_rtm > -#endif > +#define USE_AS_STRCASECMP_L > +#define USE_AS_STRNCMP > > -#define _GLABEL(x) x ## _rtm > -#define GLABEL(x) _GLABEL(x) > - > -#define ZERO_UPPER_VEC_REGISTERS_RETURN \ > - ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST > - > -#define VZEROUPPER_RETURN jmp L(return_vzeroupper) > - > -#define SECTION(p) p##.avx.rtm > -#define OVERFLOW_STRCMP __strcasecmp_l_avx2_rtm > - > -#include "strncase_l-avx2.S" > +#include "strcmp-avx2-rtm.S" > diff --git a/sysdeps/x86_64/multiarch/strncase_l-avx2.S b/sysdeps/x86_64/multiarch/strncase_l-avx2.S > index 48c0aa21f8..d00687aac5 100644 > --- a/sysdeps/x86_64/multiarch/strncase_l-avx2.S > +++ b/sysdeps/x86_64/multiarch/strncase_l-avx2.S > @@ -16,12 +16,7 @@ > License along with the GNU C Library; if not, see > <https://www.gnu.org/licenses/>. */ > > -#ifndef STRCMP > -# define STRCMP __strncasecmp_l_avx2 > -#endif > #define USE_AS_STRCASECMP_L > #define USE_AS_STRNCMP > -#ifndef OVERFLOW_STRCMP > -# define OVERFLOW_STRCMP __strcasecmp_l_avx2 > -#endif > + > #include "strcmp-avx2.S" > diff --git a/sysdeps/x86_64/multiarch/strncase_l-evex.S b/sysdeps/x86_64/multiarch/strncase_l-evex.S > index 8a5af3695c..1a79758065 100644 > --- a/sysdeps/x86_64/multiarch/strncase_l-evex.S > +++ b/sysdeps/x86_64/multiarch/strncase_l-evex.S > @@ -16,10 +16,6 @@ > License along with the GNU C Library; if not, see > <https://www.gnu.org/licenses/>. */ > > -#ifndef STRCMP > -# define STRCMP __strncasecmp_l_evex > -#endif > -#define OVERFLOW_STRCMP __strcasecmp_l_evex > #define USE_AS_STRCASECMP_L > #define USE_AS_STRNCMP > #include "strcmp-evex.S" > diff --git a/sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S b/sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S > index 68bad365ba..6bb6be8585 100644 > --- a/sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S > +++ b/sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S > @@ -1,4 +1,2 @@ > -#define STRCMP __strncmp_avx2_rtm > #define USE_AS_STRNCMP 1 > -#define OVERFLOW_STRCMP __strcmp_avx2_rtm > #include "strcmp-avx2-rtm.S" > diff --git a/sysdeps/x86_64/multiarch/strncmp-avx2.S b/sysdeps/x86_64/multiarch/strncmp-avx2.S > index f138e9f1fd..def3509c4c 100644 > --- a/sysdeps/x86_64/multiarch/strncmp-avx2.S > +++ b/sysdeps/x86_64/multiarch/strncmp-avx2.S > @@ -1,4 +1,3 @@ > -#define STRCMP __strncmp_avx2 > #define USE_AS_STRNCMP 1 > -#define OVERFLOW_STRCMP __strcmp_avx2 > + > #include "strcmp-avx2.S" > diff --git a/sysdeps/x86_64/multiarch/strncmp-evex.S b/sysdeps/x86_64/multiarch/strncmp-evex.S > index a1d53e8c9f..aa69c18928 100644 > --- a/sysdeps/x86_64/multiarch/strncmp-evex.S > +++ b/sysdeps/x86_64/multiarch/strncmp-evex.S > @@ -1,3 +1,2 @@ > -#define STRCMP __strncmp_evex > #define USE_AS_STRNCMP 1 > #include "strcmp-evex.S" > diff --git a/sysdeps/x86_64/multiarch/strncmp.c b/sysdeps/x86_64/multiarch/strncmp.c > index 70ae6547c9..4ebe4bde30 100644 > --- a/sysdeps/x86_64/multiarch/strncmp.c > +++ b/sysdeps/x86_64/multiarch/strncmp.c > @@ -26,33 +26,38 @@ > # define SYMBOL_NAME strncmp > # include <init-arch.h> > > -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; > -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden; > +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; > + > extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; > extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden; > -extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; > + > +extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden; > + > +extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; > > static inline void * > IFUNC_SELECTOR (void) > { > - const struct cpu_features* cpu_features = __get_cpu_features (); > + const struct cpu_features *cpu_features = __get_cpu_features (); > > - if (CPU_FEATURE_USABLE_P (cpu_features, AVX2) > - && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) > + if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2) > + && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, > + AVX_Fast_Unaligned_Load, )) > { > - if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) > - && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW) > - && CPU_FEATURE_USABLE_P (cpu_features, BMI2)) > + if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) > + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW) > + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)) > return OPTIMIZE (evex); > > if (CPU_FEATURE_USABLE_P (cpu_features, RTM)) > return OPTIMIZE (avx2_rtm); > > - if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) > + if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, > + Prefer_No_VZEROUPPER, !)) > return OPTIMIZE (avx2); > } > > - if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2) > + if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, SSE4_2) > && !CPU_FEATURES_ARCH_P (cpu_features, Slow_SSE4_2)) > return OPTIMIZE (sse42); > > diff --git a/sysdeps/x86_64/multiarch/strnlen-avx2.S b/sysdeps/x86_64/multiarch/strnlen-avx2.S > index c4062b22f7..c4a12097f0 100644 > --- a/sysdeps/x86_64/multiarch/strnlen-avx2.S > +++ b/sysdeps/x86_64/multiarch/strnlen-avx2.S > @@ -1,4 +1,8 @@ > -#define STRLEN __strnlen_avx2 > +#ifndef STRNLEN > +# define STRNLEN __strnlen_avx2 > +#endif > + > #define USE_AS_STRNLEN 1 > +#define STRLEN STRNLEN > > #include "strlen-avx2.S" > diff --git a/sysdeps/x86_64/multiarch/strnlen-evex.S b/sysdeps/x86_64/multiarch/strnlen-evex.S > index 722022f303..64a9fc2606 100644 > --- a/sysdeps/x86_64/multiarch/strnlen-evex.S > +++ b/sysdeps/x86_64/multiarch/strnlen-evex.S > @@ -1,4 +1,8 @@ > -#define STRLEN __strnlen_evex > +#ifndef STRNLEN > +# define STRNLEN __strnlen_evex > +#endif > + > #define USE_AS_STRNLEN 1 > +#define STRLEN STRNLEN > > #include "strlen-evex.S" > diff --git a/sysdeps/x86_64/multiarch/strnlen-sse2.S b/sysdeps/x86_64/multiarch/strnlen-sse2.S > index a50c7d6a28..8841ba9faf 100644 > --- a/sysdeps/x86_64/multiarch/strnlen-sse2.S > +++ b/sysdeps/x86_64/multiarch/strnlen-sse2.S > @@ -16,11 +16,11 @@ > License along with the GNU C Library; if not, see > <https://www.gnu.org/licenses/>. */ > > -#if IS_IN (libc) > -# ifndef STRLEN > -# define STRLEN __strnlen_sse2 > -# endif > +#ifndef STRNLEN > +# define STRNLEN __strnlen_sse2 > #endif > > -#define AS_STRNLEN > +#define AS_STRNLEN 1 > +#define STRLEN STRNLEN > + > #include "strlen-sse2.S" > diff --git a/sysdeps/x86_64/multiarch/strrchr-avx2.S b/sysdeps/x86_64/multiarch/strrchr-avx2.S > index eb128a2ae3..924171d8e4 100644 > --- a/sysdeps/x86_64/multiarch/strrchr-avx2.S > +++ b/sysdeps/x86_64/multiarch/strrchr-avx2.S > @@ -16,7 +16,9 @@ > License along with the GNU C Library; if not, see > <https://www.gnu.org/licenses/>. */ > > -#if IS_IN (libc) > +#include <isa-level.h> > + > +#if ISA_SHOULD_BUILD (3) > > # include <sysdep.h> > > diff --git a/sysdeps/x86_64/multiarch/strrchr-evex.S b/sysdeps/x86_64/multiarch/strrchr-evex.S > index 8014c285b3..992b45fb47 100644 > --- a/sysdeps/x86_64/multiarch/strrchr-evex.S > +++ b/sysdeps/x86_64/multiarch/strrchr-evex.S > @@ -16,7 +16,9 @@ > License along with the GNU C Library; if not, see > <https://www.gnu.org/licenses/>. */ > > -#if IS_IN (libc) > +#include <isa-level.h> > + > +#if ISA_SHOULD_BUILD (4) > > # include <sysdep.h> > > diff --git a/sysdeps/x86_64/multiarch/strrchr-sse2.S b/sysdeps/x86_64/multiarch/strrchr-sse2.S > index 6ee7a5e33a..892e861fa8 100644 > --- a/sysdeps/x86_64/multiarch/strrchr-sse2.S > +++ b/sysdeps/x86_64/multiarch/strrchr-sse2.S > @@ -16,36 +16,40 @@ > License along with the GNU C Library; if not, see > <https://www.gnu.org/licenses/>. */ > > -#if IS_IN (libc) > +#include <isa-level.h> > + > +/* ISA level >= 2 because there are no {wcs|str}rchr-sse4 > + implementations. */ > +#if ISA_SHOULD_BUILD (2) > + > +# include <sysdep.h> > + > # ifndef STRRCHR > # define STRRCHR __strrchr_sse2 > # endif > -#endif > - > -#include <sysdep.h> > > -#ifdef USE_AS_WCSRCHR > -# define PCMPEQ pcmpeqd > -# define CHAR_SIZE 4 > -# define PMINU pminud > -#else > -# define PCMPEQ pcmpeqb > -# define CHAR_SIZE 1 > -# define PMINU pminub > -#endif > +# ifdef USE_AS_WCSRCHR > +# define PCMPEQ pcmpeqd > +# define CHAR_SIZE 4 > +# define PMINU pminud > +# else > +# define PCMPEQ pcmpeqb > +# define CHAR_SIZE 1 > +# define PMINU pminub > +# endif > > -#define PAGE_SIZE 4096 > -#define VEC_SIZE 16 > +# define PAGE_SIZE 4096 > +# define VEC_SIZE 16 > > .text > ENTRY(STRRCHR) > movd %esi, %xmm0 > movq %rdi, %rax > andl $(PAGE_SIZE - 1), %eax > -#ifndef USE_AS_WCSRCHR > +# ifndef USE_AS_WCSRCHR > punpcklbw %xmm0, %xmm0 > punpcklwd %xmm0, %xmm0 > -#endif > +# endif > pshufd $0, %xmm0, %xmm0 > cmpl $(PAGE_SIZE - VEC_SIZE), %eax > ja L(cross_page) > @@ -69,9 +73,9 @@ L(cross_page_continue): > /* We are off by 3 for wcsrchr if search CHAR is non-zero. If > search CHAR is zero we are correct. Either way `andq > -CHAR_SIZE, %rax` gets the correct result. */ > -#ifdef USE_AS_WCSRCHR > +# ifdef USE_AS_WCSRCHR > andq $-CHAR_SIZE, %rax > -#endif > +# endif > L(ret0): > ret > > @@ -85,9 +89,9 @@ L(first_vec_x0_test): > jz L(ret0) > bsrl %eax, %eax > addq %r8, %rax > -#ifdef USE_AS_WCSRCHR > +# ifdef USE_AS_WCSRCHR > andq $-CHAR_SIZE, %rax > -#endif > +# endif > ret > > .p2align 4 > @@ -100,9 +104,9 @@ L(first_vec_x1): > jz L(first_vec_x0_test) > bsrl %eax, %eax > leaq (VEC_SIZE)(%rdi, %rax), %rax > -#ifdef USE_AS_WCSRCHR > +# ifdef USE_AS_WCSRCHR > andq $-CHAR_SIZE, %rax > -#endif > +# endif > ret > > .p2align 4 > @@ -113,9 +117,9 @@ L(first_vec_x1_test): > jz L(first_vec_x0_test) > bsrl %eax, %eax > leaq (VEC_SIZE)(%rdi, %rax), %rax > -#ifdef USE_AS_WCSRCHR > +# ifdef USE_AS_WCSRCHR > andq $-CHAR_SIZE, %rax > -#endif > +# endif > ret > > .p2align 4 > @@ -128,9 +132,9 @@ L(first_vec_x2): > jz L(first_vec_x1_test) > bsrl %eax, %eax > leaq (VEC_SIZE * 2)(%rdi, %rax), %rax > -#ifdef USE_AS_WCSRCHR > +# ifdef USE_AS_WCSRCHR > andq $-CHAR_SIZE, %rax > -#endif > +# endif > ret > > .p2align 4 > @@ -165,27 +169,27 @@ L(first_loop): > /* Since SSE2 no pminud so wcsrchr needs seperate logic for > detecting zero. Note if this is found to be a bottleneck it > may be worth adding an SSE4.1 wcsrchr implementation. */ > -#ifdef USE_AS_WCSRCHR > +# ifdef USE_AS_WCSRCHR > movaps %xmm5, %xmm6 > pxor %xmm8, %xmm8 > > PCMPEQ %xmm8, %xmm5 > PCMPEQ %xmm4, %xmm8 > por %xmm5, %xmm8 > -#else > +# else > movaps %xmm5, %xmm6 > PMINU %xmm4, %xmm5 > -#endif > +# endif > > movaps %xmm4, %xmm9 > PCMPEQ %xmm0, %xmm4 > PCMPEQ %xmm0, %xmm6 > movaps %xmm6, %xmm7 > por %xmm4, %xmm6 > -#ifndef USE_AS_WCSRCHR > +# ifndef USE_AS_WCSRCHR > pxor %xmm8, %xmm8 > PCMPEQ %xmm5, %xmm8 > -#endif > +# endif > pmovmskb %xmm8, %ecx > pmovmskb %xmm6, %eax > > @@ -219,9 +223,9 @@ L(first_loop_old_match): > > bsrl %eax, %eax > addq %rsi, %rax > -#ifdef USE_AS_WCSRCHR > +# ifdef USE_AS_WCSRCHR > andq $-CHAR_SIZE, %rax > -#endif > +# endif > ret > > .p2align 4 > @@ -247,9 +251,9 @@ L(new_match): > jz L(first_loop_old_match) > bsrl %eax, %eax > addq %rdi, %rax > -#ifdef USE_AS_WCSRCHR > +# ifdef USE_AS_WCSRCHR > andq $-CHAR_SIZE, %rax > -#endif > +# endif > ret > > /* Save minimum state for getting most recent match. We can > @@ -267,27 +271,27 @@ L(second_loop): > /* Since SSE2 no pminud so wcsrchr needs seperate logic for > detecting zero. Note if this is found to be a bottleneck it > may be worth adding an SSE4.1 wcsrchr implementation. */ > -#ifdef USE_AS_WCSRCHR > +# ifdef USE_AS_WCSRCHR > movaps %xmm5, %xmm6 > pxor %xmm8, %xmm8 > > PCMPEQ %xmm8, %xmm5 > PCMPEQ %xmm4, %xmm8 > por %xmm5, %xmm8 > -#else > +# else > movaps %xmm5, %xmm6 > PMINU %xmm4, %xmm5 > -#endif > +# endif > > movaps %xmm4, %xmm9 > PCMPEQ %xmm0, %xmm4 > PCMPEQ %xmm0, %xmm6 > movaps %xmm6, %xmm7 > por %xmm4, %xmm6 > -#ifndef USE_AS_WCSRCHR > +# ifndef USE_AS_WCSRCHR > pxor %xmm8, %xmm8 > PCMPEQ %xmm5, %xmm8 > -#endif > +# endif > > pmovmskb %xmm8, %ecx > pmovmskb %xmm6, %eax > @@ -312,9 +316,9 @@ L(second_loop_old_match): > orl %ecx, %eax > bsrl %eax, %eax > addq %rsi, %rax > -#ifdef USE_AS_WCSRCHR > +# ifdef USE_AS_WCSRCHR > andq $-CHAR_SIZE, %rax > -#endif > +# endif > ret > > .p2align 4 > @@ -340,9 +344,9 @@ L(second_loop_new_match): > jz L(second_loop_old_match) > bsrl %eax, %eax > addq %rdi, %rax > -#ifdef USE_AS_WCSRCHR > +# ifdef USE_AS_WCSRCHR > andq $-CHAR_SIZE, %rax > -#endif > +# endif > ret > > .p2align 4,, 4 > @@ -366,9 +370,10 @@ L(cross_page): > jz L(ret1) > bsrl %eax, %eax > addq %rdi, %rax > -#ifdef USE_AS_WCSRCHR > +# ifdef USE_AS_WCSRCHR > andq $-CHAR_SIZE, %rax > -#endif > +# endif > L(ret1): > ret > END(STRRCHR) > +#endif > diff --git a/sysdeps/x86_64/multiarch/strstr-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strstr-sse2-unaligned.S > index c6aa8f45a6..dc342a9f44 100644 > --- a/sysdeps/x86_64/multiarch/strstr-sse2-unaligned.S > +++ b/sysdeps/x86_64/multiarch/strstr-sse2-unaligned.S > @@ -17,6 +17,7 @@ > <https://www.gnu.org/licenses/>. */ > > #include <sysdep.h> > +#include "../strchr-isa-default-impl.h" > > ENTRY(__strstr_sse2_unaligned) > movzbl (%rsi), %eax > @@ -75,7 +76,7 @@ L(next_pair_index): > .p2align 4 > L(strchr): > movzbl %al, %esi > - jmp __strchr_sse2 > + jmp DEFAULT_STRCHR > > .p2align 4 > L(pair_loop): > diff --git a/sysdeps/x86_64/multiarch/wcschr-avx2.S b/sysdeps/x86_64/multiarch/wcschr-avx2.S > index 67726b6837..f404888a93 100644 > --- a/sysdeps/x86_64/multiarch/wcschr-avx2.S > +++ b/sysdeps/x86_64/multiarch/wcschr-avx2.S > @@ -1,3 +1,8 @@ > -#define STRCHR __wcschr_avx2 > +#ifndef WCSCHR > +# define WCSCHR __wcschr_avx2 > +#endif > + > +#define STRCHR WCSCHR > #define USE_AS_WCSCHR 1 > + > #include "strchr-avx2.S" > diff --git a/sysdeps/x86_64/multiarch/wcschr-evex.S b/sysdeps/x86_64/multiarch/wcschr-evex.S > index 7cb8f1e41a..b5ccc59230 100644 > --- a/sysdeps/x86_64/multiarch/wcschr-evex.S > +++ b/sysdeps/x86_64/multiarch/wcschr-evex.S > @@ -1,3 +1,8 @@ > -#define STRCHR __wcschr_evex > +#ifndef WCSCHR > +# define WCSCHR __wcschr_evex > +#endif > + > +#define STRCHR WCSCHR > #define USE_AS_WCSCHR 1 > + > #include "strchr-evex.S" > diff --git a/sysdeps/x86_64/multiarch/wcschr-sse2.S b/sysdeps/x86_64/multiarch/wcschr-sse2.S > index c872926ba9..1c83957cbc 100644 > --- a/sysdeps/x86_64/multiarch/wcschr-sse2.S > +++ b/sysdeps/x86_64/multiarch/wcschr-sse2.S > @@ -16,13 +16,17 @@ > License along with the GNU C Library; if not, see > <https://www.gnu.org/licenses/>. */ > > -#if IS_IN (libc) > + > +#include <isa-level.h> > + > +/* ISA level >= 2 because there is no wcschr-sse4 implementations. */ > +#if ISA_SHOULD_BUILD (2) > + > # ifndef WCSCHR > # define WCSCHR __wcschr_sse2 > # endif > -#endif > > -#include <sysdep.h> > +# include <sysdep.h> > > .text > ENTRY (WCSCHR) > @@ -155,3 +159,4 @@ L(return_null): > ret > > END (WCSCHR) > +#endif > diff --git a/sysdeps/x86_64/multiarch/wcscmp-avx2-rtm.S b/sysdeps/x86_64/multiarch/wcscmp-avx2-rtm.S > index d6ca2b8064..f17a8969cb 100644 > --- a/sysdeps/x86_64/multiarch/wcscmp-avx2-rtm.S > +++ b/sysdeps/x86_64/multiarch/wcscmp-avx2-rtm.S > @@ -1,4 +1,3 @@ > -#define STRCMP __wcscmp_avx2_rtm > #define USE_AS_WCSCMP 1 > > #include "strcmp-avx2-rtm.S" > diff --git a/sysdeps/x86_64/multiarch/wcscmp-avx2.S b/sysdeps/x86_64/multiarch/wcscmp-avx2.S > index e5da4da689..0a71f907f0 100644 > --- a/sysdeps/x86_64/multiarch/wcscmp-avx2.S > +++ b/sysdeps/x86_64/multiarch/wcscmp-avx2.S > @@ -1,4 +1,3 @@ > -#define STRCMP __wcscmp_avx2 > #define USE_AS_WCSCMP 1 > > #include "strcmp-avx2.S" > diff --git a/sysdeps/x86_64/multiarch/wcscmp-evex.S b/sysdeps/x86_64/multiarch/wcscmp-evex.S > index 42e73e51eb..b0337a8311 100644 > --- a/sysdeps/x86_64/multiarch/wcscmp-evex.S > +++ b/sysdeps/x86_64/multiarch/wcscmp-evex.S > @@ -1,4 +1,3 @@ > -#define STRCMP __wcscmp_evex > #define USE_AS_WCSCMP 1 > > #include "strcmp-evex.S" > diff --git a/sysdeps/x86_64/multiarch/wcscmp-sse2.S b/sysdeps/x86_64/multiarch/wcscmp-sse2.S > index 6cb7d9faf9..3f32e8127d 100644 > --- a/sysdeps/x86_64/multiarch/wcscmp-sse2.S > +++ b/sysdeps/x86_64/multiarch/wcscmp-sse2.S > @@ -16,11 +16,16 @@ > License along with the GNU C Library; if not, see > <https://www.gnu.org/licenses/>. */ > > -#define USE_AS_WCSCMP > -#define STRCMP_ISA _sse2 > -#include "strcmp-naming.h" > +#include <isa-level.h> > > -#include <sysdep.h> > +/* ISA level >= 2 because there is no wcscmp-sse4 implementations. */ > +#if ISA_SHOULD_BUILD (2) > +# include <sysdep.h> > + > +/* Needed to get right name. */ > +# define USE_AS_WCSCMP > +# define STRCMP_ISA _sse2 > +# include "strcmp-naming.h" > > /* Note: wcscmp uses signed comparison, not unsighed as in strcmp function. */ > > @@ -949,3 +954,4 @@ L(equal): > ret > > END (STRCMP) > +#endif > diff --git a/sysdeps/x86_64/multiarch/wcslen-avx2.S b/sysdeps/x86_64/multiarch/wcslen-avx2.S > index c9224f1bc5..9784d8f780 100644 > --- a/sysdeps/x86_64/multiarch/wcslen-avx2.S > +++ b/sysdeps/x86_64/multiarch/wcslen-avx2.S > @@ -1,4 +1,8 @@ > -#define STRLEN __wcslen_avx2 > +#ifndef WCSLEN > +# define WCSLEN __wcslen_avx2 > +#endif > + > +#define STRLEN WCSLEN > #define USE_AS_WCSLEN 1 > > #include "strlen-avx2.S" > diff --git a/sysdeps/x86_64/multiarch/wcslen-evex.S b/sysdeps/x86_64/multiarch/wcslen-evex.S > index bdafa83bd5..df21bac63c 100644 > --- a/sysdeps/x86_64/multiarch/wcslen-evex.S > +++ b/sysdeps/x86_64/multiarch/wcslen-evex.S > @@ -1,4 +1,8 @@ > -#define STRLEN __wcslen_evex > +#ifndef WCSLEN > +# define WCSLEN __wcslen_evex > +#endif > + > +#define STRLEN WCSLEN > #define USE_AS_WCSLEN 1 > > #include "strlen-evex.S" > diff --git a/sysdeps/x86_64/multiarch/wcslen-sse2.S b/sysdeps/x86_64/multiarch/wcslen-sse2.S > index 944c3bd9c6..e9c518a932 100644 > --- a/sysdeps/x86_64/multiarch/wcslen-sse2.S > +++ b/sysdeps/x86_64/multiarch/wcslen-sse2.S > @@ -16,13 +16,16 @@ > License along with the GNU C Library; if not, see > <https://www.gnu.org/licenses/>. */ > > -#if IS_IN (libc) > -# ifndef WCSLEN > -# define WCSLEN __wcslen_sse2 > -# endif > +#include <isa-level.h> > + > +#if ISA_SHOULD_BUILD (1) > + > +# include <sysdep.h> > + > +#ifndef WCSLEN > +# define WCSLEN __wcslen_sse2 > #endif > > -#include <sysdep.h> > > .text > ENTRY (WCSLEN) > @@ -235,3 +238,5 @@ L(exit_tail7): > ret > > END (WCSLEN) > + > +#endif > diff --git a/sysdeps/x86_64/multiarch/wcslen-sse4_1.S b/sysdeps/x86_64/multiarch/wcslen-sse4_1.S > index c88e8342a1..126d183e75 100644 > --- a/sysdeps/x86_64/multiarch/wcslen-sse4_1.S > +++ b/sysdeps/x86_64/multiarch/wcslen-sse4_1.S > @@ -1,5 +1,9 @@ > -#define AS_WCSLEN > -#define STRLEN __wcslen_sse4_1 > -#define SECTION(p) p##.sse4.1 > +#ifndef WCSLEN > +# define WCSLEN __wcslen_sse4_1 > +#endif > + > +#define AS_WCSLEN 1 > +#define STRLEN WCSLEN > +#define SECTION(p) p##.sse4.1 > > #include "strlen-sse2.S" > diff --git a/sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S b/sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S > index f467582cbe..099a60c48e 100644 > --- a/sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S > +++ b/sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S > @@ -1,5 +1,3 @@ > -#define STRCMP __wcsncmp_avx2_rtm > #define USE_AS_STRNCMP 1 > #define USE_AS_WCSCMP 1 > -#define OVERFLOW_STRCMP __wcscmp_avx2_rtm > #include "strcmp-avx2-rtm.S" > diff --git a/sysdeps/x86_64/multiarch/wcsncmp-avx2.S b/sysdeps/x86_64/multiarch/wcsncmp-avx2.S > index e9ede522b8..fc26b593d0 100644 > --- a/sysdeps/x86_64/multiarch/wcsncmp-avx2.S > +++ b/sysdeps/x86_64/multiarch/wcsncmp-avx2.S > @@ -1,5 +1,4 @@ > -#define STRCMP __wcsncmp_avx2 > #define USE_AS_STRNCMP 1 > #define USE_AS_WCSCMP 1 > -#define OVERFLOW_STRCMP __wcscmp_avx2 > + > #include "strcmp-avx2.S" > diff --git a/sysdeps/x86_64/multiarch/wcsncmp-evex.S b/sysdeps/x86_64/multiarch/wcsncmp-evex.S > index 8a8e310713..d3a92e2000 100644 > --- a/sysdeps/x86_64/multiarch/wcsncmp-evex.S > +++ b/sysdeps/x86_64/multiarch/wcsncmp-evex.S > @@ -1,4 +1,3 @@ > -#define STRCMP __wcsncmp_evex > #define USE_AS_STRNCMP 1 > #define USE_AS_WCSCMP 1 > > diff --git a/sysdeps/x86_64/multiarch/wcsncmp-generic.c b/sysdeps/x86_64/multiarch/wcsncmp-generic.c > index 658d541886..b0cf4e87d5 100644 > --- a/sysdeps/x86_64/multiarch/wcsncmp-generic.c > +++ b/sysdeps/x86_64/multiarch/wcsncmp-generic.c > @@ -16,5 +16,10 @@ > License along with the GNU C Library; if not, see > <https://www.gnu.org/licenses/>. */ > > -#define WCSNCMP __wcsncmp_generic > -#include <wcsmbs/wcsncmp.c> > +#include <isa-level.h> > +#if ISA_SHOULD_BUILD (2) > + > +# define WCSNCMP __wcsncmp_generic > +# include <wcsmbs/wcsncmp.c> > + > +#endif > diff --git a/sysdeps/x86_64/multiarch/wcsnlen-avx2.S b/sysdeps/x86_64/multiarch/wcsnlen-avx2.S > index fac83546b5..12c3a0fd05 100644 > --- a/sysdeps/x86_64/multiarch/wcsnlen-avx2.S > +++ b/sysdeps/x86_64/multiarch/wcsnlen-avx2.S > @@ -1,4 +1,8 @@ > -#define STRLEN __wcsnlen_avx2 > +#ifndef WCSNLEN > +# define WCSNLEN __wcsnlen_avx2 > +#endif > + > +#define STRLEN WCSNLEN > #define USE_AS_WCSLEN 1 > #define USE_AS_STRNLEN 1 > > diff --git a/sysdeps/x86_64/multiarch/wcsnlen-evex.S b/sysdeps/x86_64/multiarch/wcsnlen-evex.S > index 24773bb4e2..e2aad94c1e 100644 > --- a/sysdeps/x86_64/multiarch/wcsnlen-evex.S > +++ b/sysdeps/x86_64/multiarch/wcsnlen-evex.S > @@ -1,4 +1,8 @@ > -#define STRLEN __wcsnlen_evex > +#ifndef WCSNLEN > +# define WCSNLEN __wcsnlen_evex > +#endif > + > +#define STRLEN WCSNLEN > #define USE_AS_WCSLEN 1 > #define USE_AS_STRNLEN 1 > > diff --git a/sysdeps/x86_64/multiarch/wcsnlen-generic.c b/sysdeps/x86_64/multiarch/wcsnlen-generic.c > index 2d75da7709..8b466aac2f 100644 > --- a/sysdeps/x86_64/multiarch/wcsnlen-generic.c > +++ b/sysdeps/x86_64/multiarch/wcsnlen-generic.c > @@ -16,13 +16,18 @@ > License along with the GNU C Library; if not, see > <https://www.gnu.org/licenses/>. */ > > +#include <isa-level.h> > + > +#if ISA_SHOULD_BUILD (1) > > -#if IS_IN (libc) > # include <wchar.h> > > -# define WCSNLEN __wcsnlen_generic > +# ifndef WCSNLEN > +# define WCSNLEN __wcsnlen_generic > +# endif > > extern __typeof (wcsnlen) __wcsnlen_generic; > -#endif > > -#include "wcsmbs/wcsnlen.c" > +# include "wcsmbs/wcsnlen.c" > + > +#endif > diff --git a/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S b/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S > index 17cdedc2a9..8f534102a2 100644 > --- a/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S > +++ b/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S > @@ -1,6 +1,11 @@ > +#ifndef WCSNLEN > +# define WCSNLEN __wcsnlen_sse4_1 > +# define OVERFLOW_STRLEN __wcslen_sse4_1 > +#endif > + > #define AS_WCSLEN > #define AS_STRNLEN > -#define STRLEN __wcsnlen_sse4_1 > +#define STRLEN WCSNLEN > #define SECTION(p) p##.sse4.1 > > #include "strlen-sse2.S" > diff --git a/sysdeps/x86_64/multiarch/wcsrchr-avx2.S b/sysdeps/x86_64/multiarch/wcsrchr-avx2.S > index cf8a239ab2..6eaf5e090b 100644 > --- a/sysdeps/x86_64/multiarch/wcsrchr-avx2.S > +++ b/sysdeps/x86_64/multiarch/wcsrchr-avx2.S > @@ -1,3 +1,8 @@ > -#define STRRCHR __wcsrchr_avx2 > +#ifndef WCSRCHR > +# define WCSRCHR __wcsrchr_avx2 > +#endif > + > +#define STRRCHR WCSRCHR > #define USE_AS_WCSRCHR 1 > + > #include "strrchr-avx2.S" > diff --git a/sysdeps/x86_64/multiarch/wcsrchr-evex.S b/sysdeps/x86_64/multiarch/wcsrchr-evex.S > index c64602f7dc..e5c5fe3bf2 100644 > --- a/sysdeps/x86_64/multiarch/wcsrchr-evex.S > +++ b/sysdeps/x86_64/multiarch/wcsrchr-evex.S > @@ -1,3 +1,7 @@ > -#define STRRCHR __wcsrchr_evex > +#ifndef WCSRCHR > +# define WCSRCHR __wcsrchr_evex > +#endif > + > +#define STRRCHR WCSRCHR > #define USE_AS_WCSRCHR 1 > #include "strrchr-evex.S" > diff --git a/sysdeps/x86_64/multiarch/wcsrchr-sse2.S b/sysdeps/x86_64/multiarch/wcsrchr-sse2.S > index d9259720f8..21388d900c 100644 > --- a/sysdeps/x86_64/multiarch/wcsrchr-sse2.S > +++ b/sysdeps/x86_64/multiarch/wcsrchr-sse2.S > @@ -16,12 +16,11 @@ > License along with the GNU C Library; if not, see > <https://www.gnu.org/licenses/>. */ > > -#if IS_IN (libc) > -# ifndef STRRCHR > -# define STRRCHR __wcsrchr_sse2 > -# endif > +#ifndef WCSRCHR > +# define WCSRCHR __wcsrchr_sse2 > #endif > > +#define STRRCHR WCSRCHR > #define USE_AS_WCSRCHR 1 > #define NO_PMINU 1 > > diff --git a/sysdeps/x86_64/strcasecmp_l.S b/sysdeps/x86_64/strcasecmp_l.S > index 84fd7fdfd3..5afa7ea098 100644 > --- a/sysdeps/x86_64/strcasecmp_l.S > +++ b/sysdeps/x86_64/strcasecmp_l.S > @@ -1,11 +1,35 @@ > +/* strcasecmp_l dispatch for RTLD and non-multiarch build > + Copyright (C) 2022 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > /* Symbols = __strcasecmp_l and __strcasecmp. */ > > -#include "multiarch/strcasecmp_l-sse2.S" > +#define DEFAULT_IMPL_V1 "multiarch/strcasecmp_l-sse2.S" > +/* This may cause regressions on some processors that heavily prefer > + aligned loads or have slow a implementation of the `pcmpstri` > + instruction. */ > +#define DEFAULT_IMPL_V2 "multiarch/strcasecmp_l-sse4_2.S" > +#define DEFAULT_IMPL_V3 "multiarch/strcasecmp_l-avx2.S" > +#define DEFAULT_IMPL_V4 "multiarch/strcasecmp_l-evex.S" > > -libc_hidden_builtin_def (__strcasecmp_l) > +#include "isa-default-impl.h" > > +libc_hidden_def (__strcasecmp_l) > weak_alias (__strcasecmp_l, strcasecmp_l) > -libc_hidden_def (strcasecmp_l) > > -weak_alias (__strcasecmp, strcasecmp) > libc_hidden_def (__strcasecmp) > +weak_alias (__strcasecmp, strcasecmp) > diff --git a/sysdeps/x86_64/strchr-isa-default-impl.h b/sysdeps/x86_64/strchr-isa-default-impl.h > new file mode 100644 > index 0000000000..0c8cbc6ffb > --- /dev/null > +++ b/sysdeps/x86_64/strchr-isa-default-impl.h > @@ -0,0 +1,28 @@ > +/* Set default strchr impl based on ISA level. > + Copyright (C) 2022 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#include <isa-level.h> > +#if MINIMUM_X86_ISA_LEVEL == 1 || MINIMUM_X86_ISA_LEVEL == 2 > +# define DEFAULT_STRCHR __strchr_sse2 > +#elif MINIMUM_X86_ISA_LEVEL == 3 > +# define DEFAULT_STRCHR __strchr_avx2 > +#elif MINIMUM_X86_ISA_LEVEL == 4 > +# define DEFAULT_STRCHR __strchr_evex > +#else > +# error "Unknown default strchr implementation" > +#endif > diff --git a/sysdeps/x86_64/strchr.S b/sysdeps/x86_64/strchr.S > index 77c956c92c..1406c633e8 100644 > --- a/sysdeps/x86_64/strchr.S > +++ b/sysdeps/x86_64/strchr.S > @@ -1,5 +1,4 @@ > -/* strchr (str, ch) -- Return pointer to first occurrence of CH in STR. > - For AMD x86-64. > +/* strchr dispatch for RTLD and non-multiarch build > Copyright (C) 2009-2022 Free Software Foundation, Inc. > This file is part of the GNU C Library. > > @@ -17,8 +16,13 @@ > License along with the GNU C Library; if not, see > <https://www.gnu.org/licenses/>. */ > > +#define STRCHR strchr > + > +#define DEFAULT_IMPL_V1 "multiarch/strchr-sse2.S" > +#define DEFAULT_IMPL_V3 "multiarch/strchr-avx2.S" > +#define DEFAULT_IMPL_V4 "multiarch/strchr-evex.S" > + > +#include "isa-default-impl.h" > > -#define STRCHR strchr > -#include "multiarch/strchr-sse2.S" > weak_alias (strchr, index) > libc_hidden_builtin_def (strchr) > diff --git a/sysdeps/x86_64/strchrnul.S b/sysdeps/x86_64/strchrnul.S > index 508e42db26..f1ef907296 100644 > --- a/sysdeps/x86_64/strchrnul.S > +++ b/sysdeps/x86_64/strchrnul.S > @@ -1,6 +1,4 @@ > -/* strchrnul (str, ch) -- Return pointer to first occurrence of CH in STR > - or terminating NUL byte. > - For AMD x86-64. > +/* strchrnul dispatch for RTLD and non-multiarch build > Copyright (C) 2009-2022 Free Software Foundation, Inc. > This file is part of the GNU C Library. > > @@ -18,7 +16,12 @@ > License along with the GNU C Library; if not, see > <https://www.gnu.org/licenses/>. */ > > -#define STRCHR __strchrnul > -#include "multiarch/strchrnul-sse2.S" > +#define STRCHRNUL __strchrnul > + > +#define DEFAULT_IMPL_V1 "multiarch/strchrnul-sse2.S" > +#define DEFAULT_IMPL_V3 "multiarch/strchrnul-avx2.S" > +#define DEFAULT_IMPL_V4 "multiarch/strchrnul-evex.S" > + > +#include "isa-default-impl.h" > > weak_alias (__strchrnul, strchrnul) > diff --git a/sysdeps/x86_64/strcmp.S b/sysdeps/x86_64/strcmp.S > index 19e54bd3a7..7c3cf87a42 100644 > --- a/sysdeps/x86_64/strcmp.S > +++ b/sysdeps/x86_64/strcmp.S > @@ -1,4 +1,4 @@ > -/* Highly optimized version for x86-64. > +/* strcmp dispatch for RTLD and non-multiarch build > Copyright (C) 1999-2022 Free Software Foundation, Inc. > This file is part of the GNU C Library. > > @@ -18,5 +18,14 @@ > > /* Symbol = strcmp. */ > > -#include "multiarch/strcmp-sse2.S" > +#define DEFAULT_IMPL_V1 "multiarch/strcmp-sse2.S" > +/* strcmp-sse2-unaligned.S is often faster than strcmp-sse42.S and > + doesn't have the drawback of using the `pcmpstri` instruction > + which can be very slow on some CPUs. */ > +#define DEFAULT_IMPL_V2 "multiarch/strcmp-sse2-unaligned.S" > +#define DEFAULT_IMPL_V3 "multiarch/strcmp-avx2.S" > +#define DEFAULT_IMPL_V4 "multiarch/strcmp-evex.S" > + > +#include "isa-default-impl.h" > + > libc_hidden_builtin_def (strcmp) > diff --git a/sysdeps/x86_64/strlen.S b/sysdeps/x86_64/strlen.S > index c2f5674f8d..e7cb4b1680 100644 > --- a/sysdeps/x86_64/strlen.S > +++ b/sysdeps/x86_64/strlen.S > @@ -1,4 +1,4 @@ > -/* SSE2 version of strlen. > +/* strlen dispatch for RTLD and non-multiarch build > Copyright (C) 2021-2022 Free Software Foundation, Inc. > This file is part of the GNU C Library. > > @@ -17,6 +17,11 @@ > <https://www.gnu.org/licenses/>. */ > > #define STRLEN strlen > -#include "multiarch/strlen-sse2.S" > + > +#define DEFAULT_IMPL_V1 "multiarch/strlen-sse2.S" > +#define DEFAULT_IMPL_V3 "multiarch/strlen-avx2.S" > +#define DEFAULT_IMPL_V4 "multiarch/strlen-evex.S" > + > +#include "isa-default-impl.h" > > libc_hidden_builtin_def (strlen) > diff --git a/sysdeps/x86_64/strncase_l.S b/sysdeps/x86_64/strncase_l.S > index 3780fc50b1..de28ecf5d4 100644 > --- a/sysdeps/x86_64/strncase_l.S > +++ b/sysdeps/x86_64/strncase_l.S > @@ -1,11 +1,35 @@ > +/* strcasecmp_l dispatch for RTLD and non-multiarch build > + Copyright (C) 2022 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > /* Symbols = __strncasecmp_l and __strncasecmp. */ > > -#include "multiarch/strncase_l-sse2.S" > +#define DEFAULT_IMPL_V1 "multiarch/strncase_l-sse2.S" > +/* This may cause regressions on some processors that heavily prefer > + aligned loads or have slow a implementation of the `pcmpstri` > + instruction. */ > +#define DEFAULT_IMPL_V2 "multiarch/strncase_l-sse4_2.S" > +#define DEFAULT_IMPL_V3 "multiarch/strncase_l-avx2.S" > +#define DEFAULT_IMPL_V4 "multiarch/strncase_l-evex.S" > > -libc_hidden_builtin_def (__strncasecmp_l) > +#include "isa-default-impl.h" > > +libc_hidden_def (__strncasecmp_l) > weak_alias (__strncasecmp_l, strncasecmp_l) > -libc_hidden_def (strncasecmp_l) > > -weak_alias (__strncasecmp, strncasecmp) > libc_hidden_def (__strncasecmp) > +weak_alias (__strncasecmp, strncasecmp) > diff --git a/sysdeps/x86_64/strncmp.S b/sysdeps/x86_64/strncmp.S > index 13d9e82ee2..afb251d9fe 100644 > --- a/sysdeps/x86_64/strncmp.S > +++ b/sysdeps/x86_64/strncmp.S > @@ -1,4 +1,31 @@ > +/* strncmp dispatch for RTLD and non-multiarch build > + Copyright (C) 1999-2022 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > /* Symbol = strncmp. */ > > -#include "multiarch/strncmp-sse2.S" > +#define DEFAULT_IMPL_V1 "multiarch/strncmp-sse2.S" > +/* This may cause regressions on some processors that heavily prefer > + aligned loads or have slow a implementation of the `pcmpstri` > + instruction. */ > +#define DEFAULT_IMPL_V2 "multiarch/strncmp-sse4_2.S" > +#define DEFAULT_IMPL_V3 "multiarch/strncmp-avx2.S" > +#define DEFAULT_IMPL_V4 "multiarch/strncmp-evex.S" > + > +#include "isa-default-impl.h" > + > libc_hidden_builtin_def (strncmp) > diff --git a/sysdeps/x86_64/strnlen.S b/sysdeps/x86_64/strnlen.S > index 174970d58f..b2c2149e07 100644 > --- a/sysdeps/x86_64/strnlen.S > +++ b/sysdeps/x86_64/strnlen.S > @@ -1,6 +1,29 @@ > -#define STRLEN __strnlen > -#include "multiarch/strnlen-sse2.S" > +/* strnlen dispatch for RTLD and non-multiarch build > + Copyright (C) 2022 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#define STRNLEN __strnlen > + > +#define DEFAULT_IMPL_V1 "multiarch/strnlen-sse2.S" > +#define DEFAULT_IMPL_V3 "multiarch/strnlen-avx2.S" > +#define DEFAULT_IMPL_V4 "multiarch/strnlen-evex.S" > + > +#include "isa-default-impl.h" > + > +weak_alias (__strnlen, strnlen) > libc_hidden_def (__strnlen) > -weak_alias (__strnlen, strnlen); > -libc_hidden_builtin_def (strnlen) > +libc_hidden_def (strnlen) > diff --git a/sysdeps/x86_64/strrchr.S b/sysdeps/x86_64/strrchr.S > index f39da60454..493d370a28 100644 > --- a/sysdeps/x86_64/strrchr.S > +++ b/sysdeps/x86_64/strrchr.S > @@ -1,4 +1,4 @@ > -/* strrchr (str, ch) -- Return pointer to last occurrence of CH in STR. > +/* strrchr dispatch for RTLD and non-multiarch build > Copyright (C) 2013-2022 Free Software Foundation, Inc. > This file is part of the GNU C Library. > > @@ -17,6 +17,12 @@ > <https://www.gnu.org/licenses/>. */ > > #define STRRCHR strrchr > -#include "multiarch/strrchr-sse2.S" > + > +#define DEFAULT_IMPL_V1 "multiarch/strrchr-sse2.S" > +#define DEFAULT_IMPL_V3 "multiarch/strrchr-avx2.S" > +#define DEFAULT_IMPL_V4 "multiarch/strrchr-evex.S" > + > +#include "isa-default-impl.h" > + > weak_alias (strrchr, rindex) > libc_hidden_builtin_def (strrchr) > diff --git a/sysdeps/x86_64/wcschr.S b/sysdeps/x86_64/wcschr.S > index 80b12c4286..01a432b899 100644 > --- a/sysdeps/x86_64/wcschr.S > +++ b/sysdeps/x86_64/wcschr.S > @@ -1,4 +1,4 @@ > -/* wcschr with SSSE3 > +/* wcschr dispatch for RTLD and non-multiarch build > Copyright (C) 2011-2022 Free Software Foundation, Inc. > This file is part of the GNU C Library. > > @@ -16,9 +16,14 @@ > License along with the GNU C Library; if not, see > <https://www.gnu.org/licenses/>. */ > > - > #define WCSCHR __wcschr > -#include "multiarch/wcschr-sse2.S" > -libc_hidden_def(__wcschr) > + > +#define DEFAULT_IMPL_V1 "multiarch/wcschr-sse2.S" > +#define DEFAULT_IMPL_V3 "multiarch/wcschr-avx2.S" > +#define DEFAULT_IMPL_V4 "multiarch/wcschr-evex.S" > + > +#include "isa-default-impl.h" > + > +libc_hidden_def (__wcschr) > weak_alias (__wcschr, wcschr) > libc_hidden_weak (wcschr) > diff --git a/sysdeps/x86_64/wcscmp.S b/sysdeps/x86_64/wcscmp.S > index e04cdbf5fe..5d30545fb6 100644 > --- a/sysdeps/x86_64/wcscmp.S > +++ b/sysdeps/x86_64/wcscmp.S > @@ -1,4 +1,4 @@ > -/* Optimized wcscmp for x86-64 with SSE2. > +/* strlen dispatch for RTLD and non-multiarch build > Copyright (C) 2011-2022 Free Software Foundation, Inc. > This file is part of the GNU C Library. > > @@ -18,6 +18,11 @@ > > /* Symbol = __wcscmp. */ > > -#include "multiarch/wcscmp-sse2.S" > +#define DEFAULT_IMPL_V1 "multiarch/wcscmp-sse2.S" > +#define DEFAULT_IMPL_V3 "multiarch/wcscmp-avx2.S" > +#define DEFAULT_IMPL_V4 "multiarch/wcscmp-evex.S" > + > +#include "isa-default-impl.h" > + > libc_hidden_def (__wcscmp) > weak_alias (__wcscmp, wcscmp) > diff --git a/sysdeps/x86_64/wcslen.S b/sysdeps/x86_64/wcslen.S > index 588a0fbe01..e4e25b5353 100644 > --- a/sysdeps/x86_64/wcslen.S > +++ b/sysdeps/x86_64/wcslen.S > @@ -1,4 +1,4 @@ > -/* Optimized wcslen for x86-64 with SSE2. > +/* wcslen dispatch for RTLD and non-multiarch build > Copyright (C) 2011-2022 Free Software Foundation, Inc. > This file is part of the GNU C Library. > > @@ -17,5 +17,18 @@ > <https://www.gnu.org/licenses/>. */ > > #define WCSLEN __wcslen > -#include "multiarch/wcslen-sse2.S" > -weak_alias(__wcslen, wcslen) > + > +#define DEFAULT_IMPL_V1 "multiarch/wcslen-sse2.S" > +#define DEFAULT_IMPL_V2 "multiarch/wcslen-sse4_1.S" > +#define DEFAULT_IMPL_V3 "multiarch/wcslen-avx2.S" > +#define DEFAULT_IMPL_V4 "multiarch/wcslen-evex.S" > + > +#include "isa-default-impl.h" > + > +weak_alias (__wcslen, wcslen) > + > +#if MINIMUM_X86_ISA_LEVEL == 2 && !IS_IN (rtld) > +/* Hidden def so it can be used as overflow fallback in > + wcsnlen-sse4_1.S. */ > +libc_hidden_def (__wcslen) > +#endif > diff --git a/sysdeps/x86_64/wcsncmp-generic.c b/sysdeps/x86_64/wcsncmp-generic.c > new file mode 100644 > index 0000000000..493a6f9b9b > --- /dev/null > +++ b/sysdeps/x86_64/wcsncmp-generic.c > @@ -0,0 +1,29 @@ > +/* wcsncmp dispatch for RTLD and non-multiarch .c ISA level 1 build. > + Copyright (C) 2022 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +/* wcsncmp non-multiarch build is split into two files, > + wcsncmp-generic.c and wcsncmp.S. The wcsncmp-generic.c build is for > + ISA level <= 1 and just uses wcsmbs/wcsncmp.c. This must be split > + into two files because we cannot include C code from assembly or > + vice versa. */ > + > +#include <isa-level.h> > + > +#if MINIMUM_X86_ISA_LEVEL <= 2 > +# include "wcsmbs/wcsncmp.c" > +#endif > diff --git a/sysdeps/x86_64/wcsncmp.S b/sysdeps/x86_64/wcsncmp.S > new file mode 100644 > index 0000000000..14f9a1341e > --- /dev/null > +++ b/sysdeps/x86_64/wcsncmp.S > @@ -0,0 +1,40 @@ > +/* wcsncmp dispatch for RTLD and non-multiarch .c files > + Copyright (C) 2022 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +/* wcsncmp non-multiarch build is split into two files, > + wcsncmp-generic.c and wcsncmp.S. The wcsncmp.S build is for > + ISA level >= 3 uses the optimized assembly implementations in > + multiarch/wcsncmp*.S. This must be split into two files because > + we cannot include C code from assembly or vice versa. */ > + > +#include <isa-level.h> > + > +#if MINIMUM_X86_ISA_LEVEL >= 3 > + > +/* Symbol = wcsncmp. */ > + > +# define DEFAULT_IMPL_V3 "multiarch/wcsncmp-avx2.S" > +# define DEFAULT_IMPL_V4 "multiarch/wcsncmp-evex.S" > + > +/* isa-default-impl.h expects DEFAULT_IMPL_V1 to be defined but it > + should never be used from here. */ > +# define DEFAULT_IMPL_V1 "ERROR -- Invalid ISA IMPL" > + > +# include "isa-default-impl.h" > + > +#endif > diff --git a/sysdeps/x86_64/wcsnlen-generic.c b/sysdeps/x86_64/wcsnlen-generic.c > new file mode 100644 > index 0000000000..ec66511589 > --- /dev/null > +++ b/sysdeps/x86_64/wcsnlen-generic.c > @@ -0,0 +1,29 @@ > +/* wcsnlen dispatch for RTLD and non-multiarch .c ISA level 1 build. > + Copyright (C) 2022 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +/* wcsnlen non-multiarch build is split into two files, > + wcsnlen-generic.c and wcsnlen.S. The wcsnlen-generic.c build is for > + ISA level <= 1 and just uses wcsmbs/wcsnlen.c. This must be split > + into two files because we cannot include C code from assembly or > + vice versa. */ > + > +#include <isa-level.h> > + > +#if MINIMUM_X86_ISA_LEVEL <= 1 > +# include "wcsmbs/wcsnlen.c" > +#endif > diff --git a/sysdeps/x86_64/wcsnlen.S b/sysdeps/x86_64/wcsnlen.S > new file mode 100644 > index 0000000000..b30b3f0785 > --- /dev/null > +++ b/sysdeps/x86_64/wcsnlen.S > @@ -0,0 +1,49 @@ > +/* wcsnlen dispatch for RTLD and non-multiarch .c files > + Copyright (C) 2022 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +/* wcsnlen non-multiarch build is split into two files, > + wcsnlen-generic.c and wcsnlen.S. The wcsnlen.S build is for > + ISA level >= 2 uses the optimized assembly implementations in > + multiarch/wcsnlen*.S. This must be split into two files because > + we cannot include C code from assembly or vice versa. */ > + > +#include <isa-level.h> > + > +#if MINIMUM_X86_ISA_LEVEL >= 2 > + > +# define WCSNLEN __wcsnlen > +/* This symbol must stay linked to the name in wcslen.S. */ > +#if IS_IN (rtld) > +# define OVERFLOW_STRLEN __wcslen > +#else > +# define OVERFLOW_STRLEN HIDDEN_JUMPTARGET (__wcslen) > +#endif > + > +# define DEFAULT_IMPL_V2 "multiarch/wcsnlen-sse4_1.S" > +# define DEFAULT_IMPL_V3 "multiarch/wcsnlen-avx2.S" > +# define DEFAULT_IMPL_V4 "multiarch/wcsnlen-evex.S" > + > +/* isa-default-impl.h expects DEFAULT_IMPL_V1 to be defined but it > + should never be used from here. */ > +# define DEFAULT_IMPL_V1 "ERROR -- Invalid ISA IMPL" > + > +# include "isa-default-impl.h" > + > +weak_alias (__wcsnlen, wcsnlen) > +libc_hidden_def (__wcsnlen) > +#endif > diff --git a/sysdeps/x86_64/wcsrchr.S b/sysdeps/x86_64/wcsrchr.S > index 1d4b1eb21c..abf828b458 100644 > --- a/sysdeps/x86_64/wcsrchr.S > +++ b/sysdeps/x86_64/wcsrchr.S > @@ -1,4 +1,4 @@ > -/* wcsrchr optimized with SSE2. > +/* wcsrchr dispatch for RTLD and non-multiarch build > Copyright (C) 2011-2022 Free Software Foundation, Inc. > This file is part of the GNU C Library. > > @@ -16,5 +16,10 @@ > License along with the GNU C Library; if not, see > <https://www.gnu.org/licenses/>. */ > > -#define STRRCHR wcsrchr > -#include "multiarch/wcsrchr-sse2.S" > +#define WCSRCHR wcsrchr > + > +#define DEFAULT_IMPL_V1 "multiarch/wcsrchr-sse2.S" > +#define DEFAULT_IMPL_V3 "multiarch/wcsrchr-avx2.S" > +#define DEFAULT_IMPL_V4 "multiarch/wcsrchr-evex.S" > + > +#include "isa-default-impl.h" > -- > 2.34.1 > LGTM. Thanks.
Hi Carlos, As release manager, any issues with this change going out before 2.36? Best, Noah On Fri, Jul 15, 2022 at 10:47 AM H.J. Lu <hjl.tools@gmail.com> wrote: > > On Wed, Jul 13, 2022 at 4:33 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > > > 1. Add default ISA level selection in non-multiarch/rtld > > implementations. > > > > 2. Add ISA level build guards to different implementations. > > - I.e strcmp-avx2.S which is ISA level 3 will only build if > > compiled ISA level <= 3. Otherwise there is no reason to > > include it as we will always use one of the ISA level 4 > > implementations (strcmp-evex.S). > > > > 3. Refactor the ifunc selector and ifunc implementation list to use > > the ISA level aware wrapper macros that allow functions below the > > compiled ISA level (with a guranteed replacement) to be skipped. > > > > Tested with and without multiarch on x86_64 for ISA levels: > > {generic, x86-64-v2, x86-64-v3, x86-64-v4} > > > > And m32 with and without multiarch. > > --- > > sysdeps/x86/isa-level.h | 10 + > > sysdeps/x86_64/Makefile | 6 + > > sysdeps/x86_64/memrchr.S | 10 +- > > sysdeps/x86_64/multiarch/Makefile | 2 - > > sysdeps/x86_64/multiarch/ifunc-avx2.h | 22 +- > > sysdeps/x86_64/multiarch/ifunc-impl-list.c | 648 ++++++++++-------- > > sysdeps/x86_64/multiarch/ifunc-strcasecmp.h | 33 +- > > sysdeps/x86_64/multiarch/ifunc-wcslen.h | 27 +- > > sysdeps/x86_64/multiarch/memrchr-avx2.S | 4 +- > > sysdeps/x86_64/multiarch/memrchr-evex.S | 4 +- > > sysdeps/x86_64/multiarch/memrchr-sse2.S | 19 +- > > .../x86_64/multiarch/strcasecmp_l-avx2-rtm.S | 17 +- > > sysdeps/x86_64/multiarch/strcasecmp_l-avx2.S | 3 - > > sysdeps/x86_64/multiarch/strcasecmp_l-evex.S | 3 - > > sysdeps/x86_64/multiarch/strcasecmp_l-sse2.S | 1 + > > sysdeps/x86_64/multiarch/strchr-avx2.S | 4 +- > > sysdeps/x86_64/multiarch/strchr-evex.S | 4 +- > > sysdeps/x86_64/multiarch/strchr-sse2-no-bsf.S | 6 +- > > sysdeps/x86_64/multiarch/strchr-sse2.S | 7 +- > > sysdeps/x86_64/multiarch/strchr.c | 30 +- > > sysdeps/x86_64/multiarch/strchrnul-avx2.S | 7 +- > > sysdeps/x86_64/multiarch/strchrnul-evex.S | 7 +- > > sysdeps/x86_64/multiarch/strchrnul-sse2.S | 8 +- > > sysdeps/x86_64/multiarch/strcmp-avx2-rtm.S | 5 +- > > sysdeps/x86_64/multiarch/strcmp-avx2.S | 22 +- > > sysdeps/x86_64/multiarch/strcmp-evex.S | 20 +- > > .../x86_64/multiarch/strcmp-sse2-unaligned.S | 18 +- > > sysdeps/x86_64/multiarch/strcmp-sse2.S | 6 +- > > sysdeps/x86_64/multiarch/strcmp-sse4_2.S | 6 +- > > sysdeps/x86_64/multiarch/strcmp.c | 37 +- > > sysdeps/x86_64/multiarch/strlen-avx2.S | 4 +- > > sysdeps/x86_64/multiarch/strlen-evex-base.S | 6 +- > > sysdeps/x86_64/multiarch/strlen-evex.S | 4 +- > > sysdeps/x86_64/multiarch/strlen-sse2.S | 17 +- > > .../x86_64/multiarch/strncase_l-avx2-rtm.S | 18 +- > > sysdeps/x86_64/multiarch/strncase_l-avx2.S | 7 +- > > sysdeps/x86_64/multiarch/strncase_l-evex.S | 4 - > > sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S | 2 - > > sysdeps/x86_64/multiarch/strncmp-avx2.S | 3 +- > > sysdeps/x86_64/multiarch/strncmp-evex.S | 1 - > > sysdeps/x86_64/multiarch/strncmp.c | 27 +- > > sysdeps/x86_64/multiarch/strnlen-avx2.S | 6 +- > > sysdeps/x86_64/multiarch/strnlen-evex.S | 6 +- > > sysdeps/x86_64/multiarch/strnlen-sse2.S | 10 +- > > sysdeps/x86_64/multiarch/strrchr-avx2.S | 4 +- > > sysdeps/x86_64/multiarch/strrchr-evex.S | 4 +- > > sysdeps/x86_64/multiarch/strrchr-sse2.S | 99 +-- > > .../x86_64/multiarch/strstr-sse2-unaligned.S | 3 +- > > sysdeps/x86_64/multiarch/wcschr-avx2.S | 7 +- > > sysdeps/x86_64/multiarch/wcschr-evex.S | 7 +- > > sysdeps/x86_64/multiarch/wcschr-sse2.S | 11 +- > > sysdeps/x86_64/multiarch/wcscmp-avx2-rtm.S | 1 - > > sysdeps/x86_64/multiarch/wcscmp-avx2.S | 1 - > > sysdeps/x86_64/multiarch/wcscmp-evex.S | 1 - > > sysdeps/x86_64/multiarch/wcscmp-sse2.S | 14 +- > > sysdeps/x86_64/multiarch/wcslen-avx2.S | 6 +- > > sysdeps/x86_64/multiarch/wcslen-evex.S | 6 +- > > sysdeps/x86_64/multiarch/wcslen-sse2.S | 15 +- > > sysdeps/x86_64/multiarch/wcslen-sse4_1.S | 10 +- > > sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S | 2 - > > sysdeps/x86_64/multiarch/wcsncmp-avx2.S | 3 +- > > sysdeps/x86_64/multiarch/wcsncmp-evex.S | 1 - > > sysdeps/x86_64/multiarch/wcsncmp-generic.c | 9 +- > > sysdeps/x86_64/multiarch/wcsnlen-avx2.S | 6 +- > > sysdeps/x86_64/multiarch/wcsnlen-evex.S | 6 +- > > sysdeps/x86_64/multiarch/wcsnlen-generic.c | 13 +- > > sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S | 7 +- > > sysdeps/x86_64/multiarch/wcsrchr-avx2.S | 7 +- > > sysdeps/x86_64/multiarch/wcsrchr-evex.S | 6 +- > > sysdeps/x86_64/multiarch/wcsrchr-sse2.S | 7 +- > > sysdeps/x86_64/strcasecmp_l.S | 32 +- > > sysdeps/x86_64/strchr-isa-default-impl.h | 28 + > > sysdeps/x86_64/strchr.S | 12 +- > > sysdeps/x86_64/strchrnul.S | 13 +- > > sysdeps/x86_64/strcmp.S | 13 +- > > sysdeps/x86_64/strlen.S | 9 +- > > sysdeps/x86_64/strncase_l.S | 32 +- > > sysdeps/x86_64/strncmp.S | 29 +- > > sysdeps/x86_64/strnlen.S | 31 +- > > sysdeps/x86_64/strrchr.S | 10 +- > > sysdeps/x86_64/wcschr.S | 13 +- > > sysdeps/x86_64/wcscmp.S | 9 +- > > sysdeps/x86_64/wcslen.S | 19 +- > > sysdeps/x86_64/wcsncmp-generic.c | 29 + > > sysdeps/x86_64/wcsncmp.S | 40 ++ > > sysdeps/x86_64/wcsnlen-generic.c | 29 + > > sysdeps/x86_64/wcsnlen.S | 49 ++ > > sysdeps/x86_64/wcsrchr.S | 11 +- > > 88 files changed, 1157 insertions(+), 618 deletions(-) > > create mode 100644 sysdeps/x86_64/strchr-isa-default-impl.h > > create mode 100644 sysdeps/x86_64/wcsncmp-generic.c > > create mode 100644 sysdeps/x86_64/wcsncmp.S > > create mode 100644 sysdeps/x86_64/wcsnlen-generic.c > > create mode 100644 sysdeps/x86_64/wcsnlen.S > > > > diff --git a/sysdeps/x86/isa-level.h b/sysdeps/x86/isa-level.h > > index 77f9e2c0c3..3c4480aba7 100644 > > --- a/sysdeps/x86/isa-level.h > > +++ b/sysdeps/x86/isa-level.h > > @@ -84,6 +84,7 @@ > > > > /* ISA level >= 2 guaranteed includes. */ > > #define SSE4_2_X86_ISA_LEVEL 2 > > +#define SSE4_1_X86_ISA_LEVEL 2 > > #define SSSE3_X86_ISA_LEVEL 2 > > > > > > @@ -101,9 +102,18 @@ > > when ISA level < 3. */ > > #define Prefer_No_VZEROUPPER_X86_ISA_LEVEL 3 > > > > +/* NB: This feature is disable when ISA level >= 3. All CPUs with > > + this feature don't run on glibc built with ISA level >= 3. */ > > +#define Slow_SSE42_X86_ISA_LEVEL 3 > > + > > /* Feature(s) enabled when ISA level >= 2. */ > > #define Fast_Unaligned_Load_X86_ISA_LEVEL 2 > > > > +/* NB: This feature is disable when ISA level >= 2, which was enabled > > + for the early Atom CPUs. */ > > +#define Slow_BSF_X86_ISA_LEVEL 2 > > + > > + > > /* Both X86_ISA_CPU_FEATURE_USABLE_P and X86_ISA_CPU_FEATURES_ARCH_P > > macros are wrappers for the respective CPU_FEATURE{S}_{USABLE|ARCH}_P > > runtime checks. They differ in two ways. > > diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile > > index e597a4855f..341ee69a35 100644 > > --- a/sysdeps/x86_64/Makefile > > +++ b/sysdeps/x86_64/Makefile > > @@ -197,6 +197,12 @@ gen-as-const-headers += tlsdesc.sym rtld-offsets.sym > > endif > > > > ifeq ($(subdir),wcsmbs) > > + > > +sysdep_routines += \ > > + wcsncmp-generic \ > > + wcsnlen-generic \ > > +# sysdep_routines > > + > > tests += \ > > tst-rsi-wcslen > > endif > > diff --git a/sysdeps/x86_64/memrchr.S b/sysdeps/x86_64/memrchr.S > > index 385e2c5668..d39b1aa0e2 100644 > > --- a/sysdeps/x86_64/memrchr.S > > +++ b/sysdeps/x86_64/memrchr.S > > @@ -1,4 +1,4 @@ > > -/* fast SSE2 memrchr with 64 byte loop and pmaxub instruction using > > +/* memrchr dispatch for RTLD and non-multiarch build > > > > Copyright (C) 2011-2022 Free Software Foundation, Inc. > > This file is part of the GNU C Library. > > @@ -18,5 +18,11 @@ > > <https://www.gnu.org/licenses/>. */ > > > > #define MEMRCHR __memrchr > > -#include "multiarch/memrchr-sse2.S" > > + > > +#define DEFAULT_IMPL_V1 "multiarch/memrchr-sse2.S" > > +#define DEFAULT_IMPL_V3 "multiarch/memrchr-avx2.S" > > +#define DEFAULT_IMPL_V4 "multiarch/memrchr-evex.S" > > + > > +#include "isa-default-impl.h" > > + > > weak_alias (__memrchr, memrchr) > > diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile > > index d6b62af850..ba29a65716 100644 > > --- a/sysdeps/x86_64/multiarch/Makefile > > +++ b/sysdeps/x86_64/multiarch/Makefile > > @@ -144,11 +144,9 @@ sysdep_routines += \ > > wcslen-sse4_1 \ > > wcsncmp-avx2 \ > > wcsncmp-avx2-rtm \ > > - wcsncmp-generic \ > > wcsncmp-evex \ > > wcsnlen-avx2 \ > > wcsnlen-avx2-rtm \ > > - wcsnlen-generic \ > > wcsnlen-evex \ > > wcsnlen-evex512 \ > > wcsnlen-sse4_1 \ > > diff --git a/sysdeps/x86_64/multiarch/ifunc-avx2.h b/sysdeps/x86_64/multiarch/ifunc-avx2.h > > index 1d9cdfcfec..a57a9952f3 100644 > > --- a/sysdeps/x86_64/multiarch/ifunc-avx2.h > > +++ b/sysdeps/x86_64/multiarch/ifunc-avx2.h > > @@ -23,28 +23,32 @@ > > # define GENERIC sse2 > > #endif > > > > -extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden; > > +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; > > + > > extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; > > extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden; > > -extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; > > + > > +extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden; > > > > static inline void * > > IFUNC_SELECTOR (void) > > { > > - const struct cpu_features* cpu_features = __get_cpu_features (); > > + const struct cpu_features *cpu_features = __get_cpu_features (); > > > > - if (CPU_FEATURE_USABLE_P (cpu_features, AVX2) > > - && CPU_FEATURE_USABLE_P (cpu_features, BMI2) > > - && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) > > + if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2) > > + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2) > > + && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, > > + AVX_Fast_Unaligned_Load, )) > > { > > - if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) > > - && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)) > > + if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) > > + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)) > > return OPTIMIZE (evex); > > > > if (CPU_FEATURE_USABLE_P (cpu_features, RTM)) > > return OPTIMIZE (avx2_rtm); > > > > - if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) > > + if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, > > + Prefer_No_VZEROUPPER, !)) > > return OPTIMIZE (avx2); > > } > > > > diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c > > index 2c96cb62d2..3b1df9b73c 100644 > > --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c > > +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c > > @@ -205,19 +205,22 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, > > > > /* Support sysdeps/x86_64/multiarch/memrchr.c. */ > > IFUNC_IMPL (i, name, memrchr, > > - IFUNC_IMPL_ADD (array, i, memrchr, > > - CPU_FEATURE_USABLE (AVX2), > > - __memrchr_avx2) > > - IFUNC_IMPL_ADD (array, i, memrchr, > > - (CPU_FEATURE_USABLE (AVX2) > > - && CPU_FEATURE_USABLE (RTM)), > > - __memrchr_avx2_rtm) > > - IFUNC_IMPL_ADD (array, i, memrchr, > > - (CPU_FEATURE_USABLE (AVX512VL) > > - && CPU_FEATURE_USABLE (AVX512BW)), > > - __memrchr_evex) > > - > > - IFUNC_IMPL_ADD (array, i, memrchr, 1, __memrchr_sse2)) > > + X86_IFUNC_IMPL_ADD_V4 (array, i, memrchr, > > + (CPU_FEATURE_USABLE (AVX512VL) > > + && CPU_FEATURE_USABLE (AVX512BW)), > > + __memrchr_evex) > > + X86_IFUNC_IMPL_ADD_V3 (array, i, memrchr, > > + CPU_FEATURE_USABLE (AVX2), > > + __memrchr_avx2) > > + X86_IFUNC_IMPL_ADD_V3 (array, i, memrchr, > > + (CPU_FEATURE_USABLE (AVX2) > > + && CPU_FEATURE_USABLE (RTM)), > > + __memrchr_avx2_rtm) > > + /* ISA V2 wrapper for SSE2 implementation because the SSE2 > > + implementation is also used at ISA level 2. */ > > + X86_IFUNC_IMPL_ADD_V2 (array, i, memrchr, > > + 1, > > + __memrchr_sse2)) > > > > #ifdef SHARED > > /* Support sysdeps/x86_64/multiarch/memset_chk.c. */ > > @@ -346,49 +349,57 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, > > > > /* Support sysdeps/x86_64/multiarch/strlen.c. */ > > IFUNC_IMPL (i, name, strlen, > > - IFUNC_IMPL_ADD (array, i, strlen, > > - (CPU_FEATURE_USABLE (AVX2) > > - && CPU_FEATURE_USABLE (BMI2)), > > - __strlen_avx2) > > - IFUNC_IMPL_ADD (array, i, strlen, > > - (CPU_FEATURE_USABLE (AVX2) > > - && CPU_FEATURE_USABLE (BMI2) > > - && CPU_FEATURE_USABLE (RTM)), > > - __strlen_avx2_rtm) > > - IFUNC_IMPL_ADD (array, i, strlen, > > - (CPU_FEATURE_USABLE (AVX512VL) > > - && CPU_FEATURE_USABLE (AVX512BW) > > - && CPU_FEATURE_USABLE (BMI2)), > > - __strlen_evex) > > - IFUNC_IMPL_ADD (array, i, strlen, > > - (CPU_FEATURE_USABLE (AVX512VL) > > - && CPU_FEATURE_USABLE (AVX512BW) > > - && CPU_FEATURE_USABLE (BMI2)), > > - __strlen_evex512) > > - IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_sse2)) > > + X86_IFUNC_IMPL_ADD_V4 (array, i, strlen, > > + (CPU_FEATURE_USABLE (AVX512VL) > > + && CPU_FEATURE_USABLE (AVX512BW) > > + && CPU_FEATURE_USABLE (BMI2)), > > + __strlen_evex) > > + X86_IFUNC_IMPL_ADD_V4 (array, i, strlen, > > + (CPU_FEATURE_USABLE (AVX512VL) > > + && CPU_FEATURE_USABLE (AVX512BW) > > + && CPU_FEATURE_USABLE (BMI2)), > > + __strlen_evex512) > > + X86_IFUNC_IMPL_ADD_V3 (array, i, strlen, > > + (CPU_FEATURE_USABLE (AVX2) > > + && CPU_FEATURE_USABLE (BMI2)), > > + __strlen_avx2) > > + X86_IFUNC_IMPL_ADD_V3 (array, i, strlen, > > + (CPU_FEATURE_USABLE (AVX2) > > + && CPU_FEATURE_USABLE (BMI2) > > + && CPU_FEATURE_USABLE (RTM)), > > + __strlen_avx2_rtm) > > + /* ISA V2 wrapper for SSE2 implementation because the SSE2 > > + implementation is also used at ISA level 2. */ > > + X86_IFUNC_IMPL_ADD_V2 (array, i, strlen, > > + 1, > > + __strlen_sse2)) > > > > /* Support sysdeps/x86_64/multiarch/strnlen.c. */ > > IFUNC_IMPL (i, name, strnlen, > > - IFUNC_IMPL_ADD (array, i, strnlen, > > - (CPU_FEATURE_USABLE (AVX2) > > - && CPU_FEATURE_USABLE (BMI2)), > > - __strnlen_avx2) > > - IFUNC_IMPL_ADD (array, i, strnlen, > > - (CPU_FEATURE_USABLE (AVX2) > > - && CPU_FEATURE_USABLE (BMI2) > > - && CPU_FEATURE_USABLE (RTM)), > > - __strnlen_avx2_rtm) > > - IFUNC_IMPL_ADD (array, i, strnlen, > > - (CPU_FEATURE_USABLE (AVX512VL) > > - && CPU_FEATURE_USABLE (AVX512BW) > > - && CPU_FEATURE_USABLE (BMI2)), > > - __strnlen_evex) > > - IFUNC_IMPL_ADD (array, i, strnlen, > > - (CPU_FEATURE_USABLE (AVX512VL) > > - && CPU_FEATURE_USABLE (AVX512BW) > > - && CPU_FEATURE_USABLE (BMI2)), > > - __strnlen_evex512) > > - IFUNC_IMPL_ADD (array, i, strnlen, 1, __strnlen_sse2)) > > + X86_IFUNC_IMPL_ADD_V4 (array, i, strnlen, > > + (CPU_FEATURE_USABLE (AVX512VL) > > + && CPU_FEATURE_USABLE (AVX512BW) > > + && CPU_FEATURE_USABLE (BMI2)), > > + __strnlen_evex) > > + X86_IFUNC_IMPL_ADD_V4 (array, i, strnlen, > > + (CPU_FEATURE_USABLE (AVX512VL) > > + && CPU_FEATURE_USABLE (AVX512BW) > > + && CPU_FEATURE_USABLE (BMI2)), > > + __strnlen_evex512) > > + X86_IFUNC_IMPL_ADD_V3 (array, i, strnlen, > > + (CPU_FEATURE_USABLE (AVX2) > > + && CPU_FEATURE_USABLE (BMI2)), > > + __strnlen_avx2) > > + X86_IFUNC_IMPL_ADD_V3 (array, i, strnlen, > > + (CPU_FEATURE_USABLE (AVX2) > > + && CPU_FEATURE_USABLE (BMI2) > > + && CPU_FEATURE_USABLE (RTM)), > > + __strnlen_avx2_rtm) > > + /* ISA V2 wrapper for SSE2 implementation because the SSE2 > > + implementation is also used at ISA level 2. */ > > + X86_IFUNC_IMPL_ADD_V2 (array, i, strnlen, > > + 1, > > + __strnlen_sse2)) > > > > /* Support sysdeps/x86_64/multiarch/stpncpy.c. */ > > IFUNC_IMPL (i, name, stpncpy, > > @@ -422,40 +433,47 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, > > > > /* Support sysdeps/x86_64/multiarch/strcasecmp_l.c. */ > > IFUNC_IMPL (i, name, strcasecmp, > > - IFUNC_IMPL_ADD (array, i, strcasecmp, > > - (CPU_FEATURE_USABLE (AVX512VL) > > - && CPU_FEATURE_USABLE (AVX512BW)), > > - __strcasecmp_evex) > > - IFUNC_IMPL_ADD (array, i, strcasecmp, > > - CPU_FEATURE_USABLE (AVX2), > > - __strcasecmp_avx2) > > - IFUNC_IMPL_ADD (array, i, strcasecmp, > > - (CPU_FEATURE_USABLE (AVX2) > > - && CPU_FEATURE_USABLE (RTM)), > > - __strcasecmp_avx2_rtm) > > - IFUNC_IMPL_ADD (array, i, strcasecmp, > > - CPU_FEATURE_USABLE (SSE4_2), > > - __strcasecmp_sse42) > > - IFUNC_IMPL_ADD (array, i, strcasecmp, 1, __strcasecmp_sse2)) > > + X86_IFUNC_IMPL_ADD_V4 (array, i, strcasecmp, > > + (CPU_FEATURE_USABLE (AVX512VL) > > + && CPU_FEATURE_USABLE (AVX512BW)), > > + __strcasecmp_evex) > > + X86_IFUNC_IMPL_ADD_V3 (array, i, strcasecmp, > > + CPU_FEATURE_USABLE (AVX2), > > + __strcasecmp_avx2) > > + X86_IFUNC_IMPL_ADD_V3 (array, i, strcasecmp, > > + (CPU_FEATURE_USABLE (AVX2) > > + && CPU_FEATURE_USABLE (RTM)), > > + __strcasecmp_avx2_rtm) > > + X86_IFUNC_IMPL_ADD_V2 (array, i, strcasecmp, > > + CPU_FEATURE_USABLE (SSE4_2), > > + __strcasecmp_sse42) > > + /* ISA V2 wrapper for SSE2 implementation because the SSE2 > > + implementation is also used at ISA level 2. */ > > + X86_IFUNC_IMPL_ADD_V2 (array, i, strcasecmp, > > + 1, > > + __strcasecmp_sse2)) > > > > /* Support sysdeps/x86_64/multiarch/strcasecmp_l.c. */ > > IFUNC_IMPL (i, name, strcasecmp_l, > > - IFUNC_IMPL_ADD (array, i, strcasecmp, > > - (CPU_FEATURE_USABLE (AVX512VL) > > - && CPU_FEATURE_USABLE (AVX512BW)), > > - __strcasecmp_l_evex) > > - IFUNC_IMPL_ADD (array, i, strcasecmp, > > - CPU_FEATURE_USABLE (AVX2), > > - __strcasecmp_l_avx2) > > - IFUNC_IMPL_ADD (array, i, strcasecmp, > > - (CPU_FEATURE_USABLE (AVX2) > > - && CPU_FEATURE_USABLE (RTM)), > > - __strcasecmp_l_avx2_rtm) > > - IFUNC_IMPL_ADD (array, i, strcasecmp_l, > > - CPU_FEATURE_USABLE (SSE4_2), > > - __strcasecmp_l_sse42) > > - IFUNC_IMPL_ADD (array, i, strcasecmp_l, 1, > > - __strcasecmp_l_sse2)) > > + X86_IFUNC_IMPL_ADD_V4 (array, i, strcasecmp, > > + (CPU_FEATURE_USABLE (AVX512VL) > > + && CPU_FEATURE_USABLE (AVX512BW)), > > + __strcasecmp_l_evex) > > + X86_IFUNC_IMPL_ADD_V3 (array, i, strcasecmp, > > + CPU_FEATURE_USABLE (AVX2), > > + __strcasecmp_l_avx2) > > + X86_IFUNC_IMPL_ADD_V3 (array, i, strcasecmp, > > + (CPU_FEATURE_USABLE (AVX2) > > + && CPU_FEATURE_USABLE (RTM)), > > + __strcasecmp_l_avx2_rtm) > > + X86_IFUNC_IMPL_ADD_V2 (array, i, strcasecmp_l, > > + CPU_FEATURE_USABLE (SSE4_2), > > + __strcasecmp_l_sse42) > > + /* ISA V2 wrapper for SSE2 implementation because the SSE2 > > + implementation is also used at ISA level 2. */ > > + X86_IFUNC_IMPL_ADD_V2 (array, i, strcasecmp_l, > > + 1, > > + __strcasecmp_l_sse2)) > > > > /* Support sysdeps/x86_64/multiarch/strcat.c. */ > > IFUNC_IMPL (i, name, strcat, > > @@ -474,74 +492,95 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, > > > > /* Support sysdeps/x86_64/multiarch/strchr.c. */ > > IFUNC_IMPL (i, name, strchr, > > - IFUNC_IMPL_ADD (array, i, strchr, > > - (CPU_FEATURE_USABLE (AVX2) > > - && CPU_FEATURE_USABLE (BMI2)), > > - __strchr_avx2) > > - IFUNC_IMPL_ADD (array, i, strchr, > > - (CPU_FEATURE_USABLE (AVX2) > > - && CPU_FEATURE_USABLE (BMI2) > > - && CPU_FEATURE_USABLE (RTM)), > > - __strchr_avx2_rtm) > > - IFUNC_IMPL_ADD (array, i, strchr, > > - (CPU_FEATURE_USABLE (AVX512VL) > > - && CPU_FEATURE_USABLE (AVX512BW) > > - && CPU_FEATURE_USABLE (BMI2)), > > - __strchr_evex) > > - IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_sse2_no_bsf) > > - IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_sse2)) > > + X86_IFUNC_IMPL_ADD_V4 (array, i, strchr, > > + (CPU_FEATURE_USABLE (AVX512VL) > > + && CPU_FEATURE_USABLE (AVX512BW) > > + && CPU_FEATURE_USABLE (BMI2)), > > + __strchr_evex) > > + X86_IFUNC_IMPL_ADD_V3 (array, i, strchr, > > + (CPU_FEATURE_USABLE (AVX2) > > + && CPU_FEATURE_USABLE (BMI2)), > > + __strchr_avx2) > > + X86_IFUNC_IMPL_ADD_V3 (array, i, strchr, > > + (CPU_FEATURE_USABLE (AVX2) > > + && CPU_FEATURE_USABLE (BMI2) > > + && CPU_FEATURE_USABLE (RTM)), > > + __strchr_avx2_rtm) > > + /* ISA V2 wrapper for SSE2 implementation because the SSE2 > > + implementation is also used at ISA level 2. */ > > + X86_IFUNC_IMPL_ADD_V2 (array, i, strchr, > > + 1, > > + __strchr_sse2) > > + X86_IFUNC_IMPL_ADD_V1 (array, i, strchr, > > + 1, > > + __strchr_sse2_no_bsf)) > > > > /* Support sysdeps/x86_64/multiarch/strchrnul.c. */ > > IFUNC_IMPL (i, name, strchrnul, > > - IFUNC_IMPL_ADD (array, i, strchrnul, > > - (CPU_FEATURE_USABLE (AVX2) > > - && CPU_FEATURE_USABLE (BMI2)), > > - __strchrnul_avx2) > > - IFUNC_IMPL_ADD (array, i, strchrnul, > > - (CPU_FEATURE_USABLE (AVX2) > > - && CPU_FEATURE_USABLE (BMI2) > > - && CPU_FEATURE_USABLE (RTM)), > > - __strchrnul_avx2_rtm) > > - IFUNC_IMPL_ADD (array, i, strchrnul, > > - (CPU_FEATURE_USABLE (AVX512VL) > > - && CPU_FEATURE_USABLE (AVX512BW) > > - && CPU_FEATURE_USABLE (BMI2)), > > - __strchrnul_evex) > > - IFUNC_IMPL_ADD (array, i, strchrnul, 1, __strchrnul_sse2)) > > + X86_IFUNC_IMPL_ADD_V4 (array, i, strchrnul, > > + (CPU_FEATURE_USABLE (AVX512VL) > > + && CPU_FEATURE_USABLE (AVX512BW) > > + && CPU_FEATURE_USABLE (BMI2)), > > + __strchrnul_evex) > > + X86_IFUNC_IMPL_ADD_V3 (array, i, strchrnul, > > + (CPU_FEATURE_USABLE (AVX2) > > + && CPU_FEATURE_USABLE (BMI2)), > > + __strchrnul_avx2) > > + X86_IFUNC_IMPL_ADD_V3 (array, i, strchrnul, > > + (CPU_FEATURE_USABLE (AVX2) > > + && CPU_FEATURE_USABLE (BMI2) > > + && CPU_FEATURE_USABLE (RTM)), > > + __strchrnul_avx2_rtm) > > + /* ISA V2 wrapper for SSE2 implementation because the SSE2 > > + implementation is also used at ISA level 2. */ > > + X86_IFUNC_IMPL_ADD_V2 (array, i, strchrnul, > > + 1, > > + __strchrnul_sse2)) > > > > /* Support sysdeps/x86_64/multiarch/strrchr.c. */ > > IFUNC_IMPL (i, name, strrchr, > > - IFUNC_IMPL_ADD (array, i, strrchr, > > - CPU_FEATURE_USABLE (AVX2), > > - __strrchr_avx2) > > - IFUNC_IMPL_ADD (array, i, strrchr, > > - (CPU_FEATURE_USABLE (AVX2) > > - && CPU_FEATURE_USABLE (RTM)), > > - __strrchr_avx2_rtm) > > - IFUNC_IMPL_ADD (array, i, strrchr, > > - (CPU_FEATURE_USABLE (AVX512VL) > > - && CPU_FEATURE_USABLE (AVX512BW)), > > - __strrchr_evex) > > - IFUNC_IMPL_ADD (array, i, strrchr, 1, __strrchr_sse2)) > > + X86_IFUNC_IMPL_ADD_V4 (array, i, strrchr, > > + (CPU_FEATURE_USABLE (AVX512VL) > > + && CPU_FEATURE_USABLE (AVX512BW)), > > + __strrchr_evex) > > + X86_IFUNC_IMPL_ADD_V3 (array, i, strrchr, > > + CPU_FEATURE_USABLE (AVX2), > > + __strrchr_avx2) > > + X86_IFUNC_IMPL_ADD_V3 (array, i, strrchr, > > + (CPU_FEATURE_USABLE (AVX2) > > + && CPU_FEATURE_USABLE (RTM)), > > + __strrchr_avx2_rtm) > > + /* ISA V2 wrapper for SSE2 implementation because the SSE2 > > + implementation is also used at ISA level 2. */ > > + X86_IFUNC_IMPL_ADD_V2 (array, i, strrchr, > > + 1, > > + __strrchr_sse2)) > > > > /* Support sysdeps/x86_64/multiarch/strcmp.c. */ > > IFUNC_IMPL (i, name, strcmp, > > - IFUNC_IMPL_ADD (array, i, strcmp, > > - CPU_FEATURE_USABLE (AVX2), > > - __strcmp_avx2) > > - IFUNC_IMPL_ADD (array, i, strcmp, > > - (CPU_FEATURE_USABLE (AVX2) > > - && CPU_FEATURE_USABLE (RTM)), > > - __strcmp_avx2_rtm) > > - IFUNC_IMPL_ADD (array, i, strcmp, > > - (CPU_FEATURE_USABLE (AVX512VL) > > - && CPU_FEATURE_USABLE (AVX512BW) > > - && CPU_FEATURE_USABLE (BMI2)), > > - __strcmp_evex) > > - IFUNC_IMPL_ADD (array, i, strcmp, CPU_FEATURE_USABLE (SSE4_2), > > - __strcmp_sse42) > > - IFUNC_IMPL_ADD (array, i, strcmp, 1, __strcmp_sse2_unaligned) > > - IFUNC_IMPL_ADD (array, i, strcmp, 1, __strcmp_sse2)) > > + X86_IFUNC_IMPL_ADD_V4 (array, i, strcmp, > > + (CPU_FEATURE_USABLE (AVX512VL) > > + && CPU_FEATURE_USABLE (AVX512BW) > > + && CPU_FEATURE_USABLE (BMI2)), > > + __strcmp_evex) > > + X86_IFUNC_IMPL_ADD_V3 (array, i, strcmp, > > + CPU_FEATURE_USABLE (AVX2), > > + __strcmp_avx2) > > + X86_IFUNC_IMPL_ADD_V3 (array, i, strcmp, > > + (CPU_FEATURE_USABLE (AVX2) > > + && CPU_FEATURE_USABLE (RTM)), > > + __strcmp_avx2_rtm) > > + X86_IFUNC_IMPL_ADD_V2 (array, i, strcmp, > > + CPU_FEATURE_USABLE (SSE4_2), > > + __strcmp_sse42) > > + /* ISA V2 wrapper for SSE2 implementations because the SSE2 > > + implementations are also used at ISA level 2. */ > > + X86_IFUNC_IMPL_ADD_V2 (array, i, strcmp, > > + 1, > > + __strcmp_sse2_unaligned) > > + X86_IFUNC_IMPL_ADD_V2 (array, i, strcmp, > > + 1, > > + __strcmp_sse2)) > > > > /* Support sysdeps/x86_64/multiarch/strcpy.c. */ > > IFUNC_IMPL (i, name, strcpy, > > @@ -568,41 +607,47 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, > > > > /* Support sysdeps/x86_64/multiarch/strncase_l.c. */ > > IFUNC_IMPL (i, name, strncasecmp, > > - IFUNC_IMPL_ADD (array, i, strncasecmp, > > - (CPU_FEATURE_USABLE (AVX512VL) > > - && CPU_FEATURE_USABLE (AVX512BW)), > > - __strncasecmp_evex) > > - IFUNC_IMPL_ADD (array, i, strncasecmp, > > - CPU_FEATURE_USABLE (AVX2), > > - __strncasecmp_avx2) > > - IFUNC_IMPL_ADD (array, i, strncasecmp, > > - (CPU_FEATURE_USABLE (AVX2) > > - && CPU_FEATURE_USABLE (RTM)), > > - __strncasecmp_avx2_rtm) > > - IFUNC_IMPL_ADD (array, i, strncasecmp, > > - CPU_FEATURE_USABLE (SSE4_2), > > - __strncasecmp_sse42) > > - IFUNC_IMPL_ADD (array, i, strncasecmp, 1, > > - __strncasecmp_sse2)) > > + X86_IFUNC_IMPL_ADD_V4 (array, i, strncasecmp, > > + (CPU_FEATURE_USABLE (AVX512VL) > > + && CPU_FEATURE_USABLE (AVX512BW)), > > + __strncasecmp_evex) > > + X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp, > > + CPU_FEATURE_USABLE (AVX2), > > + __strncasecmp_avx2) > > + X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp, > > + (CPU_FEATURE_USABLE (AVX2) > > + && CPU_FEATURE_USABLE (RTM)), > > + __strncasecmp_avx2_rtm) > > + X86_IFUNC_IMPL_ADD_V2 (array, i, strncasecmp, > > + CPU_FEATURE_USABLE (SSE4_2), > > + __strncasecmp_sse42) > > + /* ISA V2 wrapper for SSE2 implementation because the SSE2 > > + implementation is also used at ISA level 2. */ > > + X86_IFUNC_IMPL_ADD_V2 (array, i, strncasecmp, > > + 1, > > + __strncasecmp_sse2)) > > > > /* Support sysdeps/x86_64/multiarch/strncase_l.c. */ > > IFUNC_IMPL (i, name, strncasecmp_l, > > - IFUNC_IMPL_ADD (array, i, strncasecmp, > > - (CPU_FEATURE_USABLE (AVX512VL) > > - && CPU_FEATURE_USABLE (AVX512BW)), > > - __strncasecmp_l_evex) > > - IFUNC_IMPL_ADD (array, i, strncasecmp, > > - CPU_FEATURE_USABLE (AVX2), > > - __strncasecmp_l_avx2) > > - IFUNC_IMPL_ADD (array, i, strncasecmp, > > - (CPU_FEATURE_USABLE (AVX2) > > - && CPU_FEATURE_USABLE (RTM)), > > - __strncasecmp_l_avx2_rtm) > > - IFUNC_IMPL_ADD (array, i, strncasecmp_l, > > - CPU_FEATURE_USABLE (SSE4_2), > > - __strncasecmp_l_sse42) > > - IFUNC_IMPL_ADD (array, i, strncasecmp_l, 1, > > - __strncasecmp_l_sse2)) > > + X86_IFUNC_IMPL_ADD_V4 (array, i, strncasecmp, > > + (CPU_FEATURE_USABLE (AVX512VL) > > + && CPU_FEATURE_USABLE (AVX512BW)), > > + __strncasecmp_l_evex) > > + X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp, > > + CPU_FEATURE_USABLE (AVX2), > > + __strncasecmp_l_avx2) > > + X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp, > > + (CPU_FEATURE_USABLE (AVX2) > > + && CPU_FEATURE_USABLE (RTM)), > > + __strncasecmp_l_avx2_rtm) > > + X86_IFUNC_IMPL_ADD_V2 (array, i, strncasecmp_l, > > + CPU_FEATURE_USABLE (SSE4_2), > > + __strncasecmp_l_sse42) > > + /* ISA V2 wrapper for SSE2 implementation because the SSE2 > > + implementation is also used at ISA level 2. */ > > + X86_IFUNC_IMPL_ADD_V2 (array, i, strncasecmp_l, > > + 1, > > + __strncasecmp_l_sse2)) > > > > /* Support sysdeps/x86_64/multiarch/strncat.c. */ > > IFUNC_IMPL (i, name, strncat, > > @@ -664,69 +709,85 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, > > > > /* Support sysdeps/x86_64/multiarch/wcschr.c. */ > > IFUNC_IMPL (i, name, wcschr, > > - IFUNC_IMPL_ADD (array, i, wcschr, > > - (CPU_FEATURE_USABLE (AVX2) > > - && CPU_FEATURE_USABLE (BMI2)), > > - __wcschr_avx2) > > - IFUNC_IMPL_ADD (array, i, wcschr, > > - (CPU_FEATURE_USABLE (AVX2) > > - && CPU_FEATURE_USABLE (BMI2) > > - && CPU_FEATURE_USABLE (RTM)), > > - __wcschr_avx2_rtm) > > - IFUNC_IMPL_ADD (array, i, wcschr, > > - (CPU_FEATURE_USABLE (AVX512VL) > > - && CPU_FEATURE_USABLE (AVX512BW) > > - && CPU_FEATURE_USABLE (BMI2)), > > - __wcschr_evex) > > - IFUNC_IMPL_ADD (array, i, wcschr, 1, __wcschr_sse2)) > > + X86_IFUNC_IMPL_ADD_V4 (array, i, wcschr, > > + (CPU_FEATURE_USABLE (AVX512VL) > > + && CPU_FEATURE_USABLE (AVX512BW) > > + && CPU_FEATURE_USABLE (BMI2)), > > + __wcschr_evex) > > + X86_IFUNC_IMPL_ADD_V3 (array, i, wcschr, > > + (CPU_FEATURE_USABLE (AVX2) > > + && CPU_FEATURE_USABLE (BMI2)), > > + __wcschr_avx2) > > + X86_IFUNC_IMPL_ADD_V3 (array, i, wcschr, > > + (CPU_FEATURE_USABLE (AVX2) > > + && CPU_FEATURE_USABLE (BMI2) > > + && CPU_FEATURE_USABLE (RTM)), > > + __wcschr_avx2_rtm) > > + /* ISA V2 wrapper for SSE2 implementation because the SSE2 > > + implementation is also used at ISA level 2. */ > > + X86_IFUNC_IMPL_ADD_V2 (array, i, wcschr, > > + 1, > > + __wcschr_sse2)) > > > > /* Support sysdeps/x86_64/multiarch/wcsrchr.c. */ > > IFUNC_IMPL (i, name, wcsrchr, > > - IFUNC_IMPL_ADD (array, i, wcsrchr, > > - CPU_FEATURE_USABLE (AVX2), > > - __wcsrchr_avx2) > > - IFUNC_IMPL_ADD (array, i, wcsrchr, > > - (CPU_FEATURE_USABLE (AVX2) > > - && CPU_FEATURE_USABLE (RTM)), > > - __wcsrchr_avx2_rtm) > > - IFUNC_IMPL_ADD (array, i, wcsrchr, > > - (CPU_FEATURE_USABLE (AVX512VL) > > - && CPU_FEATURE_USABLE (AVX512BW) > > - && CPU_FEATURE_USABLE (BMI2)), > > - __wcsrchr_evex) > > - IFUNC_IMPL_ADD (array, i, wcsrchr, 1, __wcsrchr_sse2)) > > + X86_IFUNC_IMPL_ADD_V4 (array, i, wcsrchr, > > + (CPU_FEATURE_USABLE (AVX512VL) > > + && CPU_FEATURE_USABLE (AVX512BW) > > + && CPU_FEATURE_USABLE (BMI2)), > > + __wcsrchr_evex) > > + X86_IFUNC_IMPL_ADD_V3 (array, i, wcsrchr, > > + CPU_FEATURE_USABLE (AVX2), > > + __wcsrchr_avx2) > > + X86_IFUNC_IMPL_ADD_V3 (array, i, wcsrchr, > > + (CPU_FEATURE_USABLE (AVX2) > > + && CPU_FEATURE_USABLE (RTM)), > > + __wcsrchr_avx2_rtm) > > + /* ISA V2 wrapper for SSE2 implementation because the SSE2 > > + implementation is also used at ISA level 2. */ > > + X86_IFUNC_IMPL_ADD_V2 (array, i, wcsrchr, > > + 1, > > + __wcsrchr_sse2)) > > > > /* Support sysdeps/x86_64/multiarch/wcscmp.c. */ > > IFUNC_IMPL (i, name, wcscmp, > > - IFUNC_IMPL_ADD (array, i, wcscmp, > > - CPU_FEATURE_USABLE (AVX2), > > - __wcscmp_avx2) > > - IFUNC_IMPL_ADD (array, i, wcscmp, > > - (CPU_FEATURE_USABLE (AVX2) > > - && CPU_FEATURE_USABLE (RTM)), > > - __wcscmp_avx2_rtm) > > - IFUNC_IMPL_ADD (array, i, wcscmp, > > - (CPU_FEATURE_USABLE (AVX512VL) > > - && CPU_FEATURE_USABLE (AVX512BW) > > - && CPU_FEATURE_USABLE (BMI2)), > > - __wcscmp_evex) > > - IFUNC_IMPL_ADD (array, i, wcscmp, 1, __wcscmp_sse2)) > > + X86_IFUNC_IMPL_ADD_V4 (array, i, wcscmp, > > + (CPU_FEATURE_USABLE (AVX512VL) > > + && CPU_FEATURE_USABLE (AVX512BW) > > + && CPU_FEATURE_USABLE (BMI2)), > > + __wcscmp_evex) > > + X86_IFUNC_IMPL_ADD_V3 (array, i, wcscmp, > > + CPU_FEATURE_USABLE (AVX2), > > + __wcscmp_avx2) > > + X86_IFUNC_IMPL_ADD_V3 (array, i, wcscmp, > > + (CPU_FEATURE_USABLE (AVX2) > > + && CPU_FEATURE_USABLE (RTM)), > > + __wcscmp_avx2_rtm) > > + /* ISA V2 wrapper for SSE2 implementation because the SSE2 > > + implementation is also used at ISA level 2. */ > > + X86_IFUNC_IMPL_ADD_V2 (array, i, wcscmp, > > + 1, > > + __wcscmp_sse2)) > > > > /* Support sysdeps/x86_64/multiarch/wcsncmp.c. */ > > IFUNC_IMPL (i, name, wcsncmp, > > - IFUNC_IMPL_ADD (array, i, wcsncmp, > > - CPU_FEATURE_USABLE (AVX2), > > - __wcsncmp_avx2) > > - IFUNC_IMPL_ADD (array, i, wcsncmp, > > - (CPU_FEATURE_USABLE (AVX2) > > - && CPU_FEATURE_USABLE (RTM)), > > - __wcsncmp_avx2_rtm) > > - IFUNC_IMPL_ADD (array, i, wcsncmp, > > - (CPU_FEATURE_USABLE (AVX512VL) > > - && CPU_FEATURE_USABLE (AVX512BW) > > - && CPU_FEATURE_USABLE (BMI2)), > > - __wcsncmp_evex) > > - IFUNC_IMPL_ADD (array, i, wcsncmp, 1, __wcsncmp_generic)) > > + X86_IFUNC_IMPL_ADD_V4 (array, i, wcsncmp, > > + (CPU_FEATURE_USABLE (AVX512VL) > > + && CPU_FEATURE_USABLE (AVX512BW) > > + && CPU_FEATURE_USABLE (BMI2)), > > + __wcsncmp_evex) > > + X86_IFUNC_IMPL_ADD_V3 (array, i, wcsncmp, > > + CPU_FEATURE_USABLE (AVX2), > > + __wcsncmp_avx2) > > + X86_IFUNC_IMPL_ADD_V3 (array, i, wcsncmp, > > + (CPU_FEATURE_USABLE (AVX2) > > + && CPU_FEATURE_USABLE (RTM)), > > + __wcsncmp_avx2_rtm) > > + /* ISA V2 wrapper for GENERIC implementation because the > > + GENERIC implementation is also used at ISA level 2. */ > > + X86_IFUNC_IMPL_ADD_V2 (array, i, wcsncmp, > > + 1, > > + __wcsncmp_generic)) > > > > /* Support sysdeps/x86_64/multiarch/wcscpy.c. */ > > IFUNC_IMPL (i, name, wcscpy, > > @@ -736,55 +797,59 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, > > > > /* Support sysdeps/x86_64/multiarch/wcslen.c. */ > > IFUNC_IMPL (i, name, wcslen, > > - IFUNC_IMPL_ADD (array, i, wcslen, > > - (CPU_FEATURE_USABLE (AVX2) > > - && CPU_FEATURE_USABLE (BMI2)), > > - __wcslen_avx2) > > - IFUNC_IMPL_ADD (array, i, wcslen, > > - (CPU_FEATURE_USABLE (AVX2) > > - && CPU_FEATURE_USABLE (BMI2) > > - && CPU_FEATURE_USABLE (RTM)), > > - __wcslen_avx2_rtm) > > - IFUNC_IMPL_ADD (array, i, wcslen, > > - (CPU_FEATURE_USABLE (AVX512VL) > > - && CPU_FEATURE_USABLE (AVX512BW) > > - && CPU_FEATURE_USABLE (BMI2)), > > - __wcslen_evex) > > - IFUNC_IMPL_ADD (array, i, wcslen, > > - (CPU_FEATURE_USABLE (AVX512VL) > > - && CPU_FEATURE_USABLE (AVX512BW) > > - && CPU_FEATURE_USABLE (BMI2)), > > - __wcslen_evex512) > > - IFUNC_IMPL_ADD (array, i, wcslen, > > - CPU_FEATURE_USABLE (SSE4_1), > > - __wcslen_sse4_1) > > - IFUNC_IMPL_ADD (array, i, wcslen, 1, __wcslen_sse2)) > > + X86_IFUNC_IMPL_ADD_V4 (array, i, wcslen, > > + (CPU_FEATURE_USABLE (AVX512VL) > > + && CPU_FEATURE_USABLE (AVX512BW) > > + && CPU_FEATURE_USABLE (BMI2)), > > + __wcslen_evex) > > + X86_IFUNC_IMPL_ADD_V4 (array, i, wcslen, > > + (CPU_FEATURE_USABLE (AVX512VL) > > + && CPU_FEATURE_USABLE (AVX512BW) > > + && CPU_FEATURE_USABLE (BMI2)), > > + __wcslen_evex512) > > + X86_IFUNC_IMPL_ADD_V3 (array, i, wcslen, > > + (CPU_FEATURE_USABLE (AVX2) > > + && CPU_FEATURE_USABLE (BMI2)), > > + __wcslen_avx2) > > + X86_IFUNC_IMPL_ADD_V3 (array, i, wcslen, > > + (CPU_FEATURE_USABLE (AVX2) > > + && CPU_FEATURE_USABLE (BMI2) > > + && CPU_FEATURE_USABLE (RTM)), > > + __wcslen_avx2_rtm) > > + X86_IFUNC_IMPL_ADD_V2 (array, i, wcslen, > > + CPU_FEATURE_USABLE (SSE4_1), > > + __wcslen_sse4_1) > > + X86_IFUNC_IMPL_ADD_V1 (array, i, wcslen, > > + 1, > > + __wcslen_sse2)) > > > > /* Support sysdeps/x86_64/multiarch/wcsnlen.c. */ > > IFUNC_IMPL (i, name, wcsnlen, > > - IFUNC_IMPL_ADD (array, i, wcsnlen, > > - (CPU_FEATURE_USABLE (AVX2) > > - && CPU_FEATURE_USABLE (BMI2)), > > - __wcsnlen_avx2) > > - IFUNC_IMPL_ADD (array, i, wcsnlen, > > - (CPU_FEATURE_USABLE (AVX2) > > - && CPU_FEATURE_USABLE (BMI2) > > - && CPU_FEATURE_USABLE (RTM)), > > - __wcsnlen_avx2_rtm) > > - IFUNC_IMPL_ADD (array, i, wcsnlen, > > - (CPU_FEATURE_USABLE (AVX512VL) > > - && CPU_FEATURE_USABLE (AVX512BW) > > - && CPU_FEATURE_USABLE (BMI2)), > > - __wcsnlen_evex) > > - IFUNC_IMPL_ADD (array, i, wcsnlen, > > - (CPU_FEATURE_USABLE (AVX512VL) > > - && CPU_FEATURE_USABLE (AVX512BW) > > - && CPU_FEATURE_USABLE (BMI2)), > > - __wcsnlen_evex512) > > - IFUNC_IMPL_ADD (array, i, wcsnlen, > > - CPU_FEATURE_USABLE (SSE4_1), > > - __wcsnlen_sse4_1) > > - IFUNC_IMPL_ADD (array, i, wcsnlen, 1, __wcsnlen_generic)) > > + X86_IFUNC_IMPL_ADD_V4 (array, i, wcsnlen, > > + (CPU_FEATURE_USABLE (AVX512VL) > > + && CPU_FEATURE_USABLE (AVX512BW) > > + && CPU_FEATURE_USABLE (BMI2)), > > + __wcsnlen_evex) > > + X86_IFUNC_IMPL_ADD_V4 (array, i, wcsnlen, > > + (CPU_FEATURE_USABLE (AVX512VL) > > + && CPU_FEATURE_USABLE (AVX512BW) > > + && CPU_FEATURE_USABLE (BMI2)), > > + __wcsnlen_evex512) > > + X86_IFUNC_IMPL_ADD_V3 (array, i, wcsnlen, > > + (CPU_FEATURE_USABLE (AVX2) > > + && CPU_FEATURE_USABLE (BMI2)), > > + __wcsnlen_avx2) > > + X86_IFUNC_IMPL_ADD_V3 (array, i, wcsnlen, > > + (CPU_FEATURE_USABLE (AVX2) > > + && CPU_FEATURE_USABLE (BMI2) > > + && CPU_FEATURE_USABLE (RTM)), > > + __wcsnlen_avx2_rtm) > > + X86_IFUNC_IMPL_ADD_V2 (array, i, wcsnlen, > > + CPU_FEATURE_USABLE (SSE4_1), > > + __wcsnlen_sse4_1) > > + X86_IFUNC_IMPL_ADD_V1 (array, i, wcsnlen, > > + 1, > > + __wcsnlen_generic)) > > > > /* Support sysdeps/x86_64/multiarch/wmemchr.c. */ > > IFUNC_IMPL (i, name, wmemchr, > > @@ -1050,20 +1115,25 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, > > > > /* Support sysdeps/x86_64/multiarch/strncmp.c. */ > > IFUNC_IMPL (i, name, strncmp, > > - IFUNC_IMPL_ADD (array, i, strncmp, > > - CPU_FEATURE_USABLE (AVX2), > > - __strncmp_avx2) > > - IFUNC_IMPL_ADD (array, i, strncmp, > > - (CPU_FEATURE_USABLE (AVX2) > > - && CPU_FEATURE_USABLE (RTM)), > > - __strncmp_avx2_rtm) > > - IFUNC_IMPL_ADD (array, i, strncmp, > > - (CPU_FEATURE_USABLE (AVX512VL) > > - && CPU_FEATURE_USABLE (AVX512BW)), > > - __strncmp_evex) > > - IFUNC_IMPL_ADD (array, i, strncmp, CPU_FEATURE_USABLE (SSE4_2), > > - __strncmp_sse42) > > - IFUNC_IMPL_ADD (array, i, strncmp, 1, __strncmp_sse2)) > > + X86_IFUNC_IMPL_ADD_V4 (array, i, strncmp, > > + (CPU_FEATURE_USABLE (AVX512VL) > > + && CPU_FEATURE_USABLE (AVX512BW)), > > + __strncmp_evex) > > + X86_IFUNC_IMPL_ADD_V3 (array, i, strncmp, > > + CPU_FEATURE_USABLE (AVX2), > > + __strncmp_avx2) > > + X86_IFUNC_IMPL_ADD_V3 (array, i, strncmp, > > + (CPU_FEATURE_USABLE (AVX2) > > + && CPU_FEATURE_USABLE (RTM)), > > + __strncmp_avx2_rtm) > > + X86_IFUNC_IMPL_ADD_V2 (array, i, strncmp, > > + CPU_FEATURE_USABLE (SSE4_2), > > + __strncmp_sse42) > > + /* ISA V2 wrapper for SSE2 implementation because the SSE2 > > + implementation is also used at ISA level 2. */ > > + X86_IFUNC_IMPL_ADD_V2 (array, i, strncmp, > > + 1, > > + __strncmp_sse2)) > > > > #ifdef SHARED > > /* Support sysdeps/x86_64/multiarch/wmemset_chk.c. */ > > diff --git a/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h b/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h > > index 296d32071b..68646ef199 100644 > > --- a/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h > > +++ b/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h > > @@ -19,32 +19,39 @@ > > > > #include <init-arch.h> > > > > -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; > > -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden; > > +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; > > + > > extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; > > extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden; > > -extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; > > + > > +extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden; > > + > > +extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; > > > > static inline void * > > IFUNC_SELECTOR (void) > > { > > - const struct cpu_features* cpu_features = __get_cpu_features (); > > + const struct cpu_features *cpu_features = __get_cpu_features (); > > > > - if (CPU_FEATURE_USABLE_P (cpu_features, AVX2) > > - && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) > > + if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2) > > + && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, > > + AVX_Fast_Unaligned_Load, )) > > { > > - if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) > > - && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)) > > - return OPTIMIZE (evex); > > + if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) > > + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)) > > + return OPTIMIZE (evex); > > > > if (CPU_FEATURE_USABLE_P (cpu_features, RTM)) > > - return OPTIMIZE (avx2_rtm); > > + return OPTIMIZE (avx2_rtm); > > > > - if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) > > - return OPTIMIZE (avx2); > > + if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, > > + Prefer_No_VZEROUPPER, !)) > > + return OPTIMIZE (avx2); > > } > > > > - if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2) > > + if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, SSE4_2) > > + /* Keep this as a runtime check as its not guaranteed at ISA > > + level 2. */ > > && !CPU_FEATURES_ARCH_P (cpu_features, Slow_SSE4_2)) > > return OPTIMIZE (sse42); > > > > diff --git a/sysdeps/x86_64/multiarch/ifunc-wcslen.h b/sysdeps/x86_64/multiarch/ifunc-wcslen.h > > index 88c1c502af..064722c2bd 100644 > > --- a/sysdeps/x86_64/multiarch/ifunc-wcslen.h > > +++ b/sysdeps/x86_64/multiarch/ifunc-wcslen.h > > @@ -23,33 +23,38 @@ > > # define GENERIC sse2 > > #endif > > > > -extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden; > > -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden; > > +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; > > + > > extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; > > extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden; > > -extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; > > + > > +extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden; > > + > > +extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden; > > > > static inline void * > > IFUNC_SELECTOR (void) > > { > > - const struct cpu_features* cpu_features = __get_cpu_features (); > > + const struct cpu_features *cpu_features = __get_cpu_features (); > > > > - if (CPU_FEATURE_USABLE_P (cpu_features, AVX2) > > - && CPU_FEATURE_USABLE_P (cpu_features, BMI2) > > - && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) > > + if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2) > > + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2) > > + && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, > > + AVX_Fast_Unaligned_Load, )) > > { > > - if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) > > - && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)) > > + if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) > > + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)) > > return OPTIMIZE (evex); > > > > if (CPU_FEATURE_USABLE_P (cpu_features, RTM)) > > return OPTIMIZE (avx2_rtm); > > > > - if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) > > + if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, > > + Prefer_No_VZEROUPPER, !)) > > return OPTIMIZE (avx2); > > } > > > > - if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_1)) > > + if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, SSE4_1)) > > return OPTIMIZE (sse4_1); > > > > return OPTIMIZE (GENERIC); > > diff --git a/sysdeps/x86_64/multiarch/memrchr-avx2.S b/sysdeps/x86_64/multiarch/memrchr-avx2.S > > index f300d7daf4..d1457ab60c 100644 > > --- a/sysdeps/x86_64/multiarch/memrchr-avx2.S > > +++ b/sysdeps/x86_64/multiarch/memrchr-avx2.S > > @@ -16,7 +16,9 @@ > > License along with the GNU C Library; if not, see > > <https://www.gnu.org/licenses/>. */ > > > > -#if IS_IN (libc) > > +#include <isa-level.h> > > + > > +#if ISA_SHOULD_BUILD (3) > > > > # include <sysdep.h> > > > > diff --git a/sysdeps/x86_64/multiarch/memrchr-evex.S b/sysdeps/x86_64/multiarch/memrchr-evex.S > > index 91329b18dc..ea3a0a0a60 100644 > > --- a/sysdeps/x86_64/multiarch/memrchr-evex.S > > +++ b/sysdeps/x86_64/multiarch/memrchr-evex.S > > @@ -16,7 +16,9 @@ > > License along with the GNU C Library; if not, see > > <https://www.gnu.org/licenses/>. */ > > > > -#if IS_IN (libc) > > +#include <isa-level.h> > > + > > +#if ISA_SHOULD_BUILD (4) > > > > # include <sysdep.h> > > # include "evex256-vecs.h" > > diff --git a/sysdeps/x86_64/multiarch/memrchr-sse2.S b/sysdeps/x86_64/multiarch/memrchr-sse2.S > > index d92a4022dc..4cc8b9e3b0 100644 > > --- a/sysdeps/x86_64/multiarch/memrchr-sse2.S > > +++ b/sysdeps/x86_64/multiarch/memrchr-sse2.S > > @@ -16,22 +16,26 @@ > > License along with the GNU C Library; if not, see > > <https://www.gnu.org/licenses/>. */ > > > > -#if IS_IN (libc) > > +#include <isa-level.h> > > + > > +/* MINIMUM_X86_ISA_LEVEL <= 2 because there is no V2 implementation > > + so we need this to build for ISA V2 builds. */ > > +#if ISA_SHOULD_BUILD (2) > > + > > # ifndef MEMRCHR > > # define MEMRCHR __memrchr_sse2 > > # endif > > -#endif > > > > -#include <sysdep.h> > > -#define VEC_SIZE 16 > > -#define PAGE_SIZE 4096 > > +# include <sysdep.h> > > +# define VEC_SIZE 16 > > +# define PAGE_SIZE 4096 > > > > .text > > ENTRY_P2ALIGN(MEMRCHR, 6) > > -#ifdef __ILP32__ > > +# ifdef __ILP32__ > > /* Clear upper bits. */ > > mov %RDX_LP, %RDX_LP > > -#endif > > +# endif > > movd %esi, %xmm0 > > > > /* Get end pointer. */ > > @@ -352,3 +356,4 @@ L(zero_3): > > ret > > /* 2-bytes from next cache line. */ > > END(MEMRCHR) > > +#endif > > diff --git a/sysdeps/x86_64/multiarch/strcasecmp_l-avx2-rtm.S b/sysdeps/x86_64/multiarch/strcasecmp_l-avx2-rtm.S > > index 09957fc3c5..d408751f4c 100644 > > --- a/sysdeps/x86_64/multiarch/strcasecmp_l-avx2-rtm.S > > +++ b/sysdeps/x86_64/multiarch/strcasecmp_l-avx2-rtm.S > > @@ -1,15 +1,2 @@ > > -#ifndef STRCMP > > -# define STRCMP __strcasecmp_l_avx2_rtm > > -#endif > > - > > -#define _GLABEL(x) x ## _rtm > > -#define GLABEL(x) _GLABEL(x) > > - > > -#define ZERO_UPPER_VEC_REGISTERS_RETURN \ > > - ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST > > - > > -#define VZEROUPPER_RETURN jmp L(return_vzeroupper) > > - > > -#define SECTION(p) p##.avx.rtm > > - > > -#include "strcasecmp_l-avx2.S" > > +#define USE_AS_STRCASECMP_L > > +#include "strcmp-avx2-rtm.S" > > diff --git a/sysdeps/x86_64/multiarch/strcasecmp_l-avx2.S b/sysdeps/x86_64/multiarch/strcasecmp_l-avx2.S > > index e2762f2a22..167f866014 100644 > > --- a/sysdeps/x86_64/multiarch/strcasecmp_l-avx2.S > > +++ b/sysdeps/x86_64/multiarch/strcasecmp_l-avx2.S > > @@ -16,8 +16,5 @@ > > License along with the GNU C Library; if not, see > > <https://www.gnu.org/licenses/>. */ > > > > -#ifndef STRCMP > > -# define STRCMP __strcasecmp_l_avx2 > > -#endif > > #define USE_AS_STRCASECMP_L > > #include "strcmp-avx2.S" > > diff --git a/sysdeps/x86_64/multiarch/strcasecmp_l-evex.S b/sysdeps/x86_64/multiarch/strcasecmp_l-evex.S > > index 58642db748..012a084930 100644 > > --- a/sysdeps/x86_64/multiarch/strcasecmp_l-evex.S > > +++ b/sysdeps/x86_64/multiarch/strcasecmp_l-evex.S > > @@ -16,8 +16,5 @@ > > License along with the GNU C Library; if not, see > > <https://www.gnu.org/licenses/>. */ > > > > -#ifndef STRCMP > > -# define STRCMP __strcasecmp_l_evex > > -#endif > > #define USE_AS_STRCASECMP_L > > #include "strcmp-evex.S" > > diff --git a/sysdeps/x86_64/multiarch/strcasecmp_l-sse2.S b/sysdeps/x86_64/multiarch/strcasecmp_l-sse2.S > > index a2b5741399..6ffd09b513 100644 > > --- a/sysdeps/x86_64/multiarch/strcasecmp_l-sse2.S > > +++ b/sysdeps/x86_64/multiarch/strcasecmp_l-sse2.S > > @@ -17,4 +17,5 @@ > > <https://www.gnu.org/licenses/>. */ > > > > #define USE_AS_STRCASECMP_L > > + > > #include "strcmp-sse2.S" > > diff --git a/sysdeps/x86_64/multiarch/strchr-avx2.S b/sysdeps/x86_64/multiarch/strchr-avx2.S > > index 1a916cc951..425a40b8de 100644 > > --- a/sysdeps/x86_64/multiarch/strchr-avx2.S > > +++ b/sysdeps/x86_64/multiarch/strchr-avx2.S > > @@ -16,7 +16,9 @@ > > License along with the GNU C Library; if not, see > > <https://www.gnu.org/licenses/>. */ > > > > -#if IS_IN (libc) > > +#include <isa-level.h> > > + > > +#if ISA_SHOULD_BUILD (3) > > > > # include <sysdep.h> > > > > diff --git a/sysdeps/x86_64/multiarch/strchr-evex.S b/sysdeps/x86_64/multiarch/strchr-evex.S > > index ec739fb8f9..a1c15c4419 100644 > > --- a/sysdeps/x86_64/multiarch/strchr-evex.S > > +++ b/sysdeps/x86_64/multiarch/strchr-evex.S > > @@ -16,7 +16,9 @@ > > License along with the GNU C Library; if not, see > > <https://www.gnu.org/licenses/>. */ > > > > -#if IS_IN (libc) > > +#include <isa-level.h> > > + > > +#if ISA_SHOULD_BUILD (4) > > > > # include <sysdep.h> > > > > diff --git a/sysdeps/x86_64/multiarch/strchr-sse2-no-bsf.S b/sysdeps/x86_64/multiarch/strchr-sse2-no-bsf.S > > index 93e6f62d7f..bb092e3f61 100644 > > --- a/sysdeps/x86_64/multiarch/strchr-sse2-no-bsf.S > > +++ b/sysdeps/x86_64/multiarch/strchr-sse2-no-bsf.S > > @@ -16,7 +16,11 @@ > > License along with the GNU C Library; if not, see > > <https://www.gnu.org/licenses/>. */ > > > > -#if IS_IN (libc) > > +#include <isa-level.h> > > + > > +/* NB: atom builds with ISA level == 1 so no reason to hold onto this > > + at ISA level >= 2. */ > > +#if ISA_SHOULD_BUILD (1) > > > > # include <sysdep.h> > > # include "asm-syntax.h" > > diff --git a/sysdeps/x86_64/multiarch/strchr-sse2.S b/sysdeps/x86_64/multiarch/strchr-sse2.S > > index f7767ca543..7a182f0c3b 100644 > > --- a/sysdeps/x86_64/multiarch/strchr-sse2.S > > +++ b/sysdeps/x86_64/multiarch/strchr-sse2.S > > @@ -16,7 +16,12 @@ > > License along with the GNU C Library; if not, see > > <https://www.gnu.org/licenses/>. */ > > > > -#if IS_IN (libc) || defined STRCHR > > +#include <isa-level.h> > > + > > +/* MINIMUM_X86_ISA_LEVEL <= 2 because there is no V2 implementation > > + so we need this to build for ISA V2 builds. */ > > +#if ISA_SHOULD_BUILD (2) > > + > > # ifndef STRCHR > > # define STRCHR __strchr_sse2 > > # endif > > diff --git a/sysdeps/x86_64/multiarch/strchr.c b/sysdeps/x86_64/multiarch/strchr.c > > index de737580eb..ce7441c532 100644 > > --- a/sysdeps/x86_64/multiarch/strchr.c > > +++ b/sysdeps/x86_64/multiarch/strchr.c > > @@ -26,36 +26,40 @@ > > # define SYMBOL_NAME strchr > > # include <init-arch.h> > > > > -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; > > -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_no_bsf) attribute_hidden; > > +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; > > + > > extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; > > extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden; > > -extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; > > + > > +extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; > > +extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_no_bsf) attribute_hidden; > > > > static inline void * > > IFUNC_SELECTOR (void) > > { > > - const struct cpu_features* cpu_features = __get_cpu_features (); > > + const struct cpu_features *cpu_features = __get_cpu_features (); > > > > - if (CPU_FEATURE_USABLE_P (cpu_features, AVX2) > > - && CPU_FEATURE_USABLE_P (cpu_features, BMI2) > > - && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) > > + if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2) > > + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2) > > + && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, > > + AVX_Fast_Unaligned_Load, )) > > { > > - if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) > > - && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)) > > + if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) > > + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)) > > return OPTIMIZE (evex); > > > > if (CPU_FEATURE_USABLE_P (cpu_features, RTM)) > > return OPTIMIZE (avx2_rtm); > > > > - if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) > > + if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, > > + Prefer_No_VZEROUPPER, !)) > > return OPTIMIZE (avx2); > > } > > > > - if (CPU_FEATURES_ARCH_P (cpu_features, Slow_BSF)) > > - return OPTIMIZE (sse2_no_bsf); > > + if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, Slow_BSF, !)) > > + return OPTIMIZE (sse2); > > > > - return OPTIMIZE (sse2); > > + return OPTIMIZE (sse2_no_bsf); > > } > > > > libc_ifunc_redirected (__redirect_strchr, strchr, IFUNC_SELECTOR ()); > > diff --git a/sysdeps/x86_64/multiarch/strchrnul-avx2.S b/sysdeps/x86_64/multiarch/strchrnul-avx2.S > > index fa0cc09760..10ad5e6058 100644 > > --- a/sysdeps/x86_64/multiarch/strchrnul-avx2.S > > +++ b/sysdeps/x86_64/multiarch/strchrnul-avx2.S > > @@ -1,3 +1,8 @@ > > -#define STRCHR __strchrnul_avx2 > > +#ifndef STRCHRNUL > > +# define STRCHRNUL __strchrnul_avx2 > > +#endif > > + > > +#define STRCHR STRCHRNUL > > #define USE_AS_STRCHRNUL 1 > > + > > #include "strchr-avx2.S" > > diff --git a/sysdeps/x86_64/multiarch/strchrnul-evex.S b/sysdeps/x86_64/multiarch/strchrnul-evex.S > > index 064fe7ca9e..0f216cb47f 100644 > > --- a/sysdeps/x86_64/multiarch/strchrnul-evex.S > > +++ b/sysdeps/x86_64/multiarch/strchrnul-evex.S > > @@ -1,3 +1,8 @@ > > -#define STRCHR __strchrnul_evex > > +#ifndef STRCHRNUL > > +# define STRCHRNUL __strchrnul_evex > > +#endif > > + > > +#define STRCHR STRCHRNUL > > #define USE_AS_STRCHRNUL 1 > > + > > #include "strchr-evex.S" > > diff --git a/sysdeps/x86_64/multiarch/strchrnul-sse2.S b/sysdeps/x86_64/multiarch/strchrnul-sse2.S > > index 7238977a21..7ee81ae510 100644 > > --- a/sysdeps/x86_64/multiarch/strchrnul-sse2.S > > +++ b/sysdeps/x86_64/multiarch/strchrnul-sse2.S > > @@ -16,12 +16,10 @@ > > License along with the GNU C Library; if not, see > > <https://www.gnu.org/licenses/>. */ > > > > -#if IS_IN (libc) > > -# ifndef STRCHR > > -# define STRCHR __strchrnul_sse2 > > -# endif > > +#ifndef STRCHRNUL > > +# define STRCHRNUL __strchrnul_sse2 > > #endif > > - > > #define AS_STRCHRNUL > > +#define STRCHR STRCHRNUL > > > > #include "strchr-sse2.S" > > diff --git a/sysdeps/x86_64/multiarch/strcmp-avx2-rtm.S b/sysdeps/x86_64/multiarch/strcmp-avx2-rtm.S > > index aecd30d97f..74f1f996a9 100644 > > --- a/sysdeps/x86_64/multiarch/strcmp-avx2-rtm.S > > +++ b/sysdeps/x86_64/multiarch/strcmp-avx2-rtm.S > > @@ -1,12 +1,9 @@ > > -#ifndef STRCMP > > -# define STRCMP __strcmp_avx2_rtm > > -#endif > > - > > #define ZERO_UPPER_VEC_REGISTERS_RETURN \ > > ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST > > > > #define VZEROUPPER_RETURN jmp L(return_vzeroupper) > > > > #define SECTION(p) p##.avx.rtm > > +#define STRCMP_ISA _avx2_rtm > > > > #include "strcmp-avx2.S" > > diff --git a/sysdeps/x86_64/multiarch/strcmp-avx2.S b/sysdeps/x86_64/multiarch/strcmp-avx2.S > > index 3ab21e3a58..4c01d664e8 100644 > > --- a/sysdeps/x86_64/multiarch/strcmp-avx2.S > > +++ b/sysdeps/x86_64/multiarch/strcmp-avx2.S > > @@ -16,7 +16,15 @@ > > License along with the GNU C Library; if not, see > > <https://www.gnu.org/licenses/>. */ > > > > -#if IS_IN (libc) > > +#include <isa-level.h> > > + > > +#if ISA_SHOULD_BUILD (3) > > + > > +# ifndef STRCMP_ISA > > +# define STRCMP_ISA _avx2 > > +# endif > > + > > +# include "strcmp-naming.h" > > > > # include <sysdep.h> > > > > @@ -86,15 +94,11 @@ > > > > # ifdef USE_AS_STRCASECMP_L > > # ifdef USE_AS_STRNCMP > > -# define STRCASECMP __strncasecmp_avx2 > > # define LOCALE_REG rcx > > # define LOCALE_REG_LP RCX_LP > > -# define STRCASECMP_L_NONASCII __strncasecmp_l_nonascii > > # else > > -# define STRCASECMP __strcasecmp_avx2 > > # define LOCALE_REG rdx > > # define LOCALE_REG_LP RDX_LP > > -# define STRCASECMP_L_NONASCII __strcasecmp_l_nonascii > > # endif > > # endif > > > > @@ -185,18 +189,14 @@ > > .type STRCMP, @function > > .globl STRCMP > > > > -# ifndef GLABEL > > -# define GLABEL(...) __VA_ARGS__ > > -# endif > > - > > # ifdef USE_AS_STRCASECMP_L > > -ENTRY (GLABEL(STRCASECMP)) > > +ENTRY (STRCASECMP) > > movq __libc_tsd_LOCALE@gottpoff(%rip), %rax > > mov %fs:(%rax), %LOCALE_REG_LP > > > > /* Either 1 or 5 bytes (dependeing if CET is enabled). */ > > .p2align 4 > > -END (GLABEL(STRCASECMP)) > > +END (STRCASECMP) > > /* FALLTHROUGH to strcasecmp/strncasecmp_l. */ > > # endif > > > > diff --git a/sysdeps/x86_64/multiarch/strcmp-evex.S b/sysdeps/x86_64/multiarch/strcmp-evex.S > > index afbf13a230..e482d0167f 100644 > > --- a/sysdeps/x86_64/multiarch/strcmp-evex.S > > +++ b/sysdeps/x86_64/multiarch/strcmp-evex.S > > @@ -16,7 +16,12 @@ > > License along with the GNU C Library; if not, see > > <https://www.gnu.org/licenses/>. */ > > > > -#if IS_IN (libc) > > +#include <isa-level.h> > > + > > +#if ISA_SHOULD_BUILD (4) > > + > > +# define STRCMP_ISA _evex > > +# include "strcmp-naming.h" > > > > # include <sysdep.h> > > # if defined USE_AS_STRCASECMP_L > > @@ -37,10 +42,6 @@ > > # define VMOVA vmovdqa64 > > > > # ifdef USE_AS_WCSCMP > > -# ifndef OVERFLOW_STRCMP > > -# define OVERFLOW_STRCMP __wcscmp_evex > > -# endif > > - > > # define TESTEQ subl $0xff, > > /* Compare packed dwords. */ > > # define VPCMP vpcmpd > > @@ -50,10 +51,6 @@ > > /* 1 dword char == 4 bytes. */ > > # define SIZE_OF_CHAR 4 > > # else > > -# ifndef OVERFLOW_STRCMP > > -# define OVERFLOW_STRCMP __strcmp_evex > > -# endif > > - > > # define TESTEQ incl > > /* Compare packed bytes. */ > > # define VPCMP vpcmpb > > @@ -120,15 +117,11 @@ > > > > # ifdef USE_AS_STRCASECMP_L > > # ifdef USE_AS_STRNCMP > > -# define STRCASECMP __strncasecmp_evex > > # define LOCALE_REG rcx > > # define LOCALE_REG_LP RCX_LP > > -# define STRCASECMP_L_NONASCII __strncasecmp_l_nonascii > > # else > > -# define STRCASECMP __strcasecmp_evex > > # define LOCALE_REG rdx > > # define LOCALE_REG_LP RDX_LP > > -# define STRCASECMP_L_NONASCII __strcasecmp_l_nonascii > > # endif > > # endif > > > > @@ -214,7 +207,6 @@ > > .align 16 > > .type STRCMP, @function > > .globl STRCMP > > - > > # ifdef USE_AS_STRCASECMP_L > > ENTRY (STRCASECMP) > > movq __libc_tsd_LOCALE@gottpoff(%rip), %rax > > diff --git a/sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S > > index 0d691b78a8..33c18a28e8 100644 > > --- a/sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S > > +++ b/sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S > > @@ -16,11 +16,20 @@ > > License along with the GNU C Library; if not, see > > <https://www.gnu.org/licenses/>. */ > > > > -#if IS_IN (libc) > > +#include <isa-level.h> > > > > -#include "sysdep.h" > > +/* Continue building as ISA level 2. We use this as ISA V2 default > > + because strcmp-sse42 uses pcmpstri (slow on some SSE4.2 > > + processors) and this implementation is potenially faster than > > + strcmp-sse42 (aside from the slower page cross case). */ > > +#if ISA_SHOULD_BUILD (2) > > > > -ENTRY ( __strcmp_sse2_unaligned) > > +# define STRCMP_ISA _sse2_unaligned > > +# include "strcmp-naming.h" > > + > > +# include "sysdep.h" > > + > > +ENTRY (STRCMP) > > movl %edi, %eax > > xorl %edx, %edx > > pxor %xmm7, %xmm7 > > @@ -208,6 +217,5 @@ L(cross_page): > > L(different): > > subl %ecx, %eax > > ret > > -END (__strcmp_sse2_unaligned) > > - > > +END (STRCMP) > > #endif > > diff --git a/sysdeps/x86_64/multiarch/strcmp-sse2.S b/sysdeps/x86_64/multiarch/strcmp-sse2.S > > index b1220231ab..3c69fc1df1 100644 > > --- a/sysdeps/x86_64/multiarch/strcmp-sse2.S > > +++ b/sysdeps/x86_64/multiarch/strcmp-sse2.S > > @@ -16,7 +16,11 @@ > > License along with the GNU C Library; if not, see > > <https://www.gnu.org/licenses/>. */ > > > > -#if IS_IN (libc) || IS_IN (rtld) > > +#include <isa-level.h> > > + > > +/* Continue building at ISA level 2 as the strcmp-sse42 is not always > > + preferable for ISA level == 2 CPUs. */ > > +#if ISA_SHOULD_BUILD (2) > > > > # define STRCMP_ISA _sse2 > > # include "strcmp-naming.h" > > diff --git a/sysdeps/x86_64/multiarch/strcmp-sse4_2.S b/sysdeps/x86_64/multiarch/strcmp-sse4_2.S > > index 963e208ccb..dc6fc90e14 100644 > > --- a/sysdeps/x86_64/multiarch/strcmp-sse4_2.S > > +++ b/sysdeps/x86_64/multiarch/strcmp-sse4_2.S > > @@ -16,7 +16,10 @@ > > License along with the GNU C Library; if not, see > > <https://www.gnu.org/licenses/>. */ > > > > -#if IS_IN (libc) > > +#include <isa-level.h> > > + > > +#if ISA_SHOULD_BUILD (2) > > + > > # include <sysdep.h> > > > > # define STRCMP_ISA _sse42 > > @@ -1766,7 +1769,6 @@ LABEL(unaligned_table): > > .int LABEL(ashr_0) - LABEL(unaligned_table) > > > > # undef LABEL > > -# undef GLABEL > > # undef SECTION > > # undef movdqa > > # undef movdqu > > diff --git a/sysdeps/x86_64/multiarch/strcmp.c b/sysdeps/x86_64/multiarch/strcmp.c > > index 9c1677724c..fdd5afe3af 100644 > > --- a/sysdeps/x86_64/multiarch/strcmp.c > > +++ b/sysdeps/x86_64/multiarch/strcmp.c > > @@ -26,37 +26,50 @@ > > # define SYMBOL_NAME strcmp > > # include <init-arch.h> > > > > -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; > > -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned) attribute_hidden; > > -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden; > > +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; > > + > > extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; > > extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden; > > -extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; > > + > > +extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden; > > + > > +extern __typeof (REDIRECT_NAME) > > + OPTIMIZE (sse2_unaligned) attribute_hidden; > > +extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; > > + > > + > > > > static inline void * > > IFUNC_SELECTOR (void) > > { > > - const struct cpu_features* cpu_features = __get_cpu_features (); > > + const struct cpu_features *cpu_features = __get_cpu_features (); > > > > - if (CPU_FEATURE_USABLE_P (cpu_features, AVX2) > > - && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) > > + if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2) > > + && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, > > + AVX_Fast_Unaligned_Load, )) > > { > > - if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) > > - && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW) > > - && CPU_FEATURE_USABLE_P (cpu_features, BMI2)) > > + if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) > > + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW) > > + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)) > > return OPTIMIZE (evex); > > > > if (CPU_FEATURE_USABLE_P (cpu_features, RTM)) > > return OPTIMIZE (avx2_rtm); > > > > - if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) > > + if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, > > + Prefer_No_VZEROUPPER, !)) > > return OPTIMIZE (avx2); > > } > > > > - if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2) > > + if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, SSE4_2) > > + /* Keep this as runtime check. Some ISA level >= 2 CPUs such as > > + Tremont, Silvermont, and more check this. */ > > && !CPU_FEATURES_ARCH_P (cpu_features, Slow_SSE4_2)) > > return OPTIMIZE (sse42); > > > > + /* Keep this as runtime check. The standard SSE2 version has > > + meaningful optimizations around keeping all loads aligned in the > > + main loop which can benefit some ISA level >= 2 CPUs. */ > > if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load)) > > return OPTIMIZE (sse2_unaligned); > > > > diff --git a/sysdeps/x86_64/multiarch/strlen-avx2.S b/sysdeps/x86_64/multiarch/strlen-avx2.S > > index 9e36290dd2..0593fb303b 100644 > > --- a/sysdeps/x86_64/multiarch/strlen-avx2.S > > +++ b/sysdeps/x86_64/multiarch/strlen-avx2.S > > @@ -16,7 +16,9 @@ > > License along with the GNU C Library; if not, see > > <https://www.gnu.org/licenses/>. */ > > > > -#if IS_IN (libc) > > +#include <isa-level.h> > > + > > +#if ISA_SHOULD_BUILD (3) > > > > # include <sysdep.h> > > > > diff --git a/sysdeps/x86_64/multiarch/strlen-evex-base.S b/sysdeps/x86_64/multiarch/strlen-evex-base.S > > index 278c899691..418e9f8411 100644 > > --- a/sysdeps/x86_64/multiarch/strlen-evex-base.S > > +++ b/sysdeps/x86_64/multiarch/strlen-evex-base.S > > @@ -16,7 +16,11 @@ > > License along with the GNU C Library; if not, see > > <https://www.gnu.org/licenses/>. */ > > > > -#if IS_IN (libc) > > +/* UNUSED. Exists purely as reference implementation. */ > > + > > +#include <isa-level.h> > > + > > +#if ISA_SHOULD_BUILD (4) > > > > # include <sysdep.h> > > > > diff --git a/sysdeps/x86_64/multiarch/strlen-evex.S b/sysdeps/x86_64/multiarch/strlen-evex.S > > index 59ade77498..2109ec2f7a 100644 > > --- a/sysdeps/x86_64/multiarch/strlen-evex.S > > +++ b/sysdeps/x86_64/multiarch/strlen-evex.S > > @@ -16,7 +16,9 @@ > > License along with the GNU C Library; if not, see > > <https://www.gnu.org/licenses/>. */ > > > > -#if IS_IN (libc) > > +#include <isa-level.h> > > + > > +#if ISA_SHOULD_BUILD (4) > > > > # include <sysdep.h> > > > > diff --git a/sysdeps/x86_64/multiarch/strlen-sse2.S b/sysdeps/x86_64/multiarch/strlen-sse2.S > > index 5be72267d5..a96ccbb2d5 100644 > > --- a/sysdeps/x86_64/multiarch/strlen-sse2.S > > +++ b/sysdeps/x86_64/multiarch/strlen-sse2.S > > @@ -16,15 +16,20 @@ > > License along with the GNU C Library; if not, see > > <https://www.gnu.org/licenses/>. */ > > > > -#if IS_IN (libc) || defined STRLEN > > - > > -# ifndef STRLEN > > -# define STRLEN __strlen_sse2 > > -# endif > > +#include <isa-level.h> > > > > +/* ISA level >= 2 for both strlen and wcslen. wcslen uses `pminud` > > + which is SSE4.1. strlen doesn't have an ISA level == 2 > > + implementation so the SSE2 implementation must be built with ISA > > + level == 2. */ > > +# if ISA_SHOULD_BUILD (2) > > > > # include <sysdep.h> > > > > +# ifndef STRLEN > > +# define STRLEN __strlen_sse2 > > +# endif > > + > > # ifdef AS_WCSLEN > > # define PMINU pminud > > # define PCMPEQ pcmpeqd > > @@ -82,7 +87,7 @@ L(n_nonzero): > > suffice. */ > > mov %RSI_LP, %R10_LP > > sar $62, %R10_LP > > - jnz __wcslen_sse4_1 > > + jnz OVERFLOW_STRLEN > > sal $2, %RSI_LP > > # endif > > > > diff --git a/sysdeps/x86_64/multiarch/strncase_l-avx2-rtm.S b/sysdeps/x86_64/multiarch/strncase_l-avx2-rtm.S > > index 58c05dcfb8..c2596ab103 100644 > > --- a/sysdeps/x86_64/multiarch/strncase_l-avx2-rtm.S > > +++ b/sysdeps/x86_64/multiarch/strncase_l-avx2-rtm.S > > @@ -1,16 +1,4 @@ > > -#ifndef STRCMP > > -# define STRCMP __strncasecmp_l_avx2_rtm > > -#endif > > +#define USE_AS_STRCASECMP_L > > +#define USE_AS_STRNCMP > > > > -#define _GLABEL(x) x ## _rtm > > -#define GLABEL(x) _GLABEL(x) > > - > > -#define ZERO_UPPER_VEC_REGISTERS_RETURN \ > > - ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST > > - > > -#define VZEROUPPER_RETURN jmp L(return_vzeroupper) > > - > > -#define SECTION(p) p##.avx.rtm > > -#define OVERFLOW_STRCMP __strcasecmp_l_avx2_rtm > > - > > -#include "strncase_l-avx2.S" > > +#include "strcmp-avx2-rtm.S" > > diff --git a/sysdeps/x86_64/multiarch/strncase_l-avx2.S b/sysdeps/x86_64/multiarch/strncase_l-avx2.S > > index 48c0aa21f8..d00687aac5 100644 > > --- a/sysdeps/x86_64/multiarch/strncase_l-avx2.S > > +++ b/sysdeps/x86_64/multiarch/strncase_l-avx2.S > > @@ -16,12 +16,7 @@ > > License along with the GNU C Library; if not, see > > <https://www.gnu.org/licenses/>. */ > > > > -#ifndef STRCMP > > -# define STRCMP __strncasecmp_l_avx2 > > -#endif > > #define USE_AS_STRCASECMP_L > > #define USE_AS_STRNCMP > > -#ifndef OVERFLOW_STRCMP > > -# define OVERFLOW_STRCMP __strcasecmp_l_avx2 > > -#endif > > + > > #include "strcmp-avx2.S" > > diff --git a/sysdeps/x86_64/multiarch/strncase_l-evex.S b/sysdeps/x86_64/multiarch/strncase_l-evex.S > > index 8a5af3695c..1a79758065 100644 > > --- a/sysdeps/x86_64/multiarch/strncase_l-evex.S > > +++ b/sysdeps/x86_64/multiarch/strncase_l-evex.S > > @@ -16,10 +16,6 @@ > > License along with the GNU C Library; if not, see > > <https://www.gnu.org/licenses/>. */ > > > > -#ifndef STRCMP > > -# define STRCMP __strncasecmp_l_evex > > -#endif > > -#define OVERFLOW_STRCMP __strcasecmp_l_evex > > #define USE_AS_STRCASECMP_L > > #define USE_AS_STRNCMP > > #include "strcmp-evex.S" > > diff --git a/sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S b/sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S > > index 68bad365ba..6bb6be8585 100644 > > --- a/sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S > > +++ b/sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S > > @@ -1,4 +1,2 @@ > > -#define STRCMP __strncmp_avx2_rtm > > #define USE_AS_STRNCMP 1 > > -#define OVERFLOW_STRCMP __strcmp_avx2_rtm > > #include "strcmp-avx2-rtm.S" > > diff --git a/sysdeps/x86_64/multiarch/strncmp-avx2.S b/sysdeps/x86_64/multiarch/strncmp-avx2.S > > index f138e9f1fd..def3509c4c 100644 > > --- a/sysdeps/x86_64/multiarch/strncmp-avx2.S > > +++ b/sysdeps/x86_64/multiarch/strncmp-avx2.S > > @@ -1,4 +1,3 @@ > > -#define STRCMP __strncmp_avx2 > > #define USE_AS_STRNCMP 1 > > -#define OVERFLOW_STRCMP __strcmp_avx2 > > + > > #include "strcmp-avx2.S" > > diff --git a/sysdeps/x86_64/multiarch/strncmp-evex.S b/sysdeps/x86_64/multiarch/strncmp-evex.S > > index a1d53e8c9f..aa69c18928 100644 > > --- a/sysdeps/x86_64/multiarch/strncmp-evex.S > > +++ b/sysdeps/x86_64/multiarch/strncmp-evex.S > > @@ -1,3 +1,2 @@ > > -#define STRCMP __strncmp_evex > > #define USE_AS_STRNCMP 1 > > #include "strcmp-evex.S" > > diff --git a/sysdeps/x86_64/multiarch/strncmp.c b/sysdeps/x86_64/multiarch/strncmp.c > > index 70ae6547c9..4ebe4bde30 100644 > > --- a/sysdeps/x86_64/multiarch/strncmp.c > > +++ b/sysdeps/x86_64/multiarch/strncmp.c > > @@ -26,33 +26,38 @@ > > # define SYMBOL_NAME strncmp > > # include <init-arch.h> > > > > -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; > > -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden; > > +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; > > + > > extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; > > extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden; > > -extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; > > + > > +extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden; > > + > > +extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; > > > > static inline void * > > IFUNC_SELECTOR (void) > > { > > - const struct cpu_features* cpu_features = __get_cpu_features (); > > + const struct cpu_features *cpu_features = __get_cpu_features (); > > > > - if (CPU_FEATURE_USABLE_P (cpu_features, AVX2) > > - && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) > > + if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2) > > + && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, > > + AVX_Fast_Unaligned_Load, )) > > { > > - if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) > > - && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW) > > - && CPU_FEATURE_USABLE_P (cpu_features, BMI2)) > > + if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) > > + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW) > > + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)) > > return OPTIMIZE (evex); > > > > if (CPU_FEATURE_USABLE_P (cpu_features, RTM)) > > return OPTIMIZE (avx2_rtm); > > > > - if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) > > + if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, > > + Prefer_No_VZEROUPPER, !)) > > return OPTIMIZE (avx2); > > } > > > > - if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2) > > + if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, SSE4_2) > > && !CPU_FEATURES_ARCH_P (cpu_features, Slow_SSE4_2)) > > return OPTIMIZE (sse42); > > > > diff --git a/sysdeps/x86_64/multiarch/strnlen-avx2.S b/sysdeps/x86_64/multiarch/strnlen-avx2.S > > index c4062b22f7..c4a12097f0 100644 > > --- a/sysdeps/x86_64/multiarch/strnlen-avx2.S > > +++ b/sysdeps/x86_64/multiarch/strnlen-avx2.S > > @@ -1,4 +1,8 @@ > > -#define STRLEN __strnlen_avx2 > > +#ifndef STRNLEN > > +# define STRNLEN __strnlen_avx2 > > +#endif > > + > > #define USE_AS_STRNLEN 1 > > +#define STRLEN STRNLEN > > > > #include "strlen-avx2.S" > > diff --git a/sysdeps/x86_64/multiarch/strnlen-evex.S b/sysdeps/x86_64/multiarch/strnlen-evex.S > > index 722022f303..64a9fc2606 100644 > > --- a/sysdeps/x86_64/multiarch/strnlen-evex.S > > +++ b/sysdeps/x86_64/multiarch/strnlen-evex.S > > @@ -1,4 +1,8 @@ > > -#define STRLEN __strnlen_evex > > +#ifndef STRNLEN > > +# define STRNLEN __strnlen_evex > > +#endif > > + > > #define USE_AS_STRNLEN 1 > > +#define STRLEN STRNLEN > > > > #include "strlen-evex.S" > > diff --git a/sysdeps/x86_64/multiarch/strnlen-sse2.S b/sysdeps/x86_64/multiarch/strnlen-sse2.S > > index a50c7d6a28..8841ba9faf 100644 > > --- a/sysdeps/x86_64/multiarch/strnlen-sse2.S > > +++ b/sysdeps/x86_64/multiarch/strnlen-sse2.S > > @@ -16,11 +16,11 @@ > > License along with the GNU C Library; if not, see > > <https://www.gnu.org/licenses/>. */ > > > > -#if IS_IN (libc) > > -# ifndef STRLEN > > -# define STRLEN __strnlen_sse2 > > -# endif > > +#ifndef STRNLEN > > +# define STRNLEN __strnlen_sse2 > > #endif > > > > -#define AS_STRNLEN > > +#define AS_STRNLEN 1 > > +#define STRLEN STRNLEN > > + > > #include "strlen-sse2.S" > > diff --git a/sysdeps/x86_64/multiarch/strrchr-avx2.S b/sysdeps/x86_64/multiarch/strrchr-avx2.S > > index eb128a2ae3..924171d8e4 100644 > > --- a/sysdeps/x86_64/multiarch/strrchr-avx2.S > > +++ b/sysdeps/x86_64/multiarch/strrchr-avx2.S > > @@ -16,7 +16,9 @@ > > License along with the GNU C Library; if not, see > > <https://www.gnu.org/licenses/>. */ > > > > -#if IS_IN (libc) > > +#include <isa-level.h> > > + > > +#if ISA_SHOULD_BUILD (3) > > > > # include <sysdep.h> > > > > diff --git a/sysdeps/x86_64/multiarch/strrchr-evex.S b/sysdeps/x86_64/multiarch/strrchr-evex.S > > index 8014c285b3..992b45fb47 100644 > > --- a/sysdeps/x86_64/multiarch/strrchr-evex.S > > +++ b/sysdeps/x86_64/multiarch/strrchr-evex.S > > @@ -16,7 +16,9 @@ > > License along with the GNU C Library; if not, see > > <https://www.gnu.org/licenses/>. */ > > > > -#if IS_IN (libc) > > +#include <isa-level.h> > > + > > +#if ISA_SHOULD_BUILD (4) > > > > # include <sysdep.h> > > > > diff --git a/sysdeps/x86_64/multiarch/strrchr-sse2.S b/sysdeps/x86_64/multiarch/strrchr-sse2.S > > index 6ee7a5e33a..892e861fa8 100644 > > --- a/sysdeps/x86_64/multiarch/strrchr-sse2.S > > +++ b/sysdeps/x86_64/multiarch/strrchr-sse2.S > > @@ -16,36 +16,40 @@ > > License along with the GNU C Library; if not, see > > <https://www.gnu.org/licenses/>. */ > > > > -#if IS_IN (libc) > > +#include <isa-level.h> > > + > > +/* ISA level >= 2 because there are no {wcs|str}rchr-sse4 > > + implementations. */ > > +#if ISA_SHOULD_BUILD (2) > > + > > +# include <sysdep.h> > > + > > # ifndef STRRCHR > > # define STRRCHR __strrchr_sse2 > > # endif > > -#endif > > - > > -#include <sysdep.h> > > > > -#ifdef USE_AS_WCSRCHR > > -# define PCMPEQ pcmpeqd > > -# define CHAR_SIZE 4 > > -# define PMINU pminud > > -#else > > -# define PCMPEQ pcmpeqb > > -# define CHAR_SIZE 1 > > -# define PMINU pminub > > -#endif > > +# ifdef USE_AS_WCSRCHR > > +# define PCMPEQ pcmpeqd > > +# define CHAR_SIZE 4 > > +# define PMINU pminud > > +# else > > +# define PCMPEQ pcmpeqb > > +# define CHAR_SIZE 1 > > +# define PMINU pminub > > +# endif > > > > -#define PAGE_SIZE 4096 > > -#define VEC_SIZE 16 > > +# define PAGE_SIZE 4096 > > +# define VEC_SIZE 16 > > > > .text > > ENTRY(STRRCHR) > > movd %esi, %xmm0 > > movq %rdi, %rax > > andl $(PAGE_SIZE - 1), %eax > > -#ifndef USE_AS_WCSRCHR > > +# ifndef USE_AS_WCSRCHR > > punpcklbw %xmm0, %xmm0 > > punpcklwd %xmm0, %xmm0 > > -#endif > > +# endif > > pshufd $0, %xmm0, %xmm0 > > cmpl $(PAGE_SIZE - VEC_SIZE), %eax > > ja L(cross_page) > > @@ -69,9 +73,9 @@ L(cross_page_continue): > > /* We are off by 3 for wcsrchr if search CHAR is non-zero. If > > search CHAR is zero we are correct. Either way `andq > > -CHAR_SIZE, %rax` gets the correct result. */ > > -#ifdef USE_AS_WCSRCHR > > +# ifdef USE_AS_WCSRCHR > > andq $-CHAR_SIZE, %rax > > -#endif > > +# endif > > L(ret0): > > ret > > > > @@ -85,9 +89,9 @@ L(first_vec_x0_test): > > jz L(ret0) > > bsrl %eax, %eax > > addq %r8, %rax > > -#ifdef USE_AS_WCSRCHR > > +# ifdef USE_AS_WCSRCHR > > andq $-CHAR_SIZE, %rax > > -#endif > > +# endif > > ret > > > > .p2align 4 > > @@ -100,9 +104,9 @@ L(first_vec_x1): > > jz L(first_vec_x0_test) > > bsrl %eax, %eax > > leaq (VEC_SIZE)(%rdi, %rax), %rax > > -#ifdef USE_AS_WCSRCHR > > +# ifdef USE_AS_WCSRCHR > > andq $-CHAR_SIZE, %rax > > -#endif > > +# endif > > ret > > > > .p2align 4 > > @@ -113,9 +117,9 @@ L(first_vec_x1_test): > > jz L(first_vec_x0_test) > > bsrl %eax, %eax > > leaq (VEC_SIZE)(%rdi, %rax), %rax > > -#ifdef USE_AS_WCSRCHR > > +# ifdef USE_AS_WCSRCHR > > andq $-CHAR_SIZE, %rax > > -#endif > > +# endif > > ret > > > > .p2align 4 > > @@ -128,9 +132,9 @@ L(first_vec_x2): > > jz L(first_vec_x1_test) > > bsrl %eax, %eax > > leaq (VEC_SIZE * 2)(%rdi, %rax), %rax > > -#ifdef USE_AS_WCSRCHR > > +# ifdef USE_AS_WCSRCHR > > andq $-CHAR_SIZE, %rax > > -#endif > > +# endif > > ret > > > > .p2align 4 > > @@ -165,27 +169,27 @@ L(first_loop): > > /* Since SSE2 no pminud so wcsrchr needs seperate logic for > > detecting zero. Note if this is found to be a bottleneck it > > may be worth adding an SSE4.1 wcsrchr implementation. */ > > -#ifdef USE_AS_WCSRCHR > > +# ifdef USE_AS_WCSRCHR > > movaps %xmm5, %xmm6 > > pxor %xmm8, %xmm8 > > > > PCMPEQ %xmm8, %xmm5 > > PCMPEQ %xmm4, %xmm8 > > por %xmm5, %xmm8 > > -#else > > +# else > > movaps %xmm5, %xmm6 > > PMINU %xmm4, %xmm5 > > -#endif > > +# endif > > > > movaps %xmm4, %xmm9 > > PCMPEQ %xmm0, %xmm4 > > PCMPEQ %xmm0, %xmm6 > > movaps %xmm6, %xmm7 > > por %xmm4, %xmm6 > > -#ifndef USE_AS_WCSRCHR > > +# ifndef USE_AS_WCSRCHR > > pxor %xmm8, %xmm8 > > PCMPEQ %xmm5, %xmm8 > > -#endif > > +# endif > > pmovmskb %xmm8, %ecx > > pmovmskb %xmm6, %eax > > > > @@ -219,9 +223,9 @@ L(first_loop_old_match): > > > > bsrl %eax, %eax > > addq %rsi, %rax > > -#ifdef USE_AS_WCSRCHR > > +# ifdef USE_AS_WCSRCHR > > andq $-CHAR_SIZE, %rax > > -#endif > > +# endif > > ret > > > > .p2align 4 > > @@ -247,9 +251,9 @@ L(new_match): > > jz L(first_loop_old_match) > > bsrl %eax, %eax > > addq %rdi, %rax > > -#ifdef USE_AS_WCSRCHR > > +# ifdef USE_AS_WCSRCHR > > andq $-CHAR_SIZE, %rax > > -#endif > > +# endif > > ret > > > > /* Save minimum state for getting most recent match. We can > > @@ -267,27 +271,27 @@ L(second_loop): > > /* Since SSE2 no pminud so wcsrchr needs seperate logic for > > detecting zero. Note if this is found to be a bottleneck it > > may be worth adding an SSE4.1 wcsrchr implementation. */ > > -#ifdef USE_AS_WCSRCHR > > +# ifdef USE_AS_WCSRCHR > > movaps %xmm5, %xmm6 > > pxor %xmm8, %xmm8 > > > > PCMPEQ %xmm8, %xmm5 > > PCMPEQ %xmm4, %xmm8 > > por %xmm5, %xmm8 > > -#else > > +# else > > movaps %xmm5, %xmm6 > > PMINU %xmm4, %xmm5 > > -#endif > > +# endif > > > > movaps %xmm4, %xmm9 > > PCMPEQ %xmm0, %xmm4 > > PCMPEQ %xmm0, %xmm6 > > movaps %xmm6, %xmm7 > > por %xmm4, %xmm6 > > -#ifndef USE_AS_WCSRCHR > > +# ifndef USE_AS_WCSRCHR > > pxor %xmm8, %xmm8 > > PCMPEQ %xmm5, %xmm8 > > -#endif > > +# endif > > > > pmovmskb %xmm8, %ecx > > pmovmskb %xmm6, %eax > > @@ -312,9 +316,9 @@ L(second_loop_old_match): > > orl %ecx, %eax > > bsrl %eax, %eax > > addq %rsi, %rax > > -#ifdef USE_AS_WCSRCHR > > +# ifdef USE_AS_WCSRCHR > > andq $-CHAR_SIZE, %rax > > -#endif > > +# endif > > ret > > > > .p2align 4 > > @@ -340,9 +344,9 @@ L(second_loop_new_match): > > jz L(second_loop_old_match) > > bsrl %eax, %eax > > addq %rdi, %rax > > -#ifdef USE_AS_WCSRCHR > > +# ifdef USE_AS_WCSRCHR > > andq $-CHAR_SIZE, %rax > > -#endif > > +# endif > > ret > > > > .p2align 4,, 4 > > @@ -366,9 +370,10 @@ L(cross_page): > > jz L(ret1) > > bsrl %eax, %eax > > addq %rdi, %rax > > -#ifdef USE_AS_WCSRCHR > > +# ifdef USE_AS_WCSRCHR > > andq $-CHAR_SIZE, %rax > > -#endif > > +# endif > > L(ret1): > > ret > > END(STRRCHR) > > +#endif > > diff --git a/sysdeps/x86_64/multiarch/strstr-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strstr-sse2-unaligned.S > > index c6aa8f45a6..dc342a9f44 100644 > > --- a/sysdeps/x86_64/multiarch/strstr-sse2-unaligned.S > > +++ b/sysdeps/x86_64/multiarch/strstr-sse2-unaligned.S > > @@ -17,6 +17,7 @@ > > <https://www.gnu.org/licenses/>. */ > > > > #include <sysdep.h> > > +#include "../strchr-isa-default-impl.h" > > > > ENTRY(__strstr_sse2_unaligned) > > movzbl (%rsi), %eax > > @@ -75,7 +76,7 @@ L(next_pair_index): > > .p2align 4 > > L(strchr): > > movzbl %al, %esi > > - jmp __strchr_sse2 > > + jmp DEFAULT_STRCHR > > > > .p2align 4 > > L(pair_loop): > > diff --git a/sysdeps/x86_64/multiarch/wcschr-avx2.S b/sysdeps/x86_64/multiarch/wcschr-avx2.S > > index 67726b6837..f404888a93 100644 > > --- a/sysdeps/x86_64/multiarch/wcschr-avx2.S > > +++ b/sysdeps/x86_64/multiarch/wcschr-avx2.S > > @@ -1,3 +1,8 @@ > > -#define STRCHR __wcschr_avx2 > > +#ifndef WCSCHR > > +# define WCSCHR __wcschr_avx2 > > +#endif > > + > > +#define STRCHR WCSCHR > > #define USE_AS_WCSCHR 1 > > + > > #include "strchr-avx2.S" > > diff --git a/sysdeps/x86_64/multiarch/wcschr-evex.S b/sysdeps/x86_64/multiarch/wcschr-evex.S > > index 7cb8f1e41a..b5ccc59230 100644 > > --- a/sysdeps/x86_64/multiarch/wcschr-evex.S > > +++ b/sysdeps/x86_64/multiarch/wcschr-evex.S > > @@ -1,3 +1,8 @@ > > -#define STRCHR __wcschr_evex > > +#ifndef WCSCHR > > +# define WCSCHR __wcschr_evex > > +#endif > > + > > +#define STRCHR WCSCHR > > #define USE_AS_WCSCHR 1 > > + > > #include "strchr-evex.S" > > diff --git a/sysdeps/x86_64/multiarch/wcschr-sse2.S b/sysdeps/x86_64/multiarch/wcschr-sse2.S > > index c872926ba9..1c83957cbc 100644 > > --- a/sysdeps/x86_64/multiarch/wcschr-sse2.S > > +++ b/sysdeps/x86_64/multiarch/wcschr-sse2.S > > @@ -16,13 +16,17 @@ > > License along with the GNU C Library; if not, see > > <https://www.gnu.org/licenses/>. */ > > > > -#if IS_IN (libc) > > + > > +#include <isa-level.h> > > + > > +/* ISA level >= 2 because there is no wcschr-sse4 implementations. */ > > +#if ISA_SHOULD_BUILD (2) > > + > > # ifndef WCSCHR > > # define WCSCHR __wcschr_sse2 > > # endif > > -#endif > > > > -#include <sysdep.h> > > +# include <sysdep.h> > > > > .text > > ENTRY (WCSCHR) > > @@ -155,3 +159,4 @@ L(return_null): > > ret > > > > END (WCSCHR) > > +#endif > > diff --git a/sysdeps/x86_64/multiarch/wcscmp-avx2-rtm.S b/sysdeps/x86_64/multiarch/wcscmp-avx2-rtm.S > > index d6ca2b8064..f17a8969cb 100644 > > --- a/sysdeps/x86_64/multiarch/wcscmp-avx2-rtm.S > > +++ b/sysdeps/x86_64/multiarch/wcscmp-avx2-rtm.S > > @@ -1,4 +1,3 @@ > > -#define STRCMP __wcscmp_avx2_rtm > > #define USE_AS_WCSCMP 1 > > > > #include "strcmp-avx2-rtm.S" > > diff --git a/sysdeps/x86_64/multiarch/wcscmp-avx2.S b/sysdeps/x86_64/multiarch/wcscmp-avx2.S > > index e5da4da689..0a71f907f0 100644 > > --- a/sysdeps/x86_64/multiarch/wcscmp-avx2.S > > +++ b/sysdeps/x86_64/multiarch/wcscmp-avx2.S > > @@ -1,4 +1,3 @@ > > -#define STRCMP __wcscmp_avx2 > > #define USE_AS_WCSCMP 1 > > > > #include "strcmp-avx2.S" > > diff --git a/sysdeps/x86_64/multiarch/wcscmp-evex.S b/sysdeps/x86_64/multiarch/wcscmp-evex.S > > index 42e73e51eb..b0337a8311 100644 > > --- a/sysdeps/x86_64/multiarch/wcscmp-evex.S > > +++ b/sysdeps/x86_64/multiarch/wcscmp-evex.S > > @@ -1,4 +1,3 @@ > > -#define STRCMP __wcscmp_evex > > #define USE_AS_WCSCMP 1 > > > > #include "strcmp-evex.S" > > diff --git a/sysdeps/x86_64/multiarch/wcscmp-sse2.S b/sysdeps/x86_64/multiarch/wcscmp-sse2.S > > index 6cb7d9faf9..3f32e8127d 100644 > > --- a/sysdeps/x86_64/multiarch/wcscmp-sse2.S > > +++ b/sysdeps/x86_64/multiarch/wcscmp-sse2.S > > @@ -16,11 +16,16 @@ > > License along with the GNU C Library; if not, see > > <https://www.gnu.org/licenses/>. */ > > > > -#define USE_AS_WCSCMP > > -#define STRCMP_ISA _sse2 > > -#include "strcmp-naming.h" > > +#include <isa-level.h> > > > > -#include <sysdep.h> > > +/* ISA level >= 2 because there is no wcscmp-sse4 implementations. */ > > +#if ISA_SHOULD_BUILD (2) > > +# include <sysdep.h> > > + > > +/* Needed to get right name. */ > > +# define USE_AS_WCSCMP > > +# define STRCMP_ISA _sse2 > > +# include "strcmp-naming.h" > > > > /* Note: wcscmp uses signed comparison, not unsighed as in strcmp function. */ > > > > @@ -949,3 +954,4 @@ L(equal): > > ret > > > > END (STRCMP) > > +#endif > > diff --git a/sysdeps/x86_64/multiarch/wcslen-avx2.S b/sysdeps/x86_64/multiarch/wcslen-avx2.S > > index c9224f1bc5..9784d8f780 100644 > > --- a/sysdeps/x86_64/multiarch/wcslen-avx2.S > > +++ b/sysdeps/x86_64/multiarch/wcslen-avx2.S > > @@ -1,4 +1,8 @@ > > -#define STRLEN __wcslen_avx2 > > +#ifndef WCSLEN > > +# define WCSLEN __wcslen_avx2 > > +#endif > > + > > +#define STRLEN WCSLEN > > #define USE_AS_WCSLEN 1 > > > > #include "strlen-avx2.S" > > diff --git a/sysdeps/x86_64/multiarch/wcslen-evex.S b/sysdeps/x86_64/multiarch/wcslen-evex.S > > index bdafa83bd5..df21bac63c 100644 > > --- a/sysdeps/x86_64/multiarch/wcslen-evex.S > > +++ b/sysdeps/x86_64/multiarch/wcslen-evex.S > > @@ -1,4 +1,8 @@ > > -#define STRLEN __wcslen_evex > > +#ifndef WCSLEN > > +# define WCSLEN __wcslen_evex > > +#endif > > + > > +#define STRLEN WCSLEN > > #define USE_AS_WCSLEN 1 > > > > #include "strlen-evex.S" > > diff --git a/sysdeps/x86_64/multiarch/wcslen-sse2.S b/sysdeps/x86_64/multiarch/wcslen-sse2.S > > index 944c3bd9c6..e9c518a932 100644 > > --- a/sysdeps/x86_64/multiarch/wcslen-sse2.S > > +++ b/sysdeps/x86_64/multiarch/wcslen-sse2.S > > @@ -16,13 +16,16 @@ > > License along with the GNU C Library; if not, see > > <https://www.gnu.org/licenses/>. */ > > > > -#if IS_IN (libc) > > -# ifndef WCSLEN > > -# define WCSLEN __wcslen_sse2 > > -# endif > > +#include <isa-level.h> > > + > > +#if ISA_SHOULD_BUILD (1) > > + > > +# include <sysdep.h> > > + > > +#ifndef WCSLEN > > +# define WCSLEN __wcslen_sse2 > > #endif > > > > -#include <sysdep.h> > > > > .text > > ENTRY (WCSLEN) > > @@ -235,3 +238,5 @@ L(exit_tail7): > > ret > > > > END (WCSLEN) > > + > > +#endif > > diff --git a/sysdeps/x86_64/multiarch/wcslen-sse4_1.S b/sysdeps/x86_64/multiarch/wcslen-sse4_1.S > > index c88e8342a1..126d183e75 100644 > > --- a/sysdeps/x86_64/multiarch/wcslen-sse4_1.S > > +++ b/sysdeps/x86_64/multiarch/wcslen-sse4_1.S > > @@ -1,5 +1,9 @@ > > -#define AS_WCSLEN > > -#define STRLEN __wcslen_sse4_1 > > -#define SECTION(p) p##.sse4.1 > > +#ifndef WCSLEN > > +# define WCSLEN __wcslen_sse4_1 > > +#endif > > + > > +#define AS_WCSLEN 1 > > +#define STRLEN WCSLEN > > +#define SECTION(p) p##.sse4.1 > > > > #include "strlen-sse2.S" > > diff --git a/sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S b/sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S > > index f467582cbe..099a60c48e 100644 > > --- a/sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S > > +++ b/sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S > > @@ -1,5 +1,3 @@ > > -#define STRCMP __wcsncmp_avx2_rtm > > #define USE_AS_STRNCMP 1 > > #define USE_AS_WCSCMP 1 > > -#define OVERFLOW_STRCMP __wcscmp_avx2_rtm > > #include "strcmp-avx2-rtm.S" > > diff --git a/sysdeps/x86_64/multiarch/wcsncmp-avx2.S b/sysdeps/x86_64/multiarch/wcsncmp-avx2.S > > index e9ede522b8..fc26b593d0 100644 > > --- a/sysdeps/x86_64/multiarch/wcsncmp-avx2.S > > +++ b/sysdeps/x86_64/multiarch/wcsncmp-avx2.S > > @@ -1,5 +1,4 @@ > > -#define STRCMP __wcsncmp_avx2 > > #define USE_AS_STRNCMP 1 > > #define USE_AS_WCSCMP 1 > > -#define OVERFLOW_STRCMP __wcscmp_avx2 > > + > > #include "strcmp-avx2.S" > > diff --git a/sysdeps/x86_64/multiarch/wcsncmp-evex.S b/sysdeps/x86_64/multiarch/wcsncmp-evex.S > > index 8a8e310713..d3a92e2000 100644 > > --- a/sysdeps/x86_64/multiarch/wcsncmp-evex.S > > +++ b/sysdeps/x86_64/multiarch/wcsncmp-evex.S > > @@ -1,4 +1,3 @@ > > -#define STRCMP __wcsncmp_evex > > #define USE_AS_STRNCMP 1 > > #define USE_AS_WCSCMP 1 > > > > diff --git a/sysdeps/x86_64/multiarch/wcsncmp-generic.c b/sysdeps/x86_64/multiarch/wcsncmp-generic.c > > index 658d541886..b0cf4e87d5 100644 > > --- a/sysdeps/x86_64/multiarch/wcsncmp-generic.c > > +++ b/sysdeps/x86_64/multiarch/wcsncmp-generic.c > > @@ -16,5 +16,10 @@ > > License along with the GNU C Library; if not, see > > <https://www.gnu.org/licenses/>. */ > > > > -#define WCSNCMP __wcsncmp_generic > > -#include <wcsmbs/wcsncmp.c> > > +#include <isa-level.h> > > +#if ISA_SHOULD_BUILD (2) > > + > > +# define WCSNCMP __wcsncmp_generic > > +# include <wcsmbs/wcsncmp.c> > > + > > +#endif > > diff --git a/sysdeps/x86_64/multiarch/wcsnlen-avx2.S b/sysdeps/x86_64/multiarch/wcsnlen-avx2.S > > index fac83546b5..12c3a0fd05 100644 > > --- a/sysdeps/x86_64/multiarch/wcsnlen-avx2.S > > +++ b/sysdeps/x86_64/multiarch/wcsnlen-avx2.S > > @@ -1,4 +1,8 @@ > > -#define STRLEN __wcsnlen_avx2 > > +#ifndef WCSNLEN > > +# define WCSNLEN __wcsnlen_avx2 > > +#endif > > + > > +#define STRLEN WCSNLEN > > #define USE_AS_WCSLEN 1 > > #define USE_AS_STRNLEN 1 > > > > diff --git a/sysdeps/x86_64/multiarch/wcsnlen-evex.S b/sysdeps/x86_64/multiarch/wcsnlen-evex.S > > index 24773bb4e2..e2aad94c1e 100644 > > --- a/sysdeps/x86_64/multiarch/wcsnlen-evex.S > > +++ b/sysdeps/x86_64/multiarch/wcsnlen-evex.S > > @@ -1,4 +1,8 @@ > > -#define STRLEN __wcsnlen_evex > > +#ifndef WCSNLEN > > +# define WCSNLEN __wcsnlen_evex > > +#endif > > + > > +#define STRLEN WCSNLEN > > #define USE_AS_WCSLEN 1 > > #define USE_AS_STRNLEN 1 > > > > diff --git a/sysdeps/x86_64/multiarch/wcsnlen-generic.c b/sysdeps/x86_64/multiarch/wcsnlen-generic.c > > index 2d75da7709..8b466aac2f 100644 > > --- a/sysdeps/x86_64/multiarch/wcsnlen-generic.c > > +++ b/sysdeps/x86_64/multiarch/wcsnlen-generic.c > > @@ -16,13 +16,18 @@ > > License along with the GNU C Library; if not, see > > <https://www.gnu.org/licenses/>. */ > > > > +#include <isa-level.h> > > + > > +#if ISA_SHOULD_BUILD (1) > > > > -#if IS_IN (libc) > > # include <wchar.h> > > > > -# define WCSNLEN __wcsnlen_generic > > +# ifndef WCSNLEN > > +# define WCSNLEN __wcsnlen_generic > > +# endif > > > > extern __typeof (wcsnlen) __wcsnlen_generic; > > -#endif > > > > -#include "wcsmbs/wcsnlen.c" > > +# include "wcsmbs/wcsnlen.c" > > + > > +#endif > > diff --git a/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S b/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S > > index 17cdedc2a9..8f534102a2 100644 > > --- a/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S > > +++ b/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S > > @@ -1,6 +1,11 @@ > > +#ifndef WCSNLEN > > +# define WCSNLEN __wcsnlen_sse4_1 > > +# define OVERFLOW_STRLEN __wcslen_sse4_1 > > +#endif > > + > > #define AS_WCSLEN > > #define AS_STRNLEN > > -#define STRLEN __wcsnlen_sse4_1 > > +#define STRLEN WCSNLEN > > #define SECTION(p) p##.sse4.1 > > > > #include "strlen-sse2.S" > > diff --git a/sysdeps/x86_64/multiarch/wcsrchr-avx2.S b/sysdeps/x86_64/multiarch/wcsrchr-avx2.S > > index cf8a239ab2..6eaf5e090b 100644 > > --- a/sysdeps/x86_64/multiarch/wcsrchr-avx2.S > > +++ b/sysdeps/x86_64/multiarch/wcsrchr-avx2.S > > @@ -1,3 +1,8 @@ > > -#define STRRCHR __wcsrchr_avx2 > > +#ifndef WCSRCHR > > +# define WCSRCHR __wcsrchr_avx2 > > +#endif > > + > > +#define STRRCHR WCSRCHR > > #define USE_AS_WCSRCHR 1 > > + > > #include "strrchr-avx2.S" > > diff --git a/sysdeps/x86_64/multiarch/wcsrchr-evex.S b/sysdeps/x86_64/multiarch/wcsrchr-evex.S > > index c64602f7dc..e5c5fe3bf2 100644 > > --- a/sysdeps/x86_64/multiarch/wcsrchr-evex.S > > +++ b/sysdeps/x86_64/multiarch/wcsrchr-evex.S > > @@ -1,3 +1,7 @@ > > -#define STRRCHR __wcsrchr_evex > > +#ifndef WCSRCHR > > +# define WCSRCHR __wcsrchr_evex > > +#endif > > + > > +#define STRRCHR WCSRCHR > > #define USE_AS_WCSRCHR 1 > > #include "strrchr-evex.S" > > diff --git a/sysdeps/x86_64/multiarch/wcsrchr-sse2.S b/sysdeps/x86_64/multiarch/wcsrchr-sse2.S > > index d9259720f8..21388d900c 100644 > > --- a/sysdeps/x86_64/multiarch/wcsrchr-sse2.S > > +++ b/sysdeps/x86_64/multiarch/wcsrchr-sse2.S > > @@ -16,12 +16,11 @@ > > License along with the GNU C Library; if not, see > > <https://www.gnu.org/licenses/>. */ > > > > -#if IS_IN (libc) > > -# ifndef STRRCHR > > -# define STRRCHR __wcsrchr_sse2 > > -# endif > > +#ifndef WCSRCHR > > +# define WCSRCHR __wcsrchr_sse2 > > #endif > > > > +#define STRRCHR WCSRCHR > > #define USE_AS_WCSRCHR 1 > > #define NO_PMINU 1 > > > > diff --git a/sysdeps/x86_64/strcasecmp_l.S b/sysdeps/x86_64/strcasecmp_l.S > > index 84fd7fdfd3..5afa7ea098 100644 > > --- a/sysdeps/x86_64/strcasecmp_l.S > > +++ b/sysdeps/x86_64/strcasecmp_l.S > > @@ -1,11 +1,35 @@ > > +/* strcasecmp_l dispatch for RTLD and non-multiarch build > > + Copyright (C) 2022 Free Software Foundation, Inc. > > + This file is part of the GNU C Library. > > + > > + The GNU C Library is free software; you can redistribute it and/or > > + modify it under the terms of the GNU Lesser General Public > > + License as published by the Free Software Foundation; either > > + version 2.1 of the License, or (at your option) any later version. > > + > > + The GNU C Library is distributed in the hope that it will be useful, > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + Lesser General Public License for more details. > > + > > + You should have received a copy of the GNU Lesser General Public > > + License along with the GNU C Library; if not, see > > + <https://www.gnu.org/licenses/>. */ > > + > > /* Symbols = __strcasecmp_l and __strcasecmp. */ > > > > -#include "multiarch/strcasecmp_l-sse2.S" > > +#define DEFAULT_IMPL_V1 "multiarch/strcasecmp_l-sse2.S" > > +/* This may cause regressions on some processors that heavily prefer > > + aligned loads or have slow a implementation of the `pcmpstri` > > + instruction. */ > > +#define DEFAULT_IMPL_V2 "multiarch/strcasecmp_l-sse4_2.S" > > +#define DEFAULT_IMPL_V3 "multiarch/strcasecmp_l-avx2.S" > > +#define DEFAULT_IMPL_V4 "multiarch/strcasecmp_l-evex.S" > > > > -libc_hidden_builtin_def (__strcasecmp_l) > > +#include "isa-default-impl.h" > > > > +libc_hidden_def (__strcasecmp_l) > > weak_alias (__strcasecmp_l, strcasecmp_l) > > -libc_hidden_def (strcasecmp_l) > > > > -weak_alias (__strcasecmp, strcasecmp) > > libc_hidden_def (__strcasecmp) > > +weak_alias (__strcasecmp, strcasecmp) > > diff --git a/sysdeps/x86_64/strchr-isa-default-impl.h b/sysdeps/x86_64/strchr-isa-default-impl.h > > new file mode 100644 > > index 0000000000..0c8cbc6ffb > > --- /dev/null > > +++ b/sysdeps/x86_64/strchr-isa-default-impl.h > > @@ -0,0 +1,28 @@ > > +/* Set default strchr impl based on ISA level. > > + Copyright (C) 2022 Free Software Foundation, Inc. > > + This file is part of the GNU C Library. > > + > > + The GNU C Library is free software; you can redistribute it and/or > > + modify it under the terms of the GNU Lesser General Public > > + License as published by the Free Software Foundation; either > > + version 2.1 of the License, or (at your option) any later version. > > + > > + The GNU C Library is distributed in the hope that it will be useful, > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + Lesser General Public License for more details. > > + > > + You should have received a copy of the GNU Lesser General Public > > + License along with the GNU C Library; if not, see > > + <https://www.gnu.org/licenses/>. */ > > + > > +#include <isa-level.h> > > +#if MINIMUM_X86_ISA_LEVEL == 1 || MINIMUM_X86_ISA_LEVEL == 2 > > +# define DEFAULT_STRCHR __strchr_sse2 > > +#elif MINIMUM_X86_ISA_LEVEL == 3 > > +# define DEFAULT_STRCHR __strchr_avx2 > > +#elif MINIMUM_X86_ISA_LEVEL == 4 > > +# define DEFAULT_STRCHR __strchr_evex > > +#else > > +# error "Unknown default strchr implementation" > > +#endif > > diff --git a/sysdeps/x86_64/strchr.S b/sysdeps/x86_64/strchr.S > > index 77c956c92c..1406c633e8 100644 > > --- a/sysdeps/x86_64/strchr.S > > +++ b/sysdeps/x86_64/strchr.S > > @@ -1,5 +1,4 @@ > > -/* strchr (str, ch) -- Return pointer to first occurrence of CH in STR. > > - For AMD x86-64. > > +/* strchr dispatch for RTLD and non-multiarch build > > Copyright (C) 2009-2022 Free Software Foundation, Inc. > > This file is part of the GNU C Library. > > > > @@ -17,8 +16,13 @@ > > License along with the GNU C Library; if not, see > > <https://www.gnu.org/licenses/>. */ > > > > +#define STRCHR strchr > > + > > +#define DEFAULT_IMPL_V1 "multiarch/strchr-sse2.S" > > +#define DEFAULT_IMPL_V3 "multiarch/strchr-avx2.S" > > +#define DEFAULT_IMPL_V4 "multiarch/strchr-evex.S" > > + > > +#include "isa-default-impl.h" > > > > -#define STRCHR strchr > > -#include "multiarch/strchr-sse2.S" > > weak_alias (strchr, index) > > libc_hidden_builtin_def (strchr) > > diff --git a/sysdeps/x86_64/strchrnul.S b/sysdeps/x86_64/strchrnul.S > > index 508e42db26..f1ef907296 100644 > > --- a/sysdeps/x86_64/strchrnul.S > > +++ b/sysdeps/x86_64/strchrnul.S > > @@ -1,6 +1,4 @@ > > -/* strchrnul (str, ch) -- Return pointer to first occurrence of CH in STR > > - or terminating NUL byte. > > - For AMD x86-64. > > +/* strchrnul dispatch for RTLD and non-multiarch build > > Copyright (C) 2009-2022 Free Software Foundation, Inc. > > This file is part of the GNU C Library. > > > > @@ -18,7 +16,12 @@ > > License along with the GNU C Library; if not, see > > <https://www.gnu.org/licenses/>. */ > > > > -#define STRCHR __strchrnul > > -#include "multiarch/strchrnul-sse2.S" > > +#define STRCHRNUL __strchrnul > > + > > +#define DEFAULT_IMPL_V1 "multiarch/strchrnul-sse2.S" > > +#define DEFAULT_IMPL_V3 "multiarch/strchrnul-avx2.S" > > +#define DEFAULT_IMPL_V4 "multiarch/strchrnul-evex.S" > > + > > +#include "isa-default-impl.h" > > > > weak_alias (__strchrnul, strchrnul) > > diff --git a/sysdeps/x86_64/strcmp.S b/sysdeps/x86_64/strcmp.S > > index 19e54bd3a7..7c3cf87a42 100644 > > --- a/sysdeps/x86_64/strcmp.S > > +++ b/sysdeps/x86_64/strcmp.S > > @@ -1,4 +1,4 @@ > > -/* Highly optimized version for x86-64. > > +/* strcmp dispatch for RTLD and non-multiarch build > > Copyright (C) 1999-2022 Free Software Foundation, Inc. > > This file is part of the GNU C Library. > > > > @@ -18,5 +18,14 @@ > > > > /* Symbol = strcmp. */ > > > > -#include "multiarch/strcmp-sse2.S" > > +#define DEFAULT_IMPL_V1 "multiarch/strcmp-sse2.S" > > +/* strcmp-sse2-unaligned.S is often faster than strcmp-sse42.S and > > + doesn't have the drawback of using the `pcmpstri` instruction > > + which can be very slow on some CPUs. */ > > +#define DEFAULT_IMPL_V2 "multiarch/strcmp-sse2-unaligned.S" > > +#define DEFAULT_IMPL_V3 "multiarch/strcmp-avx2.S" > > +#define DEFAULT_IMPL_V4 "multiarch/strcmp-evex.S" > > + > > +#include "isa-default-impl.h" > > + > > libc_hidden_builtin_def (strcmp) > > diff --git a/sysdeps/x86_64/strlen.S b/sysdeps/x86_64/strlen.S > > index c2f5674f8d..e7cb4b1680 100644 > > --- a/sysdeps/x86_64/strlen.S > > +++ b/sysdeps/x86_64/strlen.S > > @@ -1,4 +1,4 @@ > > -/* SSE2 version of strlen. > > +/* strlen dispatch for RTLD and non-multiarch build > > Copyright (C) 2021-2022 Free Software Foundation, Inc. > > This file is part of the GNU C Library. > > > > @@ -17,6 +17,11 @@ > > <https://www.gnu.org/licenses/>. */ > > > > #define STRLEN strlen > > -#include "multiarch/strlen-sse2.S" > > + > > +#define DEFAULT_IMPL_V1 "multiarch/strlen-sse2.S" > > +#define DEFAULT_IMPL_V3 "multiarch/strlen-avx2.S" > > +#define DEFAULT_IMPL_V4 "multiarch/strlen-evex.S" > > + > > +#include "isa-default-impl.h" > > > > libc_hidden_builtin_def (strlen) > > diff --git a/sysdeps/x86_64/strncase_l.S b/sysdeps/x86_64/strncase_l.S > > index 3780fc50b1..de28ecf5d4 100644 > > --- a/sysdeps/x86_64/strncase_l.S > > +++ b/sysdeps/x86_64/strncase_l.S > > @@ -1,11 +1,35 @@ > > +/* strcasecmp_l dispatch for RTLD and non-multiarch build > > + Copyright (C) 2022 Free Software Foundation, Inc. > > + This file is part of the GNU C Library. > > + > > + The GNU C Library is free software; you can redistribute it and/or > > + modify it under the terms of the GNU Lesser General Public > > + License as published by the Free Software Foundation; either > > + version 2.1 of the License, or (at your option) any later version. > > + > > + The GNU C Library is distributed in the hope that it will be useful, > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + Lesser General Public License for more details. > > + > > + You should have received a copy of the GNU Lesser General Public > > + License along with the GNU C Library; if not, see > > + <https://www.gnu.org/licenses/>. */ > > + > > /* Symbols = __strncasecmp_l and __strncasecmp. */ > > > > -#include "multiarch/strncase_l-sse2.S" > > +#define DEFAULT_IMPL_V1 "multiarch/strncase_l-sse2.S" > > +/* This may cause regressions on some processors that heavily prefer > > + aligned loads or have slow a implementation of the `pcmpstri` > > + instruction. */ > > +#define DEFAULT_IMPL_V2 "multiarch/strncase_l-sse4_2.S" > > +#define DEFAULT_IMPL_V3 "multiarch/strncase_l-avx2.S" > > +#define DEFAULT_IMPL_V4 "multiarch/strncase_l-evex.S" > > > > -libc_hidden_builtin_def (__strncasecmp_l) > > +#include "isa-default-impl.h" > > > > +libc_hidden_def (__strncasecmp_l) > > weak_alias (__strncasecmp_l, strncasecmp_l) > > -libc_hidden_def (strncasecmp_l) > > > > -weak_alias (__strncasecmp, strncasecmp) > > libc_hidden_def (__strncasecmp) > > +weak_alias (__strncasecmp, strncasecmp) > > diff --git a/sysdeps/x86_64/strncmp.S b/sysdeps/x86_64/strncmp.S > > index 13d9e82ee2..afb251d9fe 100644 > > --- a/sysdeps/x86_64/strncmp.S > > +++ b/sysdeps/x86_64/strncmp.S > > @@ -1,4 +1,31 @@ > > +/* strncmp dispatch for RTLD and non-multiarch build > > + Copyright (C) 1999-2022 Free Software Foundation, Inc. > > + This file is part of the GNU C Library. > > + > > + The GNU C Library is free software; you can redistribute it and/or > > + modify it under the terms of the GNU Lesser General Public > > + License as published by the Free Software Foundation; either > > + version 2.1 of the License, or (at your option) any later version. > > + > > + The GNU C Library is distributed in the hope that it will be useful, > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + Lesser General Public License for more details. > > + > > + You should have received a copy of the GNU Lesser General Public > > + License along with the GNU C Library; if not, see > > + <https://www.gnu.org/licenses/>. */ > > + > > /* Symbol = strncmp. */ > > > > -#include "multiarch/strncmp-sse2.S" > > +#define DEFAULT_IMPL_V1 "multiarch/strncmp-sse2.S" > > +/* This may cause regressions on some processors that heavily prefer > > + aligned loads or have slow a implementation of the `pcmpstri` > > + instruction. */ > > +#define DEFAULT_IMPL_V2 "multiarch/strncmp-sse4_2.S" > > +#define DEFAULT_IMPL_V3 "multiarch/strncmp-avx2.S" > > +#define DEFAULT_IMPL_V4 "multiarch/strncmp-evex.S" > > + > > +#include "isa-default-impl.h" > > + > > libc_hidden_builtin_def (strncmp) > > diff --git a/sysdeps/x86_64/strnlen.S b/sysdeps/x86_64/strnlen.S > > index 174970d58f..b2c2149e07 100644 > > --- a/sysdeps/x86_64/strnlen.S > > +++ b/sysdeps/x86_64/strnlen.S > > @@ -1,6 +1,29 @@ > > -#define STRLEN __strnlen > > -#include "multiarch/strnlen-sse2.S" > > +/* strnlen dispatch for RTLD and non-multiarch build > > + Copyright (C) 2022 Free Software Foundation, Inc. > > + This file is part of the GNU C Library. > > > > + The GNU C Library is free software; you can redistribute it and/or > > + modify it under the terms of the GNU Lesser General Public > > + License as published by the Free Software Foundation; either > > + version 2.1 of the License, or (at your option) any later version. > > + > > + The GNU C Library is distributed in the hope that it will be useful, > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + Lesser General Public License for more details. > > + > > + You should have received a copy of the GNU Lesser General Public > > + License along with the GNU C Library; if not, see > > + <https://www.gnu.org/licenses/>. */ > > + > > +#define STRNLEN __strnlen > > + > > +#define DEFAULT_IMPL_V1 "multiarch/strnlen-sse2.S" > > +#define DEFAULT_IMPL_V3 "multiarch/strnlen-avx2.S" > > +#define DEFAULT_IMPL_V4 "multiarch/strnlen-evex.S" > > + > > +#include "isa-default-impl.h" > > + > > +weak_alias (__strnlen, strnlen) > > libc_hidden_def (__strnlen) > > -weak_alias (__strnlen, strnlen); > > -libc_hidden_builtin_def (strnlen) > > +libc_hidden_def (strnlen) > > diff --git a/sysdeps/x86_64/strrchr.S b/sysdeps/x86_64/strrchr.S > > index f39da60454..493d370a28 100644 > > --- a/sysdeps/x86_64/strrchr.S > > +++ b/sysdeps/x86_64/strrchr.S > > @@ -1,4 +1,4 @@ > > -/* strrchr (str, ch) -- Return pointer to last occurrence of CH in STR. > > +/* strrchr dispatch for RTLD and non-multiarch build > > Copyright (C) 2013-2022 Free Software Foundation, Inc. > > This file is part of the GNU C Library. > > > > @@ -17,6 +17,12 @@ > > <https://www.gnu.org/licenses/>. */ > > > > #define STRRCHR strrchr > > -#include "multiarch/strrchr-sse2.S" > > + > > +#define DEFAULT_IMPL_V1 "multiarch/strrchr-sse2.S" > > +#define DEFAULT_IMPL_V3 "multiarch/strrchr-avx2.S" > > +#define DEFAULT_IMPL_V4 "multiarch/strrchr-evex.S" > > + > > +#include "isa-default-impl.h" > > + > > weak_alias (strrchr, rindex) > > libc_hidden_builtin_def (strrchr) > > diff --git a/sysdeps/x86_64/wcschr.S b/sysdeps/x86_64/wcschr.S > > index 80b12c4286..01a432b899 100644 > > --- a/sysdeps/x86_64/wcschr.S > > +++ b/sysdeps/x86_64/wcschr.S > > @@ -1,4 +1,4 @@ > > -/* wcschr with SSSE3 > > +/* wcschr dispatch for RTLD and non-multiarch build > > Copyright (C) 2011-2022 Free Software Foundation, Inc. > > This file is part of the GNU C Library. > > > > @@ -16,9 +16,14 @@ > > License along with the GNU C Library; if not, see > > <https://www.gnu.org/licenses/>. */ > > > > - > > #define WCSCHR __wcschr > > -#include "multiarch/wcschr-sse2.S" > > -libc_hidden_def(__wcschr) > > + > > +#define DEFAULT_IMPL_V1 "multiarch/wcschr-sse2.S" > > +#define DEFAULT_IMPL_V3 "multiarch/wcschr-avx2.S" > > +#define DEFAULT_IMPL_V4 "multiarch/wcschr-evex.S" > > + > > +#include "isa-default-impl.h" > > + > > +libc_hidden_def (__wcschr) > > weak_alias (__wcschr, wcschr) > > libc_hidden_weak (wcschr) > > diff --git a/sysdeps/x86_64/wcscmp.S b/sysdeps/x86_64/wcscmp.S > > index e04cdbf5fe..5d30545fb6 100644 > > --- a/sysdeps/x86_64/wcscmp.S > > +++ b/sysdeps/x86_64/wcscmp.S > > @@ -1,4 +1,4 @@ > > -/* Optimized wcscmp for x86-64 with SSE2. > > +/* strlen dispatch for RTLD and non-multiarch build > > Copyright (C) 2011-2022 Free Software Foundation, Inc. > > This file is part of the GNU C Library. > > > > @@ -18,6 +18,11 @@ > > > > /* Symbol = __wcscmp. */ > > > > -#include "multiarch/wcscmp-sse2.S" > > +#define DEFAULT_IMPL_V1 "multiarch/wcscmp-sse2.S" > > +#define DEFAULT_IMPL_V3 "multiarch/wcscmp-avx2.S" > > +#define DEFAULT_IMPL_V4 "multiarch/wcscmp-evex.S" > > + > > +#include "isa-default-impl.h" > > + > > libc_hidden_def (__wcscmp) > > weak_alias (__wcscmp, wcscmp) > > diff --git a/sysdeps/x86_64/wcslen.S b/sysdeps/x86_64/wcslen.S > > index 588a0fbe01..e4e25b5353 100644 > > --- a/sysdeps/x86_64/wcslen.S > > +++ b/sysdeps/x86_64/wcslen.S > > @@ -1,4 +1,4 @@ > > -/* Optimized wcslen for x86-64 with SSE2. > > +/* wcslen dispatch for RTLD and non-multiarch build > > Copyright (C) 2011-2022 Free Software Foundation, Inc. > > This file is part of the GNU C Library. > > > > @@ -17,5 +17,18 @@ > > <https://www.gnu.org/licenses/>. */ > > > > #define WCSLEN __wcslen > > -#include "multiarch/wcslen-sse2.S" > > -weak_alias(__wcslen, wcslen) > > + > > +#define DEFAULT_IMPL_V1 "multiarch/wcslen-sse2.S" > > +#define DEFAULT_IMPL_V2 "multiarch/wcslen-sse4_1.S" > > +#define DEFAULT_IMPL_V3 "multiarch/wcslen-avx2.S" > > +#define DEFAULT_IMPL_V4 "multiarch/wcslen-evex.S" > > + > > +#include "isa-default-impl.h" > > + > > +weak_alias (__wcslen, wcslen) > > + > > +#if MINIMUM_X86_ISA_LEVEL == 2 && !IS_IN (rtld) > > +/* Hidden def so it can be used as overflow fallback in > > + wcsnlen-sse4_1.S. */ > > +libc_hidden_def (__wcslen) > > +#endif > > diff --git a/sysdeps/x86_64/wcsncmp-generic.c b/sysdeps/x86_64/wcsncmp-generic.c > > new file mode 100644 > > index 0000000000..493a6f9b9b > > --- /dev/null > > +++ b/sysdeps/x86_64/wcsncmp-generic.c > > @@ -0,0 +1,29 @@ > > +/* wcsncmp dispatch for RTLD and non-multiarch .c ISA level 1 build. > > + Copyright (C) 2022 Free Software Foundation, Inc. > > + This file is part of the GNU C Library. > > + > > + The GNU C Library is free software; you can redistribute it and/or > > + modify it under the terms of the GNU Lesser General Public > > + License as published by the Free Software Foundation; either > > + version 2.1 of the License, or (at your option) any later version. > > + > > + The GNU C Library is distributed in the hope that it will be useful, > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + Lesser General Public License for more details. > > + > > + You should have received a copy of the GNU Lesser General Public > > + License along with the GNU C Library; if not, see > > + <https://www.gnu.org/licenses/>. */ > > + > > +/* wcsncmp non-multiarch build is split into two files, > > + wcsncmp-generic.c and wcsncmp.S. The wcsncmp-generic.c build is for > > + ISA level <= 1 and just uses wcsmbs/wcsncmp.c. This must be split > > + into two files because we cannot include C code from assembly or > > + vice versa. */ > > + > > +#include <isa-level.h> > > + > > +#if MINIMUM_X86_ISA_LEVEL <= 2 > > +# include "wcsmbs/wcsncmp.c" > > +#endif > > diff --git a/sysdeps/x86_64/wcsncmp.S b/sysdeps/x86_64/wcsncmp.S > > new file mode 100644 > > index 0000000000..14f9a1341e > > --- /dev/null > > +++ b/sysdeps/x86_64/wcsncmp.S > > @@ -0,0 +1,40 @@ > > +/* wcsncmp dispatch for RTLD and non-multiarch .c files > > + Copyright (C) 2022 Free Software Foundation, Inc. > > + This file is part of the GNU C Library. > > + > > + The GNU C Library is free software; you can redistribute it and/or > > + modify it under the terms of the GNU Lesser General Public > > + License as published by the Free Software Foundation; either > > + version 2.1 of the License, or (at your option) any later version. > > + > > + The GNU C Library is distributed in the hope that it will be useful, > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + Lesser General Public License for more details. > > + > > + You should have received a copy of the GNU Lesser General Public > > + License along with the GNU C Library; if not, see > > + <https://www.gnu.org/licenses/>. */ > > + > > +/* wcsncmp non-multiarch build is split into two files, > > + wcsncmp-generic.c and wcsncmp.S. The wcsncmp.S build is for > > + ISA level >= 3 uses the optimized assembly implementations in > > + multiarch/wcsncmp*.S. This must be split into two files because > > + we cannot include C code from assembly or vice versa. */ > > + > > +#include <isa-level.h> > > + > > +#if MINIMUM_X86_ISA_LEVEL >= 3 > > + > > +/* Symbol = wcsncmp. */ > > + > > +# define DEFAULT_IMPL_V3 "multiarch/wcsncmp-avx2.S" > > +# define DEFAULT_IMPL_V4 "multiarch/wcsncmp-evex.S" > > + > > +/* isa-default-impl.h expects DEFAULT_IMPL_V1 to be defined but it > > + should never be used from here. */ > > +# define DEFAULT_IMPL_V1 "ERROR -- Invalid ISA IMPL" > > + > > +# include "isa-default-impl.h" > > + > > +#endif > > diff --git a/sysdeps/x86_64/wcsnlen-generic.c b/sysdeps/x86_64/wcsnlen-generic.c > > new file mode 100644 > > index 0000000000..ec66511589 > > --- /dev/null > > +++ b/sysdeps/x86_64/wcsnlen-generic.c > > @@ -0,0 +1,29 @@ > > +/* wcsnlen dispatch for RTLD and non-multiarch .c ISA level 1 build. > > + Copyright (C) 2022 Free Software Foundation, Inc. > > + This file is part of the GNU C Library. > > + > > + The GNU C Library is free software; you can redistribute it and/or > > + modify it under the terms of the GNU Lesser General Public > > + License as published by the Free Software Foundation; either > > + version 2.1 of the License, or (at your option) any later version. > > + > > + The GNU C Library is distributed in the hope that it will be useful, > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + Lesser General Public License for more details. > > + > > + You should have received a copy of the GNU Lesser General Public > > + License along with the GNU C Library; if not, see > > + <https://www.gnu.org/licenses/>. */ > > + > > +/* wcsnlen non-multiarch build is split into two files, > > + wcsnlen-generic.c and wcsnlen.S. The wcsnlen-generic.c build is for > > + ISA level <= 1 and just uses wcsmbs/wcsnlen.c. This must be split > > + into two files because we cannot include C code from assembly or > > + vice versa. */ > > + > > +#include <isa-level.h> > > + > > +#if MINIMUM_X86_ISA_LEVEL <= 1 > > +# include "wcsmbs/wcsnlen.c" > > +#endif > > diff --git a/sysdeps/x86_64/wcsnlen.S b/sysdeps/x86_64/wcsnlen.S > > new file mode 100644 > > index 0000000000..b30b3f0785 > > --- /dev/null > > +++ b/sysdeps/x86_64/wcsnlen.S > > @@ -0,0 +1,49 @@ > > +/* wcsnlen dispatch for RTLD and non-multiarch .c files > > + Copyright (C) 2022 Free Software Foundation, Inc. > > + This file is part of the GNU C Library. > > + > > + The GNU C Library is free software; you can redistribute it and/or > > + modify it under the terms of the GNU Lesser General Public > > + License as published by the Free Software Foundation; either > > + version 2.1 of the License, or (at your option) any later version. > > + > > + The GNU C Library is distributed in the hope that it will be useful, > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + Lesser General Public License for more details. > > + > > + You should have received a copy of the GNU Lesser General Public > > + License along with the GNU C Library; if not, see > > + <https://www.gnu.org/licenses/>. */ > > + > > +/* wcsnlen non-multiarch build is split into two files, > > + wcsnlen-generic.c and wcsnlen.S. The wcsnlen.S build is for > > + ISA level >= 2 uses the optimized assembly implementations in > > + multiarch/wcsnlen*.S. This must be split into two files because > > + we cannot include C code from assembly or vice versa. */ > > + > > +#include <isa-level.h> > > + > > +#if MINIMUM_X86_ISA_LEVEL >= 2 > > + > > +# define WCSNLEN __wcsnlen > > +/* This symbol must stay linked to the name in wcslen.S. */ > > +#if IS_IN (rtld) > > +# define OVERFLOW_STRLEN __wcslen > > +#else > > +# define OVERFLOW_STRLEN HIDDEN_JUMPTARGET (__wcslen) > > +#endif > > + > > +# define DEFAULT_IMPL_V2 "multiarch/wcsnlen-sse4_1.S" > > +# define DEFAULT_IMPL_V3 "multiarch/wcsnlen-avx2.S" > > +# define DEFAULT_IMPL_V4 "multiarch/wcsnlen-evex.S" > > + > > +/* isa-default-impl.h expects DEFAULT_IMPL_V1 to be defined but it > > + should never be used from here. */ > > +# define DEFAULT_IMPL_V1 "ERROR -- Invalid ISA IMPL" > > + > > +# include "isa-default-impl.h" > > + > > +weak_alias (__wcsnlen, wcsnlen) > > +libc_hidden_def (__wcsnlen) > > +#endif > > diff --git a/sysdeps/x86_64/wcsrchr.S b/sysdeps/x86_64/wcsrchr.S > > index 1d4b1eb21c..abf828b458 100644 > > --- a/sysdeps/x86_64/wcsrchr.S > > +++ b/sysdeps/x86_64/wcsrchr.S > > @@ -1,4 +1,4 @@ > > -/* wcsrchr optimized with SSE2. > > +/* wcsrchr dispatch for RTLD and non-multiarch build > > Copyright (C) 2011-2022 Free Software Foundation, Inc. > > This file is part of the GNU C Library. > > > > @@ -16,5 +16,10 @@ > > License along with the GNU C Library; if not, see > > <https://www.gnu.org/licenses/>. */ > > > > -#define STRRCHR wcsrchr > > -#include "multiarch/wcsrchr-sse2.S" > > +#define WCSRCHR wcsrchr > > + > > +#define DEFAULT_IMPL_V1 "multiarch/wcsrchr-sse2.S" > > +#define DEFAULT_IMPL_V3 "multiarch/wcsrchr-avx2.S" > > +#define DEFAULT_IMPL_V4 "multiarch/wcsrchr-evex.S" > > + > > +#include "isa-default-impl.h" > > -- > > 2.34.1 > > > > LGTM. > > Thanks. > > -- > H.J.
diff --git a/sysdeps/x86/isa-level.h b/sysdeps/x86/isa-level.h index 77f9e2c0c3..3c4480aba7 100644 --- a/sysdeps/x86/isa-level.h +++ b/sysdeps/x86/isa-level.h @@ -84,6 +84,7 @@ /* ISA level >= 2 guaranteed includes. */ #define SSE4_2_X86_ISA_LEVEL 2 +#define SSE4_1_X86_ISA_LEVEL 2 #define SSSE3_X86_ISA_LEVEL 2 @@ -101,9 +102,18 @@ when ISA level < 3. */ #define Prefer_No_VZEROUPPER_X86_ISA_LEVEL 3 +/* NB: This feature is disable when ISA level >= 3. All CPUs with + this feature don't run on glibc built with ISA level >= 3. */ +#define Slow_SSE42_X86_ISA_LEVEL 3 + /* Feature(s) enabled when ISA level >= 2. */ #define Fast_Unaligned_Load_X86_ISA_LEVEL 2 +/* NB: This feature is disable when ISA level >= 2, which was enabled + for the early Atom CPUs. */ +#define Slow_BSF_X86_ISA_LEVEL 2 + + /* Both X86_ISA_CPU_FEATURE_USABLE_P and X86_ISA_CPU_FEATURES_ARCH_P macros are wrappers for the respective CPU_FEATURE{S}_{USABLE|ARCH}_P runtime checks. They differ in two ways. diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile index e597a4855f..341ee69a35 100644 --- a/sysdeps/x86_64/Makefile +++ b/sysdeps/x86_64/Makefile @@ -197,6 +197,12 @@ gen-as-const-headers += tlsdesc.sym rtld-offsets.sym endif ifeq ($(subdir),wcsmbs) + +sysdep_routines += \ + wcsncmp-generic \ + wcsnlen-generic \ +# sysdep_routines + tests += \ tst-rsi-wcslen endif diff --git a/sysdeps/x86_64/memrchr.S b/sysdeps/x86_64/memrchr.S index 385e2c5668..d39b1aa0e2 100644 --- a/sysdeps/x86_64/memrchr.S +++ b/sysdeps/x86_64/memrchr.S @@ -1,4 +1,4 @@ -/* fast SSE2 memrchr with 64 byte loop and pmaxub instruction using +/* memrchr dispatch for RTLD and non-multiarch build Copyright (C) 2011-2022 Free Software Foundation, Inc. This file is part of the GNU C Library. @@ -18,5 +18,11 @@ <https://www.gnu.org/licenses/>. */ #define MEMRCHR __memrchr -#include "multiarch/memrchr-sse2.S" + +#define DEFAULT_IMPL_V1 "multiarch/memrchr-sse2.S" +#define DEFAULT_IMPL_V3 "multiarch/memrchr-avx2.S" +#define DEFAULT_IMPL_V4 "multiarch/memrchr-evex.S" + +#include "isa-default-impl.h" + weak_alias (__memrchr, memrchr) diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile index d6b62af850..ba29a65716 100644 --- a/sysdeps/x86_64/multiarch/Makefile +++ b/sysdeps/x86_64/multiarch/Makefile @@ -144,11 +144,9 @@ sysdep_routines += \ wcslen-sse4_1 \ wcsncmp-avx2 \ wcsncmp-avx2-rtm \ - wcsncmp-generic \ wcsncmp-evex \ wcsnlen-avx2 \ wcsnlen-avx2-rtm \ - wcsnlen-generic \ wcsnlen-evex \ wcsnlen-evex512 \ wcsnlen-sse4_1 \ diff --git a/sysdeps/x86_64/multiarch/ifunc-avx2.h b/sysdeps/x86_64/multiarch/ifunc-avx2.h index 1d9cdfcfec..a57a9952f3 100644 --- a/sysdeps/x86_64/multiarch/ifunc-avx2.h +++ b/sysdeps/x86_64/multiarch/ifunc-avx2.h @@ -23,28 +23,32 @@ # define GENERIC sse2 #endif -extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden; -extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; + +extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden; static inline void * IFUNC_SELECTOR (void) { - const struct cpu_features* cpu_features = __get_cpu_features (); + const struct cpu_features *cpu_features = __get_cpu_features (); - if (CPU_FEATURE_USABLE_P (cpu_features, AVX2) - && CPU_FEATURE_USABLE_P (cpu_features, BMI2) - && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) + if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2) + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2) + && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, + AVX_Fast_Unaligned_Load, )) { - if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) - && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)) + if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)) return OPTIMIZE (evex); if (CPU_FEATURE_USABLE_P (cpu_features, RTM)) return OPTIMIZE (avx2_rtm); - if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) + if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, + Prefer_No_VZEROUPPER, !)) return OPTIMIZE (avx2); } diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c index 2c96cb62d2..3b1df9b73c 100644 --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c @@ -205,19 +205,22 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/x86_64/multiarch/memrchr.c. */ IFUNC_IMPL (i, name, memrchr, - IFUNC_IMPL_ADD (array, i, memrchr, - CPU_FEATURE_USABLE (AVX2), - __memrchr_avx2) - IFUNC_IMPL_ADD (array, i, memrchr, - (CPU_FEATURE_USABLE (AVX2) - && CPU_FEATURE_USABLE (RTM)), - __memrchr_avx2_rtm) - IFUNC_IMPL_ADD (array, i, memrchr, - (CPU_FEATURE_USABLE (AVX512VL) - && CPU_FEATURE_USABLE (AVX512BW)), - __memrchr_evex) - - IFUNC_IMPL_ADD (array, i, memrchr, 1, __memrchr_sse2)) + X86_IFUNC_IMPL_ADD_V4 (array, i, memrchr, + (CPU_FEATURE_USABLE (AVX512VL) + && CPU_FEATURE_USABLE (AVX512BW)), + __memrchr_evex) + X86_IFUNC_IMPL_ADD_V3 (array, i, memrchr, + CPU_FEATURE_USABLE (AVX2), + __memrchr_avx2) + X86_IFUNC_IMPL_ADD_V3 (array, i, memrchr, + (CPU_FEATURE_USABLE (AVX2) + && CPU_FEATURE_USABLE (RTM)), + __memrchr_avx2_rtm) + /* ISA V2 wrapper for SSE2 implementation because the SSE2 + implementation is also used at ISA level 2. */ + X86_IFUNC_IMPL_ADD_V2 (array, i, memrchr, + 1, + __memrchr_sse2)) #ifdef SHARED /* Support sysdeps/x86_64/multiarch/memset_chk.c. */ @@ -346,49 +349,57 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/x86_64/multiarch/strlen.c. */ IFUNC_IMPL (i, name, strlen, - IFUNC_IMPL_ADD (array, i, strlen, - (CPU_FEATURE_USABLE (AVX2) - && CPU_FEATURE_USABLE (BMI2)), - __strlen_avx2) - IFUNC_IMPL_ADD (array, i, strlen, - (CPU_FEATURE_USABLE (AVX2) - && CPU_FEATURE_USABLE (BMI2) - && CPU_FEATURE_USABLE (RTM)), - __strlen_avx2_rtm) - IFUNC_IMPL_ADD (array, i, strlen, - (CPU_FEATURE_USABLE (AVX512VL) - && CPU_FEATURE_USABLE (AVX512BW) - && CPU_FEATURE_USABLE (BMI2)), - __strlen_evex) - IFUNC_IMPL_ADD (array, i, strlen, - (CPU_FEATURE_USABLE (AVX512VL) - && CPU_FEATURE_USABLE (AVX512BW) - && CPU_FEATURE_USABLE (BMI2)), - __strlen_evex512) - IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_sse2)) + X86_IFUNC_IMPL_ADD_V4 (array, i, strlen, + (CPU_FEATURE_USABLE (AVX512VL) + && CPU_FEATURE_USABLE (AVX512BW) + && CPU_FEATURE_USABLE (BMI2)), + __strlen_evex) + X86_IFUNC_IMPL_ADD_V4 (array, i, strlen, + (CPU_FEATURE_USABLE (AVX512VL) + && CPU_FEATURE_USABLE (AVX512BW) + && CPU_FEATURE_USABLE (BMI2)), + __strlen_evex512) + X86_IFUNC_IMPL_ADD_V3 (array, i, strlen, + (CPU_FEATURE_USABLE (AVX2) + && CPU_FEATURE_USABLE (BMI2)), + __strlen_avx2) + X86_IFUNC_IMPL_ADD_V3 (array, i, strlen, + (CPU_FEATURE_USABLE (AVX2) + && CPU_FEATURE_USABLE (BMI2) + && CPU_FEATURE_USABLE (RTM)), + __strlen_avx2_rtm) + /* ISA V2 wrapper for SSE2 implementation because the SSE2 + implementation is also used at ISA level 2. */ + X86_IFUNC_IMPL_ADD_V2 (array, i, strlen, + 1, + __strlen_sse2)) /* Support sysdeps/x86_64/multiarch/strnlen.c. */ IFUNC_IMPL (i, name, strnlen, - IFUNC_IMPL_ADD (array, i, strnlen, - (CPU_FEATURE_USABLE (AVX2) - && CPU_FEATURE_USABLE (BMI2)), - __strnlen_avx2) - IFUNC_IMPL_ADD (array, i, strnlen, - (CPU_FEATURE_USABLE (AVX2) - && CPU_FEATURE_USABLE (BMI2) - && CPU_FEATURE_USABLE (RTM)), - __strnlen_avx2_rtm) - IFUNC_IMPL_ADD (array, i, strnlen, - (CPU_FEATURE_USABLE (AVX512VL) - && CPU_FEATURE_USABLE (AVX512BW) - && CPU_FEATURE_USABLE (BMI2)), - __strnlen_evex) - IFUNC_IMPL_ADD (array, i, strnlen, - (CPU_FEATURE_USABLE (AVX512VL) - && CPU_FEATURE_USABLE (AVX512BW) - && CPU_FEATURE_USABLE (BMI2)), - __strnlen_evex512) - IFUNC_IMPL_ADD (array, i, strnlen, 1, __strnlen_sse2)) + X86_IFUNC_IMPL_ADD_V4 (array, i, strnlen, + (CPU_FEATURE_USABLE (AVX512VL) + && CPU_FEATURE_USABLE (AVX512BW) + && CPU_FEATURE_USABLE (BMI2)), + __strnlen_evex) + X86_IFUNC_IMPL_ADD_V4 (array, i, strnlen, + (CPU_FEATURE_USABLE (AVX512VL) + && CPU_FEATURE_USABLE (AVX512BW) + && CPU_FEATURE_USABLE (BMI2)), + __strnlen_evex512) + X86_IFUNC_IMPL_ADD_V3 (array, i, strnlen, + (CPU_FEATURE_USABLE (AVX2) + && CPU_FEATURE_USABLE (BMI2)), + __strnlen_avx2) + X86_IFUNC_IMPL_ADD_V3 (array, i, strnlen, + (CPU_FEATURE_USABLE (AVX2) + && CPU_FEATURE_USABLE (BMI2) + && CPU_FEATURE_USABLE (RTM)), + __strnlen_avx2_rtm) + /* ISA V2 wrapper for SSE2 implementation because the SSE2 + implementation is also used at ISA level 2. */ + X86_IFUNC_IMPL_ADD_V2 (array, i, strnlen, + 1, + __strnlen_sse2)) /* Support sysdeps/x86_64/multiarch/stpncpy.c. */ IFUNC_IMPL (i, name, stpncpy, @@ -422,40 +433,47 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/x86_64/multiarch/strcasecmp_l.c. */ IFUNC_IMPL (i, name, strcasecmp, - IFUNC_IMPL_ADD (array, i, strcasecmp, - (CPU_FEATURE_USABLE (AVX512VL) - && CPU_FEATURE_USABLE (AVX512BW)), - __strcasecmp_evex) - IFUNC_IMPL_ADD (array, i, strcasecmp, - CPU_FEATURE_USABLE (AVX2), - __strcasecmp_avx2) - IFUNC_IMPL_ADD (array, i, strcasecmp, - (CPU_FEATURE_USABLE (AVX2) - && CPU_FEATURE_USABLE (RTM)), - __strcasecmp_avx2_rtm) - IFUNC_IMPL_ADD (array, i, strcasecmp, - CPU_FEATURE_USABLE (SSE4_2), - __strcasecmp_sse42) - IFUNC_IMPL_ADD (array, i, strcasecmp, 1, __strcasecmp_sse2)) + X86_IFUNC_IMPL_ADD_V4 (array, i, strcasecmp, + (CPU_FEATURE_USABLE (AVX512VL) + && CPU_FEATURE_USABLE (AVX512BW)), + __strcasecmp_evex) + X86_IFUNC_IMPL_ADD_V3 (array, i, strcasecmp, + CPU_FEATURE_USABLE (AVX2), + __strcasecmp_avx2) + X86_IFUNC_IMPL_ADD_V3 (array, i, strcasecmp, + (CPU_FEATURE_USABLE (AVX2) + && CPU_FEATURE_USABLE (RTM)), + __strcasecmp_avx2_rtm) + X86_IFUNC_IMPL_ADD_V2 (array, i, strcasecmp, + CPU_FEATURE_USABLE (SSE4_2), + __strcasecmp_sse42) + /* ISA V2 wrapper for SSE2 implementation because the SSE2 + implementation is also used at ISA level 2. */ + X86_IFUNC_IMPL_ADD_V2 (array, i, strcasecmp, + 1, + __strcasecmp_sse2)) /* Support sysdeps/x86_64/multiarch/strcasecmp_l.c. */ IFUNC_IMPL (i, name, strcasecmp_l, - IFUNC_IMPL_ADD (array, i, strcasecmp, - (CPU_FEATURE_USABLE (AVX512VL) - && CPU_FEATURE_USABLE (AVX512BW)), - __strcasecmp_l_evex) - IFUNC_IMPL_ADD (array, i, strcasecmp, - CPU_FEATURE_USABLE (AVX2), - __strcasecmp_l_avx2) - IFUNC_IMPL_ADD (array, i, strcasecmp, - (CPU_FEATURE_USABLE (AVX2) - && CPU_FEATURE_USABLE (RTM)), - __strcasecmp_l_avx2_rtm) - IFUNC_IMPL_ADD (array, i, strcasecmp_l, - CPU_FEATURE_USABLE (SSE4_2), - __strcasecmp_l_sse42) - IFUNC_IMPL_ADD (array, i, strcasecmp_l, 1, - __strcasecmp_l_sse2)) + X86_IFUNC_IMPL_ADD_V4 (array, i, strcasecmp, + (CPU_FEATURE_USABLE (AVX512VL) + && CPU_FEATURE_USABLE (AVX512BW)), + __strcasecmp_l_evex) + X86_IFUNC_IMPL_ADD_V3 (array, i, strcasecmp, + CPU_FEATURE_USABLE (AVX2), + __strcasecmp_l_avx2) + X86_IFUNC_IMPL_ADD_V3 (array, i, strcasecmp, + (CPU_FEATURE_USABLE (AVX2) + && CPU_FEATURE_USABLE (RTM)), + __strcasecmp_l_avx2_rtm) + X86_IFUNC_IMPL_ADD_V2 (array, i, strcasecmp_l, + CPU_FEATURE_USABLE (SSE4_2), + __strcasecmp_l_sse42) + /* ISA V2 wrapper for SSE2 implementation because the SSE2 + implementation is also used at ISA level 2. */ + X86_IFUNC_IMPL_ADD_V2 (array, i, strcasecmp_l, + 1, + __strcasecmp_l_sse2)) /* Support sysdeps/x86_64/multiarch/strcat.c. */ IFUNC_IMPL (i, name, strcat, @@ -474,74 +492,95 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/x86_64/multiarch/strchr.c. */ IFUNC_IMPL (i, name, strchr, - IFUNC_IMPL_ADD (array, i, strchr, - (CPU_FEATURE_USABLE (AVX2) - && CPU_FEATURE_USABLE (BMI2)), - __strchr_avx2) - IFUNC_IMPL_ADD (array, i, strchr, - (CPU_FEATURE_USABLE (AVX2) - && CPU_FEATURE_USABLE (BMI2) - && CPU_FEATURE_USABLE (RTM)), - __strchr_avx2_rtm) - IFUNC_IMPL_ADD (array, i, strchr, - (CPU_FEATURE_USABLE (AVX512VL) - && CPU_FEATURE_USABLE (AVX512BW) - && CPU_FEATURE_USABLE (BMI2)), - __strchr_evex) - IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_sse2_no_bsf) - IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_sse2)) + X86_IFUNC_IMPL_ADD_V4 (array, i, strchr, + (CPU_FEATURE_USABLE (AVX512VL) + && CPU_FEATURE_USABLE (AVX512BW) + && CPU_FEATURE_USABLE (BMI2)), + __strchr_evex) + X86_IFUNC_IMPL_ADD_V3 (array, i, strchr, + (CPU_FEATURE_USABLE (AVX2) + && CPU_FEATURE_USABLE (BMI2)), + __strchr_avx2) + X86_IFUNC_IMPL_ADD_V3 (array, i, strchr, + (CPU_FEATURE_USABLE (AVX2) + && CPU_FEATURE_USABLE (BMI2) + && CPU_FEATURE_USABLE (RTM)), + __strchr_avx2_rtm) + /* ISA V2 wrapper for SSE2 implementation because the SSE2 + implementation is also used at ISA level 2. */ + X86_IFUNC_IMPL_ADD_V2 (array, i, strchr, + 1, + __strchr_sse2) + X86_IFUNC_IMPL_ADD_V1 (array, i, strchr, + 1, + __strchr_sse2_no_bsf)) /* Support sysdeps/x86_64/multiarch/strchrnul.c. */ IFUNC_IMPL (i, name, strchrnul, - IFUNC_IMPL_ADD (array, i, strchrnul, - (CPU_FEATURE_USABLE (AVX2) - && CPU_FEATURE_USABLE (BMI2)), - __strchrnul_avx2) - IFUNC_IMPL_ADD (array, i, strchrnul, - (CPU_FEATURE_USABLE (AVX2) - && CPU_FEATURE_USABLE (BMI2) - && CPU_FEATURE_USABLE (RTM)), - __strchrnul_avx2_rtm) - IFUNC_IMPL_ADD (array, i, strchrnul, - (CPU_FEATURE_USABLE (AVX512VL) - && CPU_FEATURE_USABLE (AVX512BW) - && CPU_FEATURE_USABLE (BMI2)), - __strchrnul_evex) - IFUNC_IMPL_ADD (array, i, strchrnul, 1, __strchrnul_sse2)) + X86_IFUNC_IMPL_ADD_V4 (array, i, strchrnul, + (CPU_FEATURE_USABLE (AVX512VL) + && CPU_FEATURE_USABLE (AVX512BW) + && CPU_FEATURE_USABLE (BMI2)), + __strchrnul_evex) + X86_IFUNC_IMPL_ADD_V3 (array, i, strchrnul, + (CPU_FEATURE_USABLE (AVX2) + && CPU_FEATURE_USABLE (BMI2)), + __strchrnul_avx2) + X86_IFUNC_IMPL_ADD_V3 (array, i, strchrnul, + (CPU_FEATURE_USABLE (AVX2) + && CPU_FEATURE_USABLE (BMI2) + && CPU_FEATURE_USABLE (RTM)), + __strchrnul_avx2_rtm) + /* ISA V2 wrapper for SSE2 implementation because the SSE2 + implementation is also used at ISA level 2. */ + X86_IFUNC_IMPL_ADD_V2 (array, i, strchrnul, + 1, + __strchrnul_sse2)) /* Support sysdeps/x86_64/multiarch/strrchr.c. */ IFUNC_IMPL (i, name, strrchr, - IFUNC_IMPL_ADD (array, i, strrchr, - CPU_FEATURE_USABLE (AVX2), - __strrchr_avx2) - IFUNC_IMPL_ADD (array, i, strrchr, - (CPU_FEATURE_USABLE (AVX2) - && CPU_FEATURE_USABLE (RTM)), - __strrchr_avx2_rtm) - IFUNC_IMPL_ADD (array, i, strrchr, - (CPU_FEATURE_USABLE (AVX512VL) - && CPU_FEATURE_USABLE (AVX512BW)), - __strrchr_evex) - IFUNC_IMPL_ADD (array, i, strrchr, 1, __strrchr_sse2)) + X86_IFUNC_IMPL_ADD_V4 (array, i, strrchr, + (CPU_FEATURE_USABLE (AVX512VL) + && CPU_FEATURE_USABLE (AVX512BW)), + __strrchr_evex) + X86_IFUNC_IMPL_ADD_V3 (array, i, strrchr, + CPU_FEATURE_USABLE (AVX2), + __strrchr_avx2) + X86_IFUNC_IMPL_ADD_V3 (array, i, strrchr, + (CPU_FEATURE_USABLE (AVX2) + && CPU_FEATURE_USABLE (RTM)), + __strrchr_avx2_rtm) + /* ISA V2 wrapper for SSE2 implementation because the SSE2 + implementation is also used at ISA level 2. */ + X86_IFUNC_IMPL_ADD_V2 (array, i, strrchr, + 1, + __strrchr_sse2)) /* Support sysdeps/x86_64/multiarch/strcmp.c. */ IFUNC_IMPL (i, name, strcmp, - IFUNC_IMPL_ADD (array, i, strcmp, - CPU_FEATURE_USABLE (AVX2), - __strcmp_avx2) - IFUNC_IMPL_ADD (array, i, strcmp, - (CPU_FEATURE_USABLE (AVX2) - && CPU_FEATURE_USABLE (RTM)), - __strcmp_avx2_rtm) - IFUNC_IMPL_ADD (array, i, strcmp, - (CPU_FEATURE_USABLE (AVX512VL) - && CPU_FEATURE_USABLE (AVX512BW) - && CPU_FEATURE_USABLE (BMI2)), - __strcmp_evex) - IFUNC_IMPL_ADD (array, i, strcmp, CPU_FEATURE_USABLE (SSE4_2), - __strcmp_sse42) - IFUNC_IMPL_ADD (array, i, strcmp, 1, __strcmp_sse2_unaligned) - IFUNC_IMPL_ADD (array, i, strcmp, 1, __strcmp_sse2)) + X86_IFUNC_IMPL_ADD_V4 (array, i, strcmp, + (CPU_FEATURE_USABLE (AVX512VL) + && CPU_FEATURE_USABLE (AVX512BW) + && CPU_FEATURE_USABLE (BMI2)), + __strcmp_evex) + X86_IFUNC_IMPL_ADD_V3 (array, i, strcmp, + CPU_FEATURE_USABLE (AVX2), + __strcmp_avx2) + X86_IFUNC_IMPL_ADD_V3 (array, i, strcmp, + (CPU_FEATURE_USABLE (AVX2) + && CPU_FEATURE_USABLE (RTM)), + __strcmp_avx2_rtm) + X86_IFUNC_IMPL_ADD_V2 (array, i, strcmp, + CPU_FEATURE_USABLE (SSE4_2), + __strcmp_sse42) + /* ISA V2 wrapper for SSE2 implementations because the SSE2 + implementations are also used at ISA level 2. */ + X86_IFUNC_IMPL_ADD_V2 (array, i, strcmp, + 1, + __strcmp_sse2_unaligned) + X86_IFUNC_IMPL_ADD_V2 (array, i, strcmp, + 1, + __strcmp_sse2)) /* Support sysdeps/x86_64/multiarch/strcpy.c. */ IFUNC_IMPL (i, name, strcpy, @@ -568,41 +607,47 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/x86_64/multiarch/strncase_l.c. */ IFUNC_IMPL (i, name, strncasecmp, - IFUNC_IMPL_ADD (array, i, strncasecmp, - (CPU_FEATURE_USABLE (AVX512VL) - && CPU_FEATURE_USABLE (AVX512BW)), - __strncasecmp_evex) - IFUNC_IMPL_ADD (array, i, strncasecmp, - CPU_FEATURE_USABLE (AVX2), - __strncasecmp_avx2) - IFUNC_IMPL_ADD (array, i, strncasecmp, - (CPU_FEATURE_USABLE (AVX2) - && CPU_FEATURE_USABLE (RTM)), - __strncasecmp_avx2_rtm) - IFUNC_IMPL_ADD (array, i, strncasecmp, - CPU_FEATURE_USABLE (SSE4_2), - __strncasecmp_sse42) - IFUNC_IMPL_ADD (array, i, strncasecmp, 1, - __strncasecmp_sse2)) + X86_IFUNC_IMPL_ADD_V4 (array, i, strncasecmp, + (CPU_FEATURE_USABLE (AVX512VL) + && CPU_FEATURE_USABLE (AVX512BW)), + __strncasecmp_evex) + X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp, + CPU_FEATURE_USABLE (AVX2), + __strncasecmp_avx2) + X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp, + (CPU_FEATURE_USABLE (AVX2) + && CPU_FEATURE_USABLE (RTM)), + __strncasecmp_avx2_rtm) + X86_IFUNC_IMPL_ADD_V2 (array, i, strncasecmp, + CPU_FEATURE_USABLE (SSE4_2), + __strncasecmp_sse42) + /* ISA V2 wrapper for SSE2 implementation because the SSE2 + implementation is also used at ISA level 2. */ + X86_IFUNC_IMPL_ADD_V2 (array, i, strncasecmp, + 1, + __strncasecmp_sse2)) /* Support sysdeps/x86_64/multiarch/strncase_l.c. */ IFUNC_IMPL (i, name, strncasecmp_l, - IFUNC_IMPL_ADD (array, i, strncasecmp, - (CPU_FEATURE_USABLE (AVX512VL) - && CPU_FEATURE_USABLE (AVX512BW)), - __strncasecmp_l_evex) - IFUNC_IMPL_ADD (array, i, strncasecmp, - CPU_FEATURE_USABLE (AVX2), - __strncasecmp_l_avx2) - IFUNC_IMPL_ADD (array, i, strncasecmp, - (CPU_FEATURE_USABLE (AVX2) - && CPU_FEATURE_USABLE (RTM)), - __strncasecmp_l_avx2_rtm) - IFUNC_IMPL_ADD (array, i, strncasecmp_l, - CPU_FEATURE_USABLE (SSE4_2), - __strncasecmp_l_sse42) - IFUNC_IMPL_ADD (array, i, strncasecmp_l, 1, - __strncasecmp_l_sse2)) + X86_IFUNC_IMPL_ADD_V4 (array, i, strncasecmp, + (CPU_FEATURE_USABLE (AVX512VL) + && CPU_FEATURE_USABLE (AVX512BW)), + __strncasecmp_l_evex) + X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp, + CPU_FEATURE_USABLE (AVX2), + __strncasecmp_l_avx2) + X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp, + (CPU_FEATURE_USABLE (AVX2) + && CPU_FEATURE_USABLE (RTM)), + __strncasecmp_l_avx2_rtm) + X86_IFUNC_IMPL_ADD_V2 (array, i, strncasecmp_l, + CPU_FEATURE_USABLE (SSE4_2), + __strncasecmp_l_sse42) + /* ISA V2 wrapper for SSE2 implementation because the SSE2 + implementation is also used at ISA level 2. */ + X86_IFUNC_IMPL_ADD_V2 (array, i, strncasecmp_l, + 1, + __strncasecmp_l_sse2)) /* Support sysdeps/x86_64/multiarch/strncat.c. */ IFUNC_IMPL (i, name, strncat, @@ -664,69 +709,85 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/x86_64/multiarch/wcschr.c. */ IFUNC_IMPL (i, name, wcschr, - IFUNC_IMPL_ADD (array, i, wcschr, - (CPU_FEATURE_USABLE (AVX2) - && CPU_FEATURE_USABLE (BMI2)), - __wcschr_avx2) - IFUNC_IMPL_ADD (array, i, wcschr, - (CPU_FEATURE_USABLE (AVX2) - && CPU_FEATURE_USABLE (BMI2) - && CPU_FEATURE_USABLE (RTM)), - __wcschr_avx2_rtm) - IFUNC_IMPL_ADD (array, i, wcschr, - (CPU_FEATURE_USABLE (AVX512VL) - && CPU_FEATURE_USABLE (AVX512BW) - && CPU_FEATURE_USABLE (BMI2)), - __wcschr_evex) - IFUNC_IMPL_ADD (array, i, wcschr, 1, __wcschr_sse2)) + X86_IFUNC_IMPL_ADD_V4 (array, i, wcschr, + (CPU_FEATURE_USABLE (AVX512VL) + && CPU_FEATURE_USABLE (AVX512BW) + && CPU_FEATURE_USABLE (BMI2)), + __wcschr_evex) + X86_IFUNC_IMPL_ADD_V3 (array, i, wcschr, + (CPU_FEATURE_USABLE (AVX2) + && CPU_FEATURE_USABLE (BMI2)), + __wcschr_avx2) + X86_IFUNC_IMPL_ADD_V3 (array, i, wcschr, + (CPU_FEATURE_USABLE (AVX2) + && CPU_FEATURE_USABLE (BMI2) + && CPU_FEATURE_USABLE (RTM)), + __wcschr_avx2_rtm) + /* ISA V2 wrapper for SSE2 implementation because the SSE2 + implementation is also used at ISA level 2. */ + X86_IFUNC_IMPL_ADD_V2 (array, i, wcschr, + 1, + __wcschr_sse2)) /* Support sysdeps/x86_64/multiarch/wcsrchr.c. */ IFUNC_IMPL (i, name, wcsrchr, - IFUNC_IMPL_ADD (array, i, wcsrchr, - CPU_FEATURE_USABLE (AVX2), - __wcsrchr_avx2) - IFUNC_IMPL_ADD (array, i, wcsrchr, - (CPU_FEATURE_USABLE (AVX2) - && CPU_FEATURE_USABLE (RTM)), - __wcsrchr_avx2_rtm) - IFUNC_IMPL_ADD (array, i, wcsrchr, - (CPU_FEATURE_USABLE (AVX512VL) - && CPU_FEATURE_USABLE (AVX512BW) - && CPU_FEATURE_USABLE (BMI2)), - __wcsrchr_evex) - IFUNC_IMPL_ADD (array, i, wcsrchr, 1, __wcsrchr_sse2)) + X86_IFUNC_IMPL_ADD_V4 (array, i, wcsrchr, + (CPU_FEATURE_USABLE (AVX512VL) + && CPU_FEATURE_USABLE (AVX512BW) + && CPU_FEATURE_USABLE (BMI2)), + __wcsrchr_evex) + X86_IFUNC_IMPL_ADD_V3 (array, i, wcsrchr, + CPU_FEATURE_USABLE (AVX2), + __wcsrchr_avx2) + X86_IFUNC_IMPL_ADD_V3 (array, i, wcsrchr, + (CPU_FEATURE_USABLE (AVX2) + && CPU_FEATURE_USABLE (RTM)), + __wcsrchr_avx2_rtm) + /* ISA V2 wrapper for SSE2 implementation because the SSE2 + implementation is also used at ISA level 2. */ + X86_IFUNC_IMPL_ADD_V2 (array, i, wcsrchr, + 1, + __wcsrchr_sse2)) /* Support sysdeps/x86_64/multiarch/wcscmp.c. */ IFUNC_IMPL (i, name, wcscmp, - IFUNC_IMPL_ADD (array, i, wcscmp, - CPU_FEATURE_USABLE (AVX2), - __wcscmp_avx2) - IFUNC_IMPL_ADD (array, i, wcscmp, - (CPU_FEATURE_USABLE (AVX2) - && CPU_FEATURE_USABLE (RTM)), - __wcscmp_avx2_rtm) - IFUNC_IMPL_ADD (array, i, wcscmp, - (CPU_FEATURE_USABLE (AVX512VL) - && CPU_FEATURE_USABLE (AVX512BW) - && CPU_FEATURE_USABLE (BMI2)), - __wcscmp_evex) - IFUNC_IMPL_ADD (array, i, wcscmp, 1, __wcscmp_sse2)) + X86_IFUNC_IMPL_ADD_V4 (array, i, wcscmp, + (CPU_FEATURE_USABLE (AVX512VL) + && CPU_FEATURE_USABLE (AVX512BW) + && CPU_FEATURE_USABLE (BMI2)), + __wcscmp_evex) + X86_IFUNC_IMPL_ADD_V3 (array, i, wcscmp, + CPU_FEATURE_USABLE (AVX2), + __wcscmp_avx2) + X86_IFUNC_IMPL_ADD_V3 (array, i, wcscmp, + (CPU_FEATURE_USABLE (AVX2) + && CPU_FEATURE_USABLE (RTM)), + __wcscmp_avx2_rtm) + /* ISA V2 wrapper for SSE2 implementation because the SSE2 + implementation is also used at ISA level 2. */ + X86_IFUNC_IMPL_ADD_V2 (array, i, wcscmp, + 1, + __wcscmp_sse2)) /* Support sysdeps/x86_64/multiarch/wcsncmp.c. */ IFUNC_IMPL (i, name, wcsncmp, - IFUNC_IMPL_ADD (array, i, wcsncmp, - CPU_FEATURE_USABLE (AVX2), - __wcsncmp_avx2) - IFUNC_IMPL_ADD (array, i, wcsncmp, - (CPU_FEATURE_USABLE (AVX2) - && CPU_FEATURE_USABLE (RTM)), - __wcsncmp_avx2_rtm) - IFUNC_IMPL_ADD (array, i, wcsncmp, - (CPU_FEATURE_USABLE (AVX512VL) - && CPU_FEATURE_USABLE (AVX512BW) - && CPU_FEATURE_USABLE (BMI2)), - __wcsncmp_evex) - IFUNC_IMPL_ADD (array, i, wcsncmp, 1, __wcsncmp_generic)) + X86_IFUNC_IMPL_ADD_V4 (array, i, wcsncmp, + (CPU_FEATURE_USABLE (AVX512VL) + && CPU_FEATURE_USABLE (AVX512BW) + && CPU_FEATURE_USABLE (BMI2)), + __wcsncmp_evex) + X86_IFUNC_IMPL_ADD_V3 (array, i, wcsncmp, + CPU_FEATURE_USABLE (AVX2), + __wcsncmp_avx2) + X86_IFUNC_IMPL_ADD_V3 (array, i, wcsncmp, + (CPU_FEATURE_USABLE (AVX2) + && CPU_FEATURE_USABLE (RTM)), + __wcsncmp_avx2_rtm) + /* ISA V2 wrapper for GENERIC implementation because the + GENERIC implementation is also used at ISA level 2. */ + X86_IFUNC_IMPL_ADD_V2 (array, i, wcsncmp, + 1, + __wcsncmp_generic)) /* Support sysdeps/x86_64/multiarch/wcscpy.c. */ IFUNC_IMPL (i, name, wcscpy, @@ -736,55 +797,59 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/x86_64/multiarch/wcslen.c. */ IFUNC_IMPL (i, name, wcslen, - IFUNC_IMPL_ADD (array, i, wcslen, - (CPU_FEATURE_USABLE (AVX2) - && CPU_FEATURE_USABLE (BMI2)), - __wcslen_avx2) - IFUNC_IMPL_ADD (array, i, wcslen, - (CPU_FEATURE_USABLE (AVX2) - && CPU_FEATURE_USABLE (BMI2) - && CPU_FEATURE_USABLE (RTM)), - __wcslen_avx2_rtm) - IFUNC_IMPL_ADD (array, i, wcslen, - (CPU_FEATURE_USABLE (AVX512VL) - && CPU_FEATURE_USABLE (AVX512BW) - && CPU_FEATURE_USABLE (BMI2)), - __wcslen_evex) - IFUNC_IMPL_ADD (array, i, wcslen, - (CPU_FEATURE_USABLE (AVX512VL) - && CPU_FEATURE_USABLE (AVX512BW) - && CPU_FEATURE_USABLE (BMI2)), - __wcslen_evex512) - IFUNC_IMPL_ADD (array, i, wcslen, - CPU_FEATURE_USABLE (SSE4_1), - __wcslen_sse4_1) - IFUNC_IMPL_ADD (array, i, wcslen, 1, __wcslen_sse2)) + X86_IFUNC_IMPL_ADD_V4 (array, i, wcslen, + (CPU_FEATURE_USABLE (AVX512VL) + && CPU_FEATURE_USABLE (AVX512BW) + && CPU_FEATURE_USABLE (BMI2)), + __wcslen_evex) + X86_IFUNC_IMPL_ADD_V4 (array, i, wcslen, + (CPU_FEATURE_USABLE (AVX512VL) + && CPU_FEATURE_USABLE (AVX512BW) + && CPU_FEATURE_USABLE (BMI2)), + __wcslen_evex512) + X86_IFUNC_IMPL_ADD_V3 (array, i, wcslen, + (CPU_FEATURE_USABLE (AVX2) + && CPU_FEATURE_USABLE (BMI2)), + __wcslen_avx2) + X86_IFUNC_IMPL_ADD_V3 (array, i, wcslen, + (CPU_FEATURE_USABLE (AVX2) + && CPU_FEATURE_USABLE (BMI2) + && CPU_FEATURE_USABLE (RTM)), + __wcslen_avx2_rtm) + X86_IFUNC_IMPL_ADD_V2 (array, i, wcslen, + CPU_FEATURE_USABLE (SSE4_1), + __wcslen_sse4_1) + X86_IFUNC_IMPL_ADD_V1 (array, i, wcslen, + 1, + __wcslen_sse2)) /* Support sysdeps/x86_64/multiarch/wcsnlen.c. */ IFUNC_IMPL (i, name, wcsnlen, - IFUNC_IMPL_ADD (array, i, wcsnlen, - (CPU_FEATURE_USABLE (AVX2) - && CPU_FEATURE_USABLE (BMI2)), - __wcsnlen_avx2) - IFUNC_IMPL_ADD (array, i, wcsnlen, - (CPU_FEATURE_USABLE (AVX2) - && CPU_FEATURE_USABLE (BMI2) - && CPU_FEATURE_USABLE (RTM)), - __wcsnlen_avx2_rtm) - IFUNC_IMPL_ADD (array, i, wcsnlen, - (CPU_FEATURE_USABLE (AVX512VL) - && CPU_FEATURE_USABLE (AVX512BW) - && CPU_FEATURE_USABLE (BMI2)), - __wcsnlen_evex) - IFUNC_IMPL_ADD (array, i, wcsnlen, - (CPU_FEATURE_USABLE (AVX512VL) - && CPU_FEATURE_USABLE (AVX512BW) - && CPU_FEATURE_USABLE (BMI2)), - __wcsnlen_evex512) - IFUNC_IMPL_ADD (array, i, wcsnlen, - CPU_FEATURE_USABLE (SSE4_1), - __wcsnlen_sse4_1) - IFUNC_IMPL_ADD (array, i, wcsnlen, 1, __wcsnlen_generic)) + X86_IFUNC_IMPL_ADD_V4 (array, i, wcsnlen, + (CPU_FEATURE_USABLE (AVX512VL) + && CPU_FEATURE_USABLE (AVX512BW) + && CPU_FEATURE_USABLE (BMI2)), + __wcsnlen_evex) + X86_IFUNC_IMPL_ADD_V4 (array, i, wcsnlen, + (CPU_FEATURE_USABLE (AVX512VL) + && CPU_FEATURE_USABLE (AVX512BW) + && CPU_FEATURE_USABLE (BMI2)), + __wcsnlen_evex512) + X86_IFUNC_IMPL_ADD_V3 (array, i, wcsnlen, + (CPU_FEATURE_USABLE (AVX2) + && CPU_FEATURE_USABLE (BMI2)), + __wcsnlen_avx2) + X86_IFUNC_IMPL_ADD_V3 (array, i, wcsnlen, + (CPU_FEATURE_USABLE (AVX2) + && CPU_FEATURE_USABLE (BMI2) + && CPU_FEATURE_USABLE (RTM)), + __wcsnlen_avx2_rtm) + X86_IFUNC_IMPL_ADD_V2 (array, i, wcsnlen, + CPU_FEATURE_USABLE (SSE4_1), + __wcsnlen_sse4_1) + X86_IFUNC_IMPL_ADD_V1 (array, i, wcsnlen, + 1, + __wcsnlen_generic)) /* Support sysdeps/x86_64/multiarch/wmemchr.c. */ IFUNC_IMPL (i, name, wmemchr, @@ -1050,20 +1115,25 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/x86_64/multiarch/strncmp.c. */ IFUNC_IMPL (i, name, strncmp, - IFUNC_IMPL_ADD (array, i, strncmp, - CPU_FEATURE_USABLE (AVX2), - __strncmp_avx2) - IFUNC_IMPL_ADD (array, i, strncmp, - (CPU_FEATURE_USABLE (AVX2) - && CPU_FEATURE_USABLE (RTM)), - __strncmp_avx2_rtm) - IFUNC_IMPL_ADD (array, i, strncmp, - (CPU_FEATURE_USABLE (AVX512VL) - && CPU_FEATURE_USABLE (AVX512BW)), - __strncmp_evex) - IFUNC_IMPL_ADD (array, i, strncmp, CPU_FEATURE_USABLE (SSE4_2), - __strncmp_sse42) - IFUNC_IMPL_ADD (array, i, strncmp, 1, __strncmp_sse2)) + X86_IFUNC_IMPL_ADD_V4 (array, i, strncmp, + (CPU_FEATURE_USABLE (AVX512VL) + && CPU_FEATURE_USABLE (AVX512BW)), + __strncmp_evex) + X86_IFUNC_IMPL_ADD_V3 (array, i, strncmp, + CPU_FEATURE_USABLE (AVX2), + __strncmp_avx2) + X86_IFUNC_IMPL_ADD_V3 (array, i, strncmp, + (CPU_FEATURE_USABLE (AVX2) + && CPU_FEATURE_USABLE (RTM)), + __strncmp_avx2_rtm) + X86_IFUNC_IMPL_ADD_V2 (array, i, strncmp, + CPU_FEATURE_USABLE (SSE4_2), + __strncmp_sse42) + /* ISA V2 wrapper for SSE2 implementation because the SSE2 + implementation is also used at ISA level 2. */ + X86_IFUNC_IMPL_ADD_V2 (array, i, strncmp, + 1, + __strncmp_sse2)) #ifdef SHARED /* Support sysdeps/x86_64/multiarch/wmemset_chk.c. */ diff --git a/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h b/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h index 296d32071b..68646ef199 100644 --- a/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h +++ b/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h @@ -19,32 +19,39 @@ #include <init-arch.h> -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden; -extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; + +extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden; + +extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; static inline void * IFUNC_SELECTOR (void) { - const struct cpu_features* cpu_features = __get_cpu_features (); + const struct cpu_features *cpu_features = __get_cpu_features (); - if (CPU_FEATURE_USABLE_P (cpu_features, AVX2) - && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) + if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2) + && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, + AVX_Fast_Unaligned_Load, )) { - if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) - && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)) - return OPTIMIZE (evex); + if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)) + return OPTIMIZE (evex); if (CPU_FEATURE_USABLE_P (cpu_features, RTM)) - return OPTIMIZE (avx2_rtm); + return OPTIMIZE (avx2_rtm); - if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) - return OPTIMIZE (avx2); + if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, + Prefer_No_VZEROUPPER, !)) + return OPTIMIZE (avx2); } - if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2) + if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, SSE4_2) + /* Keep this as a runtime check as its not guaranteed at ISA + level 2. */ && !CPU_FEATURES_ARCH_P (cpu_features, Slow_SSE4_2)) return OPTIMIZE (sse42); diff --git a/sysdeps/x86_64/multiarch/ifunc-wcslen.h b/sysdeps/x86_64/multiarch/ifunc-wcslen.h index 88c1c502af..064722c2bd 100644 --- a/sysdeps/x86_64/multiarch/ifunc-wcslen.h +++ b/sysdeps/x86_64/multiarch/ifunc-wcslen.h @@ -23,33 +23,38 @@ # define GENERIC sse2 #endif -extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden; -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden; -extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; + +extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden; + +extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden; static inline void * IFUNC_SELECTOR (void) { - const struct cpu_features* cpu_features = __get_cpu_features (); + const struct cpu_features *cpu_features = __get_cpu_features (); - if (CPU_FEATURE_USABLE_P (cpu_features, AVX2) - && CPU_FEATURE_USABLE_P (cpu_features, BMI2) - && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) + if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2) + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2) + && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, + AVX_Fast_Unaligned_Load, )) { - if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) - && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)) + if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)) return OPTIMIZE (evex); if (CPU_FEATURE_USABLE_P (cpu_features, RTM)) return OPTIMIZE (avx2_rtm); - if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) + if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, + Prefer_No_VZEROUPPER, !)) return OPTIMIZE (avx2); } - if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_1)) + if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, SSE4_1)) return OPTIMIZE (sse4_1); return OPTIMIZE (GENERIC); diff --git a/sysdeps/x86_64/multiarch/memrchr-avx2.S b/sysdeps/x86_64/multiarch/memrchr-avx2.S index f300d7daf4..d1457ab60c 100644 --- a/sysdeps/x86_64/multiarch/memrchr-avx2.S +++ b/sysdeps/x86_64/multiarch/memrchr-avx2.S @@ -16,7 +16,9 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#if IS_IN (libc) +#include <isa-level.h> + +#if ISA_SHOULD_BUILD (3) # include <sysdep.h> diff --git a/sysdeps/x86_64/multiarch/memrchr-evex.S b/sysdeps/x86_64/multiarch/memrchr-evex.S index 91329b18dc..ea3a0a0a60 100644 --- a/sysdeps/x86_64/multiarch/memrchr-evex.S +++ b/sysdeps/x86_64/multiarch/memrchr-evex.S @@ -16,7 +16,9 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#if IS_IN (libc) +#include <isa-level.h> + +#if ISA_SHOULD_BUILD (4) # include <sysdep.h> # include "evex256-vecs.h" diff --git a/sysdeps/x86_64/multiarch/memrchr-sse2.S b/sysdeps/x86_64/multiarch/memrchr-sse2.S index d92a4022dc..4cc8b9e3b0 100644 --- a/sysdeps/x86_64/multiarch/memrchr-sse2.S +++ b/sysdeps/x86_64/multiarch/memrchr-sse2.S @@ -16,22 +16,26 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#if IS_IN (libc) +#include <isa-level.h> + +/* MINIMUM_X86_ISA_LEVEL <= 2 because there is no V2 implementation + so we need this to build for ISA V2 builds. */ +#if ISA_SHOULD_BUILD (2) + # ifndef MEMRCHR # define MEMRCHR __memrchr_sse2 # endif -#endif -#include <sysdep.h> -#define VEC_SIZE 16 -#define PAGE_SIZE 4096 +# include <sysdep.h> +# define VEC_SIZE 16 +# define PAGE_SIZE 4096 .text ENTRY_P2ALIGN(MEMRCHR, 6) -#ifdef __ILP32__ +# ifdef __ILP32__ /* Clear upper bits. */ mov %RDX_LP, %RDX_LP -#endif +# endif movd %esi, %xmm0 /* Get end pointer. */ @@ -352,3 +356,4 @@ L(zero_3): ret /* 2-bytes from next cache line. */ END(MEMRCHR) +#endif diff --git a/sysdeps/x86_64/multiarch/strcasecmp_l-avx2-rtm.S b/sysdeps/x86_64/multiarch/strcasecmp_l-avx2-rtm.S index 09957fc3c5..d408751f4c 100644 --- a/sysdeps/x86_64/multiarch/strcasecmp_l-avx2-rtm.S +++ b/sysdeps/x86_64/multiarch/strcasecmp_l-avx2-rtm.S @@ -1,15 +1,2 @@ -#ifndef STRCMP -# define STRCMP __strcasecmp_l_avx2_rtm -#endif - -#define _GLABEL(x) x ## _rtm -#define GLABEL(x) _GLABEL(x) - -#define ZERO_UPPER_VEC_REGISTERS_RETURN \ - ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST - -#define VZEROUPPER_RETURN jmp L(return_vzeroupper) - -#define SECTION(p) p##.avx.rtm - -#include "strcasecmp_l-avx2.S" +#define USE_AS_STRCASECMP_L +#include "strcmp-avx2-rtm.S" diff --git a/sysdeps/x86_64/multiarch/strcasecmp_l-avx2.S b/sysdeps/x86_64/multiarch/strcasecmp_l-avx2.S index e2762f2a22..167f866014 100644 --- a/sysdeps/x86_64/multiarch/strcasecmp_l-avx2.S +++ b/sysdeps/x86_64/multiarch/strcasecmp_l-avx2.S @@ -16,8 +16,5 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#ifndef STRCMP -# define STRCMP __strcasecmp_l_avx2 -#endif #define USE_AS_STRCASECMP_L #include "strcmp-avx2.S" diff --git a/sysdeps/x86_64/multiarch/strcasecmp_l-evex.S b/sysdeps/x86_64/multiarch/strcasecmp_l-evex.S index 58642db748..012a084930 100644 --- a/sysdeps/x86_64/multiarch/strcasecmp_l-evex.S +++ b/sysdeps/x86_64/multiarch/strcasecmp_l-evex.S @@ -16,8 +16,5 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#ifndef STRCMP -# define STRCMP __strcasecmp_l_evex -#endif #define USE_AS_STRCASECMP_L #include "strcmp-evex.S" diff --git a/sysdeps/x86_64/multiarch/strcasecmp_l-sse2.S b/sysdeps/x86_64/multiarch/strcasecmp_l-sse2.S index a2b5741399..6ffd09b513 100644 --- a/sysdeps/x86_64/multiarch/strcasecmp_l-sse2.S +++ b/sysdeps/x86_64/multiarch/strcasecmp_l-sse2.S @@ -17,4 +17,5 @@ <https://www.gnu.org/licenses/>. */ #define USE_AS_STRCASECMP_L + #include "strcmp-sse2.S" diff --git a/sysdeps/x86_64/multiarch/strchr-avx2.S b/sysdeps/x86_64/multiarch/strchr-avx2.S index 1a916cc951..425a40b8de 100644 --- a/sysdeps/x86_64/multiarch/strchr-avx2.S +++ b/sysdeps/x86_64/multiarch/strchr-avx2.S @@ -16,7 +16,9 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#if IS_IN (libc) +#include <isa-level.h> + +#if ISA_SHOULD_BUILD (3) # include <sysdep.h> diff --git a/sysdeps/x86_64/multiarch/strchr-evex.S b/sysdeps/x86_64/multiarch/strchr-evex.S index ec739fb8f9..a1c15c4419 100644 --- a/sysdeps/x86_64/multiarch/strchr-evex.S +++ b/sysdeps/x86_64/multiarch/strchr-evex.S @@ -16,7 +16,9 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#if IS_IN (libc) +#include <isa-level.h> + +#if ISA_SHOULD_BUILD (4) # include <sysdep.h> diff --git a/sysdeps/x86_64/multiarch/strchr-sse2-no-bsf.S b/sysdeps/x86_64/multiarch/strchr-sse2-no-bsf.S index 93e6f62d7f..bb092e3f61 100644 --- a/sysdeps/x86_64/multiarch/strchr-sse2-no-bsf.S +++ b/sysdeps/x86_64/multiarch/strchr-sse2-no-bsf.S @@ -16,7 +16,11 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#if IS_IN (libc) +#include <isa-level.h> + +/* NB: atom builds with ISA level == 1 so no reason to hold onto this + at ISA level >= 2. */ +#if ISA_SHOULD_BUILD (1) # include <sysdep.h> # include "asm-syntax.h" diff --git a/sysdeps/x86_64/multiarch/strchr-sse2.S b/sysdeps/x86_64/multiarch/strchr-sse2.S index f7767ca543..7a182f0c3b 100644 --- a/sysdeps/x86_64/multiarch/strchr-sse2.S +++ b/sysdeps/x86_64/multiarch/strchr-sse2.S @@ -16,7 +16,12 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#if IS_IN (libc) || defined STRCHR +#include <isa-level.h> + +/* MINIMUM_X86_ISA_LEVEL <= 2 because there is no V2 implementation + so we need this to build for ISA V2 builds. */ +#if ISA_SHOULD_BUILD (2) + # ifndef STRCHR # define STRCHR __strchr_sse2 # endif diff --git a/sysdeps/x86_64/multiarch/strchr.c b/sysdeps/x86_64/multiarch/strchr.c index de737580eb..ce7441c532 100644 --- a/sysdeps/x86_64/multiarch/strchr.c +++ b/sysdeps/x86_64/multiarch/strchr.c @@ -26,36 +26,40 @@ # define SYMBOL_NAME strchr # include <init-arch.h> -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_no_bsf) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden; -extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; + +extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_no_bsf) attribute_hidden; static inline void * IFUNC_SELECTOR (void) { - const struct cpu_features* cpu_features = __get_cpu_features (); + const struct cpu_features *cpu_features = __get_cpu_features (); - if (CPU_FEATURE_USABLE_P (cpu_features, AVX2) - && CPU_FEATURE_USABLE_P (cpu_features, BMI2) - && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) + if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2) + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2) + && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, + AVX_Fast_Unaligned_Load, )) { - if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) - && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)) + if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)) return OPTIMIZE (evex); if (CPU_FEATURE_USABLE_P (cpu_features, RTM)) return OPTIMIZE (avx2_rtm); - if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) + if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, + Prefer_No_VZEROUPPER, !)) return OPTIMIZE (avx2); } - if (CPU_FEATURES_ARCH_P (cpu_features, Slow_BSF)) - return OPTIMIZE (sse2_no_bsf); + if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, Slow_BSF, !)) + return OPTIMIZE (sse2); - return OPTIMIZE (sse2); + return OPTIMIZE (sse2_no_bsf); } libc_ifunc_redirected (__redirect_strchr, strchr, IFUNC_SELECTOR ()); diff --git a/sysdeps/x86_64/multiarch/strchrnul-avx2.S b/sysdeps/x86_64/multiarch/strchrnul-avx2.S index fa0cc09760..10ad5e6058 100644 --- a/sysdeps/x86_64/multiarch/strchrnul-avx2.S +++ b/sysdeps/x86_64/multiarch/strchrnul-avx2.S @@ -1,3 +1,8 @@ -#define STRCHR __strchrnul_avx2 +#ifndef STRCHRNUL +# define STRCHRNUL __strchrnul_avx2 +#endif + +#define STRCHR STRCHRNUL #define USE_AS_STRCHRNUL 1 + #include "strchr-avx2.S" diff --git a/sysdeps/x86_64/multiarch/strchrnul-evex.S b/sysdeps/x86_64/multiarch/strchrnul-evex.S index 064fe7ca9e..0f216cb47f 100644 --- a/sysdeps/x86_64/multiarch/strchrnul-evex.S +++ b/sysdeps/x86_64/multiarch/strchrnul-evex.S @@ -1,3 +1,8 @@ -#define STRCHR __strchrnul_evex +#ifndef STRCHRNUL +# define STRCHRNUL __strchrnul_evex +#endif + +#define STRCHR STRCHRNUL #define USE_AS_STRCHRNUL 1 + #include "strchr-evex.S" diff --git a/sysdeps/x86_64/multiarch/strchrnul-sse2.S b/sysdeps/x86_64/multiarch/strchrnul-sse2.S index 7238977a21..7ee81ae510 100644 --- a/sysdeps/x86_64/multiarch/strchrnul-sse2.S +++ b/sysdeps/x86_64/multiarch/strchrnul-sse2.S @@ -16,12 +16,10 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#if IS_IN (libc) -# ifndef STRCHR -# define STRCHR __strchrnul_sse2 -# endif +#ifndef STRCHRNUL +# define STRCHRNUL __strchrnul_sse2 #endif - #define AS_STRCHRNUL +#define STRCHR STRCHRNUL #include "strchr-sse2.S" diff --git a/sysdeps/x86_64/multiarch/strcmp-avx2-rtm.S b/sysdeps/x86_64/multiarch/strcmp-avx2-rtm.S index aecd30d97f..74f1f996a9 100644 --- a/sysdeps/x86_64/multiarch/strcmp-avx2-rtm.S +++ b/sysdeps/x86_64/multiarch/strcmp-avx2-rtm.S @@ -1,12 +1,9 @@ -#ifndef STRCMP -# define STRCMP __strcmp_avx2_rtm -#endif - #define ZERO_UPPER_VEC_REGISTERS_RETURN \ ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST #define VZEROUPPER_RETURN jmp L(return_vzeroupper) #define SECTION(p) p##.avx.rtm +#define STRCMP_ISA _avx2_rtm #include "strcmp-avx2.S" diff --git a/sysdeps/x86_64/multiarch/strcmp-avx2.S b/sysdeps/x86_64/multiarch/strcmp-avx2.S index 3ab21e3a58..4c01d664e8 100644 --- a/sysdeps/x86_64/multiarch/strcmp-avx2.S +++ b/sysdeps/x86_64/multiarch/strcmp-avx2.S @@ -16,7 +16,15 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#if IS_IN (libc) +#include <isa-level.h> + +#if ISA_SHOULD_BUILD (3) + +# ifndef STRCMP_ISA +# define STRCMP_ISA _avx2 +# endif + +# include "strcmp-naming.h" # include <sysdep.h> @@ -86,15 +94,11 @@ # ifdef USE_AS_STRCASECMP_L # ifdef USE_AS_STRNCMP -# define STRCASECMP __strncasecmp_avx2 # define LOCALE_REG rcx # define LOCALE_REG_LP RCX_LP -# define STRCASECMP_L_NONASCII __strncasecmp_l_nonascii # else -# define STRCASECMP __strcasecmp_avx2 # define LOCALE_REG rdx # define LOCALE_REG_LP RDX_LP -# define STRCASECMP_L_NONASCII __strcasecmp_l_nonascii # endif # endif @@ -185,18 +189,14 @@ .type STRCMP, @function .globl STRCMP -# ifndef GLABEL -# define GLABEL(...) __VA_ARGS__ -# endif - # ifdef USE_AS_STRCASECMP_L -ENTRY (GLABEL(STRCASECMP)) +ENTRY (STRCASECMP) movq __libc_tsd_LOCALE@gottpoff(%rip), %rax mov %fs:(%rax), %LOCALE_REG_LP /* Either 1 or 5 bytes (dependeing if CET is enabled). */ .p2align 4 -END (GLABEL(STRCASECMP)) +END (STRCASECMP) /* FALLTHROUGH to strcasecmp/strncasecmp_l. */ # endif diff --git a/sysdeps/x86_64/multiarch/strcmp-evex.S b/sysdeps/x86_64/multiarch/strcmp-evex.S index afbf13a230..e482d0167f 100644 --- a/sysdeps/x86_64/multiarch/strcmp-evex.S +++ b/sysdeps/x86_64/multiarch/strcmp-evex.S @@ -16,7 +16,12 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#if IS_IN (libc) +#include <isa-level.h> + +#if ISA_SHOULD_BUILD (4) + +# define STRCMP_ISA _evex +# include "strcmp-naming.h" # include <sysdep.h> # if defined USE_AS_STRCASECMP_L @@ -37,10 +42,6 @@ # define VMOVA vmovdqa64 # ifdef USE_AS_WCSCMP -# ifndef OVERFLOW_STRCMP -# define OVERFLOW_STRCMP __wcscmp_evex -# endif - # define TESTEQ subl $0xff, /* Compare packed dwords. */ # define VPCMP vpcmpd @@ -50,10 +51,6 @@ /* 1 dword char == 4 bytes. */ # define SIZE_OF_CHAR 4 # else -# ifndef OVERFLOW_STRCMP -# define OVERFLOW_STRCMP __strcmp_evex -# endif - # define TESTEQ incl /* Compare packed bytes. */ # define VPCMP vpcmpb @@ -120,15 +117,11 @@ # ifdef USE_AS_STRCASECMP_L # ifdef USE_AS_STRNCMP -# define STRCASECMP __strncasecmp_evex # define LOCALE_REG rcx # define LOCALE_REG_LP RCX_LP -# define STRCASECMP_L_NONASCII __strncasecmp_l_nonascii # else -# define STRCASECMP __strcasecmp_evex # define LOCALE_REG rdx # define LOCALE_REG_LP RDX_LP -# define STRCASECMP_L_NONASCII __strcasecmp_l_nonascii # endif # endif @@ -214,7 +207,6 @@ .align 16 .type STRCMP, @function .globl STRCMP - # ifdef USE_AS_STRCASECMP_L ENTRY (STRCASECMP) movq __libc_tsd_LOCALE@gottpoff(%rip), %rax diff --git a/sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S index 0d691b78a8..33c18a28e8 100644 --- a/sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S +++ b/sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S @@ -16,11 +16,20 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#if IS_IN (libc) +#include <isa-level.h> -#include "sysdep.h" +/* Continue building as ISA level 2. We use this as ISA V2 default + because strcmp-sse42 uses pcmpstri (slow on some SSE4.2 + processors) and this implementation is potenially faster than + strcmp-sse42 (aside from the slower page cross case). */ +#if ISA_SHOULD_BUILD (2) -ENTRY ( __strcmp_sse2_unaligned) +# define STRCMP_ISA _sse2_unaligned +# include "strcmp-naming.h" + +# include "sysdep.h" + +ENTRY (STRCMP) movl %edi, %eax xorl %edx, %edx pxor %xmm7, %xmm7 @@ -208,6 +217,5 @@ L(cross_page): L(different): subl %ecx, %eax ret -END (__strcmp_sse2_unaligned) - +END (STRCMP) #endif diff --git a/sysdeps/x86_64/multiarch/strcmp-sse2.S b/sysdeps/x86_64/multiarch/strcmp-sse2.S index b1220231ab..3c69fc1df1 100644 --- a/sysdeps/x86_64/multiarch/strcmp-sse2.S +++ b/sysdeps/x86_64/multiarch/strcmp-sse2.S @@ -16,7 +16,11 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#if IS_IN (libc) || IS_IN (rtld) +#include <isa-level.h> + +/* Continue building at ISA level 2 as the strcmp-sse42 is not always + preferable for ISA level == 2 CPUs. */ +#if ISA_SHOULD_BUILD (2) # define STRCMP_ISA _sse2 # include "strcmp-naming.h" diff --git a/sysdeps/x86_64/multiarch/strcmp-sse4_2.S b/sysdeps/x86_64/multiarch/strcmp-sse4_2.S index 963e208ccb..dc6fc90e14 100644 --- a/sysdeps/x86_64/multiarch/strcmp-sse4_2.S +++ b/sysdeps/x86_64/multiarch/strcmp-sse4_2.S @@ -16,7 +16,10 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#if IS_IN (libc) +#include <isa-level.h> + +#if ISA_SHOULD_BUILD (2) + # include <sysdep.h> # define STRCMP_ISA _sse42 @@ -1766,7 +1769,6 @@ LABEL(unaligned_table): .int LABEL(ashr_0) - LABEL(unaligned_table) # undef LABEL -# undef GLABEL # undef SECTION # undef movdqa # undef movdqu diff --git a/sysdeps/x86_64/multiarch/strcmp.c b/sysdeps/x86_64/multiarch/strcmp.c index 9c1677724c..fdd5afe3af 100644 --- a/sysdeps/x86_64/multiarch/strcmp.c +++ b/sysdeps/x86_64/multiarch/strcmp.c @@ -26,37 +26,50 @@ # define SYMBOL_NAME strcmp # include <init-arch.h> -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned) attribute_hidden; -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden; -extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; + +extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden; + +extern __typeof (REDIRECT_NAME) + OPTIMIZE (sse2_unaligned) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; + + static inline void * IFUNC_SELECTOR (void) { - const struct cpu_features* cpu_features = __get_cpu_features (); + const struct cpu_features *cpu_features = __get_cpu_features (); - if (CPU_FEATURE_USABLE_P (cpu_features, AVX2) - && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) + if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2) + && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, + AVX_Fast_Unaligned_Load, )) { - if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) - && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW) - && CPU_FEATURE_USABLE_P (cpu_features, BMI2)) + if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW) + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)) return OPTIMIZE (evex); if (CPU_FEATURE_USABLE_P (cpu_features, RTM)) return OPTIMIZE (avx2_rtm); - if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) + if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, + Prefer_No_VZEROUPPER, !)) return OPTIMIZE (avx2); } - if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2) + if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, SSE4_2) + /* Keep this as runtime check. Some ISA level >= 2 CPUs such as + Tremont, Silvermont, and more check this. */ && !CPU_FEATURES_ARCH_P (cpu_features, Slow_SSE4_2)) return OPTIMIZE (sse42); + /* Keep this as runtime check. The standard SSE2 version has + meaningful optimizations around keeping all loads aligned in the + main loop which can benefit some ISA level >= 2 CPUs. */ if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load)) return OPTIMIZE (sse2_unaligned); diff --git a/sysdeps/x86_64/multiarch/strlen-avx2.S b/sysdeps/x86_64/multiarch/strlen-avx2.S index 9e36290dd2..0593fb303b 100644 --- a/sysdeps/x86_64/multiarch/strlen-avx2.S +++ b/sysdeps/x86_64/multiarch/strlen-avx2.S @@ -16,7 +16,9 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#if IS_IN (libc) +#include <isa-level.h> + +#if ISA_SHOULD_BUILD (3) # include <sysdep.h> diff --git a/sysdeps/x86_64/multiarch/strlen-evex-base.S b/sysdeps/x86_64/multiarch/strlen-evex-base.S index 278c899691..418e9f8411 100644 --- a/sysdeps/x86_64/multiarch/strlen-evex-base.S +++ b/sysdeps/x86_64/multiarch/strlen-evex-base.S @@ -16,7 +16,11 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#if IS_IN (libc) +/* UNUSED. Exists purely as reference implementation. */ + +#include <isa-level.h> + +#if ISA_SHOULD_BUILD (4) # include <sysdep.h> diff --git a/sysdeps/x86_64/multiarch/strlen-evex.S b/sysdeps/x86_64/multiarch/strlen-evex.S index 59ade77498..2109ec2f7a 100644 --- a/sysdeps/x86_64/multiarch/strlen-evex.S +++ b/sysdeps/x86_64/multiarch/strlen-evex.S @@ -16,7 +16,9 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#if IS_IN (libc) +#include <isa-level.h> + +#if ISA_SHOULD_BUILD (4) # include <sysdep.h> diff --git a/sysdeps/x86_64/multiarch/strlen-sse2.S b/sysdeps/x86_64/multiarch/strlen-sse2.S index 5be72267d5..a96ccbb2d5 100644 --- a/sysdeps/x86_64/multiarch/strlen-sse2.S +++ b/sysdeps/x86_64/multiarch/strlen-sse2.S @@ -16,15 +16,20 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#if IS_IN (libc) || defined STRLEN - -# ifndef STRLEN -# define STRLEN __strlen_sse2 -# endif +#include <isa-level.h> +/* ISA level >= 2 for both strlen and wcslen. wcslen uses `pminud` + which is SSE4.1. strlen doesn't have an ISA level == 2 + implementation so the SSE2 implementation must be built with ISA + level == 2. */ +# if ISA_SHOULD_BUILD (2) # include <sysdep.h> +# ifndef STRLEN +# define STRLEN __strlen_sse2 +# endif + # ifdef AS_WCSLEN # define PMINU pminud # define PCMPEQ pcmpeqd @@ -82,7 +87,7 @@ L(n_nonzero): suffice. */ mov %RSI_LP, %R10_LP sar $62, %R10_LP - jnz __wcslen_sse4_1 + jnz OVERFLOW_STRLEN sal $2, %RSI_LP # endif diff --git a/sysdeps/x86_64/multiarch/strncase_l-avx2-rtm.S b/sysdeps/x86_64/multiarch/strncase_l-avx2-rtm.S index 58c05dcfb8..c2596ab103 100644 --- a/sysdeps/x86_64/multiarch/strncase_l-avx2-rtm.S +++ b/sysdeps/x86_64/multiarch/strncase_l-avx2-rtm.S @@ -1,16 +1,4 @@ -#ifndef STRCMP -# define STRCMP __strncasecmp_l_avx2_rtm -#endif +#define USE_AS_STRCASECMP_L +#define USE_AS_STRNCMP -#define _GLABEL(x) x ## _rtm -#define GLABEL(x) _GLABEL(x) - -#define ZERO_UPPER_VEC_REGISTERS_RETURN \ - ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST - -#define VZEROUPPER_RETURN jmp L(return_vzeroupper) - -#define SECTION(p) p##.avx.rtm -#define OVERFLOW_STRCMP __strcasecmp_l_avx2_rtm - -#include "strncase_l-avx2.S" +#include "strcmp-avx2-rtm.S" diff --git a/sysdeps/x86_64/multiarch/strncase_l-avx2.S b/sysdeps/x86_64/multiarch/strncase_l-avx2.S index 48c0aa21f8..d00687aac5 100644 --- a/sysdeps/x86_64/multiarch/strncase_l-avx2.S +++ b/sysdeps/x86_64/multiarch/strncase_l-avx2.S @@ -16,12 +16,7 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#ifndef STRCMP -# define STRCMP __strncasecmp_l_avx2 -#endif #define USE_AS_STRCASECMP_L #define USE_AS_STRNCMP -#ifndef OVERFLOW_STRCMP -# define OVERFLOW_STRCMP __strcasecmp_l_avx2 -#endif + #include "strcmp-avx2.S" diff --git a/sysdeps/x86_64/multiarch/strncase_l-evex.S b/sysdeps/x86_64/multiarch/strncase_l-evex.S index 8a5af3695c..1a79758065 100644 --- a/sysdeps/x86_64/multiarch/strncase_l-evex.S +++ b/sysdeps/x86_64/multiarch/strncase_l-evex.S @@ -16,10 +16,6 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#ifndef STRCMP -# define STRCMP __strncasecmp_l_evex -#endif -#define OVERFLOW_STRCMP __strcasecmp_l_evex #define USE_AS_STRCASECMP_L #define USE_AS_STRNCMP #include "strcmp-evex.S" diff --git a/sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S b/sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S index 68bad365ba..6bb6be8585 100644 --- a/sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S +++ b/sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S @@ -1,4 +1,2 @@ -#define STRCMP __strncmp_avx2_rtm #define USE_AS_STRNCMP 1 -#define OVERFLOW_STRCMP __strcmp_avx2_rtm #include "strcmp-avx2-rtm.S" diff --git a/sysdeps/x86_64/multiarch/strncmp-avx2.S b/sysdeps/x86_64/multiarch/strncmp-avx2.S index f138e9f1fd..def3509c4c 100644 --- a/sysdeps/x86_64/multiarch/strncmp-avx2.S +++ b/sysdeps/x86_64/multiarch/strncmp-avx2.S @@ -1,4 +1,3 @@ -#define STRCMP __strncmp_avx2 #define USE_AS_STRNCMP 1 -#define OVERFLOW_STRCMP __strcmp_avx2 + #include "strcmp-avx2.S" diff --git a/sysdeps/x86_64/multiarch/strncmp-evex.S b/sysdeps/x86_64/multiarch/strncmp-evex.S index a1d53e8c9f..aa69c18928 100644 --- a/sysdeps/x86_64/multiarch/strncmp-evex.S +++ b/sysdeps/x86_64/multiarch/strncmp-evex.S @@ -1,3 +1,2 @@ -#define STRCMP __strncmp_evex #define USE_AS_STRNCMP 1 #include "strcmp-evex.S" diff --git a/sysdeps/x86_64/multiarch/strncmp.c b/sysdeps/x86_64/multiarch/strncmp.c index 70ae6547c9..4ebe4bde30 100644 --- a/sysdeps/x86_64/multiarch/strncmp.c +++ b/sysdeps/x86_64/multiarch/strncmp.c @@ -26,33 +26,38 @@ # define SYMBOL_NAME strncmp # include <init-arch.h> -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden; -extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; + +extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden; + +extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; static inline void * IFUNC_SELECTOR (void) { - const struct cpu_features* cpu_features = __get_cpu_features (); + const struct cpu_features *cpu_features = __get_cpu_features (); - if (CPU_FEATURE_USABLE_P (cpu_features, AVX2) - && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) + if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2) + && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, + AVX_Fast_Unaligned_Load, )) { - if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) - && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW) - && CPU_FEATURE_USABLE_P (cpu_features, BMI2)) + if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW) + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)) return OPTIMIZE (evex); if (CPU_FEATURE_USABLE_P (cpu_features, RTM)) return OPTIMIZE (avx2_rtm); - if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) + if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, + Prefer_No_VZEROUPPER, !)) return OPTIMIZE (avx2); } - if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2) + if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, SSE4_2) && !CPU_FEATURES_ARCH_P (cpu_features, Slow_SSE4_2)) return OPTIMIZE (sse42); diff --git a/sysdeps/x86_64/multiarch/strnlen-avx2.S b/sysdeps/x86_64/multiarch/strnlen-avx2.S index c4062b22f7..c4a12097f0 100644 --- a/sysdeps/x86_64/multiarch/strnlen-avx2.S +++ b/sysdeps/x86_64/multiarch/strnlen-avx2.S @@ -1,4 +1,8 @@ -#define STRLEN __strnlen_avx2 +#ifndef STRNLEN +# define STRNLEN __strnlen_avx2 +#endif + #define USE_AS_STRNLEN 1 +#define STRLEN STRNLEN #include "strlen-avx2.S" diff --git a/sysdeps/x86_64/multiarch/strnlen-evex.S b/sysdeps/x86_64/multiarch/strnlen-evex.S index 722022f303..64a9fc2606 100644 --- a/sysdeps/x86_64/multiarch/strnlen-evex.S +++ b/sysdeps/x86_64/multiarch/strnlen-evex.S @@ -1,4 +1,8 @@ -#define STRLEN __strnlen_evex +#ifndef STRNLEN +# define STRNLEN __strnlen_evex +#endif + #define USE_AS_STRNLEN 1 +#define STRLEN STRNLEN #include "strlen-evex.S" diff --git a/sysdeps/x86_64/multiarch/strnlen-sse2.S b/sysdeps/x86_64/multiarch/strnlen-sse2.S index a50c7d6a28..8841ba9faf 100644 --- a/sysdeps/x86_64/multiarch/strnlen-sse2.S +++ b/sysdeps/x86_64/multiarch/strnlen-sse2.S @@ -16,11 +16,11 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#if IS_IN (libc) -# ifndef STRLEN -# define STRLEN __strnlen_sse2 -# endif +#ifndef STRNLEN +# define STRNLEN __strnlen_sse2 #endif -#define AS_STRNLEN +#define AS_STRNLEN 1 +#define STRLEN STRNLEN + #include "strlen-sse2.S" diff --git a/sysdeps/x86_64/multiarch/strrchr-avx2.S b/sysdeps/x86_64/multiarch/strrchr-avx2.S index eb128a2ae3..924171d8e4 100644 --- a/sysdeps/x86_64/multiarch/strrchr-avx2.S +++ b/sysdeps/x86_64/multiarch/strrchr-avx2.S @@ -16,7 +16,9 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#if IS_IN (libc) +#include <isa-level.h> + +#if ISA_SHOULD_BUILD (3) # include <sysdep.h> diff --git a/sysdeps/x86_64/multiarch/strrchr-evex.S b/sysdeps/x86_64/multiarch/strrchr-evex.S index 8014c285b3..992b45fb47 100644 --- a/sysdeps/x86_64/multiarch/strrchr-evex.S +++ b/sysdeps/x86_64/multiarch/strrchr-evex.S @@ -16,7 +16,9 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#if IS_IN (libc) +#include <isa-level.h> + +#if ISA_SHOULD_BUILD (4) # include <sysdep.h> diff --git a/sysdeps/x86_64/multiarch/strrchr-sse2.S b/sysdeps/x86_64/multiarch/strrchr-sse2.S index 6ee7a5e33a..892e861fa8 100644 --- a/sysdeps/x86_64/multiarch/strrchr-sse2.S +++ b/sysdeps/x86_64/multiarch/strrchr-sse2.S @@ -16,36 +16,40 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#if IS_IN (libc) +#include <isa-level.h> + +/* ISA level >= 2 because there are no {wcs|str}rchr-sse4 + implementations. */ +#if ISA_SHOULD_BUILD (2) + +# include <sysdep.h> + # ifndef STRRCHR # define STRRCHR __strrchr_sse2 # endif -#endif - -#include <sysdep.h> -#ifdef USE_AS_WCSRCHR -# define PCMPEQ pcmpeqd -# define CHAR_SIZE 4 -# define PMINU pminud -#else -# define PCMPEQ pcmpeqb -# define CHAR_SIZE 1 -# define PMINU pminub -#endif +# ifdef USE_AS_WCSRCHR +# define PCMPEQ pcmpeqd +# define CHAR_SIZE 4 +# define PMINU pminud +# else +# define PCMPEQ pcmpeqb +# define CHAR_SIZE 1 +# define PMINU pminub +# endif -#define PAGE_SIZE 4096 -#define VEC_SIZE 16 +# define PAGE_SIZE 4096 +# define VEC_SIZE 16 .text ENTRY(STRRCHR) movd %esi, %xmm0 movq %rdi, %rax andl $(PAGE_SIZE - 1), %eax -#ifndef USE_AS_WCSRCHR +# ifndef USE_AS_WCSRCHR punpcklbw %xmm0, %xmm0 punpcklwd %xmm0, %xmm0 -#endif +# endif pshufd $0, %xmm0, %xmm0 cmpl $(PAGE_SIZE - VEC_SIZE), %eax ja L(cross_page) @@ -69,9 +73,9 @@ L(cross_page_continue): /* We are off by 3 for wcsrchr if search CHAR is non-zero. If search CHAR is zero we are correct. Either way `andq -CHAR_SIZE, %rax` gets the correct result. */ -#ifdef USE_AS_WCSRCHR +# ifdef USE_AS_WCSRCHR andq $-CHAR_SIZE, %rax -#endif +# endif L(ret0): ret @@ -85,9 +89,9 @@ L(first_vec_x0_test): jz L(ret0) bsrl %eax, %eax addq %r8, %rax -#ifdef USE_AS_WCSRCHR +# ifdef USE_AS_WCSRCHR andq $-CHAR_SIZE, %rax -#endif +# endif ret .p2align 4 @@ -100,9 +104,9 @@ L(first_vec_x1): jz L(first_vec_x0_test) bsrl %eax, %eax leaq (VEC_SIZE)(%rdi, %rax), %rax -#ifdef USE_AS_WCSRCHR +# ifdef USE_AS_WCSRCHR andq $-CHAR_SIZE, %rax -#endif +# endif ret .p2align 4 @@ -113,9 +117,9 @@ L(first_vec_x1_test): jz L(first_vec_x0_test) bsrl %eax, %eax leaq (VEC_SIZE)(%rdi, %rax), %rax -#ifdef USE_AS_WCSRCHR +# ifdef USE_AS_WCSRCHR andq $-CHAR_SIZE, %rax -#endif +# endif ret .p2align 4 @@ -128,9 +132,9 @@ L(first_vec_x2): jz L(first_vec_x1_test) bsrl %eax, %eax leaq (VEC_SIZE * 2)(%rdi, %rax), %rax -#ifdef USE_AS_WCSRCHR +# ifdef USE_AS_WCSRCHR andq $-CHAR_SIZE, %rax -#endif +# endif ret .p2align 4 @@ -165,27 +169,27 @@ L(first_loop): /* Since SSE2 no pminud so wcsrchr needs seperate logic for detecting zero. Note if this is found to be a bottleneck it may be worth adding an SSE4.1 wcsrchr implementation. */ -#ifdef USE_AS_WCSRCHR +# ifdef USE_AS_WCSRCHR movaps %xmm5, %xmm6 pxor %xmm8, %xmm8 PCMPEQ %xmm8, %xmm5 PCMPEQ %xmm4, %xmm8 por %xmm5, %xmm8 -#else +# else movaps %xmm5, %xmm6 PMINU %xmm4, %xmm5 -#endif +# endif movaps %xmm4, %xmm9 PCMPEQ %xmm0, %xmm4 PCMPEQ %xmm0, %xmm6 movaps %xmm6, %xmm7 por %xmm4, %xmm6 -#ifndef USE_AS_WCSRCHR +# ifndef USE_AS_WCSRCHR pxor %xmm8, %xmm8 PCMPEQ %xmm5, %xmm8 -#endif +# endif pmovmskb %xmm8, %ecx pmovmskb %xmm6, %eax @@ -219,9 +223,9 @@ L(first_loop_old_match): bsrl %eax, %eax addq %rsi, %rax -#ifdef USE_AS_WCSRCHR +# ifdef USE_AS_WCSRCHR andq $-CHAR_SIZE, %rax -#endif +# endif ret .p2align 4 @@ -247,9 +251,9 @@ L(new_match): jz L(first_loop_old_match) bsrl %eax, %eax addq %rdi, %rax -#ifdef USE_AS_WCSRCHR +# ifdef USE_AS_WCSRCHR andq $-CHAR_SIZE, %rax -#endif +# endif ret /* Save minimum state for getting most recent match. We can @@ -267,27 +271,27 @@ L(second_loop): /* Since SSE2 no pminud so wcsrchr needs seperate logic for detecting zero. Note if this is found to be a bottleneck it may be worth adding an SSE4.1 wcsrchr implementation. */ -#ifdef USE_AS_WCSRCHR +# ifdef USE_AS_WCSRCHR movaps %xmm5, %xmm6 pxor %xmm8, %xmm8 PCMPEQ %xmm8, %xmm5 PCMPEQ %xmm4, %xmm8 por %xmm5, %xmm8 -#else +# else movaps %xmm5, %xmm6 PMINU %xmm4, %xmm5 -#endif +# endif movaps %xmm4, %xmm9 PCMPEQ %xmm0, %xmm4 PCMPEQ %xmm0, %xmm6 movaps %xmm6, %xmm7 por %xmm4, %xmm6 -#ifndef USE_AS_WCSRCHR +# ifndef USE_AS_WCSRCHR pxor %xmm8, %xmm8 PCMPEQ %xmm5, %xmm8 -#endif +# endif pmovmskb %xmm8, %ecx pmovmskb %xmm6, %eax @@ -312,9 +316,9 @@ L(second_loop_old_match): orl %ecx, %eax bsrl %eax, %eax addq %rsi, %rax -#ifdef USE_AS_WCSRCHR +# ifdef USE_AS_WCSRCHR andq $-CHAR_SIZE, %rax -#endif +# endif ret .p2align 4 @@ -340,9 +344,9 @@ L(second_loop_new_match): jz L(second_loop_old_match) bsrl %eax, %eax addq %rdi, %rax -#ifdef USE_AS_WCSRCHR +# ifdef USE_AS_WCSRCHR andq $-CHAR_SIZE, %rax -#endif +# endif ret .p2align 4,, 4 @@ -366,9 +370,10 @@ L(cross_page): jz L(ret1) bsrl %eax, %eax addq %rdi, %rax -#ifdef USE_AS_WCSRCHR +# ifdef USE_AS_WCSRCHR andq $-CHAR_SIZE, %rax -#endif +# endif L(ret1): ret END(STRRCHR) +#endif diff --git a/sysdeps/x86_64/multiarch/strstr-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strstr-sse2-unaligned.S index c6aa8f45a6..dc342a9f44 100644 --- a/sysdeps/x86_64/multiarch/strstr-sse2-unaligned.S +++ b/sysdeps/x86_64/multiarch/strstr-sse2-unaligned.S @@ -17,6 +17,7 @@ <https://www.gnu.org/licenses/>. */ #include <sysdep.h> +#include "../strchr-isa-default-impl.h" ENTRY(__strstr_sse2_unaligned) movzbl (%rsi), %eax @@ -75,7 +76,7 @@ L(next_pair_index): .p2align 4 L(strchr): movzbl %al, %esi - jmp __strchr_sse2 + jmp DEFAULT_STRCHR .p2align 4 L(pair_loop): diff --git a/sysdeps/x86_64/multiarch/wcschr-avx2.S b/sysdeps/x86_64/multiarch/wcschr-avx2.S index 67726b6837..f404888a93 100644 --- a/sysdeps/x86_64/multiarch/wcschr-avx2.S +++ b/sysdeps/x86_64/multiarch/wcschr-avx2.S @@ -1,3 +1,8 @@ -#define STRCHR __wcschr_avx2 +#ifndef WCSCHR +# define WCSCHR __wcschr_avx2 +#endif + +#define STRCHR WCSCHR #define USE_AS_WCSCHR 1 + #include "strchr-avx2.S" diff --git a/sysdeps/x86_64/multiarch/wcschr-evex.S b/sysdeps/x86_64/multiarch/wcschr-evex.S index 7cb8f1e41a..b5ccc59230 100644 --- a/sysdeps/x86_64/multiarch/wcschr-evex.S +++ b/sysdeps/x86_64/multiarch/wcschr-evex.S @@ -1,3 +1,8 @@ -#define STRCHR __wcschr_evex +#ifndef WCSCHR +# define WCSCHR __wcschr_evex +#endif + +#define STRCHR WCSCHR #define USE_AS_WCSCHR 1 + #include "strchr-evex.S" diff --git a/sysdeps/x86_64/multiarch/wcschr-sse2.S b/sysdeps/x86_64/multiarch/wcschr-sse2.S index c872926ba9..1c83957cbc 100644 --- a/sysdeps/x86_64/multiarch/wcschr-sse2.S +++ b/sysdeps/x86_64/multiarch/wcschr-sse2.S @@ -16,13 +16,17 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#if IS_IN (libc) + +#include <isa-level.h> + +/* ISA level >= 2 because there is no wcschr-sse4 implementations. */ +#if ISA_SHOULD_BUILD (2) + # ifndef WCSCHR # define WCSCHR __wcschr_sse2 # endif -#endif -#include <sysdep.h> +# include <sysdep.h> .text ENTRY (WCSCHR) @@ -155,3 +159,4 @@ L(return_null): ret END (WCSCHR) +#endif diff --git a/sysdeps/x86_64/multiarch/wcscmp-avx2-rtm.S b/sysdeps/x86_64/multiarch/wcscmp-avx2-rtm.S index d6ca2b8064..f17a8969cb 100644 --- a/sysdeps/x86_64/multiarch/wcscmp-avx2-rtm.S +++ b/sysdeps/x86_64/multiarch/wcscmp-avx2-rtm.S @@ -1,4 +1,3 @@ -#define STRCMP __wcscmp_avx2_rtm #define USE_AS_WCSCMP 1 #include "strcmp-avx2-rtm.S" diff --git a/sysdeps/x86_64/multiarch/wcscmp-avx2.S b/sysdeps/x86_64/multiarch/wcscmp-avx2.S index e5da4da689..0a71f907f0 100644 --- a/sysdeps/x86_64/multiarch/wcscmp-avx2.S +++ b/sysdeps/x86_64/multiarch/wcscmp-avx2.S @@ -1,4 +1,3 @@ -#define STRCMP __wcscmp_avx2 #define USE_AS_WCSCMP 1 #include "strcmp-avx2.S" diff --git a/sysdeps/x86_64/multiarch/wcscmp-evex.S b/sysdeps/x86_64/multiarch/wcscmp-evex.S index 42e73e51eb..b0337a8311 100644 --- a/sysdeps/x86_64/multiarch/wcscmp-evex.S +++ b/sysdeps/x86_64/multiarch/wcscmp-evex.S @@ -1,4 +1,3 @@ -#define STRCMP __wcscmp_evex #define USE_AS_WCSCMP 1 #include "strcmp-evex.S" diff --git a/sysdeps/x86_64/multiarch/wcscmp-sse2.S b/sysdeps/x86_64/multiarch/wcscmp-sse2.S index 6cb7d9faf9..3f32e8127d 100644 --- a/sysdeps/x86_64/multiarch/wcscmp-sse2.S +++ b/sysdeps/x86_64/multiarch/wcscmp-sse2.S @@ -16,11 +16,16 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#define USE_AS_WCSCMP -#define STRCMP_ISA _sse2 -#include "strcmp-naming.h" +#include <isa-level.h> -#include <sysdep.h> +/* ISA level >= 2 because there is no wcscmp-sse4 implementations. */ +#if ISA_SHOULD_BUILD (2) +# include <sysdep.h> + +/* Needed to get right name. */ +# define USE_AS_WCSCMP +# define STRCMP_ISA _sse2 +# include "strcmp-naming.h" /* Note: wcscmp uses signed comparison, not unsighed as in strcmp function. */ @@ -949,3 +954,4 @@ L(equal): ret END (STRCMP) +#endif diff --git a/sysdeps/x86_64/multiarch/wcslen-avx2.S b/sysdeps/x86_64/multiarch/wcslen-avx2.S index c9224f1bc5..9784d8f780 100644 --- a/sysdeps/x86_64/multiarch/wcslen-avx2.S +++ b/sysdeps/x86_64/multiarch/wcslen-avx2.S @@ -1,4 +1,8 @@ -#define STRLEN __wcslen_avx2 +#ifndef WCSLEN +# define WCSLEN __wcslen_avx2 +#endif + +#define STRLEN WCSLEN #define USE_AS_WCSLEN 1 #include "strlen-avx2.S" diff --git a/sysdeps/x86_64/multiarch/wcslen-evex.S b/sysdeps/x86_64/multiarch/wcslen-evex.S index bdafa83bd5..df21bac63c 100644 --- a/sysdeps/x86_64/multiarch/wcslen-evex.S +++ b/sysdeps/x86_64/multiarch/wcslen-evex.S @@ -1,4 +1,8 @@ -#define STRLEN __wcslen_evex +#ifndef WCSLEN +# define WCSLEN __wcslen_evex +#endif + +#define STRLEN WCSLEN #define USE_AS_WCSLEN 1 #include "strlen-evex.S" diff --git a/sysdeps/x86_64/multiarch/wcslen-sse2.S b/sysdeps/x86_64/multiarch/wcslen-sse2.S index 944c3bd9c6..e9c518a932 100644 --- a/sysdeps/x86_64/multiarch/wcslen-sse2.S +++ b/sysdeps/x86_64/multiarch/wcslen-sse2.S @@ -16,13 +16,16 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#if IS_IN (libc) -# ifndef WCSLEN -# define WCSLEN __wcslen_sse2 -# endif +#include <isa-level.h> + +#if ISA_SHOULD_BUILD (1) + +# include <sysdep.h> + +#ifndef WCSLEN +# define WCSLEN __wcslen_sse2 #endif -#include <sysdep.h> .text ENTRY (WCSLEN) @@ -235,3 +238,5 @@ L(exit_tail7): ret END (WCSLEN) + +#endif diff --git a/sysdeps/x86_64/multiarch/wcslen-sse4_1.S b/sysdeps/x86_64/multiarch/wcslen-sse4_1.S index c88e8342a1..126d183e75 100644 --- a/sysdeps/x86_64/multiarch/wcslen-sse4_1.S +++ b/sysdeps/x86_64/multiarch/wcslen-sse4_1.S @@ -1,5 +1,9 @@ -#define AS_WCSLEN -#define STRLEN __wcslen_sse4_1 -#define SECTION(p) p##.sse4.1 +#ifndef WCSLEN +# define WCSLEN __wcslen_sse4_1 +#endif + +#define AS_WCSLEN 1 +#define STRLEN WCSLEN +#define SECTION(p) p##.sse4.1 #include "strlen-sse2.S" diff --git a/sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S b/sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S index f467582cbe..099a60c48e 100644 --- a/sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S +++ b/sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S @@ -1,5 +1,3 @@ -#define STRCMP __wcsncmp_avx2_rtm #define USE_AS_STRNCMP 1 #define USE_AS_WCSCMP 1 -#define OVERFLOW_STRCMP __wcscmp_avx2_rtm #include "strcmp-avx2-rtm.S" diff --git a/sysdeps/x86_64/multiarch/wcsncmp-avx2.S b/sysdeps/x86_64/multiarch/wcsncmp-avx2.S index e9ede522b8..fc26b593d0 100644 --- a/sysdeps/x86_64/multiarch/wcsncmp-avx2.S +++ b/sysdeps/x86_64/multiarch/wcsncmp-avx2.S @@ -1,5 +1,4 @@ -#define STRCMP __wcsncmp_avx2 #define USE_AS_STRNCMP 1 #define USE_AS_WCSCMP 1 -#define OVERFLOW_STRCMP __wcscmp_avx2 + #include "strcmp-avx2.S" diff --git a/sysdeps/x86_64/multiarch/wcsncmp-evex.S b/sysdeps/x86_64/multiarch/wcsncmp-evex.S index 8a8e310713..d3a92e2000 100644 --- a/sysdeps/x86_64/multiarch/wcsncmp-evex.S +++ b/sysdeps/x86_64/multiarch/wcsncmp-evex.S @@ -1,4 +1,3 @@ -#define STRCMP __wcsncmp_evex #define USE_AS_STRNCMP 1 #define USE_AS_WCSCMP 1 diff --git a/sysdeps/x86_64/multiarch/wcsncmp-generic.c b/sysdeps/x86_64/multiarch/wcsncmp-generic.c index 658d541886..b0cf4e87d5 100644 --- a/sysdeps/x86_64/multiarch/wcsncmp-generic.c +++ b/sysdeps/x86_64/multiarch/wcsncmp-generic.c @@ -16,5 +16,10 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#define WCSNCMP __wcsncmp_generic -#include <wcsmbs/wcsncmp.c> +#include <isa-level.h> +#if ISA_SHOULD_BUILD (2) + +# define WCSNCMP __wcsncmp_generic +# include <wcsmbs/wcsncmp.c> + +#endif diff --git a/sysdeps/x86_64/multiarch/wcsnlen-avx2.S b/sysdeps/x86_64/multiarch/wcsnlen-avx2.S index fac83546b5..12c3a0fd05 100644 --- a/sysdeps/x86_64/multiarch/wcsnlen-avx2.S +++ b/sysdeps/x86_64/multiarch/wcsnlen-avx2.S @@ -1,4 +1,8 @@ -#define STRLEN __wcsnlen_avx2 +#ifndef WCSNLEN +# define WCSNLEN __wcsnlen_avx2 +#endif + +#define STRLEN WCSNLEN #define USE_AS_WCSLEN 1 #define USE_AS_STRNLEN 1 diff --git a/sysdeps/x86_64/multiarch/wcsnlen-evex.S b/sysdeps/x86_64/multiarch/wcsnlen-evex.S index 24773bb4e2..e2aad94c1e 100644 --- a/sysdeps/x86_64/multiarch/wcsnlen-evex.S +++ b/sysdeps/x86_64/multiarch/wcsnlen-evex.S @@ -1,4 +1,8 @@ -#define STRLEN __wcsnlen_evex +#ifndef WCSNLEN +# define WCSNLEN __wcsnlen_evex +#endif + +#define STRLEN WCSNLEN #define USE_AS_WCSLEN 1 #define USE_AS_STRNLEN 1 diff --git a/sysdeps/x86_64/multiarch/wcsnlen-generic.c b/sysdeps/x86_64/multiarch/wcsnlen-generic.c index 2d75da7709..8b466aac2f 100644 --- a/sysdeps/x86_64/multiarch/wcsnlen-generic.c +++ b/sysdeps/x86_64/multiarch/wcsnlen-generic.c @@ -16,13 +16,18 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ +#include <isa-level.h> + +#if ISA_SHOULD_BUILD (1) -#if IS_IN (libc) # include <wchar.h> -# define WCSNLEN __wcsnlen_generic +# ifndef WCSNLEN +# define WCSNLEN __wcsnlen_generic +# endif extern __typeof (wcsnlen) __wcsnlen_generic; -#endif -#include "wcsmbs/wcsnlen.c" +# include "wcsmbs/wcsnlen.c" + +#endif diff --git a/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S b/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S index 17cdedc2a9..8f534102a2 100644 --- a/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S +++ b/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S @@ -1,6 +1,11 @@ +#ifndef WCSNLEN +# define WCSNLEN __wcsnlen_sse4_1 +# define OVERFLOW_STRLEN __wcslen_sse4_1 +#endif + #define AS_WCSLEN #define AS_STRNLEN -#define STRLEN __wcsnlen_sse4_1 +#define STRLEN WCSNLEN #define SECTION(p) p##.sse4.1 #include "strlen-sse2.S" diff --git a/sysdeps/x86_64/multiarch/wcsrchr-avx2.S b/sysdeps/x86_64/multiarch/wcsrchr-avx2.S index cf8a239ab2..6eaf5e090b 100644 --- a/sysdeps/x86_64/multiarch/wcsrchr-avx2.S +++ b/sysdeps/x86_64/multiarch/wcsrchr-avx2.S @@ -1,3 +1,8 @@ -#define STRRCHR __wcsrchr_avx2 +#ifndef WCSRCHR +# define WCSRCHR __wcsrchr_avx2 +#endif + +#define STRRCHR WCSRCHR #define USE_AS_WCSRCHR 1 + #include "strrchr-avx2.S" diff --git a/sysdeps/x86_64/multiarch/wcsrchr-evex.S b/sysdeps/x86_64/multiarch/wcsrchr-evex.S index c64602f7dc..e5c5fe3bf2 100644 --- a/sysdeps/x86_64/multiarch/wcsrchr-evex.S +++ b/sysdeps/x86_64/multiarch/wcsrchr-evex.S @@ -1,3 +1,7 @@ -#define STRRCHR __wcsrchr_evex +#ifndef WCSRCHR +# define WCSRCHR __wcsrchr_evex +#endif + +#define STRRCHR WCSRCHR #define USE_AS_WCSRCHR 1 #include "strrchr-evex.S" diff --git a/sysdeps/x86_64/multiarch/wcsrchr-sse2.S b/sysdeps/x86_64/multiarch/wcsrchr-sse2.S index d9259720f8..21388d900c 100644 --- a/sysdeps/x86_64/multiarch/wcsrchr-sse2.S +++ b/sysdeps/x86_64/multiarch/wcsrchr-sse2.S @@ -16,12 +16,11 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#if IS_IN (libc) -# ifndef STRRCHR -# define STRRCHR __wcsrchr_sse2 -# endif +#ifndef WCSRCHR +# define WCSRCHR __wcsrchr_sse2 #endif +#define STRRCHR WCSRCHR #define USE_AS_WCSRCHR 1 #define NO_PMINU 1 diff --git a/sysdeps/x86_64/strcasecmp_l.S b/sysdeps/x86_64/strcasecmp_l.S index 84fd7fdfd3..5afa7ea098 100644 --- a/sysdeps/x86_64/strcasecmp_l.S +++ b/sysdeps/x86_64/strcasecmp_l.S @@ -1,11 +1,35 @@ +/* strcasecmp_l dispatch for RTLD and non-multiarch build + Copyright (C) 2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + /* Symbols = __strcasecmp_l and __strcasecmp. */ -#include "multiarch/strcasecmp_l-sse2.S" +#define DEFAULT_IMPL_V1 "multiarch/strcasecmp_l-sse2.S" +/* This may cause regressions on some processors that heavily prefer + aligned loads or have slow a implementation of the `pcmpstri` + instruction. */ +#define DEFAULT_IMPL_V2 "multiarch/strcasecmp_l-sse4_2.S" +#define DEFAULT_IMPL_V3 "multiarch/strcasecmp_l-avx2.S" +#define DEFAULT_IMPL_V4 "multiarch/strcasecmp_l-evex.S" -libc_hidden_builtin_def (__strcasecmp_l) +#include "isa-default-impl.h" +libc_hidden_def (__strcasecmp_l) weak_alias (__strcasecmp_l, strcasecmp_l) -libc_hidden_def (strcasecmp_l) -weak_alias (__strcasecmp, strcasecmp) libc_hidden_def (__strcasecmp) +weak_alias (__strcasecmp, strcasecmp) diff --git a/sysdeps/x86_64/strchr-isa-default-impl.h b/sysdeps/x86_64/strchr-isa-default-impl.h new file mode 100644 index 0000000000..0c8cbc6ffb --- /dev/null +++ b/sysdeps/x86_64/strchr-isa-default-impl.h @@ -0,0 +1,28 @@ +/* Set default strchr impl based on ISA level. + Copyright (C) 2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <isa-level.h> +#if MINIMUM_X86_ISA_LEVEL == 1 || MINIMUM_X86_ISA_LEVEL == 2 +# define DEFAULT_STRCHR __strchr_sse2 +#elif MINIMUM_X86_ISA_LEVEL == 3 +# define DEFAULT_STRCHR __strchr_avx2 +#elif MINIMUM_X86_ISA_LEVEL == 4 +# define DEFAULT_STRCHR __strchr_evex +#else +# error "Unknown default strchr implementation" +#endif diff --git a/sysdeps/x86_64/strchr.S b/sysdeps/x86_64/strchr.S index 77c956c92c..1406c633e8 100644 --- a/sysdeps/x86_64/strchr.S +++ b/sysdeps/x86_64/strchr.S @@ -1,5 +1,4 @@ -/* strchr (str, ch) -- Return pointer to first occurrence of CH in STR. - For AMD x86-64. +/* strchr dispatch for RTLD and non-multiarch build Copyright (C) 2009-2022 Free Software Foundation, Inc. This file is part of the GNU C Library. @@ -17,8 +16,13 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ +#define STRCHR strchr + +#define DEFAULT_IMPL_V1 "multiarch/strchr-sse2.S" +#define DEFAULT_IMPL_V3 "multiarch/strchr-avx2.S" +#define DEFAULT_IMPL_V4 "multiarch/strchr-evex.S" + +#include "isa-default-impl.h" -#define STRCHR strchr -#include "multiarch/strchr-sse2.S" weak_alias (strchr, index) libc_hidden_builtin_def (strchr) diff --git a/sysdeps/x86_64/strchrnul.S b/sysdeps/x86_64/strchrnul.S index 508e42db26..f1ef907296 100644 --- a/sysdeps/x86_64/strchrnul.S +++ b/sysdeps/x86_64/strchrnul.S @@ -1,6 +1,4 @@ -/* strchrnul (str, ch) -- Return pointer to first occurrence of CH in STR - or terminating NUL byte. - For AMD x86-64. +/* strchrnul dispatch for RTLD and non-multiarch build Copyright (C) 2009-2022 Free Software Foundation, Inc. This file is part of the GNU C Library. @@ -18,7 +16,12 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#define STRCHR __strchrnul -#include "multiarch/strchrnul-sse2.S" +#define STRCHRNUL __strchrnul + +#define DEFAULT_IMPL_V1 "multiarch/strchrnul-sse2.S" +#define DEFAULT_IMPL_V3 "multiarch/strchrnul-avx2.S" +#define DEFAULT_IMPL_V4 "multiarch/strchrnul-evex.S" + +#include "isa-default-impl.h" weak_alias (__strchrnul, strchrnul) diff --git a/sysdeps/x86_64/strcmp.S b/sysdeps/x86_64/strcmp.S index 19e54bd3a7..7c3cf87a42 100644 --- a/sysdeps/x86_64/strcmp.S +++ b/sysdeps/x86_64/strcmp.S @@ -1,4 +1,4 @@ -/* Highly optimized version for x86-64. +/* strcmp dispatch for RTLD and non-multiarch build Copyright (C) 1999-2022 Free Software Foundation, Inc. This file is part of the GNU C Library. @@ -18,5 +18,14 @@ /* Symbol = strcmp. */ -#include "multiarch/strcmp-sse2.S" +#define DEFAULT_IMPL_V1 "multiarch/strcmp-sse2.S" +/* strcmp-sse2-unaligned.S is often faster than strcmp-sse42.S and + doesn't have the drawback of using the `pcmpstri` instruction + which can be very slow on some CPUs. */ +#define DEFAULT_IMPL_V2 "multiarch/strcmp-sse2-unaligned.S" +#define DEFAULT_IMPL_V3 "multiarch/strcmp-avx2.S" +#define DEFAULT_IMPL_V4 "multiarch/strcmp-evex.S" + +#include "isa-default-impl.h" + libc_hidden_builtin_def (strcmp) diff --git a/sysdeps/x86_64/strlen.S b/sysdeps/x86_64/strlen.S index c2f5674f8d..e7cb4b1680 100644 --- a/sysdeps/x86_64/strlen.S +++ b/sysdeps/x86_64/strlen.S @@ -1,4 +1,4 @@ -/* SSE2 version of strlen. +/* strlen dispatch for RTLD and non-multiarch build Copyright (C) 2021-2022 Free Software Foundation, Inc. This file is part of the GNU C Library. @@ -17,6 +17,11 @@ <https://www.gnu.org/licenses/>. */ #define STRLEN strlen -#include "multiarch/strlen-sse2.S" + +#define DEFAULT_IMPL_V1 "multiarch/strlen-sse2.S" +#define DEFAULT_IMPL_V3 "multiarch/strlen-avx2.S" +#define DEFAULT_IMPL_V4 "multiarch/strlen-evex.S" + +#include "isa-default-impl.h" libc_hidden_builtin_def (strlen) diff --git a/sysdeps/x86_64/strncase_l.S b/sysdeps/x86_64/strncase_l.S index 3780fc50b1..de28ecf5d4 100644 --- a/sysdeps/x86_64/strncase_l.S +++ b/sysdeps/x86_64/strncase_l.S @@ -1,11 +1,35 @@ +/* strcasecmp_l dispatch for RTLD and non-multiarch build + Copyright (C) 2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + /* Symbols = __strncasecmp_l and __strncasecmp. */ -#include "multiarch/strncase_l-sse2.S" +#define DEFAULT_IMPL_V1 "multiarch/strncase_l-sse2.S" +/* This may cause regressions on some processors that heavily prefer + aligned loads or have slow a implementation of the `pcmpstri` + instruction. */ +#define DEFAULT_IMPL_V2 "multiarch/strncase_l-sse4_2.S" +#define DEFAULT_IMPL_V3 "multiarch/strncase_l-avx2.S" +#define DEFAULT_IMPL_V4 "multiarch/strncase_l-evex.S" -libc_hidden_builtin_def (__strncasecmp_l) +#include "isa-default-impl.h" +libc_hidden_def (__strncasecmp_l) weak_alias (__strncasecmp_l, strncasecmp_l) -libc_hidden_def (strncasecmp_l) -weak_alias (__strncasecmp, strncasecmp) libc_hidden_def (__strncasecmp) +weak_alias (__strncasecmp, strncasecmp) diff --git a/sysdeps/x86_64/strncmp.S b/sysdeps/x86_64/strncmp.S index 13d9e82ee2..afb251d9fe 100644 --- a/sysdeps/x86_64/strncmp.S +++ b/sysdeps/x86_64/strncmp.S @@ -1,4 +1,31 @@ +/* strncmp dispatch for RTLD and non-multiarch build + Copyright (C) 1999-2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + /* Symbol = strncmp. */ -#include "multiarch/strncmp-sse2.S" +#define DEFAULT_IMPL_V1 "multiarch/strncmp-sse2.S" +/* This may cause regressions on some processors that heavily prefer + aligned loads or have slow a implementation of the `pcmpstri` + instruction. */ +#define DEFAULT_IMPL_V2 "multiarch/strncmp-sse4_2.S" +#define DEFAULT_IMPL_V3 "multiarch/strncmp-avx2.S" +#define DEFAULT_IMPL_V4 "multiarch/strncmp-evex.S" + +#include "isa-default-impl.h" + libc_hidden_builtin_def (strncmp) diff --git a/sysdeps/x86_64/strnlen.S b/sysdeps/x86_64/strnlen.S index 174970d58f..b2c2149e07 100644 --- a/sysdeps/x86_64/strnlen.S +++ b/sysdeps/x86_64/strnlen.S @@ -1,6 +1,29 @@ -#define STRLEN __strnlen -#include "multiarch/strnlen-sse2.S" +/* strnlen dispatch for RTLD and non-multiarch build + Copyright (C) 2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#define STRNLEN __strnlen + +#define DEFAULT_IMPL_V1 "multiarch/strnlen-sse2.S" +#define DEFAULT_IMPL_V3 "multiarch/strnlen-avx2.S" +#define DEFAULT_IMPL_V4 "multiarch/strnlen-evex.S" + +#include "isa-default-impl.h" + +weak_alias (__strnlen, strnlen) libc_hidden_def (__strnlen) -weak_alias (__strnlen, strnlen); -libc_hidden_builtin_def (strnlen) +libc_hidden_def (strnlen) diff --git a/sysdeps/x86_64/strrchr.S b/sysdeps/x86_64/strrchr.S index f39da60454..493d370a28 100644 --- a/sysdeps/x86_64/strrchr.S +++ b/sysdeps/x86_64/strrchr.S @@ -1,4 +1,4 @@ -/* strrchr (str, ch) -- Return pointer to last occurrence of CH in STR. +/* strrchr dispatch for RTLD and non-multiarch build Copyright (C) 2013-2022 Free Software Foundation, Inc. This file is part of the GNU C Library. @@ -17,6 +17,12 @@ <https://www.gnu.org/licenses/>. */ #define STRRCHR strrchr -#include "multiarch/strrchr-sse2.S" + +#define DEFAULT_IMPL_V1 "multiarch/strrchr-sse2.S" +#define DEFAULT_IMPL_V3 "multiarch/strrchr-avx2.S" +#define DEFAULT_IMPL_V4 "multiarch/strrchr-evex.S" + +#include "isa-default-impl.h" + weak_alias (strrchr, rindex) libc_hidden_builtin_def (strrchr) diff --git a/sysdeps/x86_64/wcschr.S b/sysdeps/x86_64/wcschr.S index 80b12c4286..01a432b899 100644 --- a/sysdeps/x86_64/wcschr.S +++ b/sysdeps/x86_64/wcschr.S @@ -1,4 +1,4 @@ -/* wcschr with SSSE3 +/* wcschr dispatch for RTLD and non-multiarch build Copyright (C) 2011-2022 Free Software Foundation, Inc. This file is part of the GNU C Library. @@ -16,9 +16,14 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ - #define WCSCHR __wcschr -#include "multiarch/wcschr-sse2.S" -libc_hidden_def(__wcschr) + +#define DEFAULT_IMPL_V1 "multiarch/wcschr-sse2.S" +#define DEFAULT_IMPL_V3 "multiarch/wcschr-avx2.S" +#define DEFAULT_IMPL_V4 "multiarch/wcschr-evex.S" + +#include "isa-default-impl.h" + +libc_hidden_def (__wcschr) weak_alias (__wcschr, wcschr) libc_hidden_weak (wcschr) diff --git a/sysdeps/x86_64/wcscmp.S b/sysdeps/x86_64/wcscmp.S index e04cdbf5fe..5d30545fb6 100644 --- a/sysdeps/x86_64/wcscmp.S +++ b/sysdeps/x86_64/wcscmp.S @@ -1,4 +1,4 @@ -/* Optimized wcscmp for x86-64 with SSE2. +/* strlen dispatch for RTLD and non-multiarch build Copyright (C) 2011-2022 Free Software Foundation, Inc. This file is part of the GNU C Library. @@ -18,6 +18,11 @@ /* Symbol = __wcscmp. */ -#include "multiarch/wcscmp-sse2.S" +#define DEFAULT_IMPL_V1 "multiarch/wcscmp-sse2.S" +#define DEFAULT_IMPL_V3 "multiarch/wcscmp-avx2.S" +#define DEFAULT_IMPL_V4 "multiarch/wcscmp-evex.S" + +#include "isa-default-impl.h" + libc_hidden_def (__wcscmp) weak_alias (__wcscmp, wcscmp) diff --git a/sysdeps/x86_64/wcslen.S b/sysdeps/x86_64/wcslen.S index 588a0fbe01..e4e25b5353 100644 --- a/sysdeps/x86_64/wcslen.S +++ b/sysdeps/x86_64/wcslen.S @@ -1,4 +1,4 @@ -/* Optimized wcslen for x86-64 with SSE2. +/* wcslen dispatch for RTLD and non-multiarch build Copyright (C) 2011-2022 Free Software Foundation, Inc. This file is part of the GNU C Library. @@ -17,5 +17,18 @@ <https://www.gnu.org/licenses/>. */ #define WCSLEN __wcslen -#include "multiarch/wcslen-sse2.S" -weak_alias(__wcslen, wcslen) + +#define DEFAULT_IMPL_V1 "multiarch/wcslen-sse2.S" +#define DEFAULT_IMPL_V2 "multiarch/wcslen-sse4_1.S" +#define DEFAULT_IMPL_V3 "multiarch/wcslen-avx2.S" +#define DEFAULT_IMPL_V4 "multiarch/wcslen-evex.S" + +#include "isa-default-impl.h" + +weak_alias (__wcslen, wcslen) + +#if MINIMUM_X86_ISA_LEVEL == 2 && !IS_IN (rtld) +/* Hidden def so it can be used as overflow fallback in + wcsnlen-sse4_1.S. */ +libc_hidden_def (__wcslen) +#endif diff --git a/sysdeps/x86_64/wcsncmp-generic.c b/sysdeps/x86_64/wcsncmp-generic.c new file mode 100644 index 0000000000..493a6f9b9b --- /dev/null +++ b/sysdeps/x86_64/wcsncmp-generic.c @@ -0,0 +1,29 @@ +/* wcsncmp dispatch for RTLD and non-multiarch .c ISA level 1 build. + Copyright (C) 2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +/* wcsncmp non-multiarch build is split into two files, + wcsncmp-generic.c and wcsncmp.S. The wcsncmp-generic.c build is for + ISA level <= 1 and just uses wcsmbs/wcsncmp.c. This must be split + into two files because we cannot include C code from assembly or + vice versa. */ + +#include <isa-level.h> + +#if MINIMUM_X86_ISA_LEVEL <= 2 +# include "wcsmbs/wcsncmp.c" +#endif diff --git a/sysdeps/x86_64/wcsncmp.S b/sysdeps/x86_64/wcsncmp.S new file mode 100644 index 0000000000..14f9a1341e --- /dev/null +++ b/sysdeps/x86_64/wcsncmp.S @@ -0,0 +1,40 @@ +/* wcsncmp dispatch for RTLD and non-multiarch .c files + Copyright (C) 2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +/* wcsncmp non-multiarch build is split into two files, + wcsncmp-generic.c and wcsncmp.S. The wcsncmp.S build is for + ISA level >= 3 uses the optimized assembly implementations in + multiarch/wcsncmp*.S. This must be split into two files because + we cannot include C code from assembly or vice versa. */ + +#include <isa-level.h> + +#if MINIMUM_X86_ISA_LEVEL >= 3 + +/* Symbol = wcsncmp. */ + +# define DEFAULT_IMPL_V3 "multiarch/wcsncmp-avx2.S" +# define DEFAULT_IMPL_V4 "multiarch/wcsncmp-evex.S" + +/* isa-default-impl.h expects DEFAULT_IMPL_V1 to be defined but it + should never be used from here. */ +# define DEFAULT_IMPL_V1 "ERROR -- Invalid ISA IMPL" + +# include "isa-default-impl.h" + +#endif diff --git a/sysdeps/x86_64/wcsnlen-generic.c b/sysdeps/x86_64/wcsnlen-generic.c new file mode 100644 index 0000000000..ec66511589 --- /dev/null +++ b/sysdeps/x86_64/wcsnlen-generic.c @@ -0,0 +1,29 @@ +/* wcsnlen dispatch for RTLD and non-multiarch .c ISA level 1 build. + Copyright (C) 2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +/* wcsnlen non-multiarch build is split into two files, + wcsnlen-generic.c and wcsnlen.S. The wcsnlen-generic.c build is for + ISA level <= 1 and just uses wcsmbs/wcsnlen.c. This must be split + into two files because we cannot include C code from assembly or + vice versa. */ + +#include <isa-level.h> + +#if MINIMUM_X86_ISA_LEVEL <= 1 +# include "wcsmbs/wcsnlen.c" +#endif diff --git a/sysdeps/x86_64/wcsnlen.S b/sysdeps/x86_64/wcsnlen.S new file mode 100644 index 0000000000..b30b3f0785 --- /dev/null +++ b/sysdeps/x86_64/wcsnlen.S @@ -0,0 +1,49 @@ +/* wcsnlen dispatch for RTLD and non-multiarch .c files + Copyright (C) 2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +/* wcsnlen non-multiarch build is split into two files, + wcsnlen-generic.c and wcsnlen.S. The wcsnlen.S build is for + ISA level >= 2 uses the optimized assembly implementations in + multiarch/wcsnlen*.S. This must be split into two files because + we cannot include C code from assembly or vice versa. */ + +#include <isa-level.h> + +#if MINIMUM_X86_ISA_LEVEL >= 2 + +# define WCSNLEN __wcsnlen +/* This symbol must stay linked to the name in wcslen.S. */ +#if IS_IN (rtld) +# define OVERFLOW_STRLEN __wcslen +#else +# define OVERFLOW_STRLEN HIDDEN_JUMPTARGET (__wcslen) +#endif + +# define DEFAULT_IMPL_V2 "multiarch/wcsnlen-sse4_1.S" +# define DEFAULT_IMPL_V3 "multiarch/wcsnlen-avx2.S" +# define DEFAULT_IMPL_V4 "multiarch/wcsnlen-evex.S" + +/* isa-default-impl.h expects DEFAULT_IMPL_V1 to be defined but it + should never be used from here. */ +# define DEFAULT_IMPL_V1 "ERROR -- Invalid ISA IMPL" + +# include "isa-default-impl.h" + +weak_alias (__wcsnlen, wcsnlen) +libc_hidden_def (__wcsnlen) +#endif diff --git a/sysdeps/x86_64/wcsrchr.S b/sysdeps/x86_64/wcsrchr.S index 1d4b1eb21c..abf828b458 100644 --- a/sysdeps/x86_64/wcsrchr.S +++ b/sysdeps/x86_64/wcsrchr.S @@ -1,4 +1,4 @@ -/* wcsrchr optimized with SSE2. +/* wcsrchr dispatch for RTLD and non-multiarch build Copyright (C) 2011-2022 Free Software Foundation, Inc. This file is part of the GNU C Library. @@ -16,5 +16,10 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#define STRRCHR wcsrchr -#include "multiarch/wcsrchr-sse2.S" +#define WCSRCHR wcsrchr + +#define DEFAULT_IMPL_V1 "multiarch/wcsrchr-sse2.S" +#define DEFAULT_IMPL_V3 "multiarch/wcsrchr-avx2.S" +#define DEFAULT_IMPL_V4 "multiarch/wcsrchr-evex.S" + +#include "isa-default-impl.h"