@@ -84,6 +84,7 @@
/* ISA level >= 2 guaranteed includes. */
#define SSE4_2_X86_ISA_LEVEL 2
+#define SSE4_1_X86_ISA_LEVEL 2
#define SSSE3_X86_ISA_LEVEL 2
@@ -101,9 +102,18 @@
when ISA level < 3. */
#define Prefer_No_VZEROUPPER_X86_ISA_LEVEL 3
+/* NB: This feature is disable when ISA level >= 3. All CPUs with
+ this feature don't run on glibc built with ISA level >= 3. */
+#define Slow_SSE42_X86_ISA_LEVEL 3
+
/* Feature(s) enabled when ISA level >= 2. */
#define Fast_Unaligned_Load_X86_ISA_LEVEL 2
+/* NB: This feature is disable when ISA level >= 2, which was enabled
+ for the early Atom CPUs. */
+#define Slow_BSF_X86_ISA_LEVEL 2
+
+
/* Both X86_ISA_CPU_FEATURE_USABLE_P and X86_ISA_CPU_FEATURES_ARCH_P
macros are wrappers for the respective CPU_FEATURE{S}_{USABLE|ARCH}_P
runtime checks. They differ in two ways.
@@ -197,6 +197,12 @@ gen-as-const-headers += tlsdesc.sym rtld-offsets.sym
endif
ifeq ($(subdir),wcsmbs)
+
+sysdep_routines += \
+ wcsncmp-generic \
+ wcsnlen-generic \
+# sysdep_routines
+
tests += \
tst-rsi-wcslen
endif
@@ -1,4 +1,4 @@
-/* fast SSE2 memrchr with 64 byte loop and pmaxub instruction using
+/* memrchr dispatch for RTLD and non-multiarch build
Copyright (C) 2011-2022 Free Software Foundation, Inc.
This file is part of the GNU C Library.
@@ -18,5 +18,11 @@
<https://www.gnu.org/licenses/>. */
#define MEMRCHR __memrchr
-#include "multiarch/memrchr-sse2.S"
+
+#define DEFAULT_IMPL_V1 "multiarch/memrchr-sse2.S"
+#define DEFAULT_IMPL_V3 "multiarch/memrchr-avx2.S"
+#define DEFAULT_IMPL_V4 "multiarch/memrchr-evex.S"
+
+#include "isa-default-impl.h"
+
weak_alias (__memrchr, memrchr)
@@ -144,11 +144,9 @@ sysdep_routines += \
wcslen-sse4_1 \
wcsncmp-avx2 \
wcsncmp-avx2-rtm \
- wcsncmp-generic \
wcsncmp-evex \
wcsnlen-avx2 \
wcsnlen-avx2-rtm \
- wcsnlen-generic \
wcsnlen-evex \
wcsnlen-evex512 \
wcsnlen-sse4_1 \
@@ -23,28 +23,32 @@
# define GENERIC sse2
#endif
-extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
+
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden;
static inline void *
IFUNC_SELECTOR (void)
{
- const struct cpu_features* cpu_features = __get_cpu_features ();
+ const struct cpu_features *cpu_features = __get_cpu_features ();
- if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
- && CPU_FEATURE_USABLE_P (cpu_features, BMI2)
- && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
+ if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
+ && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
+ && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
+ AVX_Fast_Unaligned_Load, ))
{
- if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
- && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
+ if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
+ && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
return OPTIMIZE (evex);
if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
return OPTIMIZE (avx2_rtm);
- if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
+ if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
+ Prefer_No_VZEROUPPER, !))
return OPTIMIZE (avx2);
}
@@ -205,19 +205,22 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
/* Support sysdeps/x86_64/multiarch/memrchr.c. */
IFUNC_IMPL (i, name, memrchr,
- IFUNC_IMPL_ADD (array, i, memrchr,
- CPU_FEATURE_USABLE (AVX2),
- __memrchr_avx2)
- IFUNC_IMPL_ADD (array, i, memrchr,
- (CPU_FEATURE_USABLE (AVX2)
- && CPU_FEATURE_USABLE (RTM)),
- __memrchr_avx2_rtm)
- IFUNC_IMPL_ADD (array, i, memrchr,
- (CPU_FEATURE_USABLE (AVX512VL)
- && CPU_FEATURE_USABLE (AVX512BW)),
- __memrchr_evex)
-
- IFUNC_IMPL_ADD (array, i, memrchr, 1, __memrchr_sse2))
+ X86_IFUNC_IMPL_ADD_V4 (array, i, memrchr,
+ (CPU_FEATURE_USABLE (AVX512VL)
+ && CPU_FEATURE_USABLE (AVX512BW)),
+ __memrchr_evex)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, memrchr,
+ CPU_FEATURE_USABLE (AVX2),
+ __memrchr_avx2)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, memrchr,
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (RTM)),
+ __memrchr_avx2_rtm)
+ /* ISA V2 wrapper for SSE2 implementation because the SSE2
+ implementation is also used at ISA level 2. */
+ X86_IFUNC_IMPL_ADD_V2 (array, i, memrchr,
+ 1,
+ __memrchr_sse2))
#ifdef SHARED
/* Support sysdeps/x86_64/multiarch/memset_chk.c. */
@@ -346,49 +349,57 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
/* Support sysdeps/x86_64/multiarch/strlen.c. */
IFUNC_IMPL (i, name, strlen,
- IFUNC_IMPL_ADD (array, i, strlen,
- (CPU_FEATURE_USABLE (AVX2)
- && CPU_FEATURE_USABLE (BMI2)),
- __strlen_avx2)
- IFUNC_IMPL_ADD (array, i, strlen,
- (CPU_FEATURE_USABLE (AVX2)
- && CPU_FEATURE_USABLE (BMI2)
- && CPU_FEATURE_USABLE (RTM)),
- __strlen_avx2_rtm)
- IFUNC_IMPL_ADD (array, i, strlen,
- (CPU_FEATURE_USABLE (AVX512VL)
- && CPU_FEATURE_USABLE (AVX512BW)
- && CPU_FEATURE_USABLE (BMI2)),
- __strlen_evex)
- IFUNC_IMPL_ADD (array, i, strlen,
- (CPU_FEATURE_USABLE (AVX512VL)
- && CPU_FEATURE_USABLE (AVX512BW)
- && CPU_FEATURE_USABLE (BMI2)),
- __strlen_evex512)
- IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_sse2))
+ X86_IFUNC_IMPL_ADD_V4 (array, i, strlen,
+ (CPU_FEATURE_USABLE (AVX512VL)
+ && CPU_FEATURE_USABLE (AVX512BW)
+ && CPU_FEATURE_USABLE (BMI2)),
+ __strlen_evex)
+ X86_IFUNC_IMPL_ADD_V4 (array, i, strlen,
+ (CPU_FEATURE_USABLE (AVX512VL)
+ && CPU_FEATURE_USABLE (AVX512BW)
+ && CPU_FEATURE_USABLE (BMI2)),
+ __strlen_evex512)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, strlen,
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (BMI2)),
+ __strlen_avx2)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, strlen,
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (BMI2)
+ && CPU_FEATURE_USABLE (RTM)),
+ __strlen_avx2_rtm)
+ /* ISA V2 wrapper for SSE2 implementation because the SSE2
+ implementation is also used at ISA level 2. */
+ X86_IFUNC_IMPL_ADD_V2 (array, i, strlen,
+ 1,
+ __strlen_sse2))
/* Support sysdeps/x86_64/multiarch/strnlen.c. */
IFUNC_IMPL (i, name, strnlen,
- IFUNC_IMPL_ADD (array, i, strnlen,
- (CPU_FEATURE_USABLE (AVX2)
- && CPU_FEATURE_USABLE (BMI2)),
- __strnlen_avx2)
- IFUNC_IMPL_ADD (array, i, strnlen,
- (CPU_FEATURE_USABLE (AVX2)
- && CPU_FEATURE_USABLE (BMI2)
- && CPU_FEATURE_USABLE (RTM)),
- __strnlen_avx2_rtm)
- IFUNC_IMPL_ADD (array, i, strnlen,
- (CPU_FEATURE_USABLE (AVX512VL)
- && CPU_FEATURE_USABLE (AVX512BW)
- && CPU_FEATURE_USABLE (BMI2)),
- __strnlen_evex)
- IFUNC_IMPL_ADD (array, i, strnlen,
- (CPU_FEATURE_USABLE (AVX512VL)
- && CPU_FEATURE_USABLE (AVX512BW)
- && CPU_FEATURE_USABLE (BMI2)),
- __strnlen_evex512)
- IFUNC_IMPL_ADD (array, i, strnlen, 1, __strnlen_sse2))
+ X86_IFUNC_IMPL_ADD_V4 (array, i, strnlen,
+ (CPU_FEATURE_USABLE (AVX512VL)
+ && CPU_FEATURE_USABLE (AVX512BW)
+ && CPU_FEATURE_USABLE (BMI2)),
+ __strnlen_evex)
+ X86_IFUNC_IMPL_ADD_V4 (array, i, strnlen,
+ (CPU_FEATURE_USABLE (AVX512VL)
+ && CPU_FEATURE_USABLE (AVX512BW)
+ && CPU_FEATURE_USABLE (BMI2)),
+ __strnlen_evex512)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, strnlen,
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (BMI2)),
+ __strnlen_avx2)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, strnlen,
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (BMI2)
+ && CPU_FEATURE_USABLE (RTM)),
+ __strnlen_avx2_rtm)
+ /* ISA V2 wrapper for SSE2 implementation because the SSE2
+ implementation is also used at ISA level 2. */
+ X86_IFUNC_IMPL_ADD_V2 (array, i, strnlen,
+ 1,
+ __strnlen_sse2))
/* Support sysdeps/x86_64/multiarch/stpncpy.c. */
IFUNC_IMPL (i, name, stpncpy,
@@ -422,40 +433,47 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
/* Support sysdeps/x86_64/multiarch/strcasecmp_l.c. */
IFUNC_IMPL (i, name, strcasecmp,
- IFUNC_IMPL_ADD (array, i, strcasecmp,
- (CPU_FEATURE_USABLE (AVX512VL)
- && CPU_FEATURE_USABLE (AVX512BW)),
- __strcasecmp_evex)
- IFUNC_IMPL_ADD (array, i, strcasecmp,
- CPU_FEATURE_USABLE (AVX2),
- __strcasecmp_avx2)
- IFUNC_IMPL_ADD (array, i, strcasecmp,
- (CPU_FEATURE_USABLE (AVX2)
- && CPU_FEATURE_USABLE (RTM)),
- __strcasecmp_avx2_rtm)
- IFUNC_IMPL_ADD (array, i, strcasecmp,
- CPU_FEATURE_USABLE (SSE4_2),
- __strcasecmp_sse42)
- IFUNC_IMPL_ADD (array, i, strcasecmp, 1, __strcasecmp_sse2))
+ X86_IFUNC_IMPL_ADD_V4 (array, i, strcasecmp,
+ (CPU_FEATURE_USABLE (AVX512VL)
+ && CPU_FEATURE_USABLE (AVX512BW)),
+ __strcasecmp_evex)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, strcasecmp,
+ CPU_FEATURE_USABLE (AVX2),
+ __strcasecmp_avx2)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, strcasecmp,
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (RTM)),
+ __strcasecmp_avx2_rtm)
+ X86_IFUNC_IMPL_ADD_V2 (array, i, strcasecmp,
+ CPU_FEATURE_USABLE (SSE4_2),
+ __strcasecmp_sse42)
+ /* ISA V2 wrapper for SSE2 implementation because the SSE2
+ implementation is also used at ISA level 2. */
+ X86_IFUNC_IMPL_ADD_V2 (array, i, strcasecmp,
+ 1,
+ __strcasecmp_sse2))
/* Support sysdeps/x86_64/multiarch/strcasecmp_l.c. */
IFUNC_IMPL (i, name, strcasecmp_l,
- IFUNC_IMPL_ADD (array, i, strcasecmp,
- (CPU_FEATURE_USABLE (AVX512VL)
- && CPU_FEATURE_USABLE (AVX512BW)),
- __strcasecmp_l_evex)
- IFUNC_IMPL_ADD (array, i, strcasecmp,
- CPU_FEATURE_USABLE (AVX2),
- __strcasecmp_l_avx2)
- IFUNC_IMPL_ADD (array, i, strcasecmp,
- (CPU_FEATURE_USABLE (AVX2)
- && CPU_FEATURE_USABLE (RTM)),
- __strcasecmp_l_avx2_rtm)
- IFUNC_IMPL_ADD (array, i, strcasecmp_l,
- CPU_FEATURE_USABLE (SSE4_2),
- __strcasecmp_l_sse42)
- IFUNC_IMPL_ADD (array, i, strcasecmp_l, 1,
- __strcasecmp_l_sse2))
+ X86_IFUNC_IMPL_ADD_V4 (array, i, strcasecmp,
+ (CPU_FEATURE_USABLE (AVX512VL)
+ && CPU_FEATURE_USABLE (AVX512BW)),
+ __strcasecmp_l_evex)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, strcasecmp,
+ CPU_FEATURE_USABLE (AVX2),
+ __strcasecmp_l_avx2)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, strcasecmp,
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (RTM)),
+ __strcasecmp_l_avx2_rtm)
+ X86_IFUNC_IMPL_ADD_V2 (array, i, strcasecmp_l,
+ CPU_FEATURE_USABLE (SSE4_2),
+ __strcasecmp_l_sse42)
+ /* ISA V2 wrapper for SSE2 implementation because the SSE2
+ implementation is also used at ISA level 2. */
+ X86_IFUNC_IMPL_ADD_V2 (array, i, strcasecmp_l,
+ 1,
+ __strcasecmp_l_sse2))
/* Support sysdeps/x86_64/multiarch/strcat.c. */
IFUNC_IMPL (i, name, strcat,
@@ -474,74 +492,95 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
/* Support sysdeps/x86_64/multiarch/strchr.c. */
IFUNC_IMPL (i, name, strchr,
- IFUNC_IMPL_ADD (array, i, strchr,
- (CPU_FEATURE_USABLE (AVX2)
- && CPU_FEATURE_USABLE (BMI2)),
- __strchr_avx2)
- IFUNC_IMPL_ADD (array, i, strchr,
- (CPU_FEATURE_USABLE (AVX2)
- && CPU_FEATURE_USABLE (BMI2)
- && CPU_FEATURE_USABLE (RTM)),
- __strchr_avx2_rtm)
- IFUNC_IMPL_ADD (array, i, strchr,
- (CPU_FEATURE_USABLE (AVX512VL)
- && CPU_FEATURE_USABLE (AVX512BW)
- && CPU_FEATURE_USABLE (BMI2)),
- __strchr_evex)
- IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_sse2_no_bsf)
- IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_sse2))
+ X86_IFUNC_IMPL_ADD_V4 (array, i, strchr,
+ (CPU_FEATURE_USABLE (AVX512VL)
+ && CPU_FEATURE_USABLE (AVX512BW)
+ && CPU_FEATURE_USABLE (BMI2)),
+ __strchr_evex)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, strchr,
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (BMI2)),
+ __strchr_avx2)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, strchr,
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (BMI2)
+ && CPU_FEATURE_USABLE (RTM)),
+ __strchr_avx2_rtm)
+ /* ISA V2 wrapper for SSE2 implementation because the SSE2
+ implementation is also used at ISA level 2. */
+ X86_IFUNC_IMPL_ADD_V2 (array, i, strchr,
+ 1,
+ __strchr_sse2)
+ X86_IFUNC_IMPL_ADD_V1 (array, i, strchr,
+ 1,
+ __strchr_sse2_no_bsf))
/* Support sysdeps/x86_64/multiarch/strchrnul.c. */
IFUNC_IMPL (i, name, strchrnul,
- IFUNC_IMPL_ADD (array, i, strchrnul,
- (CPU_FEATURE_USABLE (AVX2)
- && CPU_FEATURE_USABLE (BMI2)),
- __strchrnul_avx2)
- IFUNC_IMPL_ADD (array, i, strchrnul,
- (CPU_FEATURE_USABLE (AVX2)
- && CPU_FEATURE_USABLE (BMI2)
- && CPU_FEATURE_USABLE (RTM)),
- __strchrnul_avx2_rtm)
- IFUNC_IMPL_ADD (array, i, strchrnul,
- (CPU_FEATURE_USABLE (AVX512VL)
- && CPU_FEATURE_USABLE (AVX512BW)
- && CPU_FEATURE_USABLE (BMI2)),
- __strchrnul_evex)
- IFUNC_IMPL_ADD (array, i, strchrnul, 1, __strchrnul_sse2))
+ X86_IFUNC_IMPL_ADD_V4 (array, i, strchrnul,
+ (CPU_FEATURE_USABLE (AVX512VL)
+ && CPU_FEATURE_USABLE (AVX512BW)
+ && CPU_FEATURE_USABLE (BMI2)),
+ __strchrnul_evex)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, strchrnul,
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (BMI2)),
+ __strchrnul_avx2)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, strchrnul,
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (BMI2)
+ && CPU_FEATURE_USABLE (RTM)),
+ __strchrnul_avx2_rtm)
+ /* ISA V2 wrapper for SSE2 implementation because the SSE2
+ implementation is also used at ISA level 2. */
+ X86_IFUNC_IMPL_ADD_V2 (array, i, strchrnul,
+ 1,
+ __strchrnul_sse2))
/* Support sysdeps/x86_64/multiarch/strrchr.c. */
IFUNC_IMPL (i, name, strrchr,
- IFUNC_IMPL_ADD (array, i, strrchr,
- CPU_FEATURE_USABLE (AVX2),
- __strrchr_avx2)
- IFUNC_IMPL_ADD (array, i, strrchr,
- (CPU_FEATURE_USABLE (AVX2)
- && CPU_FEATURE_USABLE (RTM)),
- __strrchr_avx2_rtm)
- IFUNC_IMPL_ADD (array, i, strrchr,
- (CPU_FEATURE_USABLE (AVX512VL)
- && CPU_FEATURE_USABLE (AVX512BW)),
- __strrchr_evex)
- IFUNC_IMPL_ADD (array, i, strrchr, 1, __strrchr_sse2))
+ X86_IFUNC_IMPL_ADD_V4 (array, i, strrchr,
+ (CPU_FEATURE_USABLE (AVX512VL)
+ && CPU_FEATURE_USABLE (AVX512BW)),
+ __strrchr_evex)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, strrchr,
+ CPU_FEATURE_USABLE (AVX2),
+ __strrchr_avx2)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, strrchr,
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (RTM)),
+ __strrchr_avx2_rtm)
+ /* ISA V2 wrapper for SSE2 implementation because the SSE2
+ implementation is also used at ISA level 2. */
+ X86_IFUNC_IMPL_ADD_V2 (array, i, strrchr,
+ 1,
+ __strrchr_sse2))
/* Support sysdeps/x86_64/multiarch/strcmp.c. */
IFUNC_IMPL (i, name, strcmp,
- IFUNC_IMPL_ADD (array, i, strcmp,
- CPU_FEATURE_USABLE (AVX2),
- __strcmp_avx2)
- IFUNC_IMPL_ADD (array, i, strcmp,
- (CPU_FEATURE_USABLE (AVX2)
- && CPU_FEATURE_USABLE (RTM)),
- __strcmp_avx2_rtm)
- IFUNC_IMPL_ADD (array, i, strcmp,
- (CPU_FEATURE_USABLE (AVX512VL)
- && CPU_FEATURE_USABLE (AVX512BW)
- && CPU_FEATURE_USABLE (BMI2)),
- __strcmp_evex)
- IFUNC_IMPL_ADD (array, i, strcmp, CPU_FEATURE_USABLE (SSE4_2),
- __strcmp_sse42)
- IFUNC_IMPL_ADD (array, i, strcmp, 1, __strcmp_sse2_unaligned)
- IFUNC_IMPL_ADD (array, i, strcmp, 1, __strcmp_sse2))
+ X86_IFUNC_IMPL_ADD_V4 (array, i, strcmp,
+ (CPU_FEATURE_USABLE (AVX512VL)
+ && CPU_FEATURE_USABLE (AVX512BW)
+ && CPU_FEATURE_USABLE (BMI2)),
+ __strcmp_evex)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, strcmp,
+ CPU_FEATURE_USABLE (AVX2),
+ __strcmp_avx2)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, strcmp,
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (RTM)),
+ __strcmp_avx2_rtm)
+ X86_IFUNC_IMPL_ADD_V2 (array, i, strcmp,
+ CPU_FEATURE_USABLE (SSE4_2),
+ __strcmp_sse42)
+ /* ISA V2 wrapper for SSE2 implementations because the SSE2
+ implementations are also used at ISA level 2. */
+ X86_IFUNC_IMPL_ADD_V2 (array, i, strcmp,
+ 1,
+ __strcmp_sse2_unaligned)
+ X86_IFUNC_IMPL_ADD_V2 (array, i, strcmp,
+ 1,
+ __strcmp_sse2))
/* Support sysdeps/x86_64/multiarch/strcpy.c. */
IFUNC_IMPL (i, name, strcpy,
@@ -568,41 +607,47 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
/* Support sysdeps/x86_64/multiarch/strncase_l.c. */
IFUNC_IMPL (i, name, strncasecmp,
- IFUNC_IMPL_ADD (array, i, strncasecmp,
- (CPU_FEATURE_USABLE (AVX512VL)
- && CPU_FEATURE_USABLE (AVX512BW)),
- __strncasecmp_evex)
- IFUNC_IMPL_ADD (array, i, strncasecmp,
- CPU_FEATURE_USABLE (AVX2),
- __strncasecmp_avx2)
- IFUNC_IMPL_ADD (array, i, strncasecmp,
- (CPU_FEATURE_USABLE (AVX2)
- && CPU_FEATURE_USABLE (RTM)),
- __strncasecmp_avx2_rtm)
- IFUNC_IMPL_ADD (array, i, strncasecmp,
- CPU_FEATURE_USABLE (SSE4_2),
- __strncasecmp_sse42)
- IFUNC_IMPL_ADD (array, i, strncasecmp, 1,
- __strncasecmp_sse2))
+ X86_IFUNC_IMPL_ADD_V4 (array, i, strncasecmp,
+ (CPU_FEATURE_USABLE (AVX512VL)
+ && CPU_FEATURE_USABLE (AVX512BW)),
+ __strncasecmp_evex)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp,
+ CPU_FEATURE_USABLE (AVX2),
+ __strncasecmp_avx2)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp,
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (RTM)),
+ __strncasecmp_avx2_rtm)
+ X86_IFUNC_IMPL_ADD_V2 (array, i, strncasecmp,
+ CPU_FEATURE_USABLE (SSE4_2),
+ __strncasecmp_sse42)
+ /* ISA V2 wrapper for SSE2 implementation because the SSE2
+ implementation is also used at ISA level 2. */
+ X86_IFUNC_IMPL_ADD_V2 (array, i, strncasecmp,
+ 1,
+ __strncasecmp_sse2))
/* Support sysdeps/x86_64/multiarch/strncase_l.c. */
IFUNC_IMPL (i, name, strncasecmp_l,
- IFUNC_IMPL_ADD (array, i, strncasecmp,
- (CPU_FEATURE_USABLE (AVX512VL)
- && CPU_FEATURE_USABLE (AVX512BW)),
- __strncasecmp_l_evex)
- IFUNC_IMPL_ADD (array, i, strncasecmp,
- CPU_FEATURE_USABLE (AVX2),
- __strncasecmp_l_avx2)
- IFUNC_IMPL_ADD (array, i, strncasecmp,
- (CPU_FEATURE_USABLE (AVX2)
- && CPU_FEATURE_USABLE (RTM)),
- __strncasecmp_l_avx2_rtm)
- IFUNC_IMPL_ADD (array, i, strncasecmp_l,
- CPU_FEATURE_USABLE (SSE4_2),
- __strncasecmp_l_sse42)
- IFUNC_IMPL_ADD (array, i, strncasecmp_l, 1,
- __strncasecmp_l_sse2))
+ X86_IFUNC_IMPL_ADD_V4 (array, i, strncasecmp,
+ (CPU_FEATURE_USABLE (AVX512VL)
+ && CPU_FEATURE_USABLE (AVX512BW)),
+ __strncasecmp_l_evex)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp,
+ CPU_FEATURE_USABLE (AVX2),
+ __strncasecmp_l_avx2)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp,
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (RTM)),
+ __strncasecmp_l_avx2_rtm)
+ X86_IFUNC_IMPL_ADD_V2 (array, i, strncasecmp_l,
+ CPU_FEATURE_USABLE (SSE4_2),
+ __strncasecmp_l_sse42)
+ /* ISA V2 wrapper for SSE2 implementation because the SSE2
+ implementation is also used at ISA level 2. */
+ X86_IFUNC_IMPL_ADD_V2 (array, i, strncasecmp_l,
+ 1,
+ __strncasecmp_l_sse2))
/* Support sysdeps/x86_64/multiarch/strncat.c. */
IFUNC_IMPL (i, name, strncat,
@@ -664,69 +709,85 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
/* Support sysdeps/x86_64/multiarch/wcschr.c. */
IFUNC_IMPL (i, name, wcschr,
- IFUNC_IMPL_ADD (array, i, wcschr,
- (CPU_FEATURE_USABLE (AVX2)
- && CPU_FEATURE_USABLE (BMI2)),
- __wcschr_avx2)
- IFUNC_IMPL_ADD (array, i, wcschr,
- (CPU_FEATURE_USABLE (AVX2)
- && CPU_FEATURE_USABLE (BMI2)
- && CPU_FEATURE_USABLE (RTM)),
- __wcschr_avx2_rtm)
- IFUNC_IMPL_ADD (array, i, wcschr,
- (CPU_FEATURE_USABLE (AVX512VL)
- && CPU_FEATURE_USABLE (AVX512BW)
- && CPU_FEATURE_USABLE (BMI2)),
- __wcschr_evex)
- IFUNC_IMPL_ADD (array, i, wcschr, 1, __wcschr_sse2))
+ X86_IFUNC_IMPL_ADD_V4 (array, i, wcschr,
+ (CPU_FEATURE_USABLE (AVX512VL)
+ && CPU_FEATURE_USABLE (AVX512BW)
+ && CPU_FEATURE_USABLE (BMI2)),
+ __wcschr_evex)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, wcschr,
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (BMI2)),
+ __wcschr_avx2)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, wcschr,
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (BMI2)
+ && CPU_FEATURE_USABLE (RTM)),
+ __wcschr_avx2_rtm)
+ /* ISA V2 wrapper for SSE2 implementation because the SSE2
+ implementation is also used at ISA level 2. */
+ X86_IFUNC_IMPL_ADD_V2 (array, i, wcschr,
+ 1,
+ __wcschr_sse2))
/* Support sysdeps/x86_64/multiarch/wcsrchr.c. */
IFUNC_IMPL (i, name, wcsrchr,
- IFUNC_IMPL_ADD (array, i, wcsrchr,
- CPU_FEATURE_USABLE (AVX2),
- __wcsrchr_avx2)
- IFUNC_IMPL_ADD (array, i, wcsrchr,
- (CPU_FEATURE_USABLE (AVX2)
- && CPU_FEATURE_USABLE (RTM)),
- __wcsrchr_avx2_rtm)
- IFUNC_IMPL_ADD (array, i, wcsrchr,
- (CPU_FEATURE_USABLE (AVX512VL)
- && CPU_FEATURE_USABLE (AVX512BW)
- && CPU_FEATURE_USABLE (BMI2)),
- __wcsrchr_evex)
- IFUNC_IMPL_ADD (array, i, wcsrchr, 1, __wcsrchr_sse2))
+ X86_IFUNC_IMPL_ADD_V4 (array, i, wcsrchr,
+ (CPU_FEATURE_USABLE (AVX512VL)
+ && CPU_FEATURE_USABLE (AVX512BW)
+ && CPU_FEATURE_USABLE (BMI2)),
+ __wcsrchr_evex)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, wcsrchr,
+ CPU_FEATURE_USABLE (AVX2),
+ __wcsrchr_avx2)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, wcsrchr,
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (RTM)),
+ __wcsrchr_avx2_rtm)
+ /* ISA V2 wrapper for SSE2 implementation because the SSE2
+ implementation is also used at ISA level 2. */
+ X86_IFUNC_IMPL_ADD_V2 (array, i, wcsrchr,
+ 1,
+ __wcsrchr_sse2))
/* Support sysdeps/x86_64/multiarch/wcscmp.c. */
IFUNC_IMPL (i, name, wcscmp,
- IFUNC_IMPL_ADD (array, i, wcscmp,
- CPU_FEATURE_USABLE (AVX2),
- __wcscmp_avx2)
- IFUNC_IMPL_ADD (array, i, wcscmp,
- (CPU_FEATURE_USABLE (AVX2)
- && CPU_FEATURE_USABLE (RTM)),
- __wcscmp_avx2_rtm)
- IFUNC_IMPL_ADD (array, i, wcscmp,
- (CPU_FEATURE_USABLE (AVX512VL)
- && CPU_FEATURE_USABLE (AVX512BW)
- && CPU_FEATURE_USABLE (BMI2)),
- __wcscmp_evex)
- IFUNC_IMPL_ADD (array, i, wcscmp, 1, __wcscmp_sse2))
+ X86_IFUNC_IMPL_ADD_V4 (array, i, wcscmp,
+ (CPU_FEATURE_USABLE (AVX512VL)
+ && CPU_FEATURE_USABLE (AVX512BW)
+ && CPU_FEATURE_USABLE (BMI2)),
+ __wcscmp_evex)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, wcscmp,
+ CPU_FEATURE_USABLE (AVX2),
+ __wcscmp_avx2)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, wcscmp,
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (RTM)),
+ __wcscmp_avx2_rtm)
+ /* ISA V2 wrapper for SSE2 implementation because the SSE2
+ implementation is also used at ISA level 2. */
+ X86_IFUNC_IMPL_ADD_V2 (array, i, wcscmp,
+ 1,
+ __wcscmp_sse2))
/* Support sysdeps/x86_64/multiarch/wcsncmp.c. */
IFUNC_IMPL (i, name, wcsncmp,
- IFUNC_IMPL_ADD (array, i, wcsncmp,
- CPU_FEATURE_USABLE (AVX2),
- __wcsncmp_avx2)
- IFUNC_IMPL_ADD (array, i, wcsncmp,
- (CPU_FEATURE_USABLE (AVX2)
- && CPU_FEATURE_USABLE (RTM)),
- __wcsncmp_avx2_rtm)
- IFUNC_IMPL_ADD (array, i, wcsncmp,
- (CPU_FEATURE_USABLE (AVX512VL)
- && CPU_FEATURE_USABLE (AVX512BW)
- && CPU_FEATURE_USABLE (BMI2)),
- __wcsncmp_evex)
- IFUNC_IMPL_ADD (array, i, wcsncmp, 1, __wcsncmp_generic))
+ X86_IFUNC_IMPL_ADD_V4 (array, i, wcsncmp,
+ (CPU_FEATURE_USABLE (AVX512VL)
+ && CPU_FEATURE_USABLE (AVX512BW)
+ && CPU_FEATURE_USABLE (BMI2)),
+ __wcsncmp_evex)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, wcsncmp,
+ CPU_FEATURE_USABLE (AVX2),
+ __wcsncmp_avx2)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, wcsncmp,
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (RTM)),
+ __wcsncmp_avx2_rtm)
+ /* ISA V2 wrapper for GENERIC implementation because the
+ GENERIC implementation is also used at ISA level 2. */
+ X86_IFUNC_IMPL_ADD_V2 (array, i, wcsncmp,
+ 1,
+ __wcsncmp_generic))
/* Support sysdeps/x86_64/multiarch/wcscpy.c. */
IFUNC_IMPL (i, name, wcscpy,
@@ -736,55 +797,59 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
/* Support sysdeps/x86_64/multiarch/wcslen.c. */
IFUNC_IMPL (i, name, wcslen,
- IFUNC_IMPL_ADD (array, i, wcslen,
- (CPU_FEATURE_USABLE (AVX2)
- && CPU_FEATURE_USABLE (BMI2)),
- __wcslen_avx2)
- IFUNC_IMPL_ADD (array, i, wcslen,
- (CPU_FEATURE_USABLE (AVX2)
- && CPU_FEATURE_USABLE (BMI2)
- && CPU_FEATURE_USABLE (RTM)),
- __wcslen_avx2_rtm)
- IFUNC_IMPL_ADD (array, i, wcslen,
- (CPU_FEATURE_USABLE (AVX512VL)
- && CPU_FEATURE_USABLE (AVX512BW)
- && CPU_FEATURE_USABLE (BMI2)),
- __wcslen_evex)
- IFUNC_IMPL_ADD (array, i, wcslen,
- (CPU_FEATURE_USABLE (AVX512VL)
- && CPU_FEATURE_USABLE (AVX512BW)
- && CPU_FEATURE_USABLE (BMI2)),
- __wcslen_evex512)
- IFUNC_IMPL_ADD (array, i, wcslen,
- CPU_FEATURE_USABLE (SSE4_1),
- __wcslen_sse4_1)
- IFUNC_IMPL_ADD (array, i, wcslen, 1, __wcslen_sse2))
+ X86_IFUNC_IMPL_ADD_V4 (array, i, wcslen,
+ (CPU_FEATURE_USABLE (AVX512VL)
+ && CPU_FEATURE_USABLE (AVX512BW)
+ && CPU_FEATURE_USABLE (BMI2)),
+ __wcslen_evex)
+ X86_IFUNC_IMPL_ADD_V4 (array, i, wcslen,
+ (CPU_FEATURE_USABLE (AVX512VL)
+ && CPU_FEATURE_USABLE (AVX512BW)
+ && CPU_FEATURE_USABLE (BMI2)),
+ __wcslen_evex512)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, wcslen,
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (BMI2)),
+ __wcslen_avx2)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, wcslen,
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (BMI2)
+ && CPU_FEATURE_USABLE (RTM)),
+ __wcslen_avx2_rtm)
+ X86_IFUNC_IMPL_ADD_V2 (array, i, wcslen,
+ CPU_FEATURE_USABLE (SSE4_1),
+ __wcslen_sse4_1)
+ X86_IFUNC_IMPL_ADD_V1 (array, i, wcslen,
+ 1,
+ __wcslen_sse2))
/* Support sysdeps/x86_64/multiarch/wcsnlen.c. */
IFUNC_IMPL (i, name, wcsnlen,
- IFUNC_IMPL_ADD (array, i, wcsnlen,
- (CPU_FEATURE_USABLE (AVX2)
- && CPU_FEATURE_USABLE (BMI2)),
- __wcsnlen_avx2)
- IFUNC_IMPL_ADD (array, i, wcsnlen,
- (CPU_FEATURE_USABLE (AVX2)
- && CPU_FEATURE_USABLE (BMI2)
- && CPU_FEATURE_USABLE (RTM)),
- __wcsnlen_avx2_rtm)
- IFUNC_IMPL_ADD (array, i, wcsnlen,
- (CPU_FEATURE_USABLE (AVX512VL)
- && CPU_FEATURE_USABLE (AVX512BW)
- && CPU_FEATURE_USABLE (BMI2)),
- __wcsnlen_evex)
- IFUNC_IMPL_ADD (array, i, wcsnlen,
- (CPU_FEATURE_USABLE (AVX512VL)
- && CPU_FEATURE_USABLE (AVX512BW)
- && CPU_FEATURE_USABLE (BMI2)),
- __wcsnlen_evex512)
- IFUNC_IMPL_ADD (array, i, wcsnlen,
- CPU_FEATURE_USABLE (SSE4_1),
- __wcsnlen_sse4_1)
- IFUNC_IMPL_ADD (array, i, wcsnlen, 1, __wcsnlen_generic))
+ X86_IFUNC_IMPL_ADD_V4 (array, i, wcsnlen,
+ (CPU_FEATURE_USABLE (AVX512VL)
+ && CPU_FEATURE_USABLE (AVX512BW)
+ && CPU_FEATURE_USABLE (BMI2)),
+ __wcsnlen_evex)
+ X86_IFUNC_IMPL_ADD_V4 (array, i, wcsnlen,
+ (CPU_FEATURE_USABLE (AVX512VL)
+ && CPU_FEATURE_USABLE (AVX512BW)
+ && CPU_FEATURE_USABLE (BMI2)),
+ __wcsnlen_evex512)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, wcsnlen,
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (BMI2)),
+ __wcsnlen_avx2)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, wcsnlen,
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (BMI2)
+ && CPU_FEATURE_USABLE (RTM)),
+ __wcsnlen_avx2_rtm)
+ X86_IFUNC_IMPL_ADD_V2 (array, i, wcsnlen,
+ CPU_FEATURE_USABLE (SSE4_1),
+ __wcsnlen_sse4_1)
+ X86_IFUNC_IMPL_ADD_V1 (array, i, wcsnlen,
+ 1,
+ __wcsnlen_generic))
/* Support sysdeps/x86_64/multiarch/wmemchr.c. */
IFUNC_IMPL (i, name, wmemchr,
@@ -1050,20 +1115,25 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
/* Support sysdeps/x86_64/multiarch/strncmp.c. */
IFUNC_IMPL (i, name, strncmp,
- IFUNC_IMPL_ADD (array, i, strncmp,
- CPU_FEATURE_USABLE (AVX2),
- __strncmp_avx2)
- IFUNC_IMPL_ADD (array, i, strncmp,
- (CPU_FEATURE_USABLE (AVX2)
- && CPU_FEATURE_USABLE (RTM)),
- __strncmp_avx2_rtm)
- IFUNC_IMPL_ADD (array, i, strncmp,
- (CPU_FEATURE_USABLE (AVX512VL)
- && CPU_FEATURE_USABLE (AVX512BW)),
- __strncmp_evex)
- IFUNC_IMPL_ADD (array, i, strncmp, CPU_FEATURE_USABLE (SSE4_2),
- __strncmp_sse42)
- IFUNC_IMPL_ADD (array, i, strncmp, 1, __strncmp_sse2))
+ X86_IFUNC_IMPL_ADD_V4 (array, i, strncmp,
+ (CPU_FEATURE_USABLE (AVX512VL)
+ && CPU_FEATURE_USABLE (AVX512BW)),
+ __strncmp_evex)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, strncmp,
+ CPU_FEATURE_USABLE (AVX2),
+ __strncmp_avx2)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, strncmp,
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (RTM)),
+ __strncmp_avx2_rtm)
+ X86_IFUNC_IMPL_ADD_V2 (array, i, strncmp,
+ CPU_FEATURE_USABLE (SSE4_2),
+ __strncmp_sse42)
+ /* ISA V2 wrapper for SSE2 implementation because the SSE2
+ implementation is also used at ISA level 2. */
+ X86_IFUNC_IMPL_ADD_V2 (array, i, strncmp,
+ 1,
+ __strncmp_sse2))
#ifdef SHARED
/* Support sysdeps/x86_64/multiarch/wmemset_chk.c. */
@@ -19,32 +19,39 @@
#include <init-arch.h>
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
+
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden;
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
static inline void *
IFUNC_SELECTOR (void)
{
- const struct cpu_features* cpu_features = __get_cpu_features ();
+ const struct cpu_features *cpu_features = __get_cpu_features ();
- if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
- && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
+ if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
+ && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
+ AVX_Fast_Unaligned_Load, ))
{
- if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
- && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
- return OPTIMIZE (evex);
+ if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
+ && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
+ return OPTIMIZE (evex);
if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
- return OPTIMIZE (avx2_rtm);
+ return OPTIMIZE (avx2_rtm);
- if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
- return OPTIMIZE (avx2);
+ if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
+ Prefer_No_VZEROUPPER, !))
+ return OPTIMIZE (avx2);
}
- if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2)
+ if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, SSE4_2)
+ /* Keep this as a runtime check as its not guaranteed at ISA
+ level 2. */
&& !CPU_FEATURES_ARCH_P (cpu_features, Slow_SSE4_2))
return OPTIMIZE (sse42);
@@ -23,33 +23,38 @@
# define GENERIC sse2
#endif
-extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
+
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden;
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden;
static inline void *
IFUNC_SELECTOR (void)
{
- const struct cpu_features* cpu_features = __get_cpu_features ();
+ const struct cpu_features *cpu_features = __get_cpu_features ();
- if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
- && CPU_FEATURE_USABLE_P (cpu_features, BMI2)
- && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
+ if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
+ && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
+ && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
+ AVX_Fast_Unaligned_Load, ))
{
- if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
- && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
+ if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
+ && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
return OPTIMIZE (evex);
if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
return OPTIMIZE (avx2_rtm);
- if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
+ if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
+ Prefer_No_VZEROUPPER, !))
return OPTIMIZE (avx2);
}
- if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_1))
+ if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, SSE4_1))
return OPTIMIZE (sse4_1);
return OPTIMIZE (GENERIC);
@@ -16,7 +16,9 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#if IS_IN (libc)
+#include <isa-level.h>
+
+#if ISA_SHOULD_BUILD (3)
# include <sysdep.h>
@@ -16,7 +16,9 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#if IS_IN (libc)
+#include <isa-level.h>
+
+#if ISA_SHOULD_BUILD (4)
# include <sysdep.h>
# include "evex256-vecs.h"
@@ -16,22 +16,26 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#if IS_IN (libc)
+#include <isa-level.h>
+
+/* MINIMUM_X86_ISA_LEVEL <= 2 because there is no V2 implementation
+ so we need this to build for ISA V2 builds. */
+#if ISA_SHOULD_BUILD (2)
+
# ifndef MEMRCHR
# define MEMRCHR __memrchr_sse2
# endif
-#endif
-#include <sysdep.h>
-#define VEC_SIZE 16
-#define PAGE_SIZE 4096
+# include <sysdep.h>
+# define VEC_SIZE 16
+# define PAGE_SIZE 4096
.text
ENTRY_P2ALIGN(MEMRCHR, 6)
-#ifdef __ILP32__
+# ifdef __ILP32__
/* Clear upper bits. */
mov %RDX_LP, %RDX_LP
-#endif
+# endif
movd %esi, %xmm0
/* Get end pointer. */
@@ -352,3 +356,4 @@ L(zero_3):
ret
/* 2-bytes from next cache line. */
END(MEMRCHR)
+#endif
@@ -1,15 +1,2 @@
-#ifndef STRCMP
-# define STRCMP __strcasecmp_l_avx2_rtm
-#endif
-
-#define _GLABEL(x) x ## _rtm
-#define GLABEL(x) _GLABEL(x)
-
-#define ZERO_UPPER_VEC_REGISTERS_RETURN \
- ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
-
-#define VZEROUPPER_RETURN jmp L(return_vzeroupper)
-
-#define SECTION(p) p##.avx.rtm
-
-#include "strcasecmp_l-avx2.S"
+#define USE_AS_STRCASECMP_L
+#include "strcmp-avx2-rtm.S"
@@ -16,8 +16,5 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#ifndef STRCMP
-# define STRCMP __strcasecmp_l_avx2
-#endif
#define USE_AS_STRCASECMP_L
#include "strcmp-avx2.S"
@@ -16,8 +16,5 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#ifndef STRCMP
-# define STRCMP __strcasecmp_l_evex
-#endif
#define USE_AS_STRCASECMP_L
#include "strcmp-evex.S"
@@ -17,4 +17,5 @@
<https://www.gnu.org/licenses/>. */
#define USE_AS_STRCASECMP_L
+
#include "strcmp-sse2.S"
@@ -16,7 +16,9 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#if IS_IN (libc)
+#include <isa-level.h>
+
+#if ISA_SHOULD_BUILD (3)
# include <sysdep.h>
@@ -16,7 +16,9 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#if IS_IN (libc)
+#include <isa-level.h>
+
+#if ISA_SHOULD_BUILD (4)
# include <sysdep.h>
@@ -16,7 +16,11 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#if IS_IN (libc)
+#include <isa-level.h>
+
+/* NB: atom builds with ISA level == 1 so no reason to hold onto this
+ at ISA level >= 2. */
+#if ISA_SHOULD_BUILD (1)
# include <sysdep.h>
# include "asm-syntax.h"
@@ -16,7 +16,12 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#if IS_IN (libc) || defined STRCHR
+#include <isa-level.h>
+
+/* MINIMUM_X86_ISA_LEVEL <= 2 because there is no V2 implementation
+ so we need this to build for ISA V2 builds. */
+#if ISA_SHOULD_BUILD (2)
+
# ifndef STRCHR
# define STRCHR __strchr_sse2
# endif
@@ -26,36 +26,40 @@
# define SYMBOL_NAME strchr
# include <init-arch.h>
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_no_bsf) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
+
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_no_bsf) attribute_hidden;
static inline void *
IFUNC_SELECTOR (void)
{
- const struct cpu_features* cpu_features = __get_cpu_features ();
+ const struct cpu_features *cpu_features = __get_cpu_features ();
- if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
- && CPU_FEATURE_USABLE_P (cpu_features, BMI2)
- && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
+ if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
+ && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
+ && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
+ AVX_Fast_Unaligned_Load, ))
{
- if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
- && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
+ if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
+ && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
return OPTIMIZE (evex);
if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
return OPTIMIZE (avx2_rtm);
- if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
+ if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
+ Prefer_No_VZEROUPPER, !))
return OPTIMIZE (avx2);
}
- if (CPU_FEATURES_ARCH_P (cpu_features, Slow_BSF))
- return OPTIMIZE (sse2_no_bsf);
+ if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, Slow_BSF, !))
+ return OPTIMIZE (sse2);
- return OPTIMIZE (sse2);
+ return OPTIMIZE (sse2_no_bsf);
}
libc_ifunc_redirected (__redirect_strchr, strchr, IFUNC_SELECTOR ());
@@ -1,3 +1,8 @@
-#define STRCHR __strchrnul_avx2
+#ifndef STRCHRNUL
+# define STRCHRNUL __strchrnul_avx2
+#endif
+
+#define STRCHR STRCHRNUL
#define USE_AS_STRCHRNUL 1
+
#include "strchr-avx2.S"
@@ -1,3 +1,8 @@
-#define STRCHR __strchrnul_evex
+#ifndef STRCHRNUL
+# define STRCHRNUL __strchrnul_evex
+#endif
+
+#define STRCHR STRCHRNUL
#define USE_AS_STRCHRNUL 1
+
#include "strchr-evex.S"
@@ -16,12 +16,10 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#if IS_IN (libc)
-# ifndef STRCHR
-# define STRCHR __strchrnul_sse2
-# endif
+#ifndef STRCHRNUL
+# define STRCHRNUL __strchrnul_sse2
#endif
-
#define AS_STRCHRNUL
+#define STRCHR STRCHRNUL
#include "strchr-sse2.S"
@@ -1,12 +1,9 @@
-#ifndef STRCMP
-# define STRCMP __strcmp_avx2_rtm
-#endif
-
#define ZERO_UPPER_VEC_REGISTERS_RETURN \
ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
#define VZEROUPPER_RETURN jmp L(return_vzeroupper)
#define SECTION(p) p##.avx.rtm
+#define STRCMP_ISA _avx2_rtm
#include "strcmp-avx2.S"
@@ -16,7 +16,15 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#if IS_IN (libc)
+#include <isa-level.h>
+
+#if ISA_SHOULD_BUILD (3)
+
+# ifndef STRCMP_ISA
+# define STRCMP_ISA _avx2
+# endif
+
+# include "strcmp-naming.h"
# include <sysdep.h>
@@ -86,15 +94,11 @@
# ifdef USE_AS_STRCASECMP_L
# ifdef USE_AS_STRNCMP
-# define STRCASECMP __strncasecmp_avx2
# define LOCALE_REG rcx
# define LOCALE_REG_LP RCX_LP
-# define STRCASECMP_L_NONASCII __strncasecmp_l_nonascii
# else
-# define STRCASECMP __strcasecmp_avx2
# define LOCALE_REG rdx
# define LOCALE_REG_LP RDX_LP
-# define STRCASECMP_L_NONASCII __strcasecmp_l_nonascii
# endif
# endif
@@ -185,18 +189,14 @@
.type STRCMP, @function
.globl STRCMP
-# ifndef GLABEL
-# define GLABEL(...) __VA_ARGS__
-# endif
-
# ifdef USE_AS_STRCASECMP_L
-ENTRY (GLABEL(STRCASECMP))
+ENTRY (STRCASECMP)
movq __libc_tsd_LOCALE@gottpoff(%rip), %rax
mov %fs:(%rax), %LOCALE_REG_LP
/* Either 1 or 5 bytes (dependeing if CET is enabled). */
.p2align 4
-END (GLABEL(STRCASECMP))
+END (STRCASECMP)
/* FALLTHROUGH to strcasecmp/strncasecmp_l. */
# endif
@@ -16,7 +16,12 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#if IS_IN (libc)
+#include <isa-level.h>
+
+#if ISA_SHOULD_BUILD (4)
+
+# define STRCMP_ISA _evex
+# include "strcmp-naming.h"
# include <sysdep.h>
# if defined USE_AS_STRCASECMP_L
@@ -37,10 +42,6 @@
# define VMOVA vmovdqa64
# ifdef USE_AS_WCSCMP
-# ifndef OVERFLOW_STRCMP
-# define OVERFLOW_STRCMP __wcscmp_evex
-# endif
-
# define TESTEQ subl $0xff,
/* Compare packed dwords. */
# define VPCMP vpcmpd
@@ -50,10 +51,6 @@
/* 1 dword char == 4 bytes. */
# define SIZE_OF_CHAR 4
# else
-# ifndef OVERFLOW_STRCMP
-# define OVERFLOW_STRCMP __strcmp_evex
-# endif
-
# define TESTEQ incl
/* Compare packed bytes. */
# define VPCMP vpcmpb
@@ -120,15 +117,11 @@
# ifdef USE_AS_STRCASECMP_L
# ifdef USE_AS_STRNCMP
-# define STRCASECMP __strncasecmp_evex
# define LOCALE_REG rcx
# define LOCALE_REG_LP RCX_LP
-# define STRCASECMP_L_NONASCII __strncasecmp_l_nonascii
# else
-# define STRCASECMP __strcasecmp_evex
# define LOCALE_REG rdx
# define LOCALE_REG_LP RDX_LP
-# define STRCASECMP_L_NONASCII __strcasecmp_l_nonascii
# endif
# endif
@@ -214,7 +207,6 @@
.align 16
.type STRCMP, @function
.globl STRCMP
-
# ifdef USE_AS_STRCASECMP_L
ENTRY (STRCASECMP)
movq __libc_tsd_LOCALE@gottpoff(%rip), %rax
@@ -16,11 +16,20 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#if IS_IN (libc)
+#include <isa-level.h>
-#include "sysdep.h"
+/* Continue building as ISA level 2. We use this as ISA V2 default
+ because strcmp-sse42 uses pcmpstri (slow on some SSE4.2
+ processors) and this implementation is potenially faster than
+ strcmp-sse42 (aside from the slower page cross case). */
+#if ISA_SHOULD_BUILD (2)
-ENTRY ( __strcmp_sse2_unaligned)
+# define STRCMP_ISA _sse2_unaligned
+# include "strcmp-naming.h"
+
+# include "sysdep.h"
+
+ENTRY (STRCMP)
movl %edi, %eax
xorl %edx, %edx
pxor %xmm7, %xmm7
@@ -208,6 +217,5 @@ L(cross_page):
L(different):
subl %ecx, %eax
ret
-END (__strcmp_sse2_unaligned)
-
+END (STRCMP)
#endif
@@ -16,7 +16,11 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#if IS_IN (libc) || IS_IN (rtld)
+#include <isa-level.h>
+
+/* Continue building at ISA level 2 as the strcmp-sse42 is not always
+ preferable for ISA level == 2 CPUs. */
+#if ISA_SHOULD_BUILD (2)
# define STRCMP_ISA _sse2
# include "strcmp-naming.h"
@@ -16,7 +16,10 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#if IS_IN (libc)
+#include <isa-level.h>
+
+#if ISA_SHOULD_BUILD (2)
+
# include <sysdep.h>
# define STRCMP_ISA _sse42
@@ -1766,7 +1769,6 @@ LABEL(unaligned_table):
.int LABEL(ashr_0) - LABEL(unaligned_table)
# undef LABEL
-# undef GLABEL
# undef SECTION
# undef movdqa
# undef movdqu
@@ -26,37 +26,50 @@
# define SYMBOL_NAME strcmp
# include <init-arch.h>
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned) attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
+
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden;
+
+extern __typeof (REDIRECT_NAME)
+ OPTIMIZE (sse2_unaligned) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+
+
static inline void *
IFUNC_SELECTOR (void)
{
- const struct cpu_features* cpu_features = __get_cpu_features ();
+ const struct cpu_features *cpu_features = __get_cpu_features ();
- if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
- && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
+ if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
+ && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
+ AVX_Fast_Unaligned_Load, ))
{
- if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
- && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
- && CPU_FEATURE_USABLE_P (cpu_features, BMI2))
+ if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
+ && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
+ && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2))
return OPTIMIZE (evex);
if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
return OPTIMIZE (avx2_rtm);
- if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
+ if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
+ Prefer_No_VZEROUPPER, !))
return OPTIMIZE (avx2);
}
- if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2)
+ if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, SSE4_2)
+ /* Keep this as runtime check. Some ISA level >= 2 CPUs such as
+ Tremont, Silvermont, and more check this. */
&& !CPU_FEATURES_ARCH_P (cpu_features, Slow_SSE4_2))
return OPTIMIZE (sse42);
+ /* Keep this as runtime check. The standard SSE2 version has
+ meaningful optimizations around keeping all loads aligned in the
+ main loop which can benefit some ISA level >= 2 CPUs. */
if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load))
return OPTIMIZE (sse2_unaligned);
@@ -16,7 +16,9 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#if IS_IN (libc)
+#include <isa-level.h>
+
+#if ISA_SHOULD_BUILD (3)
# include <sysdep.h>
@@ -16,7 +16,11 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#if IS_IN (libc)
+/* UNUSED. Exists purely as reference implementation. */
+
+#include <isa-level.h>
+
+#if ISA_SHOULD_BUILD (4)
# include <sysdep.h>
@@ -16,7 +16,9 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#if IS_IN (libc)
+#include <isa-level.h>
+
+#if ISA_SHOULD_BUILD (4)
# include <sysdep.h>
@@ -16,15 +16,20 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#if IS_IN (libc) || defined STRLEN
-
-# ifndef STRLEN
-# define STRLEN __strlen_sse2
-# endif
+#include <isa-level.h>
+/* ISA level >= 2 for both strlen and wcslen. wcslen uses `pminud`
+ which is SSE4.1. strlen doesn't have an ISA level == 2
+ implementation so the SSE2 implementation must be built with ISA
+ level == 2. */
+# if ISA_SHOULD_BUILD (2)
# include <sysdep.h>
+# ifndef STRLEN
+# define STRLEN __strlen_sse2
+# endif
+
# ifdef AS_WCSLEN
# define PMINU pminud
# define PCMPEQ pcmpeqd
@@ -82,7 +87,7 @@ L(n_nonzero):
suffice. */
mov %RSI_LP, %R10_LP
sar $62, %R10_LP
- jnz __wcslen_sse4_1
+ jnz OVERFLOW_STRLEN
sal $2, %RSI_LP
# endif
@@ -1,16 +1,4 @@
-#ifndef STRCMP
-# define STRCMP __strncasecmp_l_avx2_rtm
-#endif
+#define USE_AS_STRCASECMP_L
+#define USE_AS_STRNCMP
-#define _GLABEL(x) x ## _rtm
-#define GLABEL(x) _GLABEL(x)
-
-#define ZERO_UPPER_VEC_REGISTERS_RETURN \
- ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
-
-#define VZEROUPPER_RETURN jmp L(return_vzeroupper)
-
-#define SECTION(p) p##.avx.rtm
-#define OVERFLOW_STRCMP __strcasecmp_l_avx2_rtm
-
-#include "strncase_l-avx2.S"
+#include "strcmp-avx2-rtm.S"
@@ -16,12 +16,7 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#ifndef STRCMP
-# define STRCMP __strncasecmp_l_avx2
-#endif
#define USE_AS_STRCASECMP_L
#define USE_AS_STRNCMP
-#ifndef OVERFLOW_STRCMP
-# define OVERFLOW_STRCMP __strcasecmp_l_avx2
-#endif
+
#include "strcmp-avx2.S"
@@ -16,10 +16,6 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#ifndef STRCMP
-# define STRCMP __strncasecmp_l_evex
-#endif
-#define OVERFLOW_STRCMP __strcasecmp_l_evex
#define USE_AS_STRCASECMP_L
#define USE_AS_STRNCMP
#include "strcmp-evex.S"
@@ -1,4 +1,2 @@
-#define STRCMP __strncmp_avx2_rtm
#define USE_AS_STRNCMP 1
-#define OVERFLOW_STRCMP __strcmp_avx2_rtm
#include "strcmp-avx2-rtm.S"
@@ -1,4 +1,3 @@
-#define STRCMP __strncmp_avx2
#define USE_AS_STRNCMP 1
-#define OVERFLOW_STRCMP __strcmp_avx2
+
#include "strcmp-avx2.S"
@@ -1,3 +1,2 @@
-#define STRCMP __strncmp_evex
#define USE_AS_STRNCMP 1
#include "strcmp-evex.S"
@@ -26,33 +26,38 @@
# define SYMBOL_NAME strncmp
# include <init-arch.h>
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
+
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden;
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
static inline void *
IFUNC_SELECTOR (void)
{
- const struct cpu_features* cpu_features = __get_cpu_features ();
+ const struct cpu_features *cpu_features = __get_cpu_features ();
- if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
- && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
+ if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
+ && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
+ AVX_Fast_Unaligned_Load, ))
{
- if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
- && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
- && CPU_FEATURE_USABLE_P (cpu_features, BMI2))
+ if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
+ && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
+ && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2))
return OPTIMIZE (evex);
if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
return OPTIMIZE (avx2_rtm);
- if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
+ if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
+ Prefer_No_VZEROUPPER, !))
return OPTIMIZE (avx2);
}
- if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2)
+ if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, SSE4_2)
&& !CPU_FEATURES_ARCH_P (cpu_features, Slow_SSE4_2))
return OPTIMIZE (sse42);
@@ -1,4 +1,8 @@
-#define STRLEN __strnlen_avx2
+#ifndef STRNLEN
+# define STRNLEN __strnlen_avx2
+#endif
+
#define USE_AS_STRNLEN 1
+#define STRLEN STRNLEN
#include "strlen-avx2.S"
@@ -1,4 +1,8 @@
-#define STRLEN __strnlen_evex
+#ifndef STRNLEN
+# define STRNLEN __strnlen_evex
+#endif
+
#define USE_AS_STRNLEN 1
+#define STRLEN STRNLEN
#include "strlen-evex.S"
@@ -16,11 +16,11 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#if IS_IN (libc)
-# ifndef STRLEN
-# define STRLEN __strnlen_sse2
-# endif
+#ifndef STRNLEN
+# define STRNLEN __strnlen_sse2
#endif
-#define AS_STRNLEN
+#define AS_STRNLEN 1
+#define STRLEN STRNLEN
+
#include "strlen-sse2.S"
@@ -16,7 +16,9 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#if IS_IN (libc)
+#include <isa-level.h>
+
+#if ISA_SHOULD_BUILD (3)
# include <sysdep.h>
@@ -16,7 +16,9 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#if IS_IN (libc)
+#include <isa-level.h>
+
+#if ISA_SHOULD_BUILD (4)
# include <sysdep.h>
@@ -16,36 +16,40 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#if IS_IN (libc)
+#include <isa-level.h>
+
+/* ISA level >= 2 because there are no {wcs|str}rchr-sse4
+ implementations. */
+#if ISA_SHOULD_BUILD (2)
+
+# include <sysdep.h>
+
# ifndef STRRCHR
# define STRRCHR __strrchr_sse2
# endif
-#endif
-
-#include <sysdep.h>
-#ifdef USE_AS_WCSRCHR
-# define PCMPEQ pcmpeqd
-# define CHAR_SIZE 4
-# define PMINU pminud
-#else
-# define PCMPEQ pcmpeqb
-# define CHAR_SIZE 1
-# define PMINU pminub
-#endif
+# ifdef USE_AS_WCSRCHR
+# define PCMPEQ pcmpeqd
+# define CHAR_SIZE 4
+# define PMINU pminud
+# else
+# define PCMPEQ pcmpeqb
+# define CHAR_SIZE 1
+# define PMINU pminub
+# endif
-#define PAGE_SIZE 4096
-#define VEC_SIZE 16
+# define PAGE_SIZE 4096
+# define VEC_SIZE 16
.text
ENTRY(STRRCHR)
movd %esi, %xmm0
movq %rdi, %rax
andl $(PAGE_SIZE - 1), %eax
-#ifndef USE_AS_WCSRCHR
+# ifndef USE_AS_WCSRCHR
punpcklbw %xmm0, %xmm0
punpcklwd %xmm0, %xmm0
-#endif
+# endif
pshufd $0, %xmm0, %xmm0
cmpl $(PAGE_SIZE - VEC_SIZE), %eax
ja L(cross_page)
@@ -69,9 +73,9 @@ L(cross_page_continue):
/* We are off by 3 for wcsrchr if search CHAR is non-zero. If
search CHAR is zero we are correct. Either way `andq
-CHAR_SIZE, %rax` gets the correct result. */
-#ifdef USE_AS_WCSRCHR
+# ifdef USE_AS_WCSRCHR
andq $-CHAR_SIZE, %rax
-#endif
+# endif
L(ret0):
ret
@@ -85,9 +89,9 @@ L(first_vec_x0_test):
jz L(ret0)
bsrl %eax, %eax
addq %r8, %rax
-#ifdef USE_AS_WCSRCHR
+# ifdef USE_AS_WCSRCHR
andq $-CHAR_SIZE, %rax
-#endif
+# endif
ret
.p2align 4
@@ -100,9 +104,9 @@ L(first_vec_x1):
jz L(first_vec_x0_test)
bsrl %eax, %eax
leaq (VEC_SIZE)(%rdi, %rax), %rax
-#ifdef USE_AS_WCSRCHR
+# ifdef USE_AS_WCSRCHR
andq $-CHAR_SIZE, %rax
-#endif
+# endif
ret
.p2align 4
@@ -113,9 +117,9 @@ L(first_vec_x1_test):
jz L(first_vec_x0_test)
bsrl %eax, %eax
leaq (VEC_SIZE)(%rdi, %rax), %rax
-#ifdef USE_AS_WCSRCHR
+# ifdef USE_AS_WCSRCHR
andq $-CHAR_SIZE, %rax
-#endif
+# endif
ret
.p2align 4
@@ -128,9 +132,9 @@ L(first_vec_x2):
jz L(first_vec_x1_test)
bsrl %eax, %eax
leaq (VEC_SIZE * 2)(%rdi, %rax), %rax
-#ifdef USE_AS_WCSRCHR
+# ifdef USE_AS_WCSRCHR
andq $-CHAR_SIZE, %rax
-#endif
+# endif
ret
.p2align 4
@@ -165,27 +169,27 @@ L(first_loop):
/* Since SSE2 no pminud so wcsrchr needs seperate logic for
detecting zero. Note if this is found to be a bottleneck it
may be worth adding an SSE4.1 wcsrchr implementation. */
-#ifdef USE_AS_WCSRCHR
+# ifdef USE_AS_WCSRCHR
movaps %xmm5, %xmm6
pxor %xmm8, %xmm8
PCMPEQ %xmm8, %xmm5
PCMPEQ %xmm4, %xmm8
por %xmm5, %xmm8
-#else
+# else
movaps %xmm5, %xmm6
PMINU %xmm4, %xmm5
-#endif
+# endif
movaps %xmm4, %xmm9
PCMPEQ %xmm0, %xmm4
PCMPEQ %xmm0, %xmm6
movaps %xmm6, %xmm7
por %xmm4, %xmm6
-#ifndef USE_AS_WCSRCHR
+# ifndef USE_AS_WCSRCHR
pxor %xmm8, %xmm8
PCMPEQ %xmm5, %xmm8
-#endif
+# endif
pmovmskb %xmm8, %ecx
pmovmskb %xmm6, %eax
@@ -219,9 +223,9 @@ L(first_loop_old_match):
bsrl %eax, %eax
addq %rsi, %rax
-#ifdef USE_AS_WCSRCHR
+# ifdef USE_AS_WCSRCHR
andq $-CHAR_SIZE, %rax
-#endif
+# endif
ret
.p2align 4
@@ -247,9 +251,9 @@ L(new_match):
jz L(first_loop_old_match)
bsrl %eax, %eax
addq %rdi, %rax
-#ifdef USE_AS_WCSRCHR
+# ifdef USE_AS_WCSRCHR
andq $-CHAR_SIZE, %rax
-#endif
+# endif
ret
/* Save minimum state for getting most recent match. We can
@@ -267,27 +271,27 @@ L(second_loop):
/* Since SSE2 no pminud so wcsrchr needs seperate logic for
detecting zero. Note if this is found to be a bottleneck it
may be worth adding an SSE4.1 wcsrchr implementation. */
-#ifdef USE_AS_WCSRCHR
+# ifdef USE_AS_WCSRCHR
movaps %xmm5, %xmm6
pxor %xmm8, %xmm8
PCMPEQ %xmm8, %xmm5
PCMPEQ %xmm4, %xmm8
por %xmm5, %xmm8
-#else
+# else
movaps %xmm5, %xmm6
PMINU %xmm4, %xmm5
-#endif
+# endif
movaps %xmm4, %xmm9
PCMPEQ %xmm0, %xmm4
PCMPEQ %xmm0, %xmm6
movaps %xmm6, %xmm7
por %xmm4, %xmm6
-#ifndef USE_AS_WCSRCHR
+# ifndef USE_AS_WCSRCHR
pxor %xmm8, %xmm8
PCMPEQ %xmm5, %xmm8
-#endif
+# endif
pmovmskb %xmm8, %ecx
pmovmskb %xmm6, %eax
@@ -312,9 +316,9 @@ L(second_loop_old_match):
orl %ecx, %eax
bsrl %eax, %eax
addq %rsi, %rax
-#ifdef USE_AS_WCSRCHR
+# ifdef USE_AS_WCSRCHR
andq $-CHAR_SIZE, %rax
-#endif
+# endif
ret
.p2align 4
@@ -340,9 +344,9 @@ L(second_loop_new_match):
jz L(second_loop_old_match)
bsrl %eax, %eax
addq %rdi, %rax
-#ifdef USE_AS_WCSRCHR
+# ifdef USE_AS_WCSRCHR
andq $-CHAR_SIZE, %rax
-#endif
+# endif
ret
.p2align 4,, 4
@@ -366,9 +370,10 @@ L(cross_page):
jz L(ret1)
bsrl %eax, %eax
addq %rdi, %rax
-#ifdef USE_AS_WCSRCHR
+# ifdef USE_AS_WCSRCHR
andq $-CHAR_SIZE, %rax
-#endif
+# endif
L(ret1):
ret
END(STRRCHR)
+#endif
@@ -17,6 +17,7 @@
<https://www.gnu.org/licenses/>. */
#include <sysdep.h>
+#include "../strchr-isa-default-impl.h"
ENTRY(__strstr_sse2_unaligned)
movzbl (%rsi), %eax
@@ -75,7 +76,7 @@ L(next_pair_index):
.p2align 4
L(strchr):
movzbl %al, %esi
- jmp __strchr_sse2
+ jmp DEFAULT_STRCHR
.p2align 4
L(pair_loop):
@@ -1,3 +1,8 @@
-#define STRCHR __wcschr_avx2
+#ifndef WCSCHR
+# define WCSCHR __wcschr_avx2
+#endif
+
+#define STRCHR WCSCHR
#define USE_AS_WCSCHR 1
+
#include "strchr-avx2.S"
@@ -1,3 +1,8 @@
-#define STRCHR __wcschr_evex
+#ifndef WCSCHR
+# define WCSCHR __wcschr_evex
+#endif
+
+#define STRCHR WCSCHR
#define USE_AS_WCSCHR 1
+
#include "strchr-evex.S"
@@ -16,13 +16,17 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#if IS_IN (libc)
+
+#include <isa-level.h>
+
+/* ISA level >= 2 because there is no wcschr-sse4 implementations. */
+#if ISA_SHOULD_BUILD (2)
+
# ifndef WCSCHR
# define WCSCHR __wcschr_sse2
# endif
-#endif
-#include <sysdep.h>
+# include <sysdep.h>
.text
ENTRY (WCSCHR)
@@ -155,3 +159,4 @@ L(return_null):
ret
END (WCSCHR)
+#endif
@@ -1,4 +1,3 @@
-#define STRCMP __wcscmp_avx2_rtm
#define USE_AS_WCSCMP 1
#include "strcmp-avx2-rtm.S"
@@ -1,4 +1,3 @@
-#define STRCMP __wcscmp_avx2
#define USE_AS_WCSCMP 1
#include "strcmp-avx2.S"
@@ -1,4 +1,3 @@
-#define STRCMP __wcscmp_evex
#define USE_AS_WCSCMP 1
#include "strcmp-evex.S"
@@ -16,11 +16,16 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#define USE_AS_WCSCMP
-#define STRCMP_ISA _sse2
-#include "strcmp-naming.h"
+#include <isa-level.h>
-#include <sysdep.h>
+/* ISA level >= 2 because there is no wcscmp-sse4 implementations. */
+#if ISA_SHOULD_BUILD (2)
+# include <sysdep.h>
+
+/* Needed to get right name. */
+# define USE_AS_WCSCMP
+# define STRCMP_ISA _sse2
+# include "strcmp-naming.h"
/* Note: wcscmp uses signed comparison, not unsighed as in strcmp function. */
@@ -949,3 +954,4 @@ L(equal):
ret
END (STRCMP)
+#endif
@@ -1,4 +1,8 @@
-#define STRLEN __wcslen_avx2
+#ifndef WCSLEN
+# define WCSLEN __wcslen_avx2
+#endif
+
+#define STRLEN WCSLEN
#define USE_AS_WCSLEN 1
#include "strlen-avx2.S"
@@ -1,4 +1,8 @@
-#define STRLEN __wcslen_evex
+#ifndef WCSLEN
+# define WCSLEN __wcslen_evex
+#endif
+
+#define STRLEN WCSLEN
#define USE_AS_WCSLEN 1
#include "strlen-evex.S"
@@ -16,13 +16,16 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#if IS_IN (libc)
-# ifndef WCSLEN
-# define WCSLEN __wcslen_sse2
-# endif
+#include <isa-level.h>
+
+#if ISA_SHOULD_BUILD (1)
+
+# include <sysdep.h>
+
+#ifndef WCSLEN
+# define WCSLEN __wcslen_sse2
#endif
-#include <sysdep.h>
.text
ENTRY (WCSLEN)
@@ -235,3 +238,5 @@ L(exit_tail7):
ret
END (WCSLEN)
+
+#endif
@@ -1,5 +1,9 @@
-#define AS_WCSLEN
-#define STRLEN __wcslen_sse4_1
-#define SECTION(p) p##.sse4.1
+#ifndef WCSLEN
+# define WCSLEN __wcslen_sse4_1
+#endif
+
+#define AS_WCSLEN 1
+#define STRLEN WCSLEN
+#define SECTION(p) p##.sse4.1
#include "strlen-sse2.S"
@@ -1,5 +1,3 @@
-#define STRCMP __wcsncmp_avx2_rtm
#define USE_AS_STRNCMP 1
#define USE_AS_WCSCMP 1
-#define OVERFLOW_STRCMP __wcscmp_avx2_rtm
#include "strcmp-avx2-rtm.S"
@@ -1,5 +1,4 @@
-#define STRCMP __wcsncmp_avx2
#define USE_AS_STRNCMP 1
#define USE_AS_WCSCMP 1
-#define OVERFLOW_STRCMP __wcscmp_avx2
+
#include "strcmp-avx2.S"
@@ -1,4 +1,3 @@
-#define STRCMP __wcsncmp_evex
#define USE_AS_STRNCMP 1
#define USE_AS_WCSCMP 1
@@ -16,5 +16,10 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#define WCSNCMP __wcsncmp_generic
-#include <wcsmbs/wcsncmp.c>
+#include <isa-level.h>
+#if ISA_SHOULD_BUILD (2)
+
+# define WCSNCMP __wcsncmp_generic
+# include <wcsmbs/wcsncmp.c>
+
+#endif
@@ -1,4 +1,8 @@
-#define STRLEN __wcsnlen_avx2
+#ifndef WCSNLEN
+# define WCSNLEN __wcsnlen_avx2
+#endif
+
+#define STRLEN WCSNLEN
#define USE_AS_WCSLEN 1
#define USE_AS_STRNLEN 1
@@ -1,4 +1,8 @@
-#define STRLEN __wcsnlen_evex
+#ifndef WCSNLEN
+# define WCSNLEN __wcsnlen_evex
+#endif
+
+#define STRLEN WCSNLEN
#define USE_AS_WCSLEN 1
#define USE_AS_STRNLEN 1
@@ -16,13 +16,18 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
+#include <isa-level.h>
+
+#if ISA_SHOULD_BUILD (1)
-#if IS_IN (libc)
# include <wchar.h>
-# define WCSNLEN __wcsnlen_generic
+# ifndef WCSNLEN
+# define WCSNLEN __wcsnlen_generic
+# endif
extern __typeof (wcsnlen) __wcsnlen_generic;
-#endif
-#include "wcsmbs/wcsnlen.c"
+# include "wcsmbs/wcsnlen.c"
+
+#endif
@@ -1,6 +1,11 @@
+#ifndef WCSNLEN
+# define WCSNLEN __wcsnlen_sse4_1
+# define OVERFLOW_STRLEN __wcslen_sse4_1
+#endif
+
#define AS_WCSLEN
#define AS_STRNLEN
-#define STRLEN __wcsnlen_sse4_1
+#define STRLEN WCSNLEN
#define SECTION(p) p##.sse4.1
#include "strlen-sse2.S"
@@ -1,3 +1,8 @@
-#define STRRCHR __wcsrchr_avx2
+#ifndef WCSRCHR
+# define WCSRCHR __wcsrchr_avx2
+#endif
+
+#define STRRCHR WCSRCHR
#define USE_AS_WCSRCHR 1
+
#include "strrchr-avx2.S"
@@ -1,3 +1,7 @@
-#define STRRCHR __wcsrchr_evex
+#ifndef WCSRCHR
+# define WCSRCHR __wcsrchr_evex
+#endif
+
+#define STRRCHR WCSRCHR
#define USE_AS_WCSRCHR 1
#include "strrchr-evex.S"
@@ -16,12 +16,11 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#if IS_IN (libc)
-# ifndef STRRCHR
-# define STRRCHR __wcsrchr_sse2
-# endif
+#ifndef WCSRCHR
+# define WCSRCHR __wcsrchr_sse2
#endif
+#define STRRCHR WCSRCHR
#define USE_AS_WCSRCHR 1
#define NO_PMINU 1
@@ -1,11 +1,35 @@
+/* strcasecmp_l dispatch for RTLD and non-multiarch build
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
/* Symbols = __strcasecmp_l and __strcasecmp. */
-#include "multiarch/strcasecmp_l-sse2.S"
+#define DEFAULT_IMPL_V1 "multiarch/strcasecmp_l-sse2.S"
+/* This may cause regressions on some processors that heavily prefer
+ aligned loads or have slow a implementation of the `pcmpstri`
+ instruction. */
+#define DEFAULT_IMPL_V2 "multiarch/strcasecmp_l-sse4_2.S"
+#define DEFAULT_IMPL_V3 "multiarch/strcasecmp_l-avx2.S"
+#define DEFAULT_IMPL_V4 "multiarch/strcasecmp_l-evex.S"
-libc_hidden_builtin_def (__strcasecmp_l)
+#include "isa-default-impl.h"
+libc_hidden_def (__strcasecmp_l)
weak_alias (__strcasecmp_l, strcasecmp_l)
-libc_hidden_def (strcasecmp_l)
-weak_alias (__strcasecmp, strcasecmp)
libc_hidden_def (__strcasecmp)
+weak_alias (__strcasecmp, strcasecmp)
new file mode 100644
@@ -0,0 +1,28 @@
+/* Set default strchr impl based on ISA level.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL == 1 || MINIMUM_X86_ISA_LEVEL == 2
+# define DEFAULT_STRCHR __strchr_sse2
+#elif MINIMUM_X86_ISA_LEVEL == 3
+# define DEFAULT_STRCHR __strchr_avx2
+#elif MINIMUM_X86_ISA_LEVEL == 4
+# define DEFAULT_STRCHR __strchr_evex
+#else
+# error "Unknown default strchr implementation"
+#endif
@@ -1,5 +1,4 @@
-/* strchr (str, ch) -- Return pointer to first occurrence of CH in STR.
- For AMD x86-64.
+/* strchr dispatch for RTLD and non-multiarch build
Copyright (C) 2009-2022 Free Software Foundation, Inc.
This file is part of the GNU C Library.
@@ -17,8 +16,13 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
+#define STRCHR strchr
+
+#define DEFAULT_IMPL_V1 "multiarch/strchr-sse2.S"
+#define DEFAULT_IMPL_V3 "multiarch/strchr-avx2.S"
+#define DEFAULT_IMPL_V4 "multiarch/strchr-evex.S"
+
+#include "isa-default-impl.h"
-#define STRCHR strchr
-#include "multiarch/strchr-sse2.S"
weak_alias (strchr, index)
libc_hidden_builtin_def (strchr)
@@ -1,6 +1,4 @@
-/* strchrnul (str, ch) -- Return pointer to first occurrence of CH in STR
- or terminating NUL byte.
- For AMD x86-64.
+/* strchrnul dispatch for RTLD and non-multiarch build
Copyright (C) 2009-2022 Free Software Foundation, Inc.
This file is part of the GNU C Library.
@@ -18,7 +16,12 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#define STRCHR __strchrnul
-#include "multiarch/strchrnul-sse2.S"
+#define STRCHRNUL __strchrnul
+
+#define DEFAULT_IMPL_V1 "multiarch/strchrnul-sse2.S"
+#define DEFAULT_IMPL_V3 "multiarch/strchrnul-avx2.S"
+#define DEFAULT_IMPL_V4 "multiarch/strchrnul-evex.S"
+
+#include "isa-default-impl.h"
weak_alias (__strchrnul, strchrnul)
@@ -1,4 +1,4 @@
-/* Highly optimized version for x86-64.
+/* strcmp dispatch for RTLD and non-multiarch build
Copyright (C) 1999-2022 Free Software Foundation, Inc.
This file is part of the GNU C Library.
@@ -18,5 +18,14 @@
/* Symbol = strcmp. */
-#include "multiarch/strcmp-sse2.S"
+#define DEFAULT_IMPL_V1 "multiarch/strcmp-sse2.S"
+/* strcmp-sse2-unaligned.S is often faster than strcmp-sse42.S and
+ doesn't have the drawback of using the `pcmpstri` instruction
+ which can be very slow on some CPUs. */
+#define DEFAULT_IMPL_V2 "multiarch/strcmp-sse2-unaligned.S"
+#define DEFAULT_IMPL_V3 "multiarch/strcmp-avx2.S"
+#define DEFAULT_IMPL_V4 "multiarch/strcmp-evex.S"
+
+#include "isa-default-impl.h"
+
libc_hidden_builtin_def (strcmp)
@@ -1,4 +1,4 @@
-/* SSE2 version of strlen.
+/* strlen dispatch for RTLD and non-multiarch build
Copyright (C) 2021-2022 Free Software Foundation, Inc.
This file is part of the GNU C Library.
@@ -17,6 +17,11 @@
<https://www.gnu.org/licenses/>. */
#define STRLEN strlen
-#include "multiarch/strlen-sse2.S"
+
+#define DEFAULT_IMPL_V1 "multiarch/strlen-sse2.S"
+#define DEFAULT_IMPL_V3 "multiarch/strlen-avx2.S"
+#define DEFAULT_IMPL_V4 "multiarch/strlen-evex.S"
+
+#include "isa-default-impl.h"
libc_hidden_builtin_def (strlen)
@@ -1,11 +1,35 @@
+/* strcasecmp_l dispatch for RTLD and non-multiarch build
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
/* Symbols = __strncasecmp_l and __strncasecmp. */
-#include "multiarch/strncase_l-sse2.S"
+#define DEFAULT_IMPL_V1 "multiarch/strncase_l-sse2.S"
+/* This may cause regressions on some processors that heavily prefer
+ aligned loads or have slow a implementation of the `pcmpstri`
+ instruction. */
+#define DEFAULT_IMPL_V2 "multiarch/strncase_l-sse4_2.S"
+#define DEFAULT_IMPL_V3 "multiarch/strncase_l-avx2.S"
+#define DEFAULT_IMPL_V4 "multiarch/strncase_l-evex.S"
-libc_hidden_builtin_def (__strncasecmp_l)
+#include "isa-default-impl.h"
+libc_hidden_def (__strncasecmp_l)
weak_alias (__strncasecmp_l, strncasecmp_l)
-libc_hidden_def (strncasecmp_l)
-weak_alias (__strncasecmp, strncasecmp)
libc_hidden_def (__strncasecmp)
+weak_alias (__strncasecmp, strncasecmp)
@@ -1,4 +1,31 @@
+/* strncmp dispatch for RTLD and non-multiarch build
+ Copyright (C) 1999-2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
/* Symbol = strncmp. */
-#include "multiarch/strncmp-sse2.S"
+#define DEFAULT_IMPL_V1 "multiarch/strncmp-sse2.S"
+/* This may cause regressions on some processors that heavily prefer
+ aligned loads or have slow a implementation of the `pcmpstri`
+ instruction. */
+#define DEFAULT_IMPL_V2 "multiarch/strncmp-sse4_2.S"
+#define DEFAULT_IMPL_V3 "multiarch/strncmp-avx2.S"
+#define DEFAULT_IMPL_V4 "multiarch/strncmp-evex.S"
+
+#include "isa-default-impl.h"
+
libc_hidden_builtin_def (strncmp)
@@ -1,6 +1,29 @@
-#define STRLEN __strnlen
-#include "multiarch/strnlen-sse2.S"
+/* strnlen dispatch for RTLD and non-multiarch build
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#define STRNLEN __strnlen
+
+#define DEFAULT_IMPL_V1 "multiarch/strnlen-sse2.S"
+#define DEFAULT_IMPL_V3 "multiarch/strnlen-avx2.S"
+#define DEFAULT_IMPL_V4 "multiarch/strnlen-evex.S"
+
+#include "isa-default-impl.h"
+
+weak_alias (__strnlen, strnlen)
libc_hidden_def (__strnlen)
-weak_alias (__strnlen, strnlen);
-libc_hidden_builtin_def (strnlen)
+libc_hidden_def (strnlen)
@@ -1,4 +1,4 @@
-/* strrchr (str, ch) -- Return pointer to last occurrence of CH in STR.
+/* strrchr dispatch for RTLD and non-multiarch build
Copyright (C) 2013-2022 Free Software Foundation, Inc.
This file is part of the GNU C Library.
@@ -17,6 +17,12 @@
<https://www.gnu.org/licenses/>. */
#define STRRCHR strrchr
-#include "multiarch/strrchr-sse2.S"
+
+#define DEFAULT_IMPL_V1 "multiarch/strrchr-sse2.S"
+#define DEFAULT_IMPL_V3 "multiarch/strrchr-avx2.S"
+#define DEFAULT_IMPL_V4 "multiarch/strrchr-evex.S"
+
+#include "isa-default-impl.h"
+
weak_alias (strrchr, rindex)
libc_hidden_builtin_def (strrchr)
@@ -1,4 +1,4 @@
-/* wcschr with SSSE3
+/* wcschr dispatch for RTLD and non-multiarch build
Copyright (C) 2011-2022 Free Software Foundation, Inc.
This file is part of the GNU C Library.
@@ -16,9 +16,14 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-
#define WCSCHR __wcschr
-#include "multiarch/wcschr-sse2.S"
-libc_hidden_def(__wcschr)
+
+#define DEFAULT_IMPL_V1 "multiarch/wcschr-sse2.S"
+#define DEFAULT_IMPL_V3 "multiarch/wcschr-avx2.S"
+#define DEFAULT_IMPL_V4 "multiarch/wcschr-evex.S"
+
+#include "isa-default-impl.h"
+
+libc_hidden_def (__wcschr)
weak_alias (__wcschr, wcschr)
libc_hidden_weak (wcschr)
@@ -1,4 +1,4 @@
-/* Optimized wcscmp for x86-64 with SSE2.
+/* strlen dispatch for RTLD and non-multiarch build
Copyright (C) 2011-2022 Free Software Foundation, Inc.
This file is part of the GNU C Library.
@@ -18,6 +18,11 @@
/* Symbol = __wcscmp. */
-#include "multiarch/wcscmp-sse2.S"
+#define DEFAULT_IMPL_V1 "multiarch/wcscmp-sse2.S"
+#define DEFAULT_IMPL_V3 "multiarch/wcscmp-avx2.S"
+#define DEFAULT_IMPL_V4 "multiarch/wcscmp-evex.S"
+
+#include "isa-default-impl.h"
+
libc_hidden_def (__wcscmp)
weak_alias (__wcscmp, wcscmp)
@@ -1,4 +1,4 @@
-/* Optimized wcslen for x86-64 with SSE2.
+/* wcslen dispatch for RTLD and non-multiarch build
Copyright (C) 2011-2022 Free Software Foundation, Inc.
This file is part of the GNU C Library.
@@ -17,5 +17,18 @@
<https://www.gnu.org/licenses/>. */
#define WCSLEN __wcslen
-#include "multiarch/wcslen-sse2.S"
-weak_alias(__wcslen, wcslen)
+
+#define DEFAULT_IMPL_V1 "multiarch/wcslen-sse2.S"
+#define DEFAULT_IMPL_V2 "multiarch/wcslen-sse4_1.S"
+#define DEFAULT_IMPL_V3 "multiarch/wcslen-avx2.S"
+#define DEFAULT_IMPL_V4 "multiarch/wcslen-evex.S"
+
+#include "isa-default-impl.h"
+
+weak_alias (__wcslen, wcslen)
+
+#if MINIMUM_X86_ISA_LEVEL == 2 && !IS_IN (rtld)
+/* Hidden def so it can be used as overflow fallback in
+ wcsnlen-sse4_1.S. */
+libc_hidden_def (__wcslen)
+#endif
new file mode 100644
@@ -0,0 +1,29 @@
+/* wcsncmp dispatch for RTLD and non-multiarch .c ISA level 1 build.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+/* wcsncmp non-multiarch build is split into two files,
+ wcsncmp-generic.c and wcsncmp.S. The wcsncmp-generic.c build is for
+ ISA level <= 1 and just uses wcsmbs/wcsncmp.c. This must be split
+ into two files because we cannot include C code from assembly or
+ vice versa. */
+
+#include <isa-level.h>
+
+#if MINIMUM_X86_ISA_LEVEL <= 2
+# include "wcsmbs/wcsncmp.c"
+#endif
new file mode 100644
@@ -0,0 +1,40 @@
+/* wcsncmp dispatch for RTLD and non-multiarch .c files
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+/* wcsncmp non-multiarch build is split into two files,
+ wcsncmp-generic.c and wcsncmp.S. The wcsncmp.S build is for
+ ISA level >= 3 uses the optimized assembly implementations in
+ multiarch/wcsncmp*.S. This must be split into two files because
+ we cannot include C code from assembly or vice versa. */
+
+#include <isa-level.h>
+
+#if MINIMUM_X86_ISA_LEVEL >= 3
+
+/* Symbol = wcsncmp. */
+
+# define DEFAULT_IMPL_V3 "multiarch/wcsncmp-avx2.S"
+# define DEFAULT_IMPL_V4 "multiarch/wcsncmp-evex.S"
+
+/* isa-default-impl.h expects DEFAULT_IMPL_V1 to be defined but it
+ should never be used from here. */
+# define DEFAULT_IMPL_V1 "ERROR -- Invalid ISA IMPL"
+
+# include "isa-default-impl.h"
+
+#endif
new file mode 100644
@@ -0,0 +1,29 @@
+/* wcsnlen dispatch for RTLD and non-multiarch .c ISA level 1 build.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+/* wcsnlen non-multiarch build is split into two files,
+ wcsnlen-generic.c and wcsnlen.S. The wcsnlen-generic.c build is for
+ ISA level <= 1 and just uses wcsmbs/wcsnlen.c. This must be split
+ into two files because we cannot include C code from assembly or
+ vice versa. */
+
+#include <isa-level.h>
+
+#if MINIMUM_X86_ISA_LEVEL <= 1
+# include "wcsmbs/wcsnlen.c"
+#endif
new file mode 100644
@@ -0,0 +1,49 @@
+/* wcsnlen dispatch for RTLD and non-multiarch .c files
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+/* wcsnlen non-multiarch build is split into two files,
+ wcsnlen-generic.c and wcsnlen.S. The wcsnlen.S build is for
+ ISA level >= 2 uses the optimized assembly implementations in
+ multiarch/wcsnlen*.S. This must be split into two files because
+ we cannot include C code from assembly or vice versa. */
+
+#include <isa-level.h>
+
+#if MINIMUM_X86_ISA_LEVEL >= 2
+
+# define WCSNLEN __wcsnlen
+/* This symbol must stay linked to the name in wcslen.S. */
+#if IS_IN (rtld)
+# define OVERFLOW_STRLEN __wcslen
+#else
+# define OVERFLOW_STRLEN HIDDEN_JUMPTARGET (__wcslen)
+#endif
+
+# define DEFAULT_IMPL_V2 "multiarch/wcsnlen-sse4_1.S"
+# define DEFAULT_IMPL_V3 "multiarch/wcsnlen-avx2.S"
+# define DEFAULT_IMPL_V4 "multiarch/wcsnlen-evex.S"
+
+/* isa-default-impl.h expects DEFAULT_IMPL_V1 to be defined but it
+ should never be used from here. */
+# define DEFAULT_IMPL_V1 "ERROR -- Invalid ISA IMPL"
+
+# include "isa-default-impl.h"
+
+weak_alias (__wcsnlen, wcsnlen)
+libc_hidden_def (__wcsnlen)
+#endif
@@ -1,4 +1,4 @@
-/* wcsrchr optimized with SSE2.
+/* wcsrchr dispatch for RTLD and non-multiarch build
Copyright (C) 2011-2022 Free Software Foundation, Inc.
This file is part of the GNU C Library.
@@ -16,5 +16,10 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#define STRRCHR wcsrchr
-#include "multiarch/wcsrchr-sse2.S"
+#define WCSRCHR wcsrchr
+
+#define DEFAULT_IMPL_V1 "multiarch/wcsrchr-sse2.S"
+#define DEFAULT_IMPL_V3 "multiarch/wcsrchr-avx2.S"
+#define DEFAULT_IMPL_V4 "multiarch/wcsrchr-evex.S"
+
+#include "isa-default-impl.h"