@@ -15,358 +15,13 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#include <sysdep.h>
+#define MEMCHR memchr
-#ifdef USE_AS_WMEMCHR
-# define MEMCHR wmemchr
-# define PCMPEQ pcmpeqd
-# define CHAR_PER_VEC 4
-#else
-# define MEMCHR memchr
-# define PCMPEQ pcmpeqb
-# define CHAR_PER_VEC 16
-#endif
+#define DEFAULT_IMPL_V1 "multiarch/memchr-sse2.S"
+#define DEFAULT_IMPL_V3 "multiarch/memchr-avx2.S"
+#define DEFAULT_IMPL_V4 "multiarch/memchr-evex.S"
-/* fast SSE2 version with using pmaxub and 64 byte loop */
+#include "isa-default-impl.h"
- .text
-ENTRY(MEMCHR)
- movd %esi, %xmm1
- mov %edi, %ecx
-
-#ifdef __ILP32__
- /* Clear the upper 32 bits. */
- movl %edx, %edx
-#endif
-#ifdef USE_AS_WMEMCHR
- test %RDX_LP, %RDX_LP
- jz L(return_null)
-#else
- punpcklbw %xmm1, %xmm1
- test %RDX_LP, %RDX_LP
- jz L(return_null)
- punpcklbw %xmm1, %xmm1
-#endif
-
- and $63, %ecx
- pshufd $0, %xmm1, %xmm1
-
- cmp $48, %ecx
- ja L(crosscache)
-
- movdqu (%rdi), %xmm0
- PCMPEQ %xmm1, %xmm0
- pmovmskb %xmm0, %eax
- test %eax, %eax
-
- jnz L(matches_1)
- sub $CHAR_PER_VEC, %rdx
- jbe L(return_null)
- add $16, %rdi
- and $15, %ecx
- and $-16, %rdi
-#ifdef USE_AS_WMEMCHR
- shr $2, %ecx
-#endif
- add %rcx, %rdx
- sub $(CHAR_PER_VEC * 4), %rdx
- jbe L(exit_loop)
- jmp L(loop_prolog)
-
- .p2align 4
-L(crosscache):
- and $15, %ecx
- and $-16, %rdi
- movdqa (%rdi), %xmm0
-
- PCMPEQ %xmm1, %xmm0
- /* Check if there is a match. */
- pmovmskb %xmm0, %eax
- /* Remove the leading bytes. */
- sar %cl, %eax
- test %eax, %eax
- je L(unaligned_no_match)
- /* Check which byte is a match. */
- bsf %eax, %eax
-#ifdef USE_AS_WMEMCHR
- mov %eax, %esi
- shr $2, %esi
- sub %rsi, %rdx
-#else
- sub %rax, %rdx
-#endif
- jbe L(return_null)
- add %rdi, %rax
- add %rcx, %rax
- ret
-
- .p2align 4
-L(unaligned_no_match):
- /* "rcx" is less than 16. Calculate "rdx + rcx - 16" by using
- "rdx - (16 - rcx)" instead of "(rdx + rcx) - 16" to void
- possible addition overflow. */
- neg %rcx
- add $16, %rcx
-#ifdef USE_AS_WMEMCHR
- shr $2, %ecx
-#endif
- sub %rcx, %rdx
- jbe L(return_null)
- add $16, %rdi
- sub $(CHAR_PER_VEC * 4), %rdx
- jbe L(exit_loop)
-
- .p2align 4
-L(loop_prolog):
- movdqa (%rdi), %xmm0
- PCMPEQ %xmm1, %xmm0
- pmovmskb %xmm0, %eax
- test %eax, %eax
- jnz L(matches)
-
- movdqa 16(%rdi), %xmm2
- PCMPEQ %xmm1, %xmm2
- pmovmskb %xmm2, %eax
- test %eax, %eax
- jnz L(matches16)
-
- movdqa 32(%rdi), %xmm3
- PCMPEQ %xmm1, %xmm3
- pmovmskb %xmm3, %eax
- test %eax, %eax
- jnz L(matches32)
-
- movdqa 48(%rdi), %xmm4
- PCMPEQ %xmm1, %xmm4
- add $64, %rdi
- pmovmskb %xmm4, %eax
- test %eax, %eax
- jnz L(matches0)
-
- test $0x3f, %rdi
- jz L(align64_loop)
-
- sub $(CHAR_PER_VEC * 4), %rdx
- jbe L(exit_loop)
-
- movdqa (%rdi), %xmm0
- PCMPEQ %xmm1, %xmm0
- pmovmskb %xmm0, %eax
- test %eax, %eax
- jnz L(matches)
-
- movdqa 16(%rdi), %xmm2
- PCMPEQ %xmm1, %xmm2
- pmovmskb %xmm2, %eax
- test %eax, %eax
- jnz L(matches16)
-
- movdqa 32(%rdi), %xmm3
- PCMPEQ %xmm1, %xmm3
- pmovmskb %xmm3, %eax
- test %eax, %eax
- jnz L(matches32)
-
- movdqa 48(%rdi), %xmm3
- PCMPEQ %xmm1, %xmm3
- pmovmskb %xmm3, %eax
-
- add $64, %rdi
- test %eax, %eax
- jnz L(matches0)
-
- mov %rdi, %rcx
- and $-64, %rdi
- and $63, %ecx
-#ifdef USE_AS_WMEMCHR
- shr $2, %ecx
-#endif
- add %rcx, %rdx
-
- .p2align 4
-L(align64_loop):
- sub $(CHAR_PER_VEC * 4), %rdx
- jbe L(exit_loop)
- movdqa (%rdi), %xmm0
- movdqa 16(%rdi), %xmm2
- movdqa 32(%rdi), %xmm3
- movdqa 48(%rdi), %xmm4
-
- PCMPEQ %xmm1, %xmm0
- PCMPEQ %xmm1, %xmm2
- PCMPEQ %xmm1, %xmm3
- PCMPEQ %xmm1, %xmm4
-
- pmaxub %xmm0, %xmm3
- pmaxub %xmm2, %xmm4
- pmaxub %xmm3, %xmm4
- pmovmskb %xmm4, %eax
-
- add $64, %rdi
-
- test %eax, %eax
- jz L(align64_loop)
-
- sub $64, %rdi
-
- pmovmskb %xmm0, %eax
- test %eax, %eax
- jnz L(matches)
-
- pmovmskb %xmm2, %eax
- test %eax, %eax
- jnz L(matches16)
-
- movdqa 32(%rdi), %xmm3
- PCMPEQ %xmm1, %xmm3
-
- PCMPEQ 48(%rdi), %xmm1
- pmovmskb %xmm3, %eax
- test %eax, %eax
- jnz L(matches32)
-
- pmovmskb %xmm1, %eax
- bsf %eax, %eax
- lea 48(%rdi, %rax), %rax
- ret
-
- .p2align 4
-L(exit_loop):
- add $(CHAR_PER_VEC * 2), %edx
- jle L(exit_loop_32)
-
- movdqa (%rdi), %xmm0
- PCMPEQ %xmm1, %xmm0
- pmovmskb %xmm0, %eax
- test %eax, %eax
- jnz L(matches)
-
- movdqa 16(%rdi), %xmm2
- PCMPEQ %xmm1, %xmm2
- pmovmskb %xmm2, %eax
- test %eax, %eax
- jnz L(matches16)
-
- movdqa 32(%rdi), %xmm3
- PCMPEQ %xmm1, %xmm3
- pmovmskb %xmm3, %eax
- test %eax, %eax
- jnz L(matches32_1)
- sub $CHAR_PER_VEC, %edx
- jle L(return_null)
-
- PCMPEQ 48(%rdi), %xmm1
- pmovmskb %xmm1, %eax
- test %eax, %eax
- jnz L(matches48_1)
- xor %eax, %eax
- ret
-
- .p2align 4
-L(exit_loop_32):
- add $(CHAR_PER_VEC * 2), %edx
- movdqa (%rdi), %xmm0
- PCMPEQ %xmm1, %xmm0
- pmovmskb %xmm0, %eax
- test %eax, %eax
- jnz L(matches_1)
- sub $CHAR_PER_VEC, %edx
- jbe L(return_null)
-
- PCMPEQ 16(%rdi), %xmm1
- pmovmskb %xmm1, %eax
- test %eax, %eax
- jnz L(matches16_1)
- xor %eax, %eax
- ret
-
- .p2align 4
-L(matches0):
- bsf %eax, %eax
- lea -16(%rax, %rdi), %rax
- ret
-
- .p2align 4
-L(matches):
- bsf %eax, %eax
- add %rdi, %rax
- ret
-
- .p2align 4
-L(matches16):
- bsf %eax, %eax
- lea 16(%rax, %rdi), %rax
- ret
-
- .p2align 4
-L(matches32):
- bsf %eax, %eax
- lea 32(%rax, %rdi), %rax
- ret
-
- .p2align 4
-L(matches_1):
- bsf %eax, %eax
-#ifdef USE_AS_WMEMCHR
- mov %eax, %esi
- shr $2, %esi
- sub %rsi, %rdx
-#else
- sub %rax, %rdx
-#endif
- jbe L(return_null)
- add %rdi, %rax
- ret
-
- .p2align 4
-L(matches16_1):
- bsf %eax, %eax
-#ifdef USE_AS_WMEMCHR
- mov %eax, %esi
- shr $2, %esi
- sub %rsi, %rdx
-#else
- sub %rax, %rdx
-#endif
- jbe L(return_null)
- lea 16(%rdi, %rax), %rax
- ret
-
- .p2align 4
-L(matches32_1):
- bsf %eax, %eax
-#ifdef USE_AS_WMEMCHR
- mov %eax, %esi
- shr $2, %esi
- sub %rsi, %rdx
-#else
- sub %rax, %rdx
-#endif
- jbe L(return_null)
- lea 32(%rdi, %rax), %rax
- ret
-
- .p2align 4
-L(matches48_1):
- bsf %eax, %eax
-#ifdef USE_AS_WMEMCHR
- mov %eax, %esi
- shr $2, %esi
- sub %rsi, %rdx
-#else
- sub %rax, %rdx
-#endif
- jbe L(return_null)
- lea 48(%rdi, %rax), %rax
- ret
-
- .p2align 4
-L(return_null):
- xor %eax, %eax
- ret
-END(MEMCHR)
-
-#ifndef USE_AS_WMEMCHR
strong_alias (memchr, __memchr)
libc_hidden_builtin_def(memchr)
-#endif
@@ -19,37 +19,48 @@
#include <init-arch.h>
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_rtm) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+
+/* TODO: Look into using the ISA build level to remove some/all of the
+ feature checks. */
static inline void *
IFUNC_SELECTOR (void)
{
- const struct cpu_features* cpu_features = __get_cpu_features ();
+ const struct cpu_features *cpu_features = __get_cpu_features ();
- if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
- && CPU_FEATURE_USABLE_P (cpu_features, BMI2)
- && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
+ if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
+ && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
+ && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
+ AVX_Fast_Unaligned_Load))
{
- if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
- && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
+ if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
+ && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
{
if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
- return OPTIMIZE (evex_rtm);
+ return_X86_OPTIMIZE_V4 (evex_rtm);
- return OPTIMIZE (evex);
+ return_X86_OPTIMIZE_V4 (evex);
}
+ X86_ERROR_IF_REACHABLE_V4 ();
+
if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
- return OPTIMIZE (avx2_rtm);
+ return_X86_OPTIMIZE_V3 (avx2_rtm);
- if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
- return OPTIMIZE (avx2);
+ if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
+ Prefer_No_VZEROUPPER))
+ return_X86_OPTIMIZE_V3 (avx2);
}
- return OPTIMIZE (sse2);
+ X86_ERROR_IF_REACHABLE_V3 ();
+
+ /* This is unreachable (compile time checked) if ISA level >= 3
+ so no need for a robust fallback here. */
+ return_X86_OPTIMIZE_V2 (sse2);
}
@@ -25,7 +25,8 @@
/* Fill ARRAY of MAX elements with IFUNC implementations for function
NAME supported on target machine and return the number of valid
- entries. */
+ entries. Each set of implementations for a given function is sorted in
+ descending order by ISA level. */
size_t
__libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
@@ -53,24 +54,27 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
/* Support sysdeps/x86_64/multiarch/memchr.c. */
IFUNC_IMPL (i, name, memchr,
- IFUNC_IMPL_ADD (array, i, memchr,
- CPU_FEATURE_USABLE (AVX2),
- __memchr_avx2)
- IFUNC_IMPL_ADD (array, i, memchr,
- (CPU_FEATURE_USABLE (AVX2)
- && CPU_FEATURE_USABLE (RTM)),
- __memchr_avx2_rtm)
- IFUNC_IMPL_ADD (array, i, memchr,
+ X86_IFUNC_IMPL_ADD_V4 (array, i, memchr,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)
&& CPU_FEATURE_USABLE (BMI2)),
__memchr_evex)
- IFUNC_IMPL_ADD (array, i, memchr,
+ X86_IFUNC_IMPL_ADD_V4 (array, i, memchr,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)
&& CPU_FEATURE_USABLE (BMI2)),
__memchr_evex_rtm)
- IFUNC_IMPL_ADD (array, i, memchr, 1, __memchr_sse2))
+ X86_IFUNC_IMPL_ADD_V3 (array, i, memchr,
+ CPU_FEATURE_USABLE (AVX2),
+ __memchr_avx2)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, memchr,
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (RTM)),
+ __memchr_avx2_rtm)
+ /* Can be lowered to V1 if a V2 implementation is added. */
+ X86_IFUNC_IMPL_ADD_V2 (array, i, memchr,
+ 1,
+ __memchr_sse2))
/* Support sysdeps/x86_64/multiarch/memcmp.c. */
IFUNC_IMPL (i, name, memcmp,
@@ -288,24 +292,27 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
/* Support sysdeps/x86_64/multiarch/rawmemchr.c. */
IFUNC_IMPL (i, name, rawmemchr,
- IFUNC_IMPL_ADD (array, i, rawmemchr,
- CPU_FEATURE_USABLE (AVX2),
- __rawmemchr_avx2)
- IFUNC_IMPL_ADD (array, i, rawmemchr,
- (CPU_FEATURE_USABLE (AVX2)
- && CPU_FEATURE_USABLE (RTM)),
- __rawmemchr_avx2_rtm)
- IFUNC_IMPL_ADD (array, i, rawmemchr,
+ X86_IFUNC_IMPL_ADD_V4 (array, i, rawmemchr,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)
&& CPU_FEATURE_USABLE (BMI2)),
__rawmemchr_evex)
- IFUNC_IMPL_ADD (array, i, rawmemchr,
+ X86_IFUNC_IMPL_ADD_V4 (array, i, rawmemchr,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)
&& CPU_FEATURE_USABLE (BMI2)),
__rawmemchr_evex_rtm)
- IFUNC_IMPL_ADD (array, i, rawmemchr, 1, __rawmemchr_sse2))
+ X86_IFUNC_IMPL_ADD_V3 (array, i, rawmemchr,
+ CPU_FEATURE_USABLE (AVX2),
+ __rawmemchr_avx2)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, rawmemchr,
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (RTM)),
+ __rawmemchr_avx2_rtm)
+ /* Can be lowered to V1 if a V2 implementation is added. */
+ X86_IFUNC_IMPL_ADD_V2 (array, i, rawmemchr,
+ 1,
+ __rawmemchr_sse2))
/* Support sysdeps/x86_64/multiarch/strlen.c. */
IFUNC_IMPL (i, name, strlen,
@@ -748,24 +755,27 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
/* Support sysdeps/x86_64/multiarch/wmemchr.c. */
IFUNC_IMPL (i, name, wmemchr,
- IFUNC_IMPL_ADD (array, i, wmemchr,
- CPU_FEATURE_USABLE (AVX2),
- __wmemchr_avx2)
- IFUNC_IMPL_ADD (array, i, wmemchr,
- (CPU_FEATURE_USABLE (AVX2)
- && CPU_FEATURE_USABLE (RTM)),
- __wmemchr_avx2_rtm)
- IFUNC_IMPL_ADD (array, i, wmemchr,
+ X86_IFUNC_IMPL_ADD_V4 (array, i, wmemchr,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)
&& CPU_FEATURE_USABLE (BMI2)),
__wmemchr_evex)
- IFUNC_IMPL_ADD (array, i, wmemchr,
+ X86_IFUNC_IMPL_ADD_V4 (array, i, wmemchr,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)
&& CPU_FEATURE_USABLE (BMI2)),
__wmemchr_evex_rtm)
- IFUNC_IMPL_ADD (array, i, wmemchr, 1, __wmemchr_sse2))
+ X86_IFUNC_IMPL_ADD_V3 (array, i, wmemchr,
+ CPU_FEATURE_USABLE (AVX2),
+ __wmemchr_avx2)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, wmemchr,
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (RTM)),
+ __wmemchr_avx2_rtm)
+ /* Can be lowered to V1 if a V2 implementation is added. */
+ X86_IFUNC_IMPL_ADD_V2 (array, i, wmemchr,
+ 1,
+ __wmemchr_sse2))
/* Support sysdeps/x86_64/multiarch/wmemcmp.c. */
IFUNC_IMPL (i, name, wmemcmp,
@@ -16,7 +16,15 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#if IS_IN (libc)
+#include <isa-level.h>
+
+#if defined IS_MULTIARCH && defined ISA_DEFAULT_IMPL
+# error "Multiarch build should never default include!"
+#endif
+
+#if (MINIMUM_X86_ISA_LEVEL <= 3 && IS_IN (libc)) \
+ || defined ISA_DEFAULT_IMPL
+
# include <sysdep.h>
@@ -16,7 +16,15 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#if IS_IN (libc)
+#include <isa-level.h>
+
+#if defined IS_MULTIARCH && defined ISA_DEFAULT_IMPL
+# error "Multiarch build should never default include!"
+#endif
+
+#if (MINIMUM_X86_ISA_LEVEL <= 4 && IS_IN (libc)) \
+ || defined ISA_DEFAULT_IMPL
+
# include <sysdep.h>
@@ -16,13 +16,367 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#if IS_IN (libc)
-# define memchr __memchr_sse2
+#include <isa-level.h>
-# undef strong_alias
-# define strong_alias(memchr, __memchr)
-# undef libc_hidden_builtin_def
-# define libc_hidden_builtin_def(memchr)
+#if defined IS_MULTIARCH && defined ISA_DEFAULT_IMPL
+# error "Multiarch build should never default include!"
#endif
-#include "../memchr.S"
+/* __X86_ISA_LEVEL <= 2 because there is no V2 implementation so we
+ need this to build for ISA V2 builds. */
+#if (MINIMUM_X86_ISA_LEVEL <= 2 && IS_IN (libc)) \
+ || defined ISA_DEFAULT_IMPL
+
+
+# include <sysdep.h>
+
+# ifndef MEMCHR
+# define MEMCHR __memchr_sse2
+# endif
+# ifdef USE_AS_WMEMCHR
+# define PCMPEQ pcmpeqd
+# define CHAR_PER_VEC 4
+# else
+# define PCMPEQ pcmpeqb
+# define CHAR_PER_VEC 16
+# endif
+
+/* fast SSE2 version with using pmaxub and 64 byte loop */
+
+ .text
+ENTRY(MEMCHR)
+ movd %esi, %xmm1
+ mov %edi, %ecx
+
+# ifdef __ILP32__
+ /* Clear the upper 32 bits. */
+ movl %edx, %edx
+# endif
+# ifdef USE_AS_WMEMCHR
+ test %RDX_LP, %RDX_LP
+ jz L(return_null)
+# else
+ punpcklbw %xmm1, %xmm1
+ test %RDX_LP, %RDX_LP
+ jz L(return_null)
+ punpcklbw %xmm1, %xmm1
+# endif
+
+ and $63, %ecx
+ pshufd $0, %xmm1, %xmm1
+
+ cmp $48, %ecx
+ ja L(crosscache)
+
+ movdqu (%rdi), %xmm0
+ PCMPEQ %xmm1, %xmm0
+ pmovmskb %xmm0, %eax
+ test %eax, %eax
+
+ jnz L(matches_1)
+ sub $CHAR_PER_VEC, %rdx
+ jbe L(return_null)
+ add $16, %rdi
+ and $15, %ecx
+ and $-16, %rdi
+# ifdef USE_AS_WMEMCHR
+ shr $2, %ecx
+# endif
+ add %rcx, %rdx
+ sub $(CHAR_PER_VEC * 4), %rdx
+ jbe L(exit_loop)
+ jmp L(loop_prolog)
+
+ .p2align 4
+L(crosscache):
+ and $15, %ecx
+ and $-16, %rdi
+ movdqa (%rdi), %xmm0
+
+ PCMPEQ %xmm1, %xmm0
+ /* Check if there is a match. */
+ pmovmskb %xmm0, %eax
+ /* Remove the leading bytes. */
+ sar %cl, %eax
+ test %eax, %eax
+ je L(unaligned_no_match)
+ /* Check which byte is a match. */
+ bsf %eax, %eax
+# ifdef USE_AS_WMEMCHR
+ mov %eax, %esi
+ shr $2, %esi
+ sub %rsi, %rdx
+# else
+ sub %rax, %rdx
+# endif
+ jbe L(return_null)
+ add %rdi, %rax
+ add %rcx, %rax
+ ret
+
+ .p2align 4
+L(unaligned_no_match):
+ /* "rcx" is less than 16. Calculate "rdx + rcx - 16" by using
+ "rdx - (16 - rcx)" instead of "(rdx + rcx) - 16" to void
+ possible addition overflow. */
+ neg %rcx
+ add $16, %rcx
+# ifdef USE_AS_WMEMCHR
+ shr $2, %ecx
+# endif
+ sub %rcx, %rdx
+ jbe L(return_null)
+ add $16, %rdi
+ sub $(CHAR_PER_VEC * 4), %rdx
+ jbe L(exit_loop)
+
+ .p2align 4
+L(loop_prolog):
+ movdqa (%rdi), %xmm0
+ PCMPEQ %xmm1, %xmm0
+ pmovmskb %xmm0, %eax
+ test %eax, %eax
+ jnz L(matches)
+
+ movdqa 16(%rdi), %xmm2
+ PCMPEQ %xmm1, %xmm2
+ pmovmskb %xmm2, %eax
+ test %eax, %eax
+ jnz L(matches16)
+
+ movdqa 32(%rdi), %xmm3
+ PCMPEQ %xmm1, %xmm3
+ pmovmskb %xmm3, %eax
+ test %eax, %eax
+ jnz L(matches32)
+
+ movdqa 48(%rdi), %xmm4
+ PCMPEQ %xmm1, %xmm4
+ add $64, %rdi
+ pmovmskb %xmm4, %eax
+ test %eax, %eax
+ jnz L(matches0)
+
+ test $0x3f, %rdi
+ jz L(align64_loop)
+
+ sub $(CHAR_PER_VEC * 4), %rdx
+ jbe L(exit_loop)
+
+ movdqa (%rdi), %xmm0
+ PCMPEQ %xmm1, %xmm0
+ pmovmskb %xmm0, %eax
+ test %eax, %eax
+ jnz L(matches)
+
+ movdqa 16(%rdi), %xmm2
+ PCMPEQ %xmm1, %xmm2
+ pmovmskb %xmm2, %eax
+ test %eax, %eax
+ jnz L(matches16)
+
+ movdqa 32(%rdi), %xmm3
+ PCMPEQ %xmm1, %xmm3
+ pmovmskb %xmm3, %eax
+ test %eax, %eax
+ jnz L(matches32)
+
+ movdqa 48(%rdi), %xmm3
+ PCMPEQ %xmm1, %xmm3
+ pmovmskb %xmm3, %eax
+
+ add $64, %rdi
+ test %eax, %eax
+ jnz L(matches0)
+
+ mov %rdi, %rcx
+ and $-64, %rdi
+ and $63, %ecx
+# ifdef USE_AS_WMEMCHR
+ shr $2, %ecx
+# endif
+ add %rcx, %rdx
+
+ .p2align 4
+L(align64_loop):
+ sub $(CHAR_PER_VEC * 4), %rdx
+ jbe L(exit_loop)
+ movdqa (%rdi), %xmm0
+ movdqa 16(%rdi), %xmm2
+ movdqa 32(%rdi), %xmm3
+ movdqa 48(%rdi), %xmm4
+
+ PCMPEQ %xmm1, %xmm0
+ PCMPEQ %xmm1, %xmm2
+ PCMPEQ %xmm1, %xmm3
+ PCMPEQ %xmm1, %xmm4
+
+ pmaxub %xmm0, %xmm3
+ pmaxub %xmm2, %xmm4
+ pmaxub %xmm3, %xmm4
+ pmovmskb %xmm4, %eax
+
+ add $64, %rdi
+
+ test %eax, %eax
+ jz L(align64_loop)
+
+ sub $64, %rdi
+
+ pmovmskb %xmm0, %eax
+ test %eax, %eax
+ jnz L(matches)
+
+ pmovmskb %xmm2, %eax
+ test %eax, %eax
+ jnz L(matches16)
+
+ movdqa 32(%rdi), %xmm3
+ PCMPEQ %xmm1, %xmm3
+
+ PCMPEQ 48(%rdi), %xmm1
+ pmovmskb %xmm3, %eax
+ test %eax, %eax
+ jnz L(matches32)
+
+ pmovmskb %xmm1, %eax
+ bsf %eax, %eax
+ lea 48(%rdi, %rax), %rax
+ ret
+
+ .p2align 4
+L(exit_loop):
+ add $(CHAR_PER_VEC * 2), %edx
+ jle L(exit_loop_32)
+
+ movdqa (%rdi), %xmm0
+ PCMPEQ %xmm1, %xmm0
+ pmovmskb %xmm0, %eax
+ test %eax, %eax
+ jnz L(matches)
+
+ movdqa 16(%rdi), %xmm2
+ PCMPEQ %xmm1, %xmm2
+ pmovmskb %xmm2, %eax
+ test %eax, %eax
+ jnz L(matches16)
+
+ movdqa 32(%rdi), %xmm3
+ PCMPEQ %xmm1, %xmm3
+ pmovmskb %xmm3, %eax
+ test %eax, %eax
+ jnz L(matches32_1)
+ sub $CHAR_PER_VEC, %edx
+ jle L(return_null)
+
+ PCMPEQ 48(%rdi), %xmm1
+ pmovmskb %xmm1, %eax
+ test %eax, %eax
+ jnz L(matches48_1)
+ xor %eax, %eax
+ ret
+
+ .p2align 4
+L(exit_loop_32):
+ add $(CHAR_PER_VEC * 2), %edx
+ movdqa (%rdi), %xmm0
+ PCMPEQ %xmm1, %xmm0
+ pmovmskb %xmm0, %eax
+ test %eax, %eax
+ jnz L(matches_1)
+ sub $CHAR_PER_VEC, %edx
+ jbe L(return_null)
+
+ PCMPEQ 16(%rdi), %xmm1
+ pmovmskb %xmm1, %eax
+ test %eax, %eax
+ jnz L(matches16_1)
+ xor %eax, %eax
+ ret
+
+ .p2align 4
+L(matches0):
+ bsf %eax, %eax
+ lea -16(%rax, %rdi), %rax
+ ret
+
+ .p2align 4
+L(matches):
+ bsf %eax, %eax
+ add %rdi, %rax
+ ret
+
+ .p2align 4
+L(matches16):
+ bsf %eax, %eax
+ lea 16(%rax, %rdi), %rax
+ ret
+
+ .p2align 4
+L(matches32):
+ bsf %eax, %eax
+ lea 32(%rax, %rdi), %rax
+ ret
+
+ .p2align 4
+L(matches_1):
+ bsf %eax, %eax
+# ifdef USE_AS_WMEMCHR
+ mov %eax, %esi
+ shr $2, %esi
+ sub %rsi, %rdx
+# else
+ sub %rax, %rdx
+# endif
+ jbe L(return_null)
+ add %rdi, %rax
+ ret
+
+ .p2align 4
+L(matches16_1):
+ bsf %eax, %eax
+# ifdef USE_AS_WMEMCHR
+ mov %eax, %esi
+ shr $2, %esi
+ sub %rsi, %rdx
+# else
+ sub %rax, %rdx
+# endif
+ jbe L(return_null)
+ lea 16(%rdi, %rax), %rax
+ ret
+
+ .p2align 4
+L(matches32_1):
+ bsf %eax, %eax
+# ifdef USE_AS_WMEMCHR
+ mov %eax, %esi
+ shr $2, %esi
+ sub %rsi, %rdx
+# else
+ sub %rax, %rdx
+# endif
+ jbe L(return_null)
+ lea 32(%rdi, %rax), %rax
+ ret
+
+ .p2align 4
+L(matches48_1):
+ bsf %eax, %eax
+# ifdef USE_AS_WMEMCHR
+ mov %eax, %esi
+ shr $2, %esi
+ sub %rsi, %rdx
+# else
+ sub %rax, %rdx
+# endif
+ jbe L(return_null)
+ lea 48(%rdi, %rax), %rax
+ ret
+
+ .p2align 4
+L(return_null):
+ xor %eax, %eax
+ ret
+END(MEMCHR)
+#endif
@@ -1,4 +1,7 @@
-#define MEMCHR __rawmemchr_avx2_rtm
-#define USE_AS_RAWMEMCHR 1
+#ifndef RAWMEMCHR
+# define RAWMEMCHR __rawmemchr_avx2_rtm
+#endif
+#define USE_AS_RAWMEMCHR 1
+#define MEMCHR RAWMEMCHR
#include "memchr-avx2-rtm.S"
@@ -1,4 +1,7 @@
-#define MEMCHR __rawmemchr_avx2
-#define USE_AS_RAWMEMCHR 1
+#ifndef RAWMEMCHR
+# define RAWMEMCHR __rawmemchr_avx2
+#endif
+#define USE_AS_RAWMEMCHR 1
+#define MEMCHR RAWMEMCHR
#include "memchr-avx2.S"
@@ -1,3 +1,7 @@
-#define MEMCHR __rawmemchr_evex_rtm
-#define USE_AS_RAWMEMCHR 1
+#ifndef RAWMEMCHR
+# define RAWMEMCHR __rawmemchr_evex_rtm
+#endif
+#define USE_AS_RAWMEMCHR 1
+#define MEMCHR RAWMEMCHR
+
#include "memchr-evex-rtm.S"
@@ -1,4 +1,7 @@
-#define MEMCHR __rawmemchr_evex
-#define USE_AS_RAWMEMCHR 1
+#ifndef RAWMEMCHR
+# define RAWMEMCHR __rawmemchr_evex
+#endif
+#define USE_AS_RAWMEMCHR 1
+#define MEMCHR RAWMEMCHR
#include "memchr-evex.S"
@@ -16,14 +16,199 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-/* Define multiple versions only for the definition in libc. */
-#if IS_IN (libc)
-# define __rawmemchr __rawmemchr_sse2
-
-# undef weak_alias
-# define weak_alias(__rawmemchr, rawmemchr)
-# undef libc_hidden_def
-# define libc_hidden_def(__rawmemchr)
+#include <isa-level.h>
+
+#if defined IS_MULTIARCH && defined ISA_DEFAULT_IMPL
+# error "Multiarch build should never default include!"
#endif
-#include "../rawmemchr.S"
+/* __X86_ISA_LEVEL <= 2 because there is no V2 implementation so we
+ need this to build for ISA V2 builds. */
+#if (MINIMUM_X86_ISA_LEVEL <= 2 && IS_IN (libc)) \
+ || defined ISA_DEFAULT_IMPL
+
+
+# include <sysdep.h>
+
+# ifndef RAWMEMCHR
+# define RAWMEMCHR __rawmemchr_sse2
+# endif
+
+ .text
+ENTRY (RAWMEMCHR)
+ movd %rsi, %xmm1
+ mov %rdi, %rcx
+
+ punpcklbw %xmm1, %xmm1
+ punpcklbw %xmm1, %xmm1
+
+ and $63, %rcx
+ pshufd $0, %xmm1, %xmm1
+
+ cmp $48, %rcx
+ ja L(crosscache)
+
+ movdqu (%rdi), %xmm0
+ pcmpeqb %xmm1, %xmm0
+/* Check if there is a match. */
+ pmovmskb %xmm0, %eax
+ test %eax, %eax
+
+ jnz L(matches)
+ add $16, %rdi
+ and $-16, %rdi
+ jmp L(loop_prolog)
+
+ .p2align 4
+L(crosscache):
+ and $15, %rcx
+ and $-16, %rdi
+ movdqa (%rdi), %xmm0
+
+ pcmpeqb %xmm1, %xmm0
+/* Check if there is a match. */
+ pmovmskb %xmm0, %eax
+/* Remove the leading bytes. */
+ sar %cl, %eax
+ test %eax, %eax
+ je L(unaligned_no_match)
+/* Check which byte is a match. */
+ bsf %eax, %eax
+
+ add %rdi, %rax
+ add %rcx, %rax
+ ret
+
+ .p2align 4
+L(unaligned_no_match):
+ add $16, %rdi
+
+ .p2align 4
+L(loop_prolog):
+ movdqa (%rdi), %xmm0
+ pcmpeqb %xmm1, %xmm0
+ pmovmskb %xmm0, %eax
+ test %eax, %eax
+ jnz L(matches)
+
+ movdqa 16(%rdi), %xmm2
+ pcmpeqb %xmm1, %xmm2
+ pmovmskb %xmm2, %eax
+ test %eax, %eax
+ jnz L(matches16)
+
+ movdqa 32(%rdi), %xmm3
+ pcmpeqb %xmm1, %xmm3
+ pmovmskb %xmm3, %eax
+ test %eax, %eax
+ jnz L(matches32)
+
+ movdqa 48(%rdi), %xmm4
+ pcmpeqb %xmm1, %xmm4
+ add $64, %rdi
+ pmovmskb %xmm4, %eax
+ test %eax, %eax
+ jnz L(matches0)
+
+ test $0x3f, %rdi
+ jz L(align64_loop)
+
+ movdqa (%rdi), %xmm0
+ pcmpeqb %xmm1, %xmm0
+ pmovmskb %xmm0, %eax
+ test %eax, %eax
+ jnz L(matches)
+
+ movdqa 16(%rdi), %xmm2
+ pcmpeqb %xmm1, %xmm2
+ pmovmskb %xmm2, %eax
+ test %eax, %eax
+ jnz L(matches16)
+
+ movdqa 32(%rdi), %xmm3
+ pcmpeqb %xmm1, %xmm3
+ pmovmskb %xmm3, %eax
+ test %eax, %eax
+ jnz L(matches32)
+
+ movdqa 48(%rdi), %xmm3
+ pcmpeqb %xmm1, %xmm3
+ pmovmskb %xmm3, %eax
+
+ add $64, %rdi
+ test %eax, %eax
+ jnz L(matches0)
+
+ and $-64, %rdi
+
+ .p2align 4
+L(align64_loop):
+ movdqa (%rdi), %xmm0
+ movdqa 16(%rdi), %xmm2
+ movdqa 32(%rdi), %xmm3
+ movdqa 48(%rdi), %xmm4
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm1, %xmm2
+ pcmpeqb %xmm1, %xmm3
+ pcmpeqb %xmm1, %xmm4
+
+ pmaxub %xmm0, %xmm3
+ pmaxub %xmm2, %xmm4
+ pmaxub %xmm3, %xmm4
+ pmovmskb %xmm4, %eax
+
+ add $64, %rdi
+
+ test %eax, %eax
+ jz L(align64_loop)
+
+ sub $64, %rdi
+
+ pmovmskb %xmm0, %eax
+ test %eax, %eax
+ jnz L(matches)
+
+ pmovmskb %xmm2, %eax
+ test %eax, %eax
+ jnz L(matches16)
+
+ movdqa 32(%rdi), %xmm3
+ pcmpeqb %xmm1, %xmm3
+
+ pcmpeqb 48(%rdi), %xmm1
+ pmovmskb %xmm3, %eax
+ test %eax, %eax
+ jnz L(matches32)
+
+ pmovmskb %xmm1, %eax
+ bsf %eax, %eax
+ lea 48(%rdi, %rax), %rax
+ ret
+
+ .p2align 4
+L(matches0):
+ bsf %eax, %eax
+ lea -16(%rax, %rdi), %rax
+ ret
+
+ .p2align 4
+L(matches):
+ bsf %eax, %eax
+ add %rdi, %rax
+ ret
+
+ .p2align 4
+L(matches16):
+ bsf %eax, %eax
+ lea 16(%rax, %rdi), %rax
+ ret
+
+ .p2align 4
+L(matches32):
+ bsf %eax, %eax
+ lea 32(%rax, %rdi), %rax
+ ret
+
+END (RAWMEMCHR)
+#endif
new file mode 100644
@@ -0,0 +1,18 @@
+/* Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "../memchr.S"
new file mode 100644
@@ -0,0 +1,18 @@
+/* Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "../rawmemchr.S"
@@ -1,4 +1,7 @@
-#define MEMCHR __wmemchr_avx2_rtm
-#define USE_AS_WMEMCHR 1
+#ifndef WMEMCHR
+# define WMEMCHR __wmemchr_avx2_rtm
+#endif
+#define USE_AS_WMEMCHR 1
+#define MEMCHR WMEMCHR
#include "memchr-avx2-rtm.S"
@@ -1,4 +1,7 @@
-#define MEMCHR __wmemchr_avx2
-#define USE_AS_WMEMCHR 1
+#ifndef WMEMCHR
+# define WMEMCHR __wmemchr_avx2
+#endif
+#define USE_AS_WMEMCHR 1
+#define MEMCHR WMEMCHR
#include "memchr-avx2.S"
@@ -1,3 +1,7 @@
-#define MEMCHR __wmemchr_evex_rtm
-#define USE_AS_WMEMCHR 1
+#ifndef WMEMCHR
+# define WMEMCHR __wmemchr_evex_rtm
+#endif
+#define USE_AS_WMEMCHR 1
+#define MEMCHR WMEMCHR
+
#include "memchr-evex-rtm.S"
@@ -1,4 +1,7 @@
-#define MEMCHR __wmemchr_evex
-#define USE_AS_WMEMCHR 1
+#ifndef WMEMCHR
+# define WMEMCHR __wmemchr_evex
+#endif
+#define USE_AS_WMEMCHR 1
+#define MEMCHR WMEMCHR
#include "memchr-evex.S"
@@ -1,4 +1,25 @@
-#define USE_AS_WMEMCHR 1
-#define wmemchr __wmemchr_sse2
+/* wmemchr optimized with SSE2
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
-#include "../memchr.S"
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifndef WMEMCHR
+# define WMEMCHR __wmemchr_sse2
+#endif
+#define USE_AS_WMEMCHR 1
+#define MEMCHR WMEMCHR
+
+#include "memchr-sse2.S"
@@ -17,185 +17,13 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#include <sysdep.h>
+#define RAWMEMCHR rawmemchr
- .text
-ENTRY (__rawmemchr)
- movd %rsi, %xmm1
- mov %rdi, %rcx
+#define DEFAULT_IMPL_V1 "multiarch/rawmemchr-sse2.S"
+#define DEFAULT_IMPL_V3 "multiarch/rawmemchr-avx2.S"
+#define DEFAULT_IMPL_V4 "multiarch/rawmemchr-evex.S"
- punpcklbw %xmm1, %xmm1
- punpcklbw %xmm1, %xmm1
+#include "isa-default-impl.h"
- and $63, %rcx
- pshufd $0, %xmm1, %xmm1
-
- cmp $48, %rcx
- ja L(crosscache)
-
- movdqu (%rdi), %xmm0
- pcmpeqb %xmm1, %xmm0
-/* Check if there is a match. */
- pmovmskb %xmm0, %eax
- test %eax, %eax
-
- jnz L(matches)
- add $16, %rdi
- and $-16, %rdi
- jmp L(loop_prolog)
-
- .p2align 4
-L(crosscache):
- and $15, %rcx
- and $-16, %rdi
- movdqa (%rdi), %xmm0
-
- pcmpeqb %xmm1, %xmm0
-/* Check if there is a match. */
- pmovmskb %xmm0, %eax
-/* Remove the leading bytes. */
- sar %cl, %eax
- test %eax, %eax
- je L(unaligned_no_match)
-/* Check which byte is a match. */
- bsf %eax, %eax
-
- add %rdi, %rax
- add %rcx, %rax
- ret
-
- .p2align 4
-L(unaligned_no_match):
- add $16, %rdi
-
- .p2align 4
-L(loop_prolog):
- movdqa (%rdi), %xmm0
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm0, %eax
- test %eax, %eax
- jnz L(matches)
-
- movdqa 16(%rdi), %xmm2
- pcmpeqb %xmm1, %xmm2
- pmovmskb %xmm2, %eax
- test %eax, %eax
- jnz L(matches16)
-
- movdqa 32(%rdi), %xmm3
- pcmpeqb %xmm1, %xmm3
- pmovmskb %xmm3, %eax
- test %eax, %eax
- jnz L(matches32)
-
- movdqa 48(%rdi), %xmm4
- pcmpeqb %xmm1, %xmm4
- add $64, %rdi
- pmovmskb %xmm4, %eax
- test %eax, %eax
- jnz L(matches0)
-
- test $0x3f, %rdi
- jz L(align64_loop)
-
- movdqa (%rdi), %xmm0
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm0, %eax
- test %eax, %eax
- jnz L(matches)
-
- movdqa 16(%rdi), %xmm2
- pcmpeqb %xmm1, %xmm2
- pmovmskb %xmm2, %eax
- test %eax, %eax
- jnz L(matches16)
-
- movdqa 32(%rdi), %xmm3
- pcmpeqb %xmm1, %xmm3
- pmovmskb %xmm3, %eax
- test %eax, %eax
- jnz L(matches32)
-
- movdqa 48(%rdi), %xmm3
- pcmpeqb %xmm1, %xmm3
- pmovmskb %xmm3, %eax
-
- add $64, %rdi
- test %eax, %eax
- jnz L(matches0)
-
- and $-64, %rdi
-
- .p2align 4
-L(align64_loop):
- movdqa (%rdi), %xmm0
- movdqa 16(%rdi), %xmm2
- movdqa 32(%rdi), %xmm3
- movdqa 48(%rdi), %xmm4
-
- pcmpeqb %xmm1, %xmm0
- pcmpeqb %xmm1, %xmm2
- pcmpeqb %xmm1, %xmm3
- pcmpeqb %xmm1, %xmm4
-
- pmaxub %xmm0, %xmm3
- pmaxub %xmm2, %xmm4
- pmaxub %xmm3, %xmm4
- pmovmskb %xmm4, %eax
-
- add $64, %rdi
-
- test %eax, %eax
- jz L(align64_loop)
-
- sub $64, %rdi
-
- pmovmskb %xmm0, %eax
- test %eax, %eax
- jnz L(matches)
-
- pmovmskb %xmm2, %eax
- test %eax, %eax
- jnz L(matches16)
-
- movdqa 32(%rdi), %xmm3
- pcmpeqb %xmm1, %xmm3
-
- pcmpeqb 48(%rdi), %xmm1
- pmovmskb %xmm3, %eax
- test %eax, %eax
- jnz L(matches32)
-
- pmovmskb %xmm1, %eax
- bsf %eax, %eax
- lea 48(%rdi, %rax), %rax
- ret
-
- .p2align 4
-L(matches0):
- bsf %eax, %eax
- lea -16(%rax, %rdi), %rax
- ret
-
- .p2align 4
-L(matches):
- bsf %eax, %eax
- add %rdi, %rax
- ret
-
- .p2align 4
-L(matches16):
- bsf %eax, %eax
- lea 16(%rax, %rdi), %rax
- ret
-
- .p2align 4
-L(matches32):
- bsf %eax, %eax
- lea 32(%rax, %rdi), %rax
- ret
-
-END (__rawmemchr)
-
-weak_alias (__rawmemchr, rawmemchr)
-libc_hidden_builtin_def (__rawmemchr)
+strong_alias (rawmemchr, __rawmemchr)
+libc_hidden_builtin_def (rawmemchr)
new file mode 100644
@@ -0,0 +1,24 @@
+/* Copyright (C) 2011-2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#define WMEMCHR wmemchr
+
+#define DEFAULT_IMPL_V1 "multiarch/wmemchr-sse2.S"
+#define DEFAULT_IMPL_V3 "multiarch/wmemchr-avx2.S"
+#define DEFAULT_IMPL_V4 "multiarch/wmemchr-evex.S"
+
+#include "isa-default-impl.h"