@@ -7,7 +7,7 @@ endif
ifeq ($(subdir),string)
sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 \
- strcmp-sse2-unaligned strncmp-ssse3 \
+ strcmp-sse2-unaligned strncmp-sse2-unaligned strncmp-ssse3 \
memcpy-ssse3 \
memcpy-sse2-unaligned mempcpy-ssse3 \
memmove-ssse3 memcpy-ssse3-back mempcpy-ssse3-back \
@@ -30,7 +30,7 @@ CFLAGS-strspn-c.c += -msse4
endif
ifeq (yes,$(config-cflags-avx2))
-sysdep_routines += memset-avx2 strcpy-avx2 stpcpy-avx2 memcmp-avx2 strcmp-avx2
+sysdep_routines += memset-avx2 strcpy-avx2 stpcpy-avx2 memcmp-avx2 strcmp-avx2 strncmp-avx2
endif
endif
@@ -257,8 +257,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
/* Support sysdeps/x86_64/multiarch/strncmp.S. */
IFUNC_IMPL (i, name, strncmp,
- IFUNC_IMPL_ADD (array, i, strncmp, HAS_SSE4_2,
- __strncmp_sse42)
+ IFUNC_IMPL_ADD (array, i, strncmp, 1, __strncmp_sse2_unaligned)
+ IFUNC_IMPL_ADD (array, i, strncmp, HAS_AVX2, __strncmp_avx2)
+
IFUNC_IMPL_ADD (array, i, strncmp, HAS_SSSE3,
__strncmp_ssse3)
IFUNC_IMPL_ADD (array, i, strncmp, 1, __strncmp_sse2))
@@ -19,6 +19,14 @@
#include "sysdep.h"
ENTRY ( __strcmp_sse2_unaligned)
+#ifdef AS_STRNCMP
+ lea -1(%rdx), %r10
+ test %rdx, %rdx
+ je L(ret_zero)
+L(back_to_start):
+ xor %rdx, %rdx
+#endif
+
pxor %xmm7, %xmm7
movl %esi, %eax
andl $4095, %eax
@@ -29,20 +37,35 @@ ENTRY ( __strcmp_sse2_unaligned)
andl $4095, %eax
cmpl $4032, %eax
jg L(cross_page)
+#ifdef AS_STRNCMP
+ cmp $64, %r10
+ jae L(dont_set_mask)
+ bts %r10, %rdx
+L(dont_set_mask):
+#endif
+
movdqu (%rdi), %xmm1
movdqu (%rsi), %xmm0
pcmpeqb %xmm1, %xmm0
pminub %xmm1, %xmm0
pcmpeqb %xmm7, %xmm0
pmovmskb %xmm0, %eax
+#ifdef AS_STRNCMP
+ or %dx, %ax
+#else
test %eax, %eax
+#endif
je L(next_48_bytes)
bsf %eax, %edx
movzbl (%rdi, %rdx), %eax
movzbl (%rsi, %rdx), %edx
subl %edx, %eax
ret
-
+#ifdef AS_STRNCMP
+ L(ret_zero):
+ xor %eax, %eax
+ ret
+#endif
.p2align 4
L(next_48_bytes):
movdqu 16(%rdi), %xmm6
@@ -54,16 +77,19 @@ L(next_48_bytes):
pcmpeqb %xmm7, %xmm3
movdqu 48(%rdi), %xmm4
pcmpeqb %xmm5, %xmm2
- pmovmskb %xmm3, %edx
movdqu 48(%rsi), %xmm0
pminub %xmm5, %xmm2
pcmpeqb %xmm7, %xmm2
pcmpeqb %xmm4, %xmm0
pmovmskb %xmm2, %eax
+ salq $32, %rax
+#ifdef AS_STRNCMP
+ or %rdx, %rax
+#endif
+ pmovmskb %xmm3, %edx
sal $16, %edx
pminub %xmm4, %xmm0
pcmpeqb %xmm7, %xmm0
- salq $32, %rax
orq %rdx, %rax
pmovmskb %xmm0, %ecx
salq $48, %rcx
@@ -82,6 +108,10 @@ L(main_loop_header):
#endif
leaq 64(%rdi), %rdx
andq $-64, %rdx
+# ifdef AS_STRNCMP
+ addq %rdi, %r10
+ subq %rdx, %r10
+# endif
subq %rdi, %rdx
leaq (%rdi, %rdx), %rax
addq %rsi, %rdx
@@ -90,6 +120,15 @@ L(main_loop_header):
andl $4095, %ecx
sub %ecx, %esi
shr $6, %esi
+#ifdef AS_STRNCMP
+ mov %r10, %r9
+ addq %rdx, %r10
+ shr $6, %r9
+ cmp %r9, %rsi
+ jb L(dont_set_page_bound)
+ mov %r9, %rsi
+L(dont_set_page_bound):
+#endif
.p2align 4
L(loop):
@@ -111,7 +150,7 @@ L(back_to_loop):
addq $64, %rdx
vpmovmskb %ymm2, %edi
test %edi, %edi
- je .Lloop
+ je L(loop)
shl $32, %rdi
vpcmpeqb %ymm7, %ymm0, %ymm0
vpmovmskb %ymm0, %ecx
@@ -164,6 +203,14 @@ L(back_to_loop):
.p2align 4
L(loop_cross_page):
+#ifdef AS_STRNCMP
+ mov %r10, %r9
+ sub %rdx, %r9
+ cmp $64, %r9
+ jb L(prepare_back_to_start)
+#endif
+
+
mov %edx, %ecx
and $63, %ecx
neg %rcx
@@ -219,6 +266,14 @@ L(loop_cross_page):
#endif
mov %edx, %ecx
mov $63, %esi
+#ifdef AS_STRNCMP
+ shr $6, %r9
+ sub $1, %r9
+ cmp %r9, %rsi
+ jb L(dont_set_bound2)
+ mov %r9, %rsi
+L(dont_set_bound2):
+#endif
shrq %cl, %rdi
test %rdi, %rdi
je L(back_to_loop)
@@ -231,6 +286,18 @@ L(loop_cross_page):
subl %edx, %eax
ret
+#ifdef AS_STRNCMP
+L(prepare_back_to_start):
+# ifdef USE_AVX2
+ vzeroupper
+# endif
+ mov %r9, %r10
+ mov %rdx, %rsi
+ mov %rax, %rdi
+ jmp L(back_to_start)
+#endif
+
+
L(cross_page):
xorl %edx, %edx
jmp L(cross_page_loop_start)
@@ -244,6 +311,9 @@ L(cross_page_loop_start):
movzbl (%rsi, %rdx), %ecx
subl %ecx, %eax
jne L(different)
+ cmp %rdx, %r10
+ je L(different)
+
test %ecx, %ecx
jne L(cross_page_loop)
L(different):
@@ -31,8 +31,8 @@
test %r9, %r9; \
je LABEL(strcmp_exitz); \
mov %r9, %r11
-
-# define STRCMP_SSE42 __strncmp_sse42
+# define STRCMP_AVX2 __strncmp_avx2
+# define STRCMP_SSE2_UNALIGNED __strncmp_sse2_unaligned
# define STRCMP_SSSE3 __strncmp_ssse3
# define STRCMP_SSE2 __strncmp_sse2
# define __GI_STRCMP __GI_strncmp
@@ -69,8 +69,9 @@
# define USE_AS_STRCMP
# define UPDATE_STRNCMP_COUNTER
# ifndef STRCMP
+# define STRCMP_AVX2 __strcmp_avx2
+# define STRCMP_SSE2_UNALIGNED __strcmp_sse2_unaligned
# define STRCMP strcmp
-# define STRCMP_SSE42 __strcmp_sse42
# define STRCMP_SSSE3 __strcmp_ssse3
# define STRCMP_SSE2 __strcmp_sse2
# define __GI_STRCMP __GI_strcmp
@@ -89,23 +90,23 @@ ENTRY(STRCMP)
jne 1f
call __init_cpu_features
1:
-#ifdef USE_AS_STRCMP
-# ifdef HAVE_AVX2_SUPPORT
+# if defined (USE_AS_STRCMP) || defined (USE_AS_STRNCMP)
+# ifdef HAVE_AVX2_SUPPORT
- leaq __strcmp_avx2(%rip), %rax
+ leaq STRCMP_AVX2(%rip), %rax
testl $bit_AVX_Fast_Unaligned_Load, __cpu_features+FEATURE_OFFSET+index_AVX_Fast_Unaligned_Load(%rip)
jnz 3f
-# endif
- leaq __strcmp_sse2_unaligned(%rip), %rax
+# endif
+ leaq STRCMP_SSE2_UNALIGNED(%rip), %rax
testl $bit_Fast_Unaligned_Load, __cpu_features+FEATURE_OFFSET+index_Fast_Unaligned_Load(%rip)
jnz 3f
-#else
+# else
testl $bit_Slow_SSE4_2, __cpu_features+FEATURE_OFFSET+index_Slow_SSE4_2(%rip)
jnz 2f
leaq STRCMP_SSE42(%rip), %rax
testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
jnz 3f
-#endif
+# endif
2: leaq STRCMP_SSSE3(%rip), %rax
testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
jnz 3f
@@ -166,15 +167,13 @@ END(__strncasecmp)
weak_alias (__strncasecmp, strncasecmp)
# endif
-# undef LABEL
-# define LABEL(l) .L##l##_sse42
-# define GLABEL(l) l##_sse42
-# define SECTION sse4.2
-# include "strcmp-sse42.S"
-
-
-# ifdef HAVE_AVX_SUPPORT
-# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+# undef LABEL
+# define LABEL(l) .L##l##_sse42
+# define GLABEL(l) l##_sse42
+# define SECTION sse4.2
+# include "strcmp-sse42.S"
+# ifdef HAVE_AVX_SUPPORT
# define LABEL(l) .L##l##_avx
# define GLABEL(l) l##_avx
# define USE_AVX 1
new file mode 100644
@@ -0,0 +1,4 @@
+#define USE_AVX2
+#define AS_STRNCMP
+#define __strcmp_sse2_unaligned __strncmp_avx2
+#include "strcmp-sse2-unaligned.S"
new file mode 100644
@@ -0,0 +1,3 @@
+#define AS_STRNCMP
+#define __strcmp_sse2_unaligned __strncmp_sse2_unaligned
+#include "strcmp-sse2-unaligned.S"