Message ID | 20220323215734.3927131-23-goldstein.w.n@gmail.com |
---|---|
State | New |
Headers | show |
Series | [v1,01/23] benchtests: Use json-lib in bench-strchr.c | expand |
On Wed, Mar 23, 2022 at 3:03 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > The rational is: > > 1. SSE42 has nearly identical logic so any benefit is minimal (3.4% > regression on Tigerlake using SSE42 versus AVX across the > benchtest suite). > 2. AVX2 version covers the majority of targets that previously > prefered it. > 3. The targets where AVX would still be best (SnB and IVB) are > becoming outdated. > > All in all the saving the code size is worth it. > > All string/memory tests pass. > --- > Geomtric Mean N=40 runs; All functions page aligned > length, align1, align2, max_char, AVX Time / SSE42 Time > 1, 1, 1, 127, 0.928 > 2, 2, 2, 127, 0.934 > 3, 3, 3, 127, 0.975 > 4, 4, 4, 127, 0.96 > 5, 5, 5, 127, 0.935 > 6, 6, 6, 127, 0.929 > 7, 7, 7, 127, 0.959 > 8, 0, 0, 127, 0.955 > 9, 1, 1, 127, 0.944 > 10, 2, 2, 127, 0.975 > 11, 3, 3, 127, 0.935 > 12, 4, 4, 127, 0.931 > 13, 5, 5, 127, 0.926 > 14, 6, 6, 127, 0.901 > 15, 7, 7, 127, 0.951 > 4, 0, 0, 127, 0.958 > 4, 0, 0, 254, 0.956 > 8, 0, 0, 254, 0.977 > 16, 0, 0, 127, 0.955 > 16, 0, 0, 254, 0.953 > 32, 0, 0, 127, 0.943 > 32, 0, 0, 254, 0.941 > 64, 0, 0, 127, 0.941 > 64, 0, 0, 254, 0.955 > 128, 0, 0, 127, 0.972 > 128, 0, 0, 254, 0.975 > 256, 0, 0, 127, 0.996 > 256, 0, 0, 254, 0.993 > 512, 0, 0, 127, 0.992 > 512, 0, 0, 254, 0.986 > 1024, 0, 0, 127, 0.994 > 1024, 0, 0, 254, 0.993 > 16, 1, 2, 127, 0.933 > 16, 2, 1, 254, 0.953 > 32, 2, 4, 127, 0.927 > 32, 4, 2, 254, 0.986 > 64, 3, 6, 127, 0.991 > 64, 6, 3, 254, 1.014 > 128, 4, 0, 127, 1.001 > 128, 0, 4, 254, 0.991 > 256, 5, 2, 127, 1.011 > 256, 2, 5, 254, 1.013 > 512, 6, 4, 127, 1.056 > 512, 4, 6, 254, 0.916 > 1024, 7, 6, 127, 1.059 > 1024, 6, 7, 254, 1.043 > > sysdeps/x86_64/multiarch/Makefile | 2 - > sysdeps/x86_64/multiarch/ifunc-impl-list.c | 12 - > sysdeps/x86_64/multiarch/ifunc-strcasecmp.h | 4 - > sysdeps/x86_64/multiarch/strcasecmp_l-avx.S | 22 -- > sysdeps/x86_64/multiarch/strcmp-sse42.S | 240 +++++++++----------- > sysdeps/x86_64/multiarch/strncase_l-avx.S | 22 -- > 6 files changed, 105 insertions(+), 197 deletions(-) > delete mode 100644 sysdeps/x86_64/multiarch/strcasecmp_l-avx.S > delete mode 100644 sysdeps/x86_64/multiarch/strncase_l-avx.S > > diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile > index 35d80dc2ff..6507d1b7fa 100644 > --- a/sysdeps/x86_64/multiarch/Makefile > +++ b/sysdeps/x86_64/multiarch/Makefile > @@ -54,7 +54,6 @@ sysdep_routines += \ > stpncpy-evex \ > stpncpy-sse2-unaligned \ > stpncpy-ssse3 \ > - strcasecmp_l-avx \ > strcasecmp_l-avx2 \ > strcasecmp_l-avx2-rtm \ > strcasecmp_l-evex \ > @@ -95,7 +94,6 @@ sysdep_routines += \ > strlen-avx2-rtm \ > strlen-evex \ > strlen-sse2 \ > - strncase_l-avx \ > strncase_l-avx2 \ > strncase_l-avx2-rtm \ > strncase_l-evex \ > diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c > index f1a4d3dac2..40cc6cc49e 100644 > --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c > +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c > @@ -447,9 +447,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, > (CPU_FEATURE_USABLE (AVX2) > && CPU_FEATURE_USABLE (RTM)), > __strcasecmp_avx2_rtm) > - IFUNC_IMPL_ADD (array, i, strcasecmp, > - CPU_FEATURE_USABLE (AVX), > - __strcasecmp_avx) > IFUNC_IMPL_ADD (array, i, strcasecmp, > CPU_FEATURE_USABLE (SSE4_2), > __strcasecmp_sse42) > @@ -471,9 +468,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, > (CPU_FEATURE_USABLE (AVX2) > && CPU_FEATURE_USABLE (RTM)), > __strcasecmp_l_avx2_rtm) > - IFUNC_IMPL_ADD (array, i, strcasecmp_l, > - CPU_FEATURE_USABLE (AVX), > - __strcasecmp_l_avx) > IFUNC_IMPL_ADD (array, i, strcasecmp_l, > CPU_FEATURE_USABLE (SSE4_2), > __strcasecmp_l_sse42) > @@ -609,9 +603,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, > (CPU_FEATURE_USABLE (AVX2) > && CPU_FEATURE_USABLE (RTM)), > __strncasecmp_avx2_rtm) > - IFUNC_IMPL_ADD (array, i, strncasecmp, > - CPU_FEATURE_USABLE (AVX), > - __strncasecmp_avx) > IFUNC_IMPL_ADD (array, i, strncasecmp, > CPU_FEATURE_USABLE (SSE4_2), > __strncasecmp_sse42) > @@ -634,9 +625,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, > (CPU_FEATURE_USABLE (AVX2) > && CPU_FEATURE_USABLE (RTM)), > __strncasecmp_l_avx2_rtm) > - IFUNC_IMPL_ADD (array, i, strncasecmp_l, > - CPU_FEATURE_USABLE (AVX), > - __strncasecmp_l_avx) > IFUNC_IMPL_ADD (array, i, strncasecmp_l, > CPU_FEATURE_USABLE (SSE4_2), > __strncasecmp_l_sse42) > diff --git a/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h b/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h > index bf0d146e7f..766539c241 100644 > --- a/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h > +++ b/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h > @@ -22,7 +22,6 @@ > extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; > extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden; > extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden; > -extern __typeof (REDIRECT_NAME) OPTIMIZE (avx) attribute_hidden; > extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; > extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden; > extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; > @@ -46,9 +45,6 @@ IFUNC_SELECTOR (void) > return OPTIMIZE (avx2); > } > > - if (CPU_FEATURE_USABLE_P (cpu_features, AVX)) > - return OPTIMIZE (avx); > - > if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2) > && !CPU_FEATURES_ARCH_P (cpu_features, Slow_SSE4_2)) > return OPTIMIZE (sse42); > diff --git a/sysdeps/x86_64/multiarch/strcasecmp_l-avx.S b/sysdeps/x86_64/multiarch/strcasecmp_l-avx.S > deleted file mode 100644 > index 7ec7c21b5a..0000000000 > --- a/sysdeps/x86_64/multiarch/strcasecmp_l-avx.S > +++ /dev/null > @@ -1,22 +0,0 @@ > -/* strcasecmp_l optimized with AVX. > - Copyright (C) 2017-2022 Free Software Foundation, Inc. > - This file is part of the GNU C Library. > - > - The GNU C Library is free software; you can redistribute it and/or > - modify it under the terms of the GNU Lesser General Public > - License as published by the Free Software Foundation; either > - version 2.1 of the License, or (at your option) any later version. > - > - The GNU C Library is distributed in the hope that it will be useful, > - but WITHOUT ANY WARRANTY; without even the implied warranty of > - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > - Lesser General Public License for more details. > - > - You should have received a copy of the GNU Lesser General Public > - License along with the GNU C Library; if not, see > - <https://www.gnu.org/licenses/>. */ > - > -#define STRCMP_SSE42 __strcasecmp_l_avx > -#define USE_AVX 1 > -#define USE_AS_STRCASECMP_L > -#include "strcmp-sse42.S" > diff --git a/sysdeps/x86_64/multiarch/strcmp-sse42.S b/sysdeps/x86_64/multiarch/strcmp-sse42.S > index 7805ae9d41..a9178ad25c 100644 > --- a/sysdeps/x86_64/multiarch/strcmp-sse42.S > +++ b/sysdeps/x86_64/multiarch/strcmp-sse42.S > @@ -41,13 +41,8 @@ > # define UPDATE_STRNCMP_COUNTER > #endif > > -#ifdef USE_AVX > -# define SECTION avx > -# define GLABEL(l) l##_avx > -#else > -# define SECTION sse4.2 > -# define GLABEL(l) l##_sse42 > -#endif > +#define SECTION sse4.2 > +#define GLABEL(l) l##_sse42 > > #define LABEL(l) .L##l > > @@ -105,21 +100,7 @@ END (GLABEL(__strncasecmp)) > #endif > > > -#ifdef USE_AVX > -# define movdqa vmovdqa > -# define movdqu vmovdqu > -# define pmovmskb vpmovmskb > -# define pcmpistri vpcmpistri > -# define psubb vpsubb > -# define pcmpeqb vpcmpeqb > -# define psrldq vpsrldq > -# define pslldq vpslldq > -# define palignr vpalignr > -# define pxor vpxor > -# define D(arg) arg, arg > -#else > -# define D(arg) arg > -#endif > +#define arg arg > > STRCMP_SSE42: > cfi_startproc > @@ -191,18 +172,7 @@ LABEL(case_add): > movdqu (%rdi), %xmm1 > movdqu (%rsi), %xmm2 > #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L > -# ifdef USE_AVX > -# define TOLOWER(reg1, reg2) \ > - vpaddb LCASE_MIN_reg, reg1, %xmm7; \ > - vpaddb LCASE_MIN_reg, reg2, %xmm8; \ > - vpcmpgtb LCASE_MAX_reg, %xmm7, %xmm7; \ > - vpcmpgtb LCASE_MAX_reg, %xmm8, %xmm8; \ > - vpandn CASE_ADD_reg, %xmm7, %xmm7; \ > - vpandn CASE_ADD_reg, %xmm8, %xmm8; \ > - vpaddb %xmm7, reg1, reg1; \ > - vpaddb %xmm8, reg2, reg2 > -# else > -# define TOLOWER(reg1, reg2) \ > +# define TOLOWER(reg1, reg2) \ > movdqa LCASE_MIN_reg, %xmm7; \ > movdqa LCASE_MIN_reg, %xmm8; \ > paddb reg1, %xmm7; \ > @@ -213,15 +183,15 @@ LABEL(case_add): > pandn CASE_ADD_reg, %xmm8; \ > paddb %xmm7, reg1; \ > paddb %xmm8, reg2 > -# endif > + > TOLOWER (%xmm1, %xmm2) > #else > # define TOLOWER(reg1, reg2) > #endif > - pxor %xmm0, D(%xmm0) /* clear %xmm0 for null char checks */ > - pcmpeqb %xmm1, D(%xmm0) /* Any null chars? */ > - pcmpeqb %xmm2, D(%xmm1) /* compare first 16 bytes for equality */ > - psubb %xmm0, D(%xmm1) /* packed sub of comparison results*/ > + pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */ > + pcmpeqb %xmm1, %xmm0 /* Any null chars? */ > + pcmpeqb %xmm2, %xmm1 /* compare first 16 bytes for equality */ > + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ > pmovmskb %xmm1, %edx > sub $0xffff, %edx /* if first 16 bytes are same, edx == 0xffff */ > jnz LABEL(less16bytes)/* If not, find different value or null char */ > @@ -245,7 +215,7 @@ LABEL(crosscache): > xor %r8d, %r8d > and $0xf, %ecx /* offset of rsi */ > and $0xf, %eax /* offset of rdi */ > - pxor %xmm0, D(%xmm0) /* clear %xmm0 for null char check */ > + pxor %xmm0, %xmm0 /* clear %xmm0 for null char check */ > cmp %eax, %ecx > je LABEL(ashr_0) /* rsi and rdi relative offset same */ > ja LABEL(bigger) > @@ -259,7 +229,7 @@ LABEL(bigger): > sub %rcx, %r9 > lea LABEL(unaligned_table)(%rip), %r10 > movslq (%r10, %r9,4), %r9 > - pcmpeqb %xmm1, D(%xmm0) /* Any null chars? */ > + pcmpeqb %xmm1, %xmm0 /* Any null chars? */ > lea (%r10, %r9), %r10 > _CET_NOTRACK jmp *%r10 /* jump to corresponding case */ > > @@ -272,15 +242,15 @@ LABEL(bigger): > LABEL(ashr_0): > > movdqa (%rsi), %xmm1 > - pcmpeqb %xmm1, D(%xmm0) /* Any null chars? */ > + pcmpeqb %xmm1, %xmm0 /* Any null chars? */ > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > - pcmpeqb (%rdi), D(%xmm1) /* compare 16 bytes for equality */ > + pcmpeqb (%rdi), %xmm1 /* compare 16 bytes for equality */ > #else > movdqa (%rdi), %xmm2 > TOLOWER (%xmm1, %xmm2) > - pcmpeqb %xmm2, D(%xmm1) /* compare 16 bytes for equality */ > + pcmpeqb %xmm2, %xmm1 /* compare 16 bytes for equality */ > #endif > - psubb %xmm0, D(%xmm1) /* packed sub of comparison results*/ > + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ > pmovmskb %xmm1, %r9d > shr %cl, %edx /* adjust 0xffff for offset */ > shr %cl, %r9d /* adjust for 16-byte offset */ > @@ -360,10 +330,10 @@ LABEL(ashr_0_exit_use): > */ > .p2align 4 > LABEL(ashr_1): > - pslldq $15, D(%xmm2) /* shift first string to align with second */ > + pslldq $15, %xmm2 /* shift first string to align with second */ > TOLOWER (%xmm1, %xmm2) > - pcmpeqb %xmm1, D(%xmm2) /* compare 16 bytes for equality */ > - psubb %xmm0, D(%xmm2) /* packed sub of comparison results*/ > + pcmpeqb %xmm1, %xmm2 /* compare 16 bytes for equality */ > + psubb %xmm0, %xmm2 /* packed sub of comparison results*/ > pmovmskb %xmm2, %r9d > shr %cl, %edx /* adjust 0xffff for offset */ > shr %cl, %r9d /* adjust for 16-byte offset */ > @@ -391,7 +361,7 @@ LABEL(loop_ashr_1_use): > > LABEL(nibble_ashr_1_restart_use): > movdqa (%rdi, %rdx), %xmm0 > - palignr $1, -16(%rdi, %rdx), D(%xmm0) > + palignr $1, -16(%rdi, %rdx), %xmm0 > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > pcmpistri $0x1a,(%rsi,%rdx), %xmm0 > #else > @@ -410,7 +380,7 @@ LABEL(nibble_ashr_1_restart_use): > jg LABEL(nibble_ashr_1_use) > > movdqa (%rdi, %rdx), %xmm0 > - palignr $1, -16(%rdi, %rdx), D(%xmm0) > + palignr $1, -16(%rdi, %rdx), %xmm0 > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > pcmpistri $0x1a,(%rsi,%rdx), %xmm0 > #else > @@ -430,7 +400,7 @@ LABEL(nibble_ashr_1_restart_use): > LABEL(nibble_ashr_1_use): > sub $0x1000, %r10 > movdqa -16(%rdi, %rdx), %xmm0 > - psrldq $1, D(%xmm0) > + psrldq $1, %xmm0 > pcmpistri $0x3a,%xmm0, %xmm0 > #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L > cmp %r11, %rcx > @@ -448,10 +418,10 @@ LABEL(nibble_ashr_1_use): > */ > .p2align 4 > LABEL(ashr_2): > - pslldq $14, D(%xmm2) > + pslldq $14, %xmm2 > TOLOWER (%xmm1, %xmm2) > - pcmpeqb %xmm1, D(%xmm2) > - psubb %xmm0, D(%xmm2) > + pcmpeqb %xmm1, %xmm2 > + psubb %xmm0, %xmm2 > pmovmskb %xmm2, %r9d > shr %cl, %edx > shr %cl, %r9d > @@ -479,7 +449,7 @@ LABEL(loop_ashr_2_use): > > LABEL(nibble_ashr_2_restart_use): > movdqa (%rdi, %rdx), %xmm0 > - palignr $2, -16(%rdi, %rdx), D(%xmm0) > + palignr $2, -16(%rdi, %rdx), %xmm0 > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > pcmpistri $0x1a,(%rsi,%rdx), %xmm0 > #else > @@ -498,7 +468,7 @@ LABEL(nibble_ashr_2_restart_use): > jg LABEL(nibble_ashr_2_use) > > movdqa (%rdi, %rdx), %xmm0 > - palignr $2, -16(%rdi, %rdx), D(%xmm0) > + palignr $2, -16(%rdi, %rdx), %xmm0 > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > pcmpistri $0x1a,(%rsi,%rdx), %xmm0 > #else > @@ -518,7 +488,7 @@ LABEL(nibble_ashr_2_restart_use): > LABEL(nibble_ashr_2_use): > sub $0x1000, %r10 > movdqa -16(%rdi, %rdx), %xmm0 > - psrldq $2, D(%xmm0) > + psrldq $2, %xmm0 > pcmpistri $0x3a,%xmm0, %xmm0 > #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L > cmp %r11, %rcx > @@ -536,10 +506,10 @@ LABEL(nibble_ashr_2_use): > */ > .p2align 4 > LABEL(ashr_3): > - pslldq $13, D(%xmm2) > + pslldq $13, %xmm2 > TOLOWER (%xmm1, %xmm2) > - pcmpeqb %xmm1, D(%xmm2) > - psubb %xmm0, D(%xmm2) > + pcmpeqb %xmm1, %xmm2 > + psubb %xmm0, %xmm2 > pmovmskb %xmm2, %r9d > shr %cl, %edx > shr %cl, %r9d > @@ -567,7 +537,7 @@ LABEL(loop_ashr_3_use): > > LABEL(nibble_ashr_3_restart_use): > movdqa (%rdi, %rdx), %xmm0 > - palignr $3, -16(%rdi, %rdx), D(%xmm0) > + palignr $3, -16(%rdi, %rdx), %xmm0 > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > pcmpistri $0x1a,(%rsi,%rdx), %xmm0 > #else > @@ -586,7 +556,7 @@ LABEL(nibble_ashr_3_restart_use): > jg LABEL(nibble_ashr_3_use) > > movdqa (%rdi, %rdx), %xmm0 > - palignr $3, -16(%rdi, %rdx), D(%xmm0) > + palignr $3, -16(%rdi, %rdx), %xmm0 > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > pcmpistri $0x1a,(%rsi,%rdx), %xmm0 > #else > @@ -606,7 +576,7 @@ LABEL(nibble_ashr_3_restart_use): > LABEL(nibble_ashr_3_use): > sub $0x1000, %r10 > movdqa -16(%rdi, %rdx), %xmm0 > - psrldq $3, D(%xmm0) > + psrldq $3, %xmm0 > pcmpistri $0x3a,%xmm0, %xmm0 > #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L > cmp %r11, %rcx > @@ -624,10 +594,10 @@ LABEL(nibble_ashr_3_use): > */ > .p2align 4 > LABEL(ashr_4): > - pslldq $12, D(%xmm2) > + pslldq $12, %xmm2 > TOLOWER (%xmm1, %xmm2) > - pcmpeqb %xmm1, D(%xmm2) > - psubb %xmm0, D(%xmm2) > + pcmpeqb %xmm1, %xmm2 > + psubb %xmm0, %xmm2 > pmovmskb %xmm2, %r9d > shr %cl, %edx > shr %cl, %r9d > @@ -656,7 +626,7 @@ LABEL(loop_ashr_4_use): > > LABEL(nibble_ashr_4_restart_use): > movdqa (%rdi, %rdx), %xmm0 > - palignr $4, -16(%rdi, %rdx), D(%xmm0) > + palignr $4, -16(%rdi, %rdx), %xmm0 > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > pcmpistri $0x1a,(%rsi,%rdx), %xmm0 > #else > @@ -675,7 +645,7 @@ LABEL(nibble_ashr_4_restart_use): > jg LABEL(nibble_ashr_4_use) > > movdqa (%rdi, %rdx), %xmm0 > - palignr $4, -16(%rdi, %rdx), D(%xmm0) > + palignr $4, -16(%rdi, %rdx), %xmm0 > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > pcmpistri $0x1a,(%rsi,%rdx), %xmm0 > #else > @@ -695,7 +665,7 @@ LABEL(nibble_ashr_4_restart_use): > LABEL(nibble_ashr_4_use): > sub $0x1000, %r10 > movdqa -16(%rdi, %rdx), %xmm0 > - psrldq $4, D(%xmm0) > + psrldq $4, %xmm0 > pcmpistri $0x3a,%xmm0, %xmm0 > #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L > cmp %r11, %rcx > @@ -713,10 +683,10 @@ LABEL(nibble_ashr_4_use): > */ > .p2align 4 > LABEL(ashr_5): > - pslldq $11, D(%xmm2) > + pslldq $11, %xmm2 > TOLOWER (%xmm1, %xmm2) > - pcmpeqb %xmm1, D(%xmm2) > - psubb %xmm0, D(%xmm2) > + pcmpeqb %xmm1, %xmm2 > + psubb %xmm0, %xmm2 > pmovmskb %xmm2, %r9d > shr %cl, %edx > shr %cl, %r9d > @@ -745,7 +715,7 @@ LABEL(loop_ashr_5_use): > > LABEL(nibble_ashr_5_restart_use): > movdqa (%rdi, %rdx), %xmm0 > - palignr $5, -16(%rdi, %rdx), D(%xmm0) > + palignr $5, -16(%rdi, %rdx), %xmm0 > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > pcmpistri $0x1a,(%rsi,%rdx), %xmm0 > #else > @@ -765,7 +735,7 @@ LABEL(nibble_ashr_5_restart_use): > > movdqa (%rdi, %rdx), %xmm0 > > - palignr $5, -16(%rdi, %rdx), D(%xmm0) > + palignr $5, -16(%rdi, %rdx), %xmm0 > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > pcmpistri $0x1a,(%rsi,%rdx), %xmm0 > #else > @@ -785,7 +755,7 @@ LABEL(nibble_ashr_5_restart_use): > LABEL(nibble_ashr_5_use): > sub $0x1000, %r10 > movdqa -16(%rdi, %rdx), %xmm0 > - psrldq $5, D(%xmm0) > + psrldq $5, %xmm0 > pcmpistri $0x3a,%xmm0, %xmm0 > #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L > cmp %r11, %rcx > @@ -803,10 +773,10 @@ LABEL(nibble_ashr_5_use): > */ > .p2align 4 > LABEL(ashr_6): > - pslldq $10, D(%xmm2) > + pslldq $10, %xmm2 > TOLOWER (%xmm1, %xmm2) > - pcmpeqb %xmm1, D(%xmm2) > - psubb %xmm0, D(%xmm2) > + pcmpeqb %xmm1, %xmm2 > + psubb %xmm0, %xmm2 > pmovmskb %xmm2, %r9d > shr %cl, %edx > shr %cl, %r9d > @@ -835,7 +805,7 @@ LABEL(loop_ashr_6_use): > > LABEL(nibble_ashr_6_restart_use): > movdqa (%rdi, %rdx), %xmm0 > - palignr $6, -16(%rdi, %rdx), D(%xmm0) > + palignr $6, -16(%rdi, %rdx), %xmm0 > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > pcmpistri $0x1a,(%rsi,%rdx), %xmm0 > #else > @@ -854,7 +824,7 @@ LABEL(nibble_ashr_6_restart_use): > jg LABEL(nibble_ashr_6_use) > > movdqa (%rdi, %rdx), %xmm0 > - palignr $6, -16(%rdi, %rdx), D(%xmm0) > + palignr $6, -16(%rdi, %rdx), %xmm0 > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > pcmpistri $0x1a,(%rsi,%rdx), %xmm0 > #else > @@ -874,7 +844,7 @@ LABEL(nibble_ashr_6_restart_use): > LABEL(nibble_ashr_6_use): > sub $0x1000, %r10 > movdqa -16(%rdi, %rdx), %xmm0 > - psrldq $6, D(%xmm0) > + psrldq $6, %xmm0 > pcmpistri $0x3a,%xmm0, %xmm0 > #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L > cmp %r11, %rcx > @@ -892,10 +862,10 @@ LABEL(nibble_ashr_6_use): > */ > .p2align 4 > LABEL(ashr_7): > - pslldq $9, D(%xmm2) > + pslldq $9, %xmm2 > TOLOWER (%xmm1, %xmm2) > - pcmpeqb %xmm1, D(%xmm2) > - psubb %xmm0, D(%xmm2) > + pcmpeqb %xmm1, %xmm2 > + psubb %xmm0, %xmm2 > pmovmskb %xmm2, %r9d > shr %cl, %edx > shr %cl, %r9d > @@ -924,7 +894,7 @@ LABEL(loop_ashr_7_use): > > LABEL(nibble_ashr_7_restart_use): > movdqa (%rdi, %rdx), %xmm0 > - palignr $7, -16(%rdi, %rdx), D(%xmm0) > + palignr $7, -16(%rdi, %rdx), %xmm0 > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > pcmpistri $0x1a,(%rsi,%rdx), %xmm0 > #else > @@ -943,7 +913,7 @@ LABEL(nibble_ashr_7_restart_use): > jg LABEL(nibble_ashr_7_use) > > movdqa (%rdi, %rdx), %xmm0 > - palignr $7, -16(%rdi, %rdx), D(%xmm0) > + palignr $7, -16(%rdi, %rdx), %xmm0 > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > pcmpistri $0x1a,(%rsi,%rdx), %xmm0 > #else > @@ -963,7 +933,7 @@ LABEL(nibble_ashr_7_restart_use): > LABEL(nibble_ashr_7_use): > sub $0x1000, %r10 > movdqa -16(%rdi, %rdx), %xmm0 > - psrldq $7, D(%xmm0) > + psrldq $7, %xmm0 > pcmpistri $0x3a,%xmm0, %xmm0 > #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L > cmp %r11, %rcx > @@ -981,10 +951,10 @@ LABEL(nibble_ashr_7_use): > */ > .p2align 4 > LABEL(ashr_8): > - pslldq $8, D(%xmm2) > + pslldq $8, %xmm2 > TOLOWER (%xmm1, %xmm2) > - pcmpeqb %xmm1, D(%xmm2) > - psubb %xmm0, D(%xmm2) > + pcmpeqb %xmm1, %xmm2 > + psubb %xmm0, %xmm2 > pmovmskb %xmm2, %r9d > shr %cl, %edx > shr %cl, %r9d > @@ -1013,7 +983,7 @@ LABEL(loop_ashr_8_use): > > LABEL(nibble_ashr_8_restart_use): > movdqa (%rdi, %rdx), %xmm0 > - palignr $8, -16(%rdi, %rdx), D(%xmm0) > + palignr $8, -16(%rdi, %rdx), %xmm0 > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > pcmpistri $0x1a, (%rsi,%rdx), %xmm0 > #else > @@ -1032,7 +1002,7 @@ LABEL(nibble_ashr_8_restart_use): > jg LABEL(nibble_ashr_8_use) > > movdqa (%rdi, %rdx), %xmm0 > - palignr $8, -16(%rdi, %rdx), D(%xmm0) > + palignr $8, -16(%rdi, %rdx), %xmm0 > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > pcmpistri $0x1a, (%rsi,%rdx), %xmm0 > #else > @@ -1052,7 +1022,7 @@ LABEL(nibble_ashr_8_restart_use): > LABEL(nibble_ashr_8_use): > sub $0x1000, %r10 > movdqa -16(%rdi, %rdx), %xmm0 > - psrldq $8, D(%xmm0) > + psrldq $8, %xmm0 > pcmpistri $0x3a,%xmm0, %xmm0 > #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L > cmp %r11, %rcx > @@ -1070,10 +1040,10 @@ LABEL(nibble_ashr_8_use): > */ > .p2align 4 > LABEL(ashr_9): > - pslldq $7, D(%xmm2) > + pslldq $7, %xmm2 > TOLOWER (%xmm1, %xmm2) > - pcmpeqb %xmm1, D(%xmm2) > - psubb %xmm0, D(%xmm2) > + pcmpeqb %xmm1, %xmm2 > + psubb %xmm0, %xmm2 > pmovmskb %xmm2, %r9d > shr %cl, %edx > shr %cl, %r9d > @@ -1103,7 +1073,7 @@ LABEL(loop_ashr_9_use): > LABEL(nibble_ashr_9_restart_use): > movdqa (%rdi, %rdx), %xmm0 > > - palignr $9, -16(%rdi, %rdx), D(%xmm0) > + palignr $9, -16(%rdi, %rdx), %xmm0 > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > pcmpistri $0x1a, (%rsi,%rdx), %xmm0 > #else > @@ -1122,7 +1092,7 @@ LABEL(nibble_ashr_9_restart_use): > jg LABEL(nibble_ashr_9_use) > > movdqa (%rdi, %rdx), %xmm0 > - palignr $9, -16(%rdi, %rdx), D(%xmm0) > + palignr $9, -16(%rdi, %rdx), %xmm0 > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > pcmpistri $0x1a, (%rsi,%rdx), %xmm0 > #else > @@ -1142,7 +1112,7 @@ LABEL(nibble_ashr_9_restart_use): > LABEL(nibble_ashr_9_use): > sub $0x1000, %r10 > movdqa -16(%rdi, %rdx), %xmm0 > - psrldq $9, D(%xmm0) > + psrldq $9, %xmm0 > pcmpistri $0x3a,%xmm0, %xmm0 > #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L > cmp %r11, %rcx > @@ -1160,10 +1130,10 @@ LABEL(nibble_ashr_9_use): > */ > .p2align 4 > LABEL(ashr_10): > - pslldq $6, D(%xmm2) > + pslldq $6, %xmm2 > TOLOWER (%xmm1, %xmm2) > - pcmpeqb %xmm1, D(%xmm2) > - psubb %xmm0, D(%xmm2) > + pcmpeqb %xmm1, %xmm2 > + psubb %xmm0, %xmm2 > pmovmskb %xmm2, %r9d > shr %cl, %edx > shr %cl, %r9d > @@ -1192,7 +1162,7 @@ LABEL(loop_ashr_10_use): > > LABEL(nibble_ashr_10_restart_use): > movdqa (%rdi, %rdx), %xmm0 > - palignr $10, -16(%rdi, %rdx), D(%xmm0) > + palignr $10, -16(%rdi, %rdx), %xmm0 > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > pcmpistri $0x1a, (%rsi,%rdx), %xmm0 > #else > @@ -1211,7 +1181,7 @@ LABEL(nibble_ashr_10_restart_use): > jg LABEL(nibble_ashr_10_use) > > movdqa (%rdi, %rdx), %xmm0 > - palignr $10, -16(%rdi, %rdx), D(%xmm0) > + palignr $10, -16(%rdi, %rdx), %xmm0 > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > pcmpistri $0x1a, (%rsi,%rdx), %xmm0 > #else > @@ -1231,7 +1201,7 @@ LABEL(nibble_ashr_10_restart_use): > LABEL(nibble_ashr_10_use): > sub $0x1000, %r10 > movdqa -16(%rdi, %rdx), %xmm0 > - psrldq $10, D(%xmm0) > + psrldq $10, %xmm0 > pcmpistri $0x3a,%xmm0, %xmm0 > #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L > cmp %r11, %rcx > @@ -1249,10 +1219,10 @@ LABEL(nibble_ashr_10_use): > */ > .p2align 4 > LABEL(ashr_11): > - pslldq $5, D(%xmm2) > + pslldq $5, %xmm2 > TOLOWER (%xmm1, %xmm2) > - pcmpeqb %xmm1, D(%xmm2) > - psubb %xmm0, D(%xmm2) > + pcmpeqb %xmm1, %xmm2 > + psubb %xmm0, %xmm2 > pmovmskb %xmm2, %r9d > shr %cl, %edx > shr %cl, %r9d > @@ -1281,7 +1251,7 @@ LABEL(loop_ashr_11_use): > > LABEL(nibble_ashr_11_restart_use): > movdqa (%rdi, %rdx), %xmm0 > - palignr $11, -16(%rdi, %rdx), D(%xmm0) > + palignr $11, -16(%rdi, %rdx), %xmm0 > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > pcmpistri $0x1a, (%rsi,%rdx), %xmm0 > #else > @@ -1300,7 +1270,7 @@ LABEL(nibble_ashr_11_restart_use): > jg LABEL(nibble_ashr_11_use) > > movdqa (%rdi, %rdx), %xmm0 > - palignr $11, -16(%rdi, %rdx), D(%xmm0) > + palignr $11, -16(%rdi, %rdx), %xmm0 > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > pcmpistri $0x1a, (%rsi,%rdx), %xmm0 > #else > @@ -1320,7 +1290,7 @@ LABEL(nibble_ashr_11_restart_use): > LABEL(nibble_ashr_11_use): > sub $0x1000, %r10 > movdqa -16(%rdi, %rdx), %xmm0 > - psrldq $11, D(%xmm0) > + psrldq $11, %xmm0 > pcmpistri $0x3a,%xmm0, %xmm0 > #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L > cmp %r11, %rcx > @@ -1338,10 +1308,10 @@ LABEL(nibble_ashr_11_use): > */ > .p2align 4 > LABEL(ashr_12): > - pslldq $4, D(%xmm2) > + pslldq $4, %xmm2 > TOLOWER (%xmm1, %xmm2) > - pcmpeqb %xmm1, D(%xmm2) > - psubb %xmm0, D(%xmm2) > + pcmpeqb %xmm1, %xmm2 > + psubb %xmm0, %xmm2 > pmovmskb %xmm2, %r9d > shr %cl, %edx > shr %cl, %r9d > @@ -1370,7 +1340,7 @@ LABEL(loop_ashr_12_use): > > LABEL(nibble_ashr_12_restart_use): > movdqa (%rdi, %rdx), %xmm0 > - palignr $12, -16(%rdi, %rdx), D(%xmm0) > + palignr $12, -16(%rdi, %rdx), %xmm0 > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > pcmpistri $0x1a, (%rsi,%rdx), %xmm0 > #else > @@ -1389,7 +1359,7 @@ LABEL(nibble_ashr_12_restart_use): > jg LABEL(nibble_ashr_12_use) > > movdqa (%rdi, %rdx), %xmm0 > - palignr $12, -16(%rdi, %rdx), D(%xmm0) > + palignr $12, -16(%rdi, %rdx), %xmm0 > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > pcmpistri $0x1a, (%rsi,%rdx), %xmm0 > #else > @@ -1409,7 +1379,7 @@ LABEL(nibble_ashr_12_restart_use): > LABEL(nibble_ashr_12_use): > sub $0x1000, %r10 > movdqa -16(%rdi, %rdx), %xmm0 > - psrldq $12, D(%xmm0) > + psrldq $12, %xmm0 > pcmpistri $0x3a,%xmm0, %xmm0 > #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L > cmp %r11, %rcx > @@ -1427,10 +1397,10 @@ LABEL(nibble_ashr_12_use): > */ > .p2align 4 > LABEL(ashr_13): > - pslldq $3, D(%xmm2) > + pslldq $3, %xmm2 > TOLOWER (%xmm1, %xmm2) > - pcmpeqb %xmm1, D(%xmm2) > - psubb %xmm0, D(%xmm2) > + pcmpeqb %xmm1, %xmm2 > + psubb %xmm0, %xmm2 > pmovmskb %xmm2, %r9d > shr %cl, %edx > shr %cl, %r9d > @@ -1460,7 +1430,7 @@ LABEL(loop_ashr_13_use): > > LABEL(nibble_ashr_13_restart_use): > movdqa (%rdi, %rdx), %xmm0 > - palignr $13, -16(%rdi, %rdx), D(%xmm0) > + palignr $13, -16(%rdi, %rdx), %xmm0 > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > pcmpistri $0x1a, (%rsi,%rdx), %xmm0 > #else > @@ -1479,7 +1449,7 @@ LABEL(nibble_ashr_13_restart_use): > jg LABEL(nibble_ashr_13_use) > > movdqa (%rdi, %rdx), %xmm0 > - palignr $13, -16(%rdi, %rdx), D(%xmm0) > + palignr $13, -16(%rdi, %rdx), %xmm0 > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > pcmpistri $0x1a, (%rsi,%rdx), %xmm0 > #else > @@ -1499,7 +1469,7 @@ LABEL(nibble_ashr_13_restart_use): > LABEL(nibble_ashr_13_use): > sub $0x1000, %r10 > movdqa -16(%rdi, %rdx), %xmm0 > - psrldq $13, D(%xmm0) > + psrldq $13, %xmm0 > pcmpistri $0x3a,%xmm0, %xmm0 > #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L > cmp %r11, %rcx > @@ -1517,10 +1487,10 @@ LABEL(nibble_ashr_13_use): > */ > .p2align 4 > LABEL(ashr_14): > - pslldq $2, D(%xmm2) > + pslldq $2, %xmm2 > TOLOWER (%xmm1, %xmm2) > - pcmpeqb %xmm1, D(%xmm2) > - psubb %xmm0, D(%xmm2) > + pcmpeqb %xmm1, %xmm2 > + psubb %xmm0, %xmm2 > pmovmskb %xmm2, %r9d > shr %cl, %edx > shr %cl, %r9d > @@ -1550,7 +1520,7 @@ LABEL(loop_ashr_14_use): > > LABEL(nibble_ashr_14_restart_use): > movdqa (%rdi, %rdx), %xmm0 > - palignr $14, -16(%rdi, %rdx), D(%xmm0) > + palignr $14, -16(%rdi, %rdx), %xmm0 > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > pcmpistri $0x1a, (%rsi,%rdx), %xmm0 > #else > @@ -1569,7 +1539,7 @@ LABEL(nibble_ashr_14_restart_use): > jg LABEL(nibble_ashr_14_use) > > movdqa (%rdi, %rdx), %xmm0 > - palignr $14, -16(%rdi, %rdx), D(%xmm0) > + palignr $14, -16(%rdi, %rdx), %xmm0 > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > pcmpistri $0x1a, (%rsi,%rdx), %xmm0 > #else > @@ -1589,7 +1559,7 @@ LABEL(nibble_ashr_14_restart_use): > LABEL(nibble_ashr_14_use): > sub $0x1000, %r10 > movdqa -16(%rdi, %rdx), %xmm0 > - psrldq $14, D(%xmm0) > + psrldq $14, %xmm0 > pcmpistri $0x3a,%xmm0, %xmm0 > #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L > cmp %r11, %rcx > @@ -1607,10 +1577,10 @@ LABEL(nibble_ashr_14_use): > */ > .p2align 4 > LABEL(ashr_15): > - pslldq $1, D(%xmm2) > + pslldq $1, %xmm2 > TOLOWER (%xmm1, %xmm2) > - pcmpeqb %xmm1, D(%xmm2) > - psubb %xmm0, D(%xmm2) > + pcmpeqb %xmm1, %xmm2 > + psubb %xmm0, %xmm2 > pmovmskb %xmm2, %r9d > shr %cl, %edx > shr %cl, %r9d > @@ -1642,7 +1612,7 @@ LABEL(loop_ashr_15_use): > > LABEL(nibble_ashr_15_restart_use): > movdqa (%rdi, %rdx), %xmm0 > - palignr $15, -16(%rdi, %rdx), D(%xmm0) > + palignr $15, -16(%rdi, %rdx), %xmm0 > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > pcmpistri $0x1a, (%rsi,%rdx), %xmm0 > #else > @@ -1661,7 +1631,7 @@ LABEL(nibble_ashr_15_restart_use): > jg LABEL(nibble_ashr_15_use) > > movdqa (%rdi, %rdx), %xmm0 > - palignr $15, -16(%rdi, %rdx), D(%xmm0) > + palignr $15, -16(%rdi, %rdx), %xmm0 > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > pcmpistri $0x1a, (%rsi,%rdx), %xmm0 > #else > @@ -1681,7 +1651,7 @@ LABEL(nibble_ashr_15_restart_use): > LABEL(nibble_ashr_15_use): > sub $0x1000, %r10 > movdqa -16(%rdi, %rdx), %xmm0 > - psrldq $15, D(%xmm0) > + psrldq $15, %xmm0 > pcmpistri $0x3a,%xmm0, %xmm0 > #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L > cmp %r11, %rcx > diff --git a/sysdeps/x86_64/multiarch/strncase_l-avx.S b/sysdeps/x86_64/multiarch/strncase_l-avx.S > deleted file mode 100644 > index b51b86d223..0000000000 > --- a/sysdeps/x86_64/multiarch/strncase_l-avx.S > +++ /dev/null > @@ -1,22 +0,0 @@ > -/* strncasecmp_l optimized with AVX. > - Copyright (C) 2017-2022 Free Software Foundation, Inc. > - This file is part of the GNU C Library. > - > - The GNU C Library is free software; you can redistribute it and/or > - modify it under the terms of the GNU Lesser General Public > - License as published by the Free Software Foundation; either > - version 2.1 of the License, or (at your option) any later version. > - > - The GNU C Library is distributed in the hope that it will be useful, > - but WITHOUT ANY WARRANTY; without even the implied warranty of > - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > - Lesser General Public License for more details. > - > - You should have received a copy of the GNU Lesser General Public > - License along with the GNU C Library; if not, see > - <https://www.gnu.org/licenses/>. */ > - > -#define STRCMP_SSE42 __strncasecmp_l_avx > -#define USE_AVX 1 > -#define USE_AS_STRNCASECMP_L > -#include "strcmp-sse42.S" > -- > 2.25.1 > LGTM. Reviewed-by: H.J. Lu <hjl.tools@gmail.com> Thanks.
On Thu, Mar 24, 2022 at 12:09 PM H.J. Lu via Libc-alpha <libc-alpha@sourceware.org> wrote: > > On Wed, Mar 23, 2022 at 3:03 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > > > The rational is: > > > > 1. SSE42 has nearly identical logic so any benefit is minimal (3.4% > > regression on Tigerlake using SSE42 versus AVX across the > > benchtest suite). > > 2. AVX2 version covers the majority of targets that previously > > prefered it. > > 3. The targets where AVX would still be best (SnB and IVB) are > > becoming outdated. > > > > All in all the saving the code size is worth it. > > > > All string/memory tests pass. > > --- > > Geomtric Mean N=40 runs; All functions page aligned > > length, align1, align2, max_char, AVX Time / SSE42 Time > > 1, 1, 1, 127, 0.928 > > 2, 2, 2, 127, 0.934 > > 3, 3, 3, 127, 0.975 > > 4, 4, 4, 127, 0.96 > > 5, 5, 5, 127, 0.935 > > 6, 6, 6, 127, 0.929 > > 7, 7, 7, 127, 0.959 > > 8, 0, 0, 127, 0.955 > > 9, 1, 1, 127, 0.944 > > 10, 2, 2, 127, 0.975 > > 11, 3, 3, 127, 0.935 > > 12, 4, 4, 127, 0.931 > > 13, 5, 5, 127, 0.926 > > 14, 6, 6, 127, 0.901 > > 15, 7, 7, 127, 0.951 > > 4, 0, 0, 127, 0.958 > > 4, 0, 0, 254, 0.956 > > 8, 0, 0, 254, 0.977 > > 16, 0, 0, 127, 0.955 > > 16, 0, 0, 254, 0.953 > > 32, 0, 0, 127, 0.943 > > 32, 0, 0, 254, 0.941 > > 64, 0, 0, 127, 0.941 > > 64, 0, 0, 254, 0.955 > > 128, 0, 0, 127, 0.972 > > 128, 0, 0, 254, 0.975 > > 256, 0, 0, 127, 0.996 > > 256, 0, 0, 254, 0.993 > > 512, 0, 0, 127, 0.992 > > 512, 0, 0, 254, 0.986 > > 1024, 0, 0, 127, 0.994 > > 1024, 0, 0, 254, 0.993 > > 16, 1, 2, 127, 0.933 > > 16, 2, 1, 254, 0.953 > > 32, 2, 4, 127, 0.927 > > 32, 4, 2, 254, 0.986 > > 64, 3, 6, 127, 0.991 > > 64, 6, 3, 254, 1.014 > > 128, 4, 0, 127, 1.001 > > 128, 0, 4, 254, 0.991 > > 256, 5, 2, 127, 1.011 > > 256, 2, 5, 254, 1.013 > > 512, 6, 4, 127, 1.056 > > 512, 4, 6, 254, 0.916 > > 1024, 7, 6, 127, 1.059 > > 1024, 6, 7, 254, 1.043 > > > > sysdeps/x86_64/multiarch/Makefile | 2 - > > sysdeps/x86_64/multiarch/ifunc-impl-list.c | 12 - > > sysdeps/x86_64/multiarch/ifunc-strcasecmp.h | 4 - > > sysdeps/x86_64/multiarch/strcasecmp_l-avx.S | 22 -- > > sysdeps/x86_64/multiarch/strcmp-sse42.S | 240 +++++++++----------- > > sysdeps/x86_64/multiarch/strncase_l-avx.S | 22 -- > > 6 files changed, 105 insertions(+), 197 deletions(-) > > delete mode 100644 sysdeps/x86_64/multiarch/strcasecmp_l-avx.S > > delete mode 100644 sysdeps/x86_64/multiarch/strncase_l-avx.S > > > > diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile > > index 35d80dc2ff..6507d1b7fa 100644 > > --- a/sysdeps/x86_64/multiarch/Makefile > > +++ b/sysdeps/x86_64/multiarch/Makefile > > @@ -54,7 +54,6 @@ sysdep_routines += \ > > stpncpy-evex \ > > stpncpy-sse2-unaligned \ > > stpncpy-ssse3 \ > > - strcasecmp_l-avx \ > > strcasecmp_l-avx2 \ > > strcasecmp_l-avx2-rtm \ > > strcasecmp_l-evex \ > > @@ -95,7 +94,6 @@ sysdep_routines += \ > > strlen-avx2-rtm \ > > strlen-evex \ > > strlen-sse2 \ > > - strncase_l-avx \ > > strncase_l-avx2 \ > > strncase_l-avx2-rtm \ > > strncase_l-evex \ > > diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c > > index f1a4d3dac2..40cc6cc49e 100644 > > --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c > > +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c > > @@ -447,9 +447,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, > > (CPU_FEATURE_USABLE (AVX2) > > && CPU_FEATURE_USABLE (RTM)), > > __strcasecmp_avx2_rtm) > > - IFUNC_IMPL_ADD (array, i, strcasecmp, > > - CPU_FEATURE_USABLE (AVX), > > - __strcasecmp_avx) > > IFUNC_IMPL_ADD (array, i, strcasecmp, > > CPU_FEATURE_USABLE (SSE4_2), > > __strcasecmp_sse42) > > @@ -471,9 +468,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, > > (CPU_FEATURE_USABLE (AVX2) > > && CPU_FEATURE_USABLE (RTM)), > > __strcasecmp_l_avx2_rtm) > > - IFUNC_IMPL_ADD (array, i, strcasecmp_l, > > - CPU_FEATURE_USABLE (AVX), > > - __strcasecmp_l_avx) > > IFUNC_IMPL_ADD (array, i, strcasecmp_l, > > CPU_FEATURE_USABLE (SSE4_2), > > __strcasecmp_l_sse42) > > @@ -609,9 +603,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, > > (CPU_FEATURE_USABLE (AVX2) > > && CPU_FEATURE_USABLE (RTM)), > > __strncasecmp_avx2_rtm) > > - IFUNC_IMPL_ADD (array, i, strncasecmp, > > - CPU_FEATURE_USABLE (AVX), > > - __strncasecmp_avx) > > IFUNC_IMPL_ADD (array, i, strncasecmp, > > CPU_FEATURE_USABLE (SSE4_2), > > __strncasecmp_sse42) > > @@ -634,9 +625,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, > > (CPU_FEATURE_USABLE (AVX2) > > && CPU_FEATURE_USABLE (RTM)), > > __strncasecmp_l_avx2_rtm) > > - IFUNC_IMPL_ADD (array, i, strncasecmp_l, > > - CPU_FEATURE_USABLE (AVX), > > - __strncasecmp_l_avx) > > IFUNC_IMPL_ADD (array, i, strncasecmp_l, > > CPU_FEATURE_USABLE (SSE4_2), > > __strncasecmp_l_sse42) > > diff --git a/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h b/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h > > index bf0d146e7f..766539c241 100644 > > --- a/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h > > +++ b/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h > > @@ -22,7 +22,6 @@ > > extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; > > extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden; > > extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden; > > -extern __typeof (REDIRECT_NAME) OPTIMIZE (avx) attribute_hidden; > > extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; > > extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden; > > extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; > > @@ -46,9 +45,6 @@ IFUNC_SELECTOR (void) > > return OPTIMIZE (avx2); > > } > > > > - if (CPU_FEATURE_USABLE_P (cpu_features, AVX)) > > - return OPTIMIZE (avx); > > - > > if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2) > > && !CPU_FEATURES_ARCH_P (cpu_features, Slow_SSE4_2)) > > return OPTIMIZE (sse42); > > diff --git a/sysdeps/x86_64/multiarch/strcasecmp_l-avx.S b/sysdeps/x86_64/multiarch/strcasecmp_l-avx.S > > deleted file mode 100644 > > index 7ec7c21b5a..0000000000 > > --- a/sysdeps/x86_64/multiarch/strcasecmp_l-avx.S > > +++ /dev/null > > @@ -1,22 +0,0 @@ > > -/* strcasecmp_l optimized with AVX. > > - Copyright (C) 2017-2022 Free Software Foundation, Inc. > > - This file is part of the GNU C Library. > > - > > - The GNU C Library is free software; you can redistribute it and/or > > - modify it under the terms of the GNU Lesser General Public > > - License as published by the Free Software Foundation; either > > - version 2.1 of the License, or (at your option) any later version. > > - > > - The GNU C Library is distributed in the hope that it will be useful, > > - but WITHOUT ANY WARRANTY; without even the implied warranty of > > - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > - Lesser General Public License for more details. > > - > > - You should have received a copy of the GNU Lesser General Public > > - License along with the GNU C Library; if not, see > > - <https://www.gnu.org/licenses/>. */ > > - > > -#define STRCMP_SSE42 __strcasecmp_l_avx > > -#define USE_AVX 1 > > -#define USE_AS_STRCASECMP_L > > -#include "strcmp-sse42.S" > > diff --git a/sysdeps/x86_64/multiarch/strcmp-sse42.S b/sysdeps/x86_64/multiarch/strcmp-sse42.S > > index 7805ae9d41..a9178ad25c 100644 > > --- a/sysdeps/x86_64/multiarch/strcmp-sse42.S > > +++ b/sysdeps/x86_64/multiarch/strcmp-sse42.S > > @@ -41,13 +41,8 @@ > > # define UPDATE_STRNCMP_COUNTER > > #endif > > > > -#ifdef USE_AVX > > -# define SECTION avx > > -# define GLABEL(l) l##_avx > > -#else > > -# define SECTION sse4.2 > > -# define GLABEL(l) l##_sse42 > > -#endif > > +#define SECTION sse4.2 > > +#define GLABEL(l) l##_sse42 > > > > #define LABEL(l) .L##l > > > > @@ -105,21 +100,7 @@ END (GLABEL(__strncasecmp)) > > #endif > > > > > > -#ifdef USE_AVX > > -# define movdqa vmovdqa > > -# define movdqu vmovdqu > > -# define pmovmskb vpmovmskb > > -# define pcmpistri vpcmpistri > > -# define psubb vpsubb > > -# define pcmpeqb vpcmpeqb > > -# define psrldq vpsrldq > > -# define pslldq vpslldq > > -# define palignr vpalignr > > -# define pxor vpxor > > -# define D(arg) arg, arg > > -#else > > -# define D(arg) arg > > -#endif > > +#define arg arg > > > > STRCMP_SSE42: > > cfi_startproc > > @@ -191,18 +172,7 @@ LABEL(case_add): > > movdqu (%rdi), %xmm1 > > movdqu (%rsi), %xmm2 > > #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L > > -# ifdef USE_AVX > > -# define TOLOWER(reg1, reg2) \ > > - vpaddb LCASE_MIN_reg, reg1, %xmm7; \ > > - vpaddb LCASE_MIN_reg, reg2, %xmm8; \ > > - vpcmpgtb LCASE_MAX_reg, %xmm7, %xmm7; \ > > - vpcmpgtb LCASE_MAX_reg, %xmm8, %xmm8; \ > > - vpandn CASE_ADD_reg, %xmm7, %xmm7; \ > > - vpandn CASE_ADD_reg, %xmm8, %xmm8; \ > > - vpaddb %xmm7, reg1, reg1; \ > > - vpaddb %xmm8, reg2, reg2 > > -# else > > -# define TOLOWER(reg1, reg2) \ > > +# define TOLOWER(reg1, reg2) \ > > movdqa LCASE_MIN_reg, %xmm7; \ > > movdqa LCASE_MIN_reg, %xmm8; \ > > paddb reg1, %xmm7; \ > > @@ -213,15 +183,15 @@ LABEL(case_add): > > pandn CASE_ADD_reg, %xmm8; \ > > paddb %xmm7, reg1; \ > > paddb %xmm8, reg2 > > -# endif > > + > > TOLOWER (%xmm1, %xmm2) > > #else > > # define TOLOWER(reg1, reg2) > > #endif > > - pxor %xmm0, D(%xmm0) /* clear %xmm0 for null char checks */ > > - pcmpeqb %xmm1, D(%xmm0) /* Any null chars? */ > > - pcmpeqb %xmm2, D(%xmm1) /* compare first 16 bytes for equality */ > > - psubb %xmm0, D(%xmm1) /* packed sub of comparison results*/ > > + pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */ > > + pcmpeqb %xmm1, %xmm0 /* Any null chars? */ > > + pcmpeqb %xmm2, %xmm1 /* compare first 16 bytes for equality */ > > + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ > > pmovmskb %xmm1, %edx > > sub $0xffff, %edx /* if first 16 bytes are same, edx == 0xffff */ > > jnz LABEL(less16bytes)/* If not, find different value or null char */ > > @@ -245,7 +215,7 @@ LABEL(crosscache): > > xor %r8d, %r8d > > and $0xf, %ecx /* offset of rsi */ > > and $0xf, %eax /* offset of rdi */ > > - pxor %xmm0, D(%xmm0) /* clear %xmm0 for null char check */ > > + pxor %xmm0, %xmm0 /* clear %xmm0 for null char check */ > > cmp %eax, %ecx > > je LABEL(ashr_0) /* rsi and rdi relative offset same */ > > ja LABEL(bigger) > > @@ -259,7 +229,7 @@ LABEL(bigger): > > sub %rcx, %r9 > > lea LABEL(unaligned_table)(%rip), %r10 > > movslq (%r10, %r9,4), %r9 > > - pcmpeqb %xmm1, D(%xmm0) /* Any null chars? */ > > + pcmpeqb %xmm1, %xmm0 /* Any null chars? */ > > lea (%r10, %r9), %r10 > > _CET_NOTRACK jmp *%r10 /* jump to corresponding case */ > > > > @@ -272,15 +242,15 @@ LABEL(bigger): > > LABEL(ashr_0): > > > > movdqa (%rsi), %xmm1 > > - pcmpeqb %xmm1, D(%xmm0) /* Any null chars? */ > > + pcmpeqb %xmm1, %xmm0 /* Any null chars? */ > > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > > - pcmpeqb (%rdi), D(%xmm1) /* compare 16 bytes for equality */ > > + pcmpeqb (%rdi), %xmm1 /* compare 16 bytes for equality */ > > #else > > movdqa (%rdi), %xmm2 > > TOLOWER (%xmm1, %xmm2) > > - pcmpeqb %xmm2, D(%xmm1) /* compare 16 bytes for equality */ > > + pcmpeqb %xmm2, %xmm1 /* compare 16 bytes for equality */ > > #endif > > - psubb %xmm0, D(%xmm1) /* packed sub of comparison results*/ > > + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ > > pmovmskb %xmm1, %r9d > > shr %cl, %edx /* adjust 0xffff for offset */ > > shr %cl, %r9d /* adjust for 16-byte offset */ > > @@ -360,10 +330,10 @@ LABEL(ashr_0_exit_use): > > */ > > .p2align 4 > > LABEL(ashr_1): > > - pslldq $15, D(%xmm2) /* shift first string to align with second */ > > + pslldq $15, %xmm2 /* shift first string to align with second */ > > TOLOWER (%xmm1, %xmm2) > > - pcmpeqb %xmm1, D(%xmm2) /* compare 16 bytes for equality */ > > - psubb %xmm0, D(%xmm2) /* packed sub of comparison results*/ > > + pcmpeqb %xmm1, %xmm2 /* compare 16 bytes for equality */ > > + psubb %xmm0, %xmm2 /* packed sub of comparison results*/ > > pmovmskb %xmm2, %r9d > > shr %cl, %edx /* adjust 0xffff for offset */ > > shr %cl, %r9d /* adjust for 16-byte offset */ > > @@ -391,7 +361,7 @@ LABEL(loop_ashr_1_use): > > > > LABEL(nibble_ashr_1_restart_use): > > movdqa (%rdi, %rdx), %xmm0 > > - palignr $1, -16(%rdi, %rdx), D(%xmm0) > > + palignr $1, -16(%rdi, %rdx), %xmm0 > > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > > pcmpistri $0x1a,(%rsi,%rdx), %xmm0 > > #else > > @@ -410,7 +380,7 @@ LABEL(nibble_ashr_1_restart_use): > > jg LABEL(nibble_ashr_1_use) > > > > movdqa (%rdi, %rdx), %xmm0 > > - palignr $1, -16(%rdi, %rdx), D(%xmm0) > > + palignr $1, -16(%rdi, %rdx), %xmm0 > > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > > pcmpistri $0x1a,(%rsi,%rdx), %xmm0 > > #else > > @@ -430,7 +400,7 @@ LABEL(nibble_ashr_1_restart_use): > > LABEL(nibble_ashr_1_use): > > sub $0x1000, %r10 > > movdqa -16(%rdi, %rdx), %xmm0 > > - psrldq $1, D(%xmm0) > > + psrldq $1, %xmm0 > > pcmpistri $0x3a,%xmm0, %xmm0 > > #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L > > cmp %r11, %rcx > > @@ -448,10 +418,10 @@ LABEL(nibble_ashr_1_use): > > */ > > .p2align 4 > > LABEL(ashr_2): > > - pslldq $14, D(%xmm2) > > + pslldq $14, %xmm2 > > TOLOWER (%xmm1, %xmm2) > > - pcmpeqb %xmm1, D(%xmm2) > > - psubb %xmm0, D(%xmm2) > > + pcmpeqb %xmm1, %xmm2 > > + psubb %xmm0, %xmm2 > > pmovmskb %xmm2, %r9d > > shr %cl, %edx > > shr %cl, %r9d > > @@ -479,7 +449,7 @@ LABEL(loop_ashr_2_use): > > > > LABEL(nibble_ashr_2_restart_use): > > movdqa (%rdi, %rdx), %xmm0 > > - palignr $2, -16(%rdi, %rdx), D(%xmm0) > > + palignr $2, -16(%rdi, %rdx), %xmm0 > > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > > pcmpistri $0x1a,(%rsi,%rdx), %xmm0 > > #else > > @@ -498,7 +468,7 @@ LABEL(nibble_ashr_2_restart_use): > > jg LABEL(nibble_ashr_2_use) > > > > movdqa (%rdi, %rdx), %xmm0 > > - palignr $2, -16(%rdi, %rdx), D(%xmm0) > > + palignr $2, -16(%rdi, %rdx), %xmm0 > > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > > pcmpistri $0x1a,(%rsi,%rdx), %xmm0 > > #else > > @@ -518,7 +488,7 @@ LABEL(nibble_ashr_2_restart_use): > > LABEL(nibble_ashr_2_use): > > sub $0x1000, %r10 > > movdqa -16(%rdi, %rdx), %xmm0 > > - psrldq $2, D(%xmm0) > > + psrldq $2, %xmm0 > > pcmpistri $0x3a,%xmm0, %xmm0 > > #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L > > cmp %r11, %rcx > > @@ -536,10 +506,10 @@ LABEL(nibble_ashr_2_use): > > */ > > .p2align 4 > > LABEL(ashr_3): > > - pslldq $13, D(%xmm2) > > + pslldq $13, %xmm2 > > TOLOWER (%xmm1, %xmm2) > > - pcmpeqb %xmm1, D(%xmm2) > > - psubb %xmm0, D(%xmm2) > > + pcmpeqb %xmm1, %xmm2 > > + psubb %xmm0, %xmm2 > > pmovmskb %xmm2, %r9d > > shr %cl, %edx > > shr %cl, %r9d > > @@ -567,7 +537,7 @@ LABEL(loop_ashr_3_use): > > > > LABEL(nibble_ashr_3_restart_use): > > movdqa (%rdi, %rdx), %xmm0 > > - palignr $3, -16(%rdi, %rdx), D(%xmm0) > > + palignr $3, -16(%rdi, %rdx), %xmm0 > > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > > pcmpistri $0x1a,(%rsi,%rdx), %xmm0 > > #else > > @@ -586,7 +556,7 @@ LABEL(nibble_ashr_3_restart_use): > > jg LABEL(nibble_ashr_3_use) > > > > movdqa (%rdi, %rdx), %xmm0 > > - palignr $3, -16(%rdi, %rdx), D(%xmm0) > > + palignr $3, -16(%rdi, %rdx), %xmm0 > > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > > pcmpistri $0x1a,(%rsi,%rdx), %xmm0 > > #else > > @@ -606,7 +576,7 @@ LABEL(nibble_ashr_3_restart_use): > > LABEL(nibble_ashr_3_use): > > sub $0x1000, %r10 > > movdqa -16(%rdi, %rdx), %xmm0 > > - psrldq $3, D(%xmm0) > > + psrldq $3, %xmm0 > > pcmpistri $0x3a,%xmm0, %xmm0 > > #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L > > cmp %r11, %rcx > > @@ -624,10 +594,10 @@ LABEL(nibble_ashr_3_use): > > */ > > .p2align 4 > > LABEL(ashr_4): > > - pslldq $12, D(%xmm2) > > + pslldq $12, %xmm2 > > TOLOWER (%xmm1, %xmm2) > > - pcmpeqb %xmm1, D(%xmm2) > > - psubb %xmm0, D(%xmm2) > > + pcmpeqb %xmm1, %xmm2 > > + psubb %xmm0, %xmm2 > > pmovmskb %xmm2, %r9d > > shr %cl, %edx > > shr %cl, %r9d > > @@ -656,7 +626,7 @@ LABEL(loop_ashr_4_use): > > > > LABEL(nibble_ashr_4_restart_use): > > movdqa (%rdi, %rdx), %xmm0 > > - palignr $4, -16(%rdi, %rdx), D(%xmm0) > > + palignr $4, -16(%rdi, %rdx), %xmm0 > > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > > pcmpistri $0x1a,(%rsi,%rdx), %xmm0 > > #else > > @@ -675,7 +645,7 @@ LABEL(nibble_ashr_4_restart_use): > > jg LABEL(nibble_ashr_4_use) > > > > movdqa (%rdi, %rdx), %xmm0 > > - palignr $4, -16(%rdi, %rdx), D(%xmm0) > > + palignr $4, -16(%rdi, %rdx), %xmm0 > > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > > pcmpistri $0x1a,(%rsi,%rdx), %xmm0 > > #else > > @@ -695,7 +665,7 @@ LABEL(nibble_ashr_4_restart_use): > > LABEL(nibble_ashr_4_use): > > sub $0x1000, %r10 > > movdqa -16(%rdi, %rdx), %xmm0 > > - psrldq $4, D(%xmm0) > > + psrldq $4, %xmm0 > > pcmpistri $0x3a,%xmm0, %xmm0 > > #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L > > cmp %r11, %rcx > > @@ -713,10 +683,10 @@ LABEL(nibble_ashr_4_use): > > */ > > .p2align 4 > > LABEL(ashr_5): > > - pslldq $11, D(%xmm2) > > + pslldq $11, %xmm2 > > TOLOWER (%xmm1, %xmm2) > > - pcmpeqb %xmm1, D(%xmm2) > > - psubb %xmm0, D(%xmm2) > > + pcmpeqb %xmm1, %xmm2 > > + psubb %xmm0, %xmm2 > > pmovmskb %xmm2, %r9d > > shr %cl, %edx > > shr %cl, %r9d > > @@ -745,7 +715,7 @@ LABEL(loop_ashr_5_use): > > > > LABEL(nibble_ashr_5_restart_use): > > movdqa (%rdi, %rdx), %xmm0 > > - palignr $5, -16(%rdi, %rdx), D(%xmm0) > > + palignr $5, -16(%rdi, %rdx), %xmm0 > > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > > pcmpistri $0x1a,(%rsi,%rdx), %xmm0 > > #else > > @@ -765,7 +735,7 @@ LABEL(nibble_ashr_5_restart_use): > > > > movdqa (%rdi, %rdx), %xmm0 > > > > - palignr $5, -16(%rdi, %rdx), D(%xmm0) > > + palignr $5, -16(%rdi, %rdx), %xmm0 > > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > > pcmpistri $0x1a,(%rsi,%rdx), %xmm0 > > #else > > @@ -785,7 +755,7 @@ LABEL(nibble_ashr_5_restart_use): > > LABEL(nibble_ashr_5_use): > > sub $0x1000, %r10 > > movdqa -16(%rdi, %rdx), %xmm0 > > - psrldq $5, D(%xmm0) > > + psrldq $5, %xmm0 > > pcmpistri $0x3a,%xmm0, %xmm0 > > #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L > > cmp %r11, %rcx > > @@ -803,10 +773,10 @@ LABEL(nibble_ashr_5_use): > > */ > > .p2align 4 > > LABEL(ashr_6): > > - pslldq $10, D(%xmm2) > > + pslldq $10, %xmm2 > > TOLOWER (%xmm1, %xmm2) > > - pcmpeqb %xmm1, D(%xmm2) > > - psubb %xmm0, D(%xmm2) > > + pcmpeqb %xmm1, %xmm2 > > + psubb %xmm0, %xmm2 > > pmovmskb %xmm2, %r9d > > shr %cl, %edx > > shr %cl, %r9d > > @@ -835,7 +805,7 @@ LABEL(loop_ashr_6_use): > > > > LABEL(nibble_ashr_6_restart_use): > > movdqa (%rdi, %rdx), %xmm0 > > - palignr $6, -16(%rdi, %rdx), D(%xmm0) > > + palignr $6, -16(%rdi, %rdx), %xmm0 > > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > > pcmpistri $0x1a,(%rsi,%rdx), %xmm0 > > #else > > @@ -854,7 +824,7 @@ LABEL(nibble_ashr_6_restart_use): > > jg LABEL(nibble_ashr_6_use) > > > > movdqa (%rdi, %rdx), %xmm0 > > - palignr $6, -16(%rdi, %rdx), D(%xmm0) > > + palignr $6, -16(%rdi, %rdx), %xmm0 > > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > > pcmpistri $0x1a,(%rsi,%rdx), %xmm0 > > #else > > @@ -874,7 +844,7 @@ LABEL(nibble_ashr_6_restart_use): > > LABEL(nibble_ashr_6_use): > > sub $0x1000, %r10 > > movdqa -16(%rdi, %rdx), %xmm0 > > - psrldq $6, D(%xmm0) > > + psrldq $6, %xmm0 > > pcmpistri $0x3a,%xmm0, %xmm0 > > #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L > > cmp %r11, %rcx > > @@ -892,10 +862,10 @@ LABEL(nibble_ashr_6_use): > > */ > > .p2align 4 > > LABEL(ashr_7): > > - pslldq $9, D(%xmm2) > > + pslldq $9, %xmm2 > > TOLOWER (%xmm1, %xmm2) > > - pcmpeqb %xmm1, D(%xmm2) > > - psubb %xmm0, D(%xmm2) > > + pcmpeqb %xmm1, %xmm2 > > + psubb %xmm0, %xmm2 > > pmovmskb %xmm2, %r9d > > shr %cl, %edx > > shr %cl, %r9d > > @@ -924,7 +894,7 @@ LABEL(loop_ashr_7_use): > > > > LABEL(nibble_ashr_7_restart_use): > > movdqa (%rdi, %rdx), %xmm0 > > - palignr $7, -16(%rdi, %rdx), D(%xmm0) > > + palignr $7, -16(%rdi, %rdx), %xmm0 > > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > > pcmpistri $0x1a,(%rsi,%rdx), %xmm0 > > #else > > @@ -943,7 +913,7 @@ LABEL(nibble_ashr_7_restart_use): > > jg LABEL(nibble_ashr_7_use) > > > > movdqa (%rdi, %rdx), %xmm0 > > - palignr $7, -16(%rdi, %rdx), D(%xmm0) > > + palignr $7, -16(%rdi, %rdx), %xmm0 > > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > > pcmpistri $0x1a,(%rsi,%rdx), %xmm0 > > #else > > @@ -963,7 +933,7 @@ LABEL(nibble_ashr_7_restart_use): > > LABEL(nibble_ashr_7_use): > > sub $0x1000, %r10 > > movdqa -16(%rdi, %rdx), %xmm0 > > - psrldq $7, D(%xmm0) > > + psrldq $7, %xmm0 > > pcmpistri $0x3a,%xmm0, %xmm0 > > #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L > > cmp %r11, %rcx > > @@ -981,10 +951,10 @@ LABEL(nibble_ashr_7_use): > > */ > > .p2align 4 > > LABEL(ashr_8): > > - pslldq $8, D(%xmm2) > > + pslldq $8, %xmm2 > > TOLOWER (%xmm1, %xmm2) > > - pcmpeqb %xmm1, D(%xmm2) > > - psubb %xmm0, D(%xmm2) > > + pcmpeqb %xmm1, %xmm2 > > + psubb %xmm0, %xmm2 > > pmovmskb %xmm2, %r9d > > shr %cl, %edx > > shr %cl, %r9d > > @@ -1013,7 +983,7 @@ LABEL(loop_ashr_8_use): > > > > LABEL(nibble_ashr_8_restart_use): > > movdqa (%rdi, %rdx), %xmm0 > > - palignr $8, -16(%rdi, %rdx), D(%xmm0) > > + palignr $8, -16(%rdi, %rdx), %xmm0 > > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > > pcmpistri $0x1a, (%rsi,%rdx), %xmm0 > > #else > > @@ -1032,7 +1002,7 @@ LABEL(nibble_ashr_8_restart_use): > > jg LABEL(nibble_ashr_8_use) > > > > movdqa (%rdi, %rdx), %xmm0 > > - palignr $8, -16(%rdi, %rdx), D(%xmm0) > > + palignr $8, -16(%rdi, %rdx), %xmm0 > > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > > pcmpistri $0x1a, (%rsi,%rdx), %xmm0 > > #else > > @@ -1052,7 +1022,7 @@ LABEL(nibble_ashr_8_restart_use): > > LABEL(nibble_ashr_8_use): > > sub $0x1000, %r10 > > movdqa -16(%rdi, %rdx), %xmm0 > > - psrldq $8, D(%xmm0) > > + psrldq $8, %xmm0 > > pcmpistri $0x3a,%xmm0, %xmm0 > > #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L > > cmp %r11, %rcx > > @@ -1070,10 +1040,10 @@ LABEL(nibble_ashr_8_use): > > */ > > .p2align 4 > > LABEL(ashr_9): > > - pslldq $7, D(%xmm2) > > + pslldq $7, %xmm2 > > TOLOWER (%xmm1, %xmm2) > > - pcmpeqb %xmm1, D(%xmm2) > > - psubb %xmm0, D(%xmm2) > > + pcmpeqb %xmm1, %xmm2 > > + psubb %xmm0, %xmm2 > > pmovmskb %xmm2, %r9d > > shr %cl, %edx > > shr %cl, %r9d > > @@ -1103,7 +1073,7 @@ LABEL(loop_ashr_9_use): > > LABEL(nibble_ashr_9_restart_use): > > movdqa (%rdi, %rdx), %xmm0 > > > > - palignr $9, -16(%rdi, %rdx), D(%xmm0) > > + palignr $9, -16(%rdi, %rdx), %xmm0 > > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > > pcmpistri $0x1a, (%rsi,%rdx), %xmm0 > > #else > > @@ -1122,7 +1092,7 @@ LABEL(nibble_ashr_9_restart_use): > > jg LABEL(nibble_ashr_9_use) > > > > movdqa (%rdi, %rdx), %xmm0 > > - palignr $9, -16(%rdi, %rdx), D(%xmm0) > > + palignr $9, -16(%rdi, %rdx), %xmm0 > > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > > pcmpistri $0x1a, (%rsi,%rdx), %xmm0 > > #else > > @@ -1142,7 +1112,7 @@ LABEL(nibble_ashr_9_restart_use): > > LABEL(nibble_ashr_9_use): > > sub $0x1000, %r10 > > movdqa -16(%rdi, %rdx), %xmm0 > > - psrldq $9, D(%xmm0) > > + psrldq $9, %xmm0 > > pcmpistri $0x3a,%xmm0, %xmm0 > > #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L > > cmp %r11, %rcx > > @@ -1160,10 +1130,10 @@ LABEL(nibble_ashr_9_use): > > */ > > .p2align 4 > > LABEL(ashr_10): > > - pslldq $6, D(%xmm2) > > + pslldq $6, %xmm2 > > TOLOWER (%xmm1, %xmm2) > > - pcmpeqb %xmm1, D(%xmm2) > > - psubb %xmm0, D(%xmm2) > > + pcmpeqb %xmm1, %xmm2 > > + psubb %xmm0, %xmm2 > > pmovmskb %xmm2, %r9d > > shr %cl, %edx > > shr %cl, %r9d > > @@ -1192,7 +1162,7 @@ LABEL(loop_ashr_10_use): > > > > LABEL(nibble_ashr_10_restart_use): > > movdqa (%rdi, %rdx), %xmm0 > > - palignr $10, -16(%rdi, %rdx), D(%xmm0) > > + palignr $10, -16(%rdi, %rdx), %xmm0 > > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > > pcmpistri $0x1a, (%rsi,%rdx), %xmm0 > > #else > > @@ -1211,7 +1181,7 @@ LABEL(nibble_ashr_10_restart_use): > > jg LABEL(nibble_ashr_10_use) > > > > movdqa (%rdi, %rdx), %xmm0 > > - palignr $10, -16(%rdi, %rdx), D(%xmm0) > > + palignr $10, -16(%rdi, %rdx), %xmm0 > > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > > pcmpistri $0x1a, (%rsi,%rdx), %xmm0 > > #else > > @@ -1231,7 +1201,7 @@ LABEL(nibble_ashr_10_restart_use): > > LABEL(nibble_ashr_10_use): > > sub $0x1000, %r10 > > movdqa -16(%rdi, %rdx), %xmm0 > > - psrldq $10, D(%xmm0) > > + psrldq $10, %xmm0 > > pcmpistri $0x3a,%xmm0, %xmm0 > > #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L > > cmp %r11, %rcx > > @@ -1249,10 +1219,10 @@ LABEL(nibble_ashr_10_use): > > */ > > .p2align 4 > > LABEL(ashr_11): > > - pslldq $5, D(%xmm2) > > + pslldq $5, %xmm2 > > TOLOWER (%xmm1, %xmm2) > > - pcmpeqb %xmm1, D(%xmm2) > > - psubb %xmm0, D(%xmm2) > > + pcmpeqb %xmm1, %xmm2 > > + psubb %xmm0, %xmm2 > > pmovmskb %xmm2, %r9d > > shr %cl, %edx > > shr %cl, %r9d > > @@ -1281,7 +1251,7 @@ LABEL(loop_ashr_11_use): > > > > LABEL(nibble_ashr_11_restart_use): > > movdqa (%rdi, %rdx), %xmm0 > > - palignr $11, -16(%rdi, %rdx), D(%xmm0) > > + palignr $11, -16(%rdi, %rdx), %xmm0 > > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > > pcmpistri $0x1a, (%rsi,%rdx), %xmm0 > > #else > > @@ -1300,7 +1270,7 @@ LABEL(nibble_ashr_11_restart_use): > > jg LABEL(nibble_ashr_11_use) > > > > movdqa (%rdi, %rdx), %xmm0 > > - palignr $11, -16(%rdi, %rdx), D(%xmm0) > > + palignr $11, -16(%rdi, %rdx), %xmm0 > > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > > pcmpistri $0x1a, (%rsi,%rdx), %xmm0 > > #else > > @@ -1320,7 +1290,7 @@ LABEL(nibble_ashr_11_restart_use): > > LABEL(nibble_ashr_11_use): > > sub $0x1000, %r10 > > movdqa -16(%rdi, %rdx), %xmm0 > > - psrldq $11, D(%xmm0) > > + psrldq $11, %xmm0 > > pcmpistri $0x3a,%xmm0, %xmm0 > > #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L > > cmp %r11, %rcx > > @@ -1338,10 +1308,10 @@ LABEL(nibble_ashr_11_use): > > */ > > .p2align 4 > > LABEL(ashr_12): > > - pslldq $4, D(%xmm2) > > + pslldq $4, %xmm2 > > TOLOWER (%xmm1, %xmm2) > > - pcmpeqb %xmm1, D(%xmm2) > > - psubb %xmm0, D(%xmm2) > > + pcmpeqb %xmm1, %xmm2 > > + psubb %xmm0, %xmm2 > > pmovmskb %xmm2, %r9d > > shr %cl, %edx > > shr %cl, %r9d > > @@ -1370,7 +1340,7 @@ LABEL(loop_ashr_12_use): > > > > LABEL(nibble_ashr_12_restart_use): > > movdqa (%rdi, %rdx), %xmm0 > > - palignr $12, -16(%rdi, %rdx), D(%xmm0) > > + palignr $12, -16(%rdi, %rdx), %xmm0 > > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > > pcmpistri $0x1a, (%rsi,%rdx), %xmm0 > > #else > > @@ -1389,7 +1359,7 @@ LABEL(nibble_ashr_12_restart_use): > > jg LABEL(nibble_ashr_12_use) > > > > movdqa (%rdi, %rdx), %xmm0 > > - palignr $12, -16(%rdi, %rdx), D(%xmm0) > > + palignr $12, -16(%rdi, %rdx), %xmm0 > > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > > pcmpistri $0x1a, (%rsi,%rdx), %xmm0 > > #else > > @@ -1409,7 +1379,7 @@ LABEL(nibble_ashr_12_restart_use): > > LABEL(nibble_ashr_12_use): > > sub $0x1000, %r10 > > movdqa -16(%rdi, %rdx), %xmm0 > > - psrldq $12, D(%xmm0) > > + psrldq $12, %xmm0 > > pcmpistri $0x3a,%xmm0, %xmm0 > > #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L > > cmp %r11, %rcx > > @@ -1427,10 +1397,10 @@ LABEL(nibble_ashr_12_use): > > */ > > .p2align 4 > > LABEL(ashr_13): > > - pslldq $3, D(%xmm2) > > + pslldq $3, %xmm2 > > TOLOWER (%xmm1, %xmm2) > > - pcmpeqb %xmm1, D(%xmm2) > > - psubb %xmm0, D(%xmm2) > > + pcmpeqb %xmm1, %xmm2 > > + psubb %xmm0, %xmm2 > > pmovmskb %xmm2, %r9d > > shr %cl, %edx > > shr %cl, %r9d > > @@ -1460,7 +1430,7 @@ LABEL(loop_ashr_13_use): > > > > LABEL(nibble_ashr_13_restart_use): > > movdqa (%rdi, %rdx), %xmm0 > > - palignr $13, -16(%rdi, %rdx), D(%xmm0) > > + palignr $13, -16(%rdi, %rdx), %xmm0 > > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > > pcmpistri $0x1a, (%rsi,%rdx), %xmm0 > > #else > > @@ -1479,7 +1449,7 @@ LABEL(nibble_ashr_13_restart_use): > > jg LABEL(nibble_ashr_13_use) > > > > movdqa (%rdi, %rdx), %xmm0 > > - palignr $13, -16(%rdi, %rdx), D(%xmm0) > > + palignr $13, -16(%rdi, %rdx), %xmm0 > > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > > pcmpistri $0x1a, (%rsi,%rdx), %xmm0 > > #else > > @@ -1499,7 +1469,7 @@ LABEL(nibble_ashr_13_restart_use): > > LABEL(nibble_ashr_13_use): > > sub $0x1000, %r10 > > movdqa -16(%rdi, %rdx), %xmm0 > > - psrldq $13, D(%xmm0) > > + psrldq $13, %xmm0 > > pcmpistri $0x3a,%xmm0, %xmm0 > > #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L > > cmp %r11, %rcx > > @@ -1517,10 +1487,10 @@ LABEL(nibble_ashr_13_use): > > */ > > .p2align 4 > > LABEL(ashr_14): > > - pslldq $2, D(%xmm2) > > + pslldq $2, %xmm2 > > TOLOWER (%xmm1, %xmm2) > > - pcmpeqb %xmm1, D(%xmm2) > > - psubb %xmm0, D(%xmm2) > > + pcmpeqb %xmm1, %xmm2 > > + psubb %xmm0, %xmm2 > > pmovmskb %xmm2, %r9d > > shr %cl, %edx > > shr %cl, %r9d > > @@ -1550,7 +1520,7 @@ LABEL(loop_ashr_14_use): > > > > LABEL(nibble_ashr_14_restart_use): > > movdqa (%rdi, %rdx), %xmm0 > > - palignr $14, -16(%rdi, %rdx), D(%xmm0) > > + palignr $14, -16(%rdi, %rdx), %xmm0 > > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > > pcmpistri $0x1a, (%rsi,%rdx), %xmm0 > > #else > > @@ -1569,7 +1539,7 @@ LABEL(nibble_ashr_14_restart_use): > > jg LABEL(nibble_ashr_14_use) > > > > movdqa (%rdi, %rdx), %xmm0 > > - palignr $14, -16(%rdi, %rdx), D(%xmm0) > > + palignr $14, -16(%rdi, %rdx), %xmm0 > > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > > pcmpistri $0x1a, (%rsi,%rdx), %xmm0 > > #else > > @@ -1589,7 +1559,7 @@ LABEL(nibble_ashr_14_restart_use): > > LABEL(nibble_ashr_14_use): > > sub $0x1000, %r10 > > movdqa -16(%rdi, %rdx), %xmm0 > > - psrldq $14, D(%xmm0) > > + psrldq $14, %xmm0 > > pcmpistri $0x3a,%xmm0, %xmm0 > > #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L > > cmp %r11, %rcx > > @@ -1607,10 +1577,10 @@ LABEL(nibble_ashr_14_use): > > */ > > .p2align 4 > > LABEL(ashr_15): > > - pslldq $1, D(%xmm2) > > + pslldq $1, %xmm2 > > TOLOWER (%xmm1, %xmm2) > > - pcmpeqb %xmm1, D(%xmm2) > > - psubb %xmm0, D(%xmm2) > > + pcmpeqb %xmm1, %xmm2 > > + psubb %xmm0, %xmm2 > > pmovmskb %xmm2, %r9d > > shr %cl, %edx > > shr %cl, %r9d > > @@ -1642,7 +1612,7 @@ LABEL(loop_ashr_15_use): > > > > LABEL(nibble_ashr_15_restart_use): > > movdqa (%rdi, %rdx), %xmm0 > > - palignr $15, -16(%rdi, %rdx), D(%xmm0) > > + palignr $15, -16(%rdi, %rdx), %xmm0 > > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > > pcmpistri $0x1a, (%rsi,%rdx), %xmm0 > > #else > > @@ -1661,7 +1631,7 @@ LABEL(nibble_ashr_15_restart_use): > > jg LABEL(nibble_ashr_15_use) > > > > movdqa (%rdi, %rdx), %xmm0 > > - palignr $15, -16(%rdi, %rdx), D(%xmm0) > > + palignr $15, -16(%rdi, %rdx), %xmm0 > > #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L > > pcmpistri $0x1a, (%rsi,%rdx), %xmm0 > > #else > > @@ -1681,7 +1651,7 @@ LABEL(nibble_ashr_15_restart_use): > > LABEL(nibble_ashr_15_use): > > sub $0x1000, %r10 > > movdqa -16(%rdi, %rdx), %xmm0 > > - psrldq $15, D(%xmm0) > > + psrldq $15, %xmm0 > > pcmpistri $0x3a,%xmm0, %xmm0 > > #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L > > cmp %r11, %rcx > > diff --git a/sysdeps/x86_64/multiarch/strncase_l-avx.S b/sysdeps/x86_64/multiarch/strncase_l-avx.S > > deleted file mode 100644 > > index b51b86d223..0000000000 > > --- a/sysdeps/x86_64/multiarch/strncase_l-avx.S > > +++ /dev/null > > @@ -1,22 +0,0 @@ > > -/* strncasecmp_l optimized with AVX. > > - Copyright (C) 2017-2022 Free Software Foundation, Inc. > > - This file is part of the GNU C Library. > > - > > - The GNU C Library is free software; you can redistribute it and/or > > - modify it under the terms of the GNU Lesser General Public > > - License as published by the Free Software Foundation; either > > - version 2.1 of the License, or (at your option) any later version. > > - > > - The GNU C Library is distributed in the hope that it will be useful, > > - but WITHOUT ANY WARRANTY; without even the implied warranty of > > - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > - Lesser General Public License for more details. > > - > > - You should have received a copy of the GNU Lesser General Public > > - License along with the GNU C Library; if not, see > > - <https://www.gnu.org/licenses/>. */ > > - > > -#define STRCMP_SSE42 __strncasecmp_l_avx > > -#define USE_AVX 1 > > -#define USE_AS_STRNCASECMP_L > > -#include "strcmp-sse42.S" > > -- > > 2.25.1 > > > > LGTM. > > Reviewed-by: H.J. Lu <hjl.tools@gmail.com> > > Thanks. > > -- > H.J. I would like to backport this patch to release branches. Any comments or objections? --Sunil
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile index 35d80dc2ff..6507d1b7fa 100644 --- a/sysdeps/x86_64/multiarch/Makefile +++ b/sysdeps/x86_64/multiarch/Makefile @@ -54,7 +54,6 @@ sysdep_routines += \ stpncpy-evex \ stpncpy-sse2-unaligned \ stpncpy-ssse3 \ - strcasecmp_l-avx \ strcasecmp_l-avx2 \ strcasecmp_l-avx2-rtm \ strcasecmp_l-evex \ @@ -95,7 +94,6 @@ sysdep_routines += \ strlen-avx2-rtm \ strlen-evex \ strlen-sse2 \ - strncase_l-avx \ strncase_l-avx2 \ strncase_l-avx2-rtm \ strncase_l-evex \ diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c index f1a4d3dac2..40cc6cc49e 100644 --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c @@ -447,9 +447,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, (CPU_FEATURE_USABLE (AVX2) && CPU_FEATURE_USABLE (RTM)), __strcasecmp_avx2_rtm) - IFUNC_IMPL_ADD (array, i, strcasecmp, - CPU_FEATURE_USABLE (AVX), - __strcasecmp_avx) IFUNC_IMPL_ADD (array, i, strcasecmp, CPU_FEATURE_USABLE (SSE4_2), __strcasecmp_sse42) @@ -471,9 +468,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, (CPU_FEATURE_USABLE (AVX2) && CPU_FEATURE_USABLE (RTM)), __strcasecmp_l_avx2_rtm) - IFUNC_IMPL_ADD (array, i, strcasecmp_l, - CPU_FEATURE_USABLE (AVX), - __strcasecmp_l_avx) IFUNC_IMPL_ADD (array, i, strcasecmp_l, CPU_FEATURE_USABLE (SSE4_2), __strcasecmp_l_sse42) @@ -609,9 +603,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, (CPU_FEATURE_USABLE (AVX2) && CPU_FEATURE_USABLE (RTM)), __strncasecmp_avx2_rtm) - IFUNC_IMPL_ADD (array, i, strncasecmp, - CPU_FEATURE_USABLE (AVX), - __strncasecmp_avx) IFUNC_IMPL_ADD (array, i, strncasecmp, CPU_FEATURE_USABLE (SSE4_2), __strncasecmp_sse42) @@ -634,9 +625,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, (CPU_FEATURE_USABLE (AVX2) && CPU_FEATURE_USABLE (RTM)), __strncasecmp_l_avx2_rtm) - IFUNC_IMPL_ADD (array, i, strncasecmp_l, - CPU_FEATURE_USABLE (AVX), - __strncasecmp_l_avx) IFUNC_IMPL_ADD (array, i, strncasecmp_l, CPU_FEATURE_USABLE (SSE4_2), __strncasecmp_l_sse42) diff --git a/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h b/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h index bf0d146e7f..766539c241 100644 --- a/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h +++ b/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h @@ -22,7 +22,6 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden; -extern __typeof (REDIRECT_NAME) OPTIMIZE (avx) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; @@ -46,9 +45,6 @@ IFUNC_SELECTOR (void) return OPTIMIZE (avx2); } - if (CPU_FEATURE_USABLE_P (cpu_features, AVX)) - return OPTIMIZE (avx); - if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2) && !CPU_FEATURES_ARCH_P (cpu_features, Slow_SSE4_2)) return OPTIMIZE (sse42); diff --git a/sysdeps/x86_64/multiarch/strcasecmp_l-avx.S b/sysdeps/x86_64/multiarch/strcasecmp_l-avx.S deleted file mode 100644 index 7ec7c21b5a..0000000000 --- a/sysdeps/x86_64/multiarch/strcasecmp_l-avx.S +++ /dev/null @@ -1,22 +0,0 @@ -/* strcasecmp_l optimized with AVX. - Copyright (C) 2017-2022 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <https://www.gnu.org/licenses/>. */ - -#define STRCMP_SSE42 __strcasecmp_l_avx -#define USE_AVX 1 -#define USE_AS_STRCASECMP_L -#include "strcmp-sse42.S" diff --git a/sysdeps/x86_64/multiarch/strcmp-sse42.S b/sysdeps/x86_64/multiarch/strcmp-sse42.S index 7805ae9d41..a9178ad25c 100644 --- a/sysdeps/x86_64/multiarch/strcmp-sse42.S +++ b/sysdeps/x86_64/multiarch/strcmp-sse42.S @@ -41,13 +41,8 @@ # define UPDATE_STRNCMP_COUNTER #endif -#ifdef USE_AVX -# define SECTION avx -# define GLABEL(l) l##_avx -#else -# define SECTION sse4.2 -# define GLABEL(l) l##_sse42 -#endif +#define SECTION sse4.2 +#define GLABEL(l) l##_sse42 #define LABEL(l) .L##l @@ -105,21 +100,7 @@ END (GLABEL(__strncasecmp)) #endif -#ifdef USE_AVX -# define movdqa vmovdqa -# define movdqu vmovdqu -# define pmovmskb vpmovmskb -# define pcmpistri vpcmpistri -# define psubb vpsubb -# define pcmpeqb vpcmpeqb -# define psrldq vpsrldq -# define pslldq vpslldq -# define palignr vpalignr -# define pxor vpxor -# define D(arg) arg, arg -#else -# define D(arg) arg -#endif +#define arg arg STRCMP_SSE42: cfi_startproc @@ -191,18 +172,7 @@ LABEL(case_add): movdqu (%rdi), %xmm1 movdqu (%rsi), %xmm2 #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L -# ifdef USE_AVX -# define TOLOWER(reg1, reg2) \ - vpaddb LCASE_MIN_reg, reg1, %xmm7; \ - vpaddb LCASE_MIN_reg, reg2, %xmm8; \ - vpcmpgtb LCASE_MAX_reg, %xmm7, %xmm7; \ - vpcmpgtb LCASE_MAX_reg, %xmm8, %xmm8; \ - vpandn CASE_ADD_reg, %xmm7, %xmm7; \ - vpandn CASE_ADD_reg, %xmm8, %xmm8; \ - vpaddb %xmm7, reg1, reg1; \ - vpaddb %xmm8, reg2, reg2 -# else -# define TOLOWER(reg1, reg2) \ +# define TOLOWER(reg1, reg2) \ movdqa LCASE_MIN_reg, %xmm7; \ movdqa LCASE_MIN_reg, %xmm8; \ paddb reg1, %xmm7; \ @@ -213,15 +183,15 @@ LABEL(case_add): pandn CASE_ADD_reg, %xmm8; \ paddb %xmm7, reg1; \ paddb %xmm8, reg2 -# endif + TOLOWER (%xmm1, %xmm2) #else # define TOLOWER(reg1, reg2) #endif - pxor %xmm0, D(%xmm0) /* clear %xmm0 for null char checks */ - pcmpeqb %xmm1, D(%xmm0) /* Any null chars? */ - pcmpeqb %xmm2, D(%xmm1) /* compare first 16 bytes for equality */ - psubb %xmm0, D(%xmm1) /* packed sub of comparison results*/ + pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */ + pcmpeqb %xmm1, %xmm0 /* Any null chars? */ + pcmpeqb %xmm2, %xmm1 /* compare first 16 bytes for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ pmovmskb %xmm1, %edx sub $0xffff, %edx /* if first 16 bytes are same, edx == 0xffff */ jnz LABEL(less16bytes)/* If not, find different value or null char */ @@ -245,7 +215,7 @@ LABEL(crosscache): xor %r8d, %r8d and $0xf, %ecx /* offset of rsi */ and $0xf, %eax /* offset of rdi */ - pxor %xmm0, D(%xmm0) /* clear %xmm0 for null char check */ + pxor %xmm0, %xmm0 /* clear %xmm0 for null char check */ cmp %eax, %ecx je LABEL(ashr_0) /* rsi and rdi relative offset same */ ja LABEL(bigger) @@ -259,7 +229,7 @@ LABEL(bigger): sub %rcx, %r9 lea LABEL(unaligned_table)(%rip), %r10 movslq (%r10, %r9,4), %r9 - pcmpeqb %xmm1, D(%xmm0) /* Any null chars? */ + pcmpeqb %xmm1, %xmm0 /* Any null chars? */ lea (%r10, %r9), %r10 _CET_NOTRACK jmp *%r10 /* jump to corresponding case */ @@ -272,15 +242,15 @@ LABEL(bigger): LABEL(ashr_0): movdqa (%rsi), %xmm1 - pcmpeqb %xmm1, D(%xmm0) /* Any null chars? */ + pcmpeqb %xmm1, %xmm0 /* Any null chars? */ #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L - pcmpeqb (%rdi), D(%xmm1) /* compare 16 bytes for equality */ + pcmpeqb (%rdi), %xmm1 /* compare 16 bytes for equality */ #else movdqa (%rdi), %xmm2 TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm2, D(%xmm1) /* compare 16 bytes for equality */ + pcmpeqb %xmm2, %xmm1 /* compare 16 bytes for equality */ #endif - psubb %xmm0, D(%xmm1) /* packed sub of comparison results*/ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ pmovmskb %xmm1, %r9d shr %cl, %edx /* adjust 0xffff for offset */ shr %cl, %r9d /* adjust for 16-byte offset */ @@ -360,10 +330,10 @@ LABEL(ashr_0_exit_use): */ .p2align 4 LABEL(ashr_1): - pslldq $15, D(%xmm2) /* shift first string to align with second */ + pslldq $15, %xmm2 /* shift first string to align with second */ TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, D(%xmm2) /* compare 16 bytes for equality */ - psubb %xmm0, D(%xmm2) /* packed sub of comparison results*/ + pcmpeqb %xmm1, %xmm2 /* compare 16 bytes for equality */ + psubb %xmm0, %xmm2 /* packed sub of comparison results*/ pmovmskb %xmm2, %r9d shr %cl, %edx /* adjust 0xffff for offset */ shr %cl, %r9d /* adjust for 16-byte offset */ @@ -391,7 +361,7 @@ LABEL(loop_ashr_1_use): LABEL(nibble_ashr_1_restart_use): movdqa (%rdi, %rdx), %xmm0 - palignr $1, -16(%rdi, %rdx), D(%xmm0) + palignr $1, -16(%rdi, %rdx), %xmm0 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a,(%rsi,%rdx), %xmm0 #else @@ -410,7 +380,7 @@ LABEL(nibble_ashr_1_restart_use): jg LABEL(nibble_ashr_1_use) movdqa (%rdi, %rdx), %xmm0 - palignr $1, -16(%rdi, %rdx), D(%xmm0) + palignr $1, -16(%rdi, %rdx), %xmm0 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a,(%rsi,%rdx), %xmm0 #else @@ -430,7 +400,7 @@ LABEL(nibble_ashr_1_restart_use): LABEL(nibble_ashr_1_use): sub $0x1000, %r10 movdqa -16(%rdi, %rdx), %xmm0 - psrldq $1, D(%xmm0) + psrldq $1, %xmm0 pcmpistri $0x3a,%xmm0, %xmm0 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp %r11, %rcx @@ -448,10 +418,10 @@ LABEL(nibble_ashr_1_use): */ .p2align 4 LABEL(ashr_2): - pslldq $14, D(%xmm2) + pslldq $14, %xmm2 TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, D(%xmm2) - psubb %xmm0, D(%xmm2) + pcmpeqb %xmm1, %xmm2 + psubb %xmm0, %xmm2 pmovmskb %xmm2, %r9d shr %cl, %edx shr %cl, %r9d @@ -479,7 +449,7 @@ LABEL(loop_ashr_2_use): LABEL(nibble_ashr_2_restart_use): movdqa (%rdi, %rdx), %xmm0 - palignr $2, -16(%rdi, %rdx), D(%xmm0) + palignr $2, -16(%rdi, %rdx), %xmm0 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a,(%rsi,%rdx), %xmm0 #else @@ -498,7 +468,7 @@ LABEL(nibble_ashr_2_restart_use): jg LABEL(nibble_ashr_2_use) movdqa (%rdi, %rdx), %xmm0 - palignr $2, -16(%rdi, %rdx), D(%xmm0) + palignr $2, -16(%rdi, %rdx), %xmm0 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a,(%rsi,%rdx), %xmm0 #else @@ -518,7 +488,7 @@ LABEL(nibble_ashr_2_restart_use): LABEL(nibble_ashr_2_use): sub $0x1000, %r10 movdqa -16(%rdi, %rdx), %xmm0 - psrldq $2, D(%xmm0) + psrldq $2, %xmm0 pcmpistri $0x3a,%xmm0, %xmm0 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp %r11, %rcx @@ -536,10 +506,10 @@ LABEL(nibble_ashr_2_use): */ .p2align 4 LABEL(ashr_3): - pslldq $13, D(%xmm2) + pslldq $13, %xmm2 TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, D(%xmm2) - psubb %xmm0, D(%xmm2) + pcmpeqb %xmm1, %xmm2 + psubb %xmm0, %xmm2 pmovmskb %xmm2, %r9d shr %cl, %edx shr %cl, %r9d @@ -567,7 +537,7 @@ LABEL(loop_ashr_3_use): LABEL(nibble_ashr_3_restart_use): movdqa (%rdi, %rdx), %xmm0 - palignr $3, -16(%rdi, %rdx), D(%xmm0) + palignr $3, -16(%rdi, %rdx), %xmm0 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a,(%rsi,%rdx), %xmm0 #else @@ -586,7 +556,7 @@ LABEL(nibble_ashr_3_restart_use): jg LABEL(nibble_ashr_3_use) movdqa (%rdi, %rdx), %xmm0 - palignr $3, -16(%rdi, %rdx), D(%xmm0) + palignr $3, -16(%rdi, %rdx), %xmm0 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a,(%rsi,%rdx), %xmm0 #else @@ -606,7 +576,7 @@ LABEL(nibble_ashr_3_restart_use): LABEL(nibble_ashr_3_use): sub $0x1000, %r10 movdqa -16(%rdi, %rdx), %xmm0 - psrldq $3, D(%xmm0) + psrldq $3, %xmm0 pcmpistri $0x3a,%xmm0, %xmm0 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp %r11, %rcx @@ -624,10 +594,10 @@ LABEL(nibble_ashr_3_use): */ .p2align 4 LABEL(ashr_4): - pslldq $12, D(%xmm2) + pslldq $12, %xmm2 TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, D(%xmm2) - psubb %xmm0, D(%xmm2) + pcmpeqb %xmm1, %xmm2 + psubb %xmm0, %xmm2 pmovmskb %xmm2, %r9d shr %cl, %edx shr %cl, %r9d @@ -656,7 +626,7 @@ LABEL(loop_ashr_4_use): LABEL(nibble_ashr_4_restart_use): movdqa (%rdi, %rdx), %xmm0 - palignr $4, -16(%rdi, %rdx), D(%xmm0) + palignr $4, -16(%rdi, %rdx), %xmm0 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a,(%rsi,%rdx), %xmm0 #else @@ -675,7 +645,7 @@ LABEL(nibble_ashr_4_restart_use): jg LABEL(nibble_ashr_4_use) movdqa (%rdi, %rdx), %xmm0 - palignr $4, -16(%rdi, %rdx), D(%xmm0) + palignr $4, -16(%rdi, %rdx), %xmm0 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a,(%rsi,%rdx), %xmm0 #else @@ -695,7 +665,7 @@ LABEL(nibble_ashr_4_restart_use): LABEL(nibble_ashr_4_use): sub $0x1000, %r10 movdqa -16(%rdi, %rdx), %xmm0 - psrldq $4, D(%xmm0) + psrldq $4, %xmm0 pcmpistri $0x3a,%xmm0, %xmm0 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp %r11, %rcx @@ -713,10 +683,10 @@ LABEL(nibble_ashr_4_use): */ .p2align 4 LABEL(ashr_5): - pslldq $11, D(%xmm2) + pslldq $11, %xmm2 TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, D(%xmm2) - psubb %xmm0, D(%xmm2) + pcmpeqb %xmm1, %xmm2 + psubb %xmm0, %xmm2 pmovmskb %xmm2, %r9d shr %cl, %edx shr %cl, %r9d @@ -745,7 +715,7 @@ LABEL(loop_ashr_5_use): LABEL(nibble_ashr_5_restart_use): movdqa (%rdi, %rdx), %xmm0 - palignr $5, -16(%rdi, %rdx), D(%xmm0) + palignr $5, -16(%rdi, %rdx), %xmm0 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a,(%rsi,%rdx), %xmm0 #else @@ -765,7 +735,7 @@ LABEL(nibble_ashr_5_restart_use): movdqa (%rdi, %rdx), %xmm0 - palignr $5, -16(%rdi, %rdx), D(%xmm0) + palignr $5, -16(%rdi, %rdx), %xmm0 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a,(%rsi,%rdx), %xmm0 #else @@ -785,7 +755,7 @@ LABEL(nibble_ashr_5_restart_use): LABEL(nibble_ashr_5_use): sub $0x1000, %r10 movdqa -16(%rdi, %rdx), %xmm0 - psrldq $5, D(%xmm0) + psrldq $5, %xmm0 pcmpistri $0x3a,%xmm0, %xmm0 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp %r11, %rcx @@ -803,10 +773,10 @@ LABEL(nibble_ashr_5_use): */ .p2align 4 LABEL(ashr_6): - pslldq $10, D(%xmm2) + pslldq $10, %xmm2 TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, D(%xmm2) - psubb %xmm0, D(%xmm2) + pcmpeqb %xmm1, %xmm2 + psubb %xmm0, %xmm2 pmovmskb %xmm2, %r9d shr %cl, %edx shr %cl, %r9d @@ -835,7 +805,7 @@ LABEL(loop_ashr_6_use): LABEL(nibble_ashr_6_restart_use): movdqa (%rdi, %rdx), %xmm0 - palignr $6, -16(%rdi, %rdx), D(%xmm0) + palignr $6, -16(%rdi, %rdx), %xmm0 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a,(%rsi,%rdx), %xmm0 #else @@ -854,7 +824,7 @@ LABEL(nibble_ashr_6_restart_use): jg LABEL(nibble_ashr_6_use) movdqa (%rdi, %rdx), %xmm0 - palignr $6, -16(%rdi, %rdx), D(%xmm0) + palignr $6, -16(%rdi, %rdx), %xmm0 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a,(%rsi,%rdx), %xmm0 #else @@ -874,7 +844,7 @@ LABEL(nibble_ashr_6_restart_use): LABEL(nibble_ashr_6_use): sub $0x1000, %r10 movdqa -16(%rdi, %rdx), %xmm0 - psrldq $6, D(%xmm0) + psrldq $6, %xmm0 pcmpistri $0x3a,%xmm0, %xmm0 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp %r11, %rcx @@ -892,10 +862,10 @@ LABEL(nibble_ashr_6_use): */ .p2align 4 LABEL(ashr_7): - pslldq $9, D(%xmm2) + pslldq $9, %xmm2 TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, D(%xmm2) - psubb %xmm0, D(%xmm2) + pcmpeqb %xmm1, %xmm2 + psubb %xmm0, %xmm2 pmovmskb %xmm2, %r9d shr %cl, %edx shr %cl, %r9d @@ -924,7 +894,7 @@ LABEL(loop_ashr_7_use): LABEL(nibble_ashr_7_restart_use): movdqa (%rdi, %rdx), %xmm0 - palignr $7, -16(%rdi, %rdx), D(%xmm0) + palignr $7, -16(%rdi, %rdx), %xmm0 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a,(%rsi,%rdx), %xmm0 #else @@ -943,7 +913,7 @@ LABEL(nibble_ashr_7_restart_use): jg LABEL(nibble_ashr_7_use) movdqa (%rdi, %rdx), %xmm0 - palignr $7, -16(%rdi, %rdx), D(%xmm0) + palignr $7, -16(%rdi, %rdx), %xmm0 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a,(%rsi,%rdx), %xmm0 #else @@ -963,7 +933,7 @@ LABEL(nibble_ashr_7_restart_use): LABEL(nibble_ashr_7_use): sub $0x1000, %r10 movdqa -16(%rdi, %rdx), %xmm0 - psrldq $7, D(%xmm0) + psrldq $7, %xmm0 pcmpistri $0x3a,%xmm0, %xmm0 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp %r11, %rcx @@ -981,10 +951,10 @@ LABEL(nibble_ashr_7_use): */ .p2align 4 LABEL(ashr_8): - pslldq $8, D(%xmm2) + pslldq $8, %xmm2 TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, D(%xmm2) - psubb %xmm0, D(%xmm2) + pcmpeqb %xmm1, %xmm2 + psubb %xmm0, %xmm2 pmovmskb %xmm2, %r9d shr %cl, %edx shr %cl, %r9d @@ -1013,7 +983,7 @@ LABEL(loop_ashr_8_use): LABEL(nibble_ashr_8_restart_use): movdqa (%rdi, %rdx), %xmm0 - palignr $8, -16(%rdi, %rdx), D(%xmm0) + palignr $8, -16(%rdi, %rdx), %xmm0 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a, (%rsi,%rdx), %xmm0 #else @@ -1032,7 +1002,7 @@ LABEL(nibble_ashr_8_restart_use): jg LABEL(nibble_ashr_8_use) movdqa (%rdi, %rdx), %xmm0 - palignr $8, -16(%rdi, %rdx), D(%xmm0) + palignr $8, -16(%rdi, %rdx), %xmm0 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a, (%rsi,%rdx), %xmm0 #else @@ -1052,7 +1022,7 @@ LABEL(nibble_ashr_8_restart_use): LABEL(nibble_ashr_8_use): sub $0x1000, %r10 movdqa -16(%rdi, %rdx), %xmm0 - psrldq $8, D(%xmm0) + psrldq $8, %xmm0 pcmpistri $0x3a,%xmm0, %xmm0 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp %r11, %rcx @@ -1070,10 +1040,10 @@ LABEL(nibble_ashr_8_use): */ .p2align 4 LABEL(ashr_9): - pslldq $7, D(%xmm2) + pslldq $7, %xmm2 TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, D(%xmm2) - psubb %xmm0, D(%xmm2) + pcmpeqb %xmm1, %xmm2 + psubb %xmm0, %xmm2 pmovmskb %xmm2, %r9d shr %cl, %edx shr %cl, %r9d @@ -1103,7 +1073,7 @@ LABEL(loop_ashr_9_use): LABEL(nibble_ashr_9_restart_use): movdqa (%rdi, %rdx), %xmm0 - palignr $9, -16(%rdi, %rdx), D(%xmm0) + palignr $9, -16(%rdi, %rdx), %xmm0 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a, (%rsi,%rdx), %xmm0 #else @@ -1122,7 +1092,7 @@ LABEL(nibble_ashr_9_restart_use): jg LABEL(nibble_ashr_9_use) movdqa (%rdi, %rdx), %xmm0 - palignr $9, -16(%rdi, %rdx), D(%xmm0) + palignr $9, -16(%rdi, %rdx), %xmm0 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a, (%rsi,%rdx), %xmm0 #else @@ -1142,7 +1112,7 @@ LABEL(nibble_ashr_9_restart_use): LABEL(nibble_ashr_9_use): sub $0x1000, %r10 movdqa -16(%rdi, %rdx), %xmm0 - psrldq $9, D(%xmm0) + psrldq $9, %xmm0 pcmpistri $0x3a,%xmm0, %xmm0 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp %r11, %rcx @@ -1160,10 +1130,10 @@ LABEL(nibble_ashr_9_use): */ .p2align 4 LABEL(ashr_10): - pslldq $6, D(%xmm2) + pslldq $6, %xmm2 TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, D(%xmm2) - psubb %xmm0, D(%xmm2) + pcmpeqb %xmm1, %xmm2 + psubb %xmm0, %xmm2 pmovmskb %xmm2, %r9d shr %cl, %edx shr %cl, %r9d @@ -1192,7 +1162,7 @@ LABEL(loop_ashr_10_use): LABEL(nibble_ashr_10_restart_use): movdqa (%rdi, %rdx), %xmm0 - palignr $10, -16(%rdi, %rdx), D(%xmm0) + palignr $10, -16(%rdi, %rdx), %xmm0 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a, (%rsi,%rdx), %xmm0 #else @@ -1211,7 +1181,7 @@ LABEL(nibble_ashr_10_restart_use): jg LABEL(nibble_ashr_10_use) movdqa (%rdi, %rdx), %xmm0 - palignr $10, -16(%rdi, %rdx), D(%xmm0) + palignr $10, -16(%rdi, %rdx), %xmm0 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a, (%rsi,%rdx), %xmm0 #else @@ -1231,7 +1201,7 @@ LABEL(nibble_ashr_10_restart_use): LABEL(nibble_ashr_10_use): sub $0x1000, %r10 movdqa -16(%rdi, %rdx), %xmm0 - psrldq $10, D(%xmm0) + psrldq $10, %xmm0 pcmpistri $0x3a,%xmm0, %xmm0 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp %r11, %rcx @@ -1249,10 +1219,10 @@ LABEL(nibble_ashr_10_use): */ .p2align 4 LABEL(ashr_11): - pslldq $5, D(%xmm2) + pslldq $5, %xmm2 TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, D(%xmm2) - psubb %xmm0, D(%xmm2) + pcmpeqb %xmm1, %xmm2 + psubb %xmm0, %xmm2 pmovmskb %xmm2, %r9d shr %cl, %edx shr %cl, %r9d @@ -1281,7 +1251,7 @@ LABEL(loop_ashr_11_use): LABEL(nibble_ashr_11_restart_use): movdqa (%rdi, %rdx), %xmm0 - palignr $11, -16(%rdi, %rdx), D(%xmm0) + palignr $11, -16(%rdi, %rdx), %xmm0 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a, (%rsi,%rdx), %xmm0 #else @@ -1300,7 +1270,7 @@ LABEL(nibble_ashr_11_restart_use): jg LABEL(nibble_ashr_11_use) movdqa (%rdi, %rdx), %xmm0 - palignr $11, -16(%rdi, %rdx), D(%xmm0) + palignr $11, -16(%rdi, %rdx), %xmm0 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a, (%rsi,%rdx), %xmm0 #else @@ -1320,7 +1290,7 @@ LABEL(nibble_ashr_11_restart_use): LABEL(nibble_ashr_11_use): sub $0x1000, %r10 movdqa -16(%rdi, %rdx), %xmm0 - psrldq $11, D(%xmm0) + psrldq $11, %xmm0 pcmpistri $0x3a,%xmm0, %xmm0 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp %r11, %rcx @@ -1338,10 +1308,10 @@ LABEL(nibble_ashr_11_use): */ .p2align 4 LABEL(ashr_12): - pslldq $4, D(%xmm2) + pslldq $4, %xmm2 TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, D(%xmm2) - psubb %xmm0, D(%xmm2) + pcmpeqb %xmm1, %xmm2 + psubb %xmm0, %xmm2 pmovmskb %xmm2, %r9d shr %cl, %edx shr %cl, %r9d @@ -1370,7 +1340,7 @@ LABEL(loop_ashr_12_use): LABEL(nibble_ashr_12_restart_use): movdqa (%rdi, %rdx), %xmm0 - palignr $12, -16(%rdi, %rdx), D(%xmm0) + palignr $12, -16(%rdi, %rdx), %xmm0 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a, (%rsi,%rdx), %xmm0 #else @@ -1389,7 +1359,7 @@ LABEL(nibble_ashr_12_restart_use): jg LABEL(nibble_ashr_12_use) movdqa (%rdi, %rdx), %xmm0 - palignr $12, -16(%rdi, %rdx), D(%xmm0) + palignr $12, -16(%rdi, %rdx), %xmm0 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a, (%rsi,%rdx), %xmm0 #else @@ -1409,7 +1379,7 @@ LABEL(nibble_ashr_12_restart_use): LABEL(nibble_ashr_12_use): sub $0x1000, %r10 movdqa -16(%rdi, %rdx), %xmm0 - psrldq $12, D(%xmm0) + psrldq $12, %xmm0 pcmpistri $0x3a,%xmm0, %xmm0 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp %r11, %rcx @@ -1427,10 +1397,10 @@ LABEL(nibble_ashr_12_use): */ .p2align 4 LABEL(ashr_13): - pslldq $3, D(%xmm2) + pslldq $3, %xmm2 TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, D(%xmm2) - psubb %xmm0, D(%xmm2) + pcmpeqb %xmm1, %xmm2 + psubb %xmm0, %xmm2 pmovmskb %xmm2, %r9d shr %cl, %edx shr %cl, %r9d @@ -1460,7 +1430,7 @@ LABEL(loop_ashr_13_use): LABEL(nibble_ashr_13_restart_use): movdqa (%rdi, %rdx), %xmm0 - palignr $13, -16(%rdi, %rdx), D(%xmm0) + palignr $13, -16(%rdi, %rdx), %xmm0 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a, (%rsi,%rdx), %xmm0 #else @@ -1479,7 +1449,7 @@ LABEL(nibble_ashr_13_restart_use): jg LABEL(nibble_ashr_13_use) movdqa (%rdi, %rdx), %xmm0 - palignr $13, -16(%rdi, %rdx), D(%xmm0) + palignr $13, -16(%rdi, %rdx), %xmm0 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a, (%rsi,%rdx), %xmm0 #else @@ -1499,7 +1469,7 @@ LABEL(nibble_ashr_13_restart_use): LABEL(nibble_ashr_13_use): sub $0x1000, %r10 movdqa -16(%rdi, %rdx), %xmm0 - psrldq $13, D(%xmm0) + psrldq $13, %xmm0 pcmpistri $0x3a,%xmm0, %xmm0 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp %r11, %rcx @@ -1517,10 +1487,10 @@ LABEL(nibble_ashr_13_use): */ .p2align 4 LABEL(ashr_14): - pslldq $2, D(%xmm2) + pslldq $2, %xmm2 TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, D(%xmm2) - psubb %xmm0, D(%xmm2) + pcmpeqb %xmm1, %xmm2 + psubb %xmm0, %xmm2 pmovmskb %xmm2, %r9d shr %cl, %edx shr %cl, %r9d @@ -1550,7 +1520,7 @@ LABEL(loop_ashr_14_use): LABEL(nibble_ashr_14_restart_use): movdqa (%rdi, %rdx), %xmm0 - palignr $14, -16(%rdi, %rdx), D(%xmm0) + palignr $14, -16(%rdi, %rdx), %xmm0 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a, (%rsi,%rdx), %xmm0 #else @@ -1569,7 +1539,7 @@ LABEL(nibble_ashr_14_restart_use): jg LABEL(nibble_ashr_14_use) movdqa (%rdi, %rdx), %xmm0 - palignr $14, -16(%rdi, %rdx), D(%xmm0) + palignr $14, -16(%rdi, %rdx), %xmm0 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a, (%rsi,%rdx), %xmm0 #else @@ -1589,7 +1559,7 @@ LABEL(nibble_ashr_14_restart_use): LABEL(nibble_ashr_14_use): sub $0x1000, %r10 movdqa -16(%rdi, %rdx), %xmm0 - psrldq $14, D(%xmm0) + psrldq $14, %xmm0 pcmpistri $0x3a,%xmm0, %xmm0 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp %r11, %rcx @@ -1607,10 +1577,10 @@ LABEL(nibble_ashr_14_use): */ .p2align 4 LABEL(ashr_15): - pslldq $1, D(%xmm2) + pslldq $1, %xmm2 TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, D(%xmm2) - psubb %xmm0, D(%xmm2) + pcmpeqb %xmm1, %xmm2 + psubb %xmm0, %xmm2 pmovmskb %xmm2, %r9d shr %cl, %edx shr %cl, %r9d @@ -1642,7 +1612,7 @@ LABEL(loop_ashr_15_use): LABEL(nibble_ashr_15_restart_use): movdqa (%rdi, %rdx), %xmm0 - palignr $15, -16(%rdi, %rdx), D(%xmm0) + palignr $15, -16(%rdi, %rdx), %xmm0 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a, (%rsi,%rdx), %xmm0 #else @@ -1661,7 +1631,7 @@ LABEL(nibble_ashr_15_restart_use): jg LABEL(nibble_ashr_15_use) movdqa (%rdi, %rdx), %xmm0 - palignr $15, -16(%rdi, %rdx), D(%xmm0) + palignr $15, -16(%rdi, %rdx), %xmm0 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a, (%rsi,%rdx), %xmm0 #else @@ -1681,7 +1651,7 @@ LABEL(nibble_ashr_15_restart_use): LABEL(nibble_ashr_15_use): sub $0x1000, %r10 movdqa -16(%rdi, %rdx), %xmm0 - psrldq $15, D(%xmm0) + psrldq $15, %xmm0 pcmpistri $0x3a,%xmm0, %xmm0 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp %r11, %rcx diff --git a/sysdeps/x86_64/multiarch/strncase_l-avx.S b/sysdeps/x86_64/multiarch/strncase_l-avx.S deleted file mode 100644 index b51b86d223..0000000000 --- a/sysdeps/x86_64/multiarch/strncase_l-avx.S +++ /dev/null @@ -1,22 +0,0 @@ -/* strncasecmp_l optimized with AVX. - Copyright (C) 2017-2022 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <https://www.gnu.org/licenses/>. */ - -#define STRCMP_SSE42 __strncasecmp_l_avx -#define USE_AVX 1 -#define USE_AS_STRNCASECMP_L -#include "strcmp-sse42.S"