Message ID | 20220609041653.2515397-2-goldstein.w.n@gmail.com |
---|---|
State | New |
Headers | show |
Series | [v1,1/3] x86: Align varshift table to 32-bytes | expand |
On Wed, Jun 8, 2022 at 9:16 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > No change to the actual logic of the functions. The goal is to so the > avx/avx2 machines rely less of sse instructions. These aren't the only SSE2 functions. The rest of glibc may be still compiled with SSE2. A different approach is to compile the whole glibc with x86-64 ISA level 3. > Full xcheck passes on x86_64. > --- > sysdeps/x86_64/multiarch/Makefile | 21 ++++++++++----- > .../multiarch/{ifunc-sse4_2.h => ifunc-avx.h} | 4 +++ > sysdeps/x86_64/multiarch/ifunc-impl-list.c | 6 +++++ > sysdeps/x86_64/multiarch/strcspn-c-avx.c | 21 +++++++++++++++ > .../{strcspn-c.c => strcspn-c-sse4.c} | 26 ++++++++++++------- > sysdeps/x86_64/multiarch/strcspn.c | 2 +- > sysdeps/x86_64/multiarch/strpbrk-c-avx.c | 23 ++++++++++++++++ > .../{strpbrk-c.c => strpbrk-c-sse4.c} | 6 ++--- > sysdeps/x86_64/multiarch/strpbrk.c | 2 +- > sysdeps/x86_64/multiarch/strspn-c-avx.c | 21 +++++++++++++++ > .../multiarch/{strspn-c.c => strspn-c-sse4.c} | 15 ++++++++--- > sysdeps/x86_64/multiarch/strspn.c | 2 +- > 12 files changed, 122 insertions(+), 27 deletions(-) > rename sysdeps/x86_64/multiarch/{ifunc-sse4_2.h => ifunc-avx.h} (89%) > create mode 100644 sysdeps/x86_64/multiarch/strcspn-c-avx.c > rename sysdeps/x86_64/multiarch/{strcspn-c.c => strcspn-c-sse4.c} (90%) > create mode 100644 sysdeps/x86_64/multiarch/strpbrk-c-avx.c > rename sysdeps/x86_64/multiarch/{strpbrk-c.c => strpbrk-c-sse4.c} (89%) > create mode 100644 sysdeps/x86_64/multiarch/strspn-c-avx.c > rename sysdeps/x86_64/multiarch/{strspn-c.c => strspn-c-sse4.c} (92%) > > diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile > index 3d153cac35..27f306c7c8 100644 > --- a/sysdeps/x86_64/multiarch/Makefile > +++ b/sysdeps/x86_64/multiarch/Makefile > @@ -76,7 +76,8 @@ sysdep_routines += \ > strcpy-evex \ > strcpy-sse2 \ > strcpy-sse2-unaligned \ > - strcspn-c \ > + strcspn-c-avx \ > + strcspn-c-sse4 \ > strcspn-sse2 \ > strlen-avx2 \ > strlen-avx2-rtm \ > @@ -108,22 +109,28 @@ sysdep_routines += \ > strnlen-evex \ > strnlen-evex512 \ > strnlen-sse2 \ > - strpbrk-c \ > + strpbrk-c-avx \ > + strpbrk-c-sse4 \ > strpbrk-sse2 \ > strrchr-avx2 \ > strrchr-avx2-rtm \ > strrchr-evex \ > strrchr-sse2 \ > - strspn-c \ > + strspn-c-avx \ > + strspn-c-sse4 \ > strspn-sse2 \ > strstr-avx512 \ > strstr-sse2-unaligned \ > varshift \ > # sysdep_routines > -CFLAGS-varshift.c += -msse4 > -CFLAGS-strcspn-c.c += -msse4 > -CFLAGS-strpbrk-c.c += -msse4 > -CFLAGS-strspn-c.c += -msse4 > + > +CFLAGS-strcspn-c-avx.c += -mavx > +CFLAGS-strcspn-c-sse4.c += -msse4 > +CFLAGS-strpbrk-c-avx.c += -mavx > +CFLAGS-strpbrk-c-sse4.c += -msse4 > +CFLAGS-strspn-c-avx.c += -mavx > +CFLAGS-strspn-c-sse4.c += -msse4 > + > CFLAGS-strstr-avx512.c += -mavx512f -mavx512vl -mavx512dq -mavx512bw -mbmi -mbmi2 -O3 > endif > > diff --git a/sysdeps/x86_64/multiarch/ifunc-sse4_2.h b/sysdeps/x86_64/multiarch/ifunc-avx.h > similarity index 89% > rename from sysdeps/x86_64/multiarch/ifunc-sse4_2.h > rename to sysdeps/x86_64/multiarch/ifunc-avx.h > index b555ff2fac..891f3ddcac 100644 > --- a/sysdeps/x86_64/multiarch/ifunc-sse4_2.h > +++ b/sysdeps/x86_64/multiarch/ifunc-avx.h > @@ -21,12 +21,16 @@ > > extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; > extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden; > +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx) attribute_hidden; > > static inline void * > IFUNC_SELECTOR (void) > { > const struct cpu_features* cpu_features = __get_cpu_features (); > > + if (CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) > + return OPTIMIZE (avx); > + > if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2)) > return OPTIMIZE (sse42); > > diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c > index 58f3ec8306..507c563669 100644 > --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c > +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c > @@ -529,6 +529,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, > > /* Support sysdeps/x86_64/multiarch/strcspn.c. */ > IFUNC_IMPL (i, name, strcspn, > + IFUNC_IMPL_ADD (array, i, strcspn, CPU_FEATURE_USABLE (AVX), > + __strcspn_avx) > IFUNC_IMPL_ADD (array, i, strcspn, CPU_FEATURE_USABLE (SSE4_2), > __strcspn_sse42) > IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_sse2)) > @@ -605,6 +607,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, > > /* Support sysdeps/x86_64/multiarch/strpbrk.c. */ > IFUNC_IMPL (i, name, strpbrk, > + IFUNC_IMPL_ADD (array, i, strpbrk, CPU_FEATURE_USABLE (AVX), > + __strpbrk_avx) > IFUNC_IMPL_ADD (array, i, strpbrk, CPU_FEATURE_USABLE (SSE4_2), > __strpbrk_sse42) > IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_sse2)) > @@ -612,6 +616,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, > > /* Support sysdeps/x86_64/multiarch/strspn.c. */ > IFUNC_IMPL (i, name, strspn, > + IFUNC_IMPL_ADD (array, i, strspn, CPU_FEATURE_USABLE (AVX), > + __strspn_avx) > IFUNC_IMPL_ADD (array, i, strspn, CPU_FEATURE_USABLE (SSE4_2), > __strspn_sse42) > IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_sse2)) > diff --git a/sysdeps/x86_64/multiarch/strcspn-c-avx.c b/sysdeps/x86_64/multiarch/strcspn-c-avx.c > new file mode 100644 > index 0000000000..b8d983f79f > --- /dev/null > +++ b/sysdeps/x86_64/multiarch/strcspn-c-avx.c > @@ -0,0 +1,21 @@ > +/* strcspn with AVX intrinsics > + Copyright (C) 2022 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#define STRCSPN __strcspn_avx > +#define SECTION "avx" > +#include "strcspn-c-sse4.c" > diff --git a/sysdeps/x86_64/multiarch/strcspn-c.c b/sysdeps/x86_64/multiarch/strcspn-c-sse4.c > similarity index 90% > rename from sysdeps/x86_64/multiarch/strcspn-c.c > rename to sysdeps/x86_64/multiarch/strcspn-c-sse4.c > index c312fab8b1..848c3cfb14 100644 > --- a/sysdeps/x86_64/multiarch/strcspn-c.c > +++ b/sysdeps/x86_64/multiarch/strcspn-c-sse4.c > @@ -52,9 +52,16 @@ > when either CFlag or ZFlag is 1. If CFlag == 1, ECX has the offset > X for case 1. */ > > -#ifndef STRCSPN_SSE2 > -# define STRCSPN_SSE2 __strcspn_sse2 > -# define STRCSPN_SSE42 __strcspn_sse42 > +#ifndef STRCSPN_FALLBACK > +# define STRCSPN_FALLBACK __strcspn_sse2 > +#endif > + > +#ifndef STRCSPN > +# define STRCSPN __strcspn_sse42 > +#endif > + > +#ifndef SECTION > +# define SECTION "sse4.2" > #endif > > #ifdef USE_AS_STRPBRK > @@ -69,16 +76,15 @@ char * > #else > size_t > #endif > -STRCSPN_SSE2 (const char *, const char *) attribute_hidden; > - > +STRCSPN_FALLBACK (const char *, const char *) attribute_hidden; > > #ifdef USE_AS_STRPBRK > char * > #else > size_t > #endif > -__attribute__ ((section (".text.sse4.2"))) > -STRCSPN_SSE42 (const char *s, const char *a) > +__attribute__ ((section (".text." SECTION))) > +STRCSPN (const char *s, const char *a) > { > if (*a == 0) > RETURN (NULL, strlen (s)); > @@ -116,10 +122,10 @@ STRCSPN_SSE42 (const char *s, const char *a) > maskz_bits = _mm_movemask_epi8 (maskz); > if (maskz_bits == 0) > { > - /* There is no NULL terminator. Don't use SSE4.2 if the length > - of A > 16. */ > + /* There is no NULL terminator. Don't use pcmpstri based approach if the > + length of A > 16. */ > if (a[16] != 0) > - return STRCSPN_SSE2 (s, a); > + return STRCSPN_FALLBACK (s, a); > } > > aligned = s; > diff --git a/sysdeps/x86_64/multiarch/strcspn.c b/sysdeps/x86_64/multiarch/strcspn.c > index 4848fa8677..63e1cf052e 100644 > --- a/sysdeps/x86_64/multiarch/strcspn.c > +++ b/sysdeps/x86_64/multiarch/strcspn.c > @@ -24,7 +24,7 @@ > # undef strcspn > > # define SYMBOL_NAME strcspn > -# include "ifunc-sse4_2.h" > +# include "ifunc-avx.h" > > libc_ifunc_redirected (__redirect_strcspn, strcspn, IFUNC_SELECTOR ()); > > diff --git a/sysdeps/x86_64/multiarch/strpbrk-c-avx.c b/sysdeps/x86_64/multiarch/strpbrk-c-avx.c > new file mode 100644 > index 0000000000..2918013994 > --- /dev/null > +++ b/sysdeps/x86_64/multiarch/strpbrk-c-avx.c > @@ -0,0 +1,23 @@ > +/* strpbrk with AVX intrinsics > + Copyright (C) 2022 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#define USE_AS_STRPBRK > +#define STRCSPN_FALLBACK __strpbrk_sse2 > +#define STRCSPN __strpbrk_avx > +#define SECTION "avx" > +#include "strcspn-c-sse4.c" > diff --git a/sysdeps/x86_64/multiarch/strpbrk-c.c b/sysdeps/x86_64/multiarch/strpbrk-c-sse4.c > similarity index 89% > rename from sysdeps/x86_64/multiarch/strpbrk-c.c > rename to sysdeps/x86_64/multiarch/strpbrk-c-sse4.c > index abf4ff7f1a..2efd38d809 100644 > --- a/sysdeps/x86_64/multiarch/strpbrk-c.c > +++ b/sysdeps/x86_64/multiarch/strpbrk-c-sse4.c > @@ -17,6 +17,6 @@ > <https://www.gnu.org/licenses/>. */ > > #define USE_AS_STRPBRK > -#define STRCSPN_SSE2 __strpbrk_sse2 > -#define STRCSPN_SSE42 __strpbrk_sse42 > -#include "strcspn-c.c" > +#define STRCSPN_FALLBACK __strpbrk_sse2 > +#define STRCSPN __strpbrk_sse42 > +#include "strcspn-c-sse4.c" > diff --git a/sysdeps/x86_64/multiarch/strpbrk.c b/sysdeps/x86_64/multiarch/strpbrk.c > index 04e300ea71..ab5b04a482 100644 > --- a/sysdeps/x86_64/multiarch/strpbrk.c > +++ b/sysdeps/x86_64/multiarch/strpbrk.c > @@ -24,7 +24,7 @@ > # undef strpbrk > > # define SYMBOL_NAME strpbrk > -# include "ifunc-sse4_2.h" > +# include "ifunc-avx.h" > > libc_ifunc_redirected (__redirect_strpbrk, strpbrk, IFUNC_SELECTOR ()); > > diff --git a/sysdeps/x86_64/multiarch/strspn-c-avx.c b/sysdeps/x86_64/multiarch/strspn-c-avx.c > new file mode 100644 > index 0000000000..9d5fdb9550 > --- /dev/null > +++ b/sysdeps/x86_64/multiarch/strspn-c-avx.c > @@ -0,0 +1,21 @@ > +/* strspn with AVX intrinsics > + Copyright (C) 2022 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#define STRSPN __strspn_avx > +#define SECTION "avx" > +#include "strspn-c-sse4.c" > diff --git a/sysdeps/x86_64/multiarch/strspn-c.c b/sysdeps/x86_64/multiarch/strspn-c-sse4.c > similarity index 92% > rename from sysdeps/x86_64/multiarch/strspn-c.c > rename to sysdeps/x86_64/multiarch/strspn-c-sse4.c > index 6124033ceb..6a91def2e0 100644 > --- a/sysdeps/x86_64/multiarch/strspn-c.c > +++ b/sysdeps/x86_64/multiarch/strspn-c-sse4.c > @@ -53,10 +53,17 @@ > > extern size_t __strspn_sse2 (const char *, const char *) attribute_hidden; > > +#ifndef STRSPN > +# define STRSPN __strspn_sse42 > +#endif > + > +#ifndef SECTION > +# define SECTION "sse4.2" > +#endif > > size_t > -__attribute__ ((section (".text.sse4.2"))) > -__strspn_sse42 (const char *s, const char *a) > +__attribute__ ((section (".text." SECTION))) > +STRSPN (const char *s, const char *a) > { > if (*a == 0) > return 0; > @@ -95,8 +102,8 @@ __strspn_sse42 (const char *s, const char *a) > maskz_bits = _mm_movemask_epi8 (maskz); > if (maskz_bits == 0) > { > - /* There is no NULL terminator. Don't use SSE4.2 if the length > - of A > 16. */ > + /* There is no NULL terminator. Don't use pcmpstri based approach if the > + length of A > 16. */ > if (a[16] != 0) > return __strspn_sse2 (s, a); > } > diff --git a/sysdeps/x86_64/multiarch/strspn.c b/sysdeps/x86_64/multiarch/strspn.c > index 07f5def155..c3c5e7a3cc 100644 > --- a/sysdeps/x86_64/multiarch/strspn.c > +++ b/sysdeps/x86_64/multiarch/strspn.c > @@ -24,7 +24,7 @@ > # undef strspn > > # define SYMBOL_NAME strspn > -# include "ifunc-sse4_2.h" > +# include "ifunc-avx.h" > > libc_ifunc_redirected (__redirect_strspn, strspn, IFUNC_SELECTOR ()); > > -- > 2.34.1 >
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile index 3d153cac35..27f306c7c8 100644 --- a/sysdeps/x86_64/multiarch/Makefile +++ b/sysdeps/x86_64/multiarch/Makefile @@ -76,7 +76,8 @@ sysdep_routines += \ strcpy-evex \ strcpy-sse2 \ strcpy-sse2-unaligned \ - strcspn-c \ + strcspn-c-avx \ + strcspn-c-sse4 \ strcspn-sse2 \ strlen-avx2 \ strlen-avx2-rtm \ @@ -108,22 +109,28 @@ sysdep_routines += \ strnlen-evex \ strnlen-evex512 \ strnlen-sse2 \ - strpbrk-c \ + strpbrk-c-avx \ + strpbrk-c-sse4 \ strpbrk-sse2 \ strrchr-avx2 \ strrchr-avx2-rtm \ strrchr-evex \ strrchr-sse2 \ - strspn-c \ + strspn-c-avx \ + strspn-c-sse4 \ strspn-sse2 \ strstr-avx512 \ strstr-sse2-unaligned \ varshift \ # sysdep_routines -CFLAGS-varshift.c += -msse4 -CFLAGS-strcspn-c.c += -msse4 -CFLAGS-strpbrk-c.c += -msse4 -CFLAGS-strspn-c.c += -msse4 + +CFLAGS-strcspn-c-avx.c += -mavx +CFLAGS-strcspn-c-sse4.c += -msse4 +CFLAGS-strpbrk-c-avx.c += -mavx +CFLAGS-strpbrk-c-sse4.c += -msse4 +CFLAGS-strspn-c-avx.c += -mavx +CFLAGS-strspn-c-sse4.c += -msse4 + CFLAGS-strstr-avx512.c += -mavx512f -mavx512vl -mavx512dq -mavx512bw -mbmi -mbmi2 -O3 endif diff --git a/sysdeps/x86_64/multiarch/ifunc-sse4_2.h b/sysdeps/x86_64/multiarch/ifunc-avx.h similarity index 89% rename from sysdeps/x86_64/multiarch/ifunc-sse4_2.h rename to sysdeps/x86_64/multiarch/ifunc-avx.h index b555ff2fac..891f3ddcac 100644 --- a/sysdeps/x86_64/multiarch/ifunc-sse4_2.h +++ b/sysdeps/x86_64/multiarch/ifunc-avx.h @@ -21,12 +21,16 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx) attribute_hidden; static inline void * IFUNC_SELECTOR (void) { const struct cpu_features* cpu_features = __get_cpu_features (); + if (CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) + return OPTIMIZE (avx); + if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2)) return OPTIMIZE (sse42); diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c index 58f3ec8306..507c563669 100644 --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c @@ -529,6 +529,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/x86_64/multiarch/strcspn.c. */ IFUNC_IMPL (i, name, strcspn, + IFUNC_IMPL_ADD (array, i, strcspn, CPU_FEATURE_USABLE (AVX), + __strcspn_avx) IFUNC_IMPL_ADD (array, i, strcspn, CPU_FEATURE_USABLE (SSE4_2), __strcspn_sse42) IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_sse2)) @@ -605,6 +607,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/x86_64/multiarch/strpbrk.c. */ IFUNC_IMPL (i, name, strpbrk, + IFUNC_IMPL_ADD (array, i, strpbrk, CPU_FEATURE_USABLE (AVX), + __strpbrk_avx) IFUNC_IMPL_ADD (array, i, strpbrk, CPU_FEATURE_USABLE (SSE4_2), __strpbrk_sse42) IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_sse2)) @@ -612,6 +616,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/x86_64/multiarch/strspn.c. */ IFUNC_IMPL (i, name, strspn, + IFUNC_IMPL_ADD (array, i, strspn, CPU_FEATURE_USABLE (AVX), + __strspn_avx) IFUNC_IMPL_ADD (array, i, strspn, CPU_FEATURE_USABLE (SSE4_2), __strspn_sse42) IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_sse2)) diff --git a/sysdeps/x86_64/multiarch/strcspn-c-avx.c b/sysdeps/x86_64/multiarch/strcspn-c-avx.c new file mode 100644 index 0000000000..b8d983f79f --- /dev/null +++ b/sysdeps/x86_64/multiarch/strcspn-c-avx.c @@ -0,0 +1,21 @@ +/* strcspn with AVX intrinsics + Copyright (C) 2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#define STRCSPN __strcspn_avx +#define SECTION "avx" +#include "strcspn-c-sse4.c" diff --git a/sysdeps/x86_64/multiarch/strcspn-c.c b/sysdeps/x86_64/multiarch/strcspn-c-sse4.c similarity index 90% rename from sysdeps/x86_64/multiarch/strcspn-c.c rename to sysdeps/x86_64/multiarch/strcspn-c-sse4.c index c312fab8b1..848c3cfb14 100644 --- a/sysdeps/x86_64/multiarch/strcspn-c.c +++ b/sysdeps/x86_64/multiarch/strcspn-c-sse4.c @@ -52,9 +52,16 @@ when either CFlag or ZFlag is 1. If CFlag == 1, ECX has the offset X for case 1. */ -#ifndef STRCSPN_SSE2 -# define STRCSPN_SSE2 __strcspn_sse2 -# define STRCSPN_SSE42 __strcspn_sse42 +#ifndef STRCSPN_FALLBACK +# define STRCSPN_FALLBACK __strcspn_sse2 +#endif + +#ifndef STRCSPN +# define STRCSPN __strcspn_sse42 +#endif + +#ifndef SECTION +# define SECTION "sse4.2" #endif #ifdef USE_AS_STRPBRK @@ -69,16 +76,15 @@ char * #else size_t #endif -STRCSPN_SSE2 (const char *, const char *) attribute_hidden; - +STRCSPN_FALLBACK (const char *, const char *) attribute_hidden; #ifdef USE_AS_STRPBRK char * #else size_t #endif -__attribute__ ((section (".text.sse4.2"))) -STRCSPN_SSE42 (const char *s, const char *a) +__attribute__ ((section (".text." SECTION))) +STRCSPN (const char *s, const char *a) { if (*a == 0) RETURN (NULL, strlen (s)); @@ -116,10 +122,10 @@ STRCSPN_SSE42 (const char *s, const char *a) maskz_bits = _mm_movemask_epi8 (maskz); if (maskz_bits == 0) { - /* There is no NULL terminator. Don't use SSE4.2 if the length - of A > 16. */ + /* There is no NULL terminator. Don't use pcmpstri based approach if the + length of A > 16. */ if (a[16] != 0) - return STRCSPN_SSE2 (s, a); + return STRCSPN_FALLBACK (s, a); } aligned = s; diff --git a/sysdeps/x86_64/multiarch/strcspn.c b/sysdeps/x86_64/multiarch/strcspn.c index 4848fa8677..63e1cf052e 100644 --- a/sysdeps/x86_64/multiarch/strcspn.c +++ b/sysdeps/x86_64/multiarch/strcspn.c @@ -24,7 +24,7 @@ # undef strcspn # define SYMBOL_NAME strcspn -# include "ifunc-sse4_2.h" +# include "ifunc-avx.h" libc_ifunc_redirected (__redirect_strcspn, strcspn, IFUNC_SELECTOR ()); diff --git a/sysdeps/x86_64/multiarch/strpbrk-c-avx.c b/sysdeps/x86_64/multiarch/strpbrk-c-avx.c new file mode 100644 index 0000000000..2918013994 --- /dev/null +++ b/sysdeps/x86_64/multiarch/strpbrk-c-avx.c @@ -0,0 +1,23 @@ +/* strpbrk with AVX intrinsics + Copyright (C) 2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#define USE_AS_STRPBRK +#define STRCSPN_FALLBACK __strpbrk_sse2 +#define STRCSPN __strpbrk_avx +#define SECTION "avx" +#include "strcspn-c-sse4.c" diff --git a/sysdeps/x86_64/multiarch/strpbrk-c.c b/sysdeps/x86_64/multiarch/strpbrk-c-sse4.c similarity index 89% rename from sysdeps/x86_64/multiarch/strpbrk-c.c rename to sysdeps/x86_64/multiarch/strpbrk-c-sse4.c index abf4ff7f1a..2efd38d809 100644 --- a/sysdeps/x86_64/multiarch/strpbrk-c.c +++ b/sysdeps/x86_64/multiarch/strpbrk-c-sse4.c @@ -17,6 +17,6 @@ <https://www.gnu.org/licenses/>. */ #define USE_AS_STRPBRK -#define STRCSPN_SSE2 __strpbrk_sse2 -#define STRCSPN_SSE42 __strpbrk_sse42 -#include "strcspn-c.c" +#define STRCSPN_FALLBACK __strpbrk_sse2 +#define STRCSPN __strpbrk_sse42 +#include "strcspn-c-sse4.c" diff --git a/sysdeps/x86_64/multiarch/strpbrk.c b/sysdeps/x86_64/multiarch/strpbrk.c index 04e300ea71..ab5b04a482 100644 --- a/sysdeps/x86_64/multiarch/strpbrk.c +++ b/sysdeps/x86_64/multiarch/strpbrk.c @@ -24,7 +24,7 @@ # undef strpbrk # define SYMBOL_NAME strpbrk -# include "ifunc-sse4_2.h" +# include "ifunc-avx.h" libc_ifunc_redirected (__redirect_strpbrk, strpbrk, IFUNC_SELECTOR ()); diff --git a/sysdeps/x86_64/multiarch/strspn-c-avx.c b/sysdeps/x86_64/multiarch/strspn-c-avx.c new file mode 100644 index 0000000000..9d5fdb9550 --- /dev/null +++ b/sysdeps/x86_64/multiarch/strspn-c-avx.c @@ -0,0 +1,21 @@ +/* strspn with AVX intrinsics + Copyright (C) 2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#define STRSPN __strspn_avx +#define SECTION "avx" +#include "strspn-c-sse4.c" diff --git a/sysdeps/x86_64/multiarch/strspn-c.c b/sysdeps/x86_64/multiarch/strspn-c-sse4.c similarity index 92% rename from sysdeps/x86_64/multiarch/strspn-c.c rename to sysdeps/x86_64/multiarch/strspn-c-sse4.c index 6124033ceb..6a91def2e0 100644 --- a/sysdeps/x86_64/multiarch/strspn-c.c +++ b/sysdeps/x86_64/multiarch/strspn-c-sse4.c @@ -53,10 +53,17 @@ extern size_t __strspn_sse2 (const char *, const char *) attribute_hidden; +#ifndef STRSPN +# define STRSPN __strspn_sse42 +#endif + +#ifndef SECTION +# define SECTION "sse4.2" +#endif size_t -__attribute__ ((section (".text.sse4.2"))) -__strspn_sse42 (const char *s, const char *a) +__attribute__ ((section (".text." SECTION))) +STRSPN (const char *s, const char *a) { if (*a == 0) return 0; @@ -95,8 +102,8 @@ __strspn_sse42 (const char *s, const char *a) maskz_bits = _mm_movemask_epi8 (maskz); if (maskz_bits == 0) { - /* There is no NULL terminator. Don't use SSE4.2 if the length - of A > 16. */ + /* There is no NULL terminator. Don't use pcmpstri based approach if the + length of A > 16. */ if (a[16] != 0) return __strspn_sse2 (s, a); } diff --git a/sysdeps/x86_64/multiarch/strspn.c b/sysdeps/x86_64/multiarch/strspn.c index 07f5def155..c3c5e7a3cc 100644 --- a/sysdeps/x86_64/multiarch/strspn.c +++ b/sysdeps/x86_64/multiarch/strspn.c @@ -24,7 +24,7 @@ # undef strspn # define SYMBOL_NAME strspn -# include "ifunc-sse4_2.h" +# include "ifunc-avx.h" libc_ifunc_redirected (__redirect_strspn, strspn, IFUNC_SELECTOR ());