Message ID | 20220617035050.1252784-1-goldstein.w.n@gmail.com |
---|---|
State | New |
Headers | show |
Series | [v1,1/2] x86: Add defines / utilities for making ISA specific x86 builds | expand |
On Thu, Jun 16, 2022 at 8:50 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > 1. Factor out some of the ISA level defines in isa-level.c to > standalone header isa-level.h > > 2. Add new headers with ISA level dependent macros for handling > ifuncs. > > Note, this file does not change any code. > > Tested with and without multiarch on x86_64 for ISA levels: > {generic, x86-64-v2, x86-64-v3, x86-64-v4} > --- > sysdeps/x86/isa-level.c | 17 +--- > sysdeps/x86/isa-level.h | 70 +++++++++++++ > sysdeps/x86_64/isa-default-include.h | 49 +++++++++ > sysdeps/x86_64/multiarch/isa-ifunc-macros.h | 106 ++++++++++++++++++++ > 4 files changed, 230 insertions(+), 12 deletions(-) > create mode 100644 sysdeps/x86/isa-level.h > create mode 100644 sysdeps/x86_64/isa-default-include.h > create mode 100644 sysdeps/x86_64/multiarch/isa-ifunc-macros.h > > diff --git a/sysdeps/x86/isa-level.c b/sysdeps/x86/isa-level.c > index 09cd72ab20..8e899bf64b 100644 > --- a/sysdeps/x86/isa-level.c > +++ b/sysdeps/x86/isa-level.c > @@ -26,38 +26,31 @@ > <https://www.gnu.org/licenses/>. */ > > #include <elf.h> > - > +#include <sysdeps/x86/isa-level.h> > /* ELF program property for x86 ISA level. */ > #ifdef INCLUDE_X86_ISA_LEVEL > -# if defined __SSE__ && defined __SSE2__ > +# if __X86_ISA_V1 #if MINIMUM_X86_ISA_LEVEL >= 1 > /* NB: ISAs, excluding MMX, in x86-64 ISA level baseline are used. */ > # define ISA_BASELINE GNU_PROPERTY_X86_ISA_1_BASELINE > # else > # define ISA_BASELINE 0 > # endif > > -# if ISA_BASELINE && defined __GCC_HAVE_SYNC_COMPARE_AND_SWAP_16 \ > - && defined HAVE_X86_LAHF_SAHF && defined __POPCNT__ \ > - && defined __SSE3__ && defined __SSSE3__ && defined __SSE4_1__ \ > - && defined __SSE4_2__ > +# if __X86_ISA_V2 #if MINIMUM_X86_ISA_LEVEL >= 2 > /* NB: ISAs in x86-64 ISA level v2 are used. */ > # define ISA_V2 GNU_PROPERTY_X86_ISA_1_V2 > # else > # define ISA_V2 0 > # endif > > -# if ISA_V2 && defined __AVX__ && defined __AVX2__ && defined __F16C__ \ > - && defined __FMA__ && defined __LZCNT__ && defined HAVE_X86_MOVBE \ > - && defined __BMI__ && defined __BMI2__ > +# if __X86_ISA_V3 #if MINIMUM_X86_ISA_LEVEL >= 3 > /* NB: ISAs in x86-64 ISA level v3 are used. */ > # define ISA_V3 GNU_PROPERTY_X86_ISA_1_V3 > # else > # define ISA_V3 0 > # endif > > -# if ISA_V3 && defined __AVX512F__ && defined __AVX512BW__ \ > - && defined __AVX512CD__ && defined __AVX512DQ__ \ > - && defined __AVX512VL__ > +# if __X86_ISA_V4 #if MINIMUM_X86_ISA_LEVEL >= 4 > /* NB: ISAs in x86-64 ISA level v4 are used. */ > # define ISA_V4 GNU_PROPERTY_X86_ISA_1_V4 > # else > diff --git a/sysdeps/x86/isa-level.h b/sysdeps/x86/isa-level.h > new file mode 100644 > index 0000000000..ed696ae8eb > --- /dev/null > +++ b/sysdeps/x86/isa-level.h > @@ -0,0 +1,70 @@ > +/* Header defining the minimum x86 ISA level > + Copyright (C) 2020-2022 Free Software Foundation, Inc. Just 2022. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + In addition to the permissions in the GNU Lesser General Public > + License, the Free Software Foundation gives you unlimited > + permission to link the compiled version of this file with other > + programs, and to distribute those programs without any restriction > + coming from the use of this file. (The Lesser General Public > + License restrictions do apply in other respects; for example, they > + cover modification of the file, and distribution when not linked > + into another program.) > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#ifndef _ISA_LEVEL_H > +#define _ISA_LEVEL_H > + > +# if defined __SSE__ && defined __SSE2__ > +/* NB: ISAs, excluding MMX, in x86-64 ISA level baseline are used. */ > +# define __X86_ISA_V1 1 > +# else > +# define __X86_ISA_V1 0 > +# endif > + > +# if __X86_ISA_V1 && defined __GCC_HAVE_SYNC_COMPARE_AND_SWAP_16 \ > + && defined HAVE_X86_LAHF_SAHF && defined __POPCNT__ \ > + && defined __SSE3__ && defined __SSSE3__ && defined __SSE4_1__ \ > + && defined __SSE4_2__ > +/* NB: ISAs in x86-64 ISA level v2 are used. */ > +# define __X86_ISA_V2 1 > +# else > +# define __X86_ISA_V2 0 > +# endif > + > +# if __X86_ISA_V2 && defined __AVX__ && defined __AVX2__ && defined __F16C__ \ > + && defined __FMA__ && defined __LZCNT__ && defined HAVE_X86_MOVBE \ > + && defined __BMI__ && defined __BMI2__ > +/* NB: ISAs in x86-64 ISA level v3 are used. */ > +# define __X86_ISA_V3 1 > +# else > +# define __X86_ISA_V3 0 > +# endif > + > +# if __X86_ISA_V3 && defined __AVX512F__ && defined __AVX512BW__ \ > + && defined __AVX512CD__ && defined __AVX512DQ__ \ > + && defined __AVX512VL__ > +/* NB: ISAs in x86-64 ISA level v4 are used. */ > +# define __X86_ISA_V4 1 > +# else > +# define __X86_ISA_V4 0 > +# endif > + > +#define __X86_ISA_LEVEL \ > + (__X86_ISA_V1 + __X86_ISA_V2 + __X86_ISA_V3 + __X86_ISA_V4) > + We have isa.h to define MINIMUM_ISA. I think this file should define MINIMUM_X86_ISA_LEVEL. > +#endif > diff --git a/sysdeps/x86_64/isa-default-include.h b/sysdeps/x86_64/isa-default-include.h > new file mode 100644 > index 0000000000..d3091340af > --- /dev/null > +++ b/sysdeps/x86_64/isa-default-include.h isa-default-impl.h? since this header file includes the default implementation. > @@ -0,0 +1,49 @@ > +/* Utility for including proper default function based on ISA level Include the default implementation based on the minimum ISA level. > + Copyright (C) 2021-2022 Free Software Foundation, Inc. Just 2022. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#include <isa-level.h> > + > +#ifndef DEFAULT_V1 ISA_DEFAULT_IMPL_V1 > +# error "Must have at least ISA V1 Version" Must have at least ISA V1 implementation > +#endif > + > +#ifndef DEFAULT_V2 > +# define DEFAULT_V2 DEFAULT_V1 > +#endif > + > +#ifndef DEFAULT_V3 > +# define DEFAULT_V3 DEFAULT_V2 > +#endif > + > +#ifndef DEFAULT_V4 > +# define DEFAULT_V4 DEFAULT_V3 > +#endif > + > +#define IS_DEFAULT_INCLUDE We can remove IS_DEFAULT_INCLUDE and check ISA_DEFAULT_IMPL instead. > + > +#if __X86_ISA_LEVEL == 1 #if MINIMUM_X86_ISA_LEVEL == 1 > +# include DEFAULT_V1 # define ISA_DEFAULT_IMPL ISA_DEFAULT_IMPL_V1 > +#elif __X86_ISA_LEVEL == 2 > +# include DEFAULT_V2 > +#elif __X86_ISA_LEVEL == 3 > +# include DEFAULT_V3 > +#elif __X86_ISA_LEVEL == 4 > +# include DEFAULT_V4 # define ISA_DEFAULT_IMPL ISA_DEFAULT_IMPL_V4 > +#else > +# error "Unsupport ISA Level!" > +#endif #include ISA_DEFAULT_IMPL > diff --git a/sysdeps/x86_64/multiarch/isa-ifunc-macros.h b/sysdeps/x86_64/multiarch/isa-ifunc-macros.h > new file mode 100644 > index 0000000000..c24f2ab655 > --- /dev/null > +++ b/sysdeps/x86_64/multiarch/isa-ifunc-macros.h > @@ -0,0 +1,106 @@ > +/* Common ifunc selection utils > + All versions must be listed in ifunc-impl-list.c. > + Copyright (C) 2017-2022 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#include <init-arch.h> > +#include <isa-level.h> > + > +#define OPTIMIZE_DECL(...) \ > + extern __typeof (REDIRECT_NAME) OPTIMIZE (__VA_ARGS__) attribute_hidden; > + > +#define OPTIMIZE_DECL1(...) \ > + extern __typeof (REDIRECT_NAME) OPTIMIZE1 (__VA_ARGS__) attribute_hidden; > + > +/* Only include at the level of the build ISA or better. I.e if built minimum build ISA level > + with ISA=V1, then include all implementations. On the other hand if > + built with ISA=V3 only include V3/V4 implementations. If there is > + not implementation at or above the build ISA level, then include no implementation at or above the minimum build ISA level. > + the highest ISA level implementation. */ > +#if __X86_ISA_LEVEL <= 4 > +# define X86_IFUNC_IMPL_ADD_V4(...) IFUNC_IMPL_ADD (__VA_ARGS__) > +# define return_X86_OPTIMIZE_V4(...) return OPTIMIZE (__VA_ARGS__) > +# define return_X86_OPTIMIZE1_V4(...) return OPTIMIZE1 (__VA_ARGS__) > +# define DECLARE_X86_OPTIMIZE_V4(...) OPTIMIZE_DECL (__VA_ARGS__) > +# define DECLARE_X86_OPTIMIZE1_V4(...) OPTIMIZE_DECL1 (__VA_ARGS__) These macros should be added to <init-arch.h> instead. I don't think DECLARE_X86_OPTIMIZE_VN is necessary since unused declarations are OK. > +#endif > +#if __X86_ISA_LEVEL <= 3 > +# define X86_IFUNC_IMPL_ADD_V3(...) IFUNC_IMPL_ADD (__VA_ARGS__) > +# define return_X86_OPTIMIZE_V3(...) return OPTIMIZE (__VA_ARGS__) > +# define return_X86_OPTIMIZE1_V3(...) return OPTIMIZE1 (__VA_ARGS__) > +# define DECLARE_X86_OPTIMIZE_V3(...) OPTIMIZE_DECL (__VA_ARGS__) > +# define DECLARE_X86_OPTIMIZE1_V3(...) OPTIMIZE_DECL1 (__VA_ARGS__) > +#endif > +#if __X86_ISA_LEVEL <= 2 > +# define X86_IFUNC_IMPL_ADD_V2(...) IFUNC_IMPL_ADD (__VA_ARGS__) > +# define return_X86_OPTIMIZE_V2(...) return OPTIMIZE (__VA_ARGS__) > +# define return_X86_OPTIMIZE1_V2(...) return OPTIMIZE1 (__VA_ARGS__) > +# define DECLARE_X86_OPTIMIZE_V2(...) OPTIMIZE_DECL (__VA_ARGS__) > +# define DECLARE_X86_OPTIMIZE1_V2(...) OPTIMIZE_DECL1 (__VA_ARGS__) > +#endif > +#if __X86_ISA_LEVEL <= 1 > +# define X86_IFUNC_IMPL_ADD_V1(...) IFUNC_IMPL_ADD (__VA_ARGS__) > +# define return_X86_OPTIMIZE_V1(...) return OPTIMIZE (__VA_ARGS__) > +# define return_X86_OPTIMIZE1_V1(...) return OPTIMIZE1 (__VA_ARGS__) > +# define DECLARE_X86_OPTIMIZE_V1(...) OPTIMIZE_DECL (__VA_ARGS__) > +# define DECLARE_X86_OPTIMIZE1_V1(...) OPTIMIZE_DECL1 (__VA_ARGS__) > +#endif > + > +#ifndef return_X86_OPTIMIZE_V4 > +# define X86_IFUNC_IMPL_ADD_V4(...) > +# define return_X86_OPTIMIZE_V4(...) (void) (0) > +# define return_X86_OPTIMIZE1_V4(...) (void) (0) > +# define DECLARE_X86_OPTIMIZE_V4(...) > +# define DECLARE_X86_OPTIMIZE1_V4(...) > +#endif > +#ifndef return_X86_OPTIMIZE_V3 > +# define X86_IFUNC_IMPL_ADD_V3(...) > +# define return_X86_OPTIMIZE_V3(...) (void) (0) > +# define return_X86_OPTIMIZE1_V3(...) (void) (0) > +# define DECLARE_X86_OPTIMIZE_V3(...) > +# define DECLARE_X86_OPTIMIZE1_V3(...) > +#endif > +#ifndef return_X86_OPTIMIZE_V2 > +# define X86_IFUNC_IMPL_ADD_V2(...) > +# define return_X86_OPTIMIZE_V2(...) (void) (0) > +# define return_X86_OPTIMIZE1_V2(...) (void) (0) > +# define DECLARE_X86_OPTIMIZE_V2(...) > +# define DECLARE_X86_OPTIMIZE1_V2(...) > +#endif > +#ifndef return_X86_OPTIMIZE_V1 > +# define X86_IFUNC_IMPL_ADD_V1(...) > +# define return_X86_OPTIMIZE_V1(...) (void) (0) > +# define return_X86_OPTIMIZE1_V1(...) (void) (0) > +# define DECLARE_X86_OPTIMIZE_V1(...) > +# define DECLARE_X86_OPTIMIZE1_V1(...) > +#endif > + > +#define DECLARE_X86_OPTIMIZE_FORCE(...) OPTIMIZE_DECL (__VA_ARGS__) > +#define return_X86_OPTIMIZE_FORCE(...) return OPTIMIZE (__VA_ARGS__) > +#define X86_IFUNC_IMPL_ADD_FORCE(...) IFUNC_IMPL_ADD (__VA_ARGS__) These are unused. > +#if __X86_ISA_LEVEL == 1 > +# define X86_OPTIMIZE_FALLBACK(v1, ...) OPTIMIZE (v1) > +#elif __X86_ISA_LEVEL == 2 > +# define X86_OPTIMIZE_FALLBACK(v1, v2, ...) OPTIMIZE (v2) > +#elif __X86_ISA_LEVEL == 3 > +# define X86_OPTIMIZE_FALLBACK(v1, v2, v3, ...) OPTIMIZE (v3) > +#elif __X86_ISA_LEVEL == 4 > +# define X86_OPTIMIZE_FALLBACK(v1, v2, v3, v4) OPTIMIZE (v4) > +#else > +# error "Unsupported ISA Level" > +#endif > -- > 2.34.1 We can avoid X86_OPTIMIZE_FALLBACK by 1. Check the minimum ISA level IFUNC selector. 2. Turn return_X86_OPTIMIZE_VN into __builtin_unreachable () when unused. Another issue. With AVX available, IFUNC selector may prefer SSE version when AVX_Fast_Unaligned_Load or Prefer_No_VZEROUPPER isn't set. This happens for memmove. We have the default implementation without IFUNC and the best implementation with IFUNC. They may not be the same.
On Fri, Jun 17, 2022 at 12:13 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > On Thu, Jun 16, 2022 at 8:50 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > > > 1. Factor out some of the ISA level defines in isa-level.c to > > standalone header isa-level.h > > > > 2. Add new headers with ISA level dependent macros for handling > > ifuncs. > > > > Note, this file does not change any code. > > > > Tested with and without multiarch on x86_64 for ISA levels: > > {generic, x86-64-v2, x86-64-v3, x86-64-v4} > > --- > > sysdeps/x86/isa-level.c | 17 +--- > > sysdeps/x86/isa-level.h | 70 +++++++++++++ > > sysdeps/x86_64/isa-default-include.h | 49 +++++++++ > > sysdeps/x86_64/multiarch/isa-ifunc-macros.h | 106 ++++++++++++++++++++ > > 4 files changed, 230 insertions(+), 12 deletions(-) > > create mode 100644 sysdeps/x86/isa-level.h > > create mode 100644 sysdeps/x86_64/isa-default-include.h > > create mode 100644 sysdeps/x86_64/multiarch/isa-ifunc-macros.h > > > > diff --git a/sysdeps/x86/isa-level.c b/sysdeps/x86/isa-level.c > > index 09cd72ab20..8e899bf64b 100644 > > --- a/sysdeps/x86/isa-level.c > > +++ b/sysdeps/x86/isa-level.c > > @@ -26,38 +26,31 @@ > > <https://www.gnu.org/licenses/>. */ > > > > #include <elf.h> > > - > > +#include <sysdeps/x86/isa-level.h> > > /* ELF program property for x86 ISA level. */ > > #ifdef INCLUDE_X86_ISA_LEVEL > > -# if defined __SSE__ && defined __SSE2__ > > +# if __X86_ISA_V1 > > #if MINIMUM_X86_ISA_LEVEL >= 1 > > > /* NB: ISAs, excluding MMX, in x86-64 ISA level baseline are used. */ > > # define ISA_BASELINE GNU_PROPERTY_X86_ISA_1_BASELINE > > # else > > # define ISA_BASELINE 0 > > # endif > > > > -# if ISA_BASELINE && defined __GCC_HAVE_SYNC_COMPARE_AND_SWAP_16 \ > > - && defined HAVE_X86_LAHF_SAHF && defined __POPCNT__ \ > > - && defined __SSE3__ && defined __SSSE3__ && defined __SSE4_1__ \ > > - && defined __SSE4_2__ > > +# if __X86_ISA_V2 > > #if MINIMUM_X86_ISA_LEVEL >= 2 > > > /* NB: ISAs in x86-64 ISA level v2 are used. */ > > # define ISA_V2 GNU_PROPERTY_X86_ISA_1_V2 > > # else > > # define ISA_V2 0 > > # endif > > > > -# if ISA_V2 && defined __AVX__ && defined __AVX2__ && defined __F16C__ \ > > - && defined __FMA__ && defined __LZCNT__ && defined HAVE_X86_MOVBE \ > > - && defined __BMI__ && defined __BMI2__ > > +# if __X86_ISA_V3 > > #if MINIMUM_X86_ISA_LEVEL >= 3 > > > /* NB: ISAs in x86-64 ISA level v3 are used. */ > > # define ISA_V3 GNU_PROPERTY_X86_ISA_1_V3 > > # else > > # define ISA_V3 0 > > # endif > > > > -# if ISA_V3 && defined __AVX512F__ && defined __AVX512BW__ \ > > - && defined __AVX512CD__ && defined __AVX512DQ__ \ > > - && defined __AVX512VL__ > > +# if __X86_ISA_V4 > > #if MINIMUM_X86_ISA_LEVEL >= 4 > > > /* NB: ISAs in x86-64 ISA level v4 are used. */ > > # define ISA_V4 GNU_PROPERTY_X86_ISA_1_V4 > > # else > > diff --git a/sysdeps/x86/isa-level.h b/sysdeps/x86/isa-level.h > > new file mode 100644 > > index 0000000000..ed696ae8eb > > --- /dev/null > > +++ b/sysdeps/x86/isa-level.h > > @@ -0,0 +1,70 @@ > > +/* Header defining the minimum x86 ISA level > > + Copyright (C) 2020-2022 Free Software Foundation, Inc. > > Just 2022. > > > + This file is part of the GNU C Library. > > + > > + The GNU C Library is free software; you can redistribute it and/or > > + modify it under the terms of the GNU Lesser General Public > > + License as published by the Free Software Foundation; either > > + version 2.1 of the License, or (at your option) any later version. > > + > > + In addition to the permissions in the GNU Lesser General Public > > + License, the Free Software Foundation gives you unlimited > > + permission to link the compiled version of this file with other > > + programs, and to distribute those programs without any restriction > > + coming from the use of this file. (The Lesser General Public > > + License restrictions do apply in other respects; for example, they > > + cover modification of the file, and distribution when not linked > > + into another program.) > > + > > + The GNU C Library is distributed in the hope that it will be useful, > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + Lesser General Public License for more details. > > + > > + You should have received a copy of the GNU Lesser General Public > > + License along with the GNU C Library; if not, see > > + <https://www.gnu.org/licenses/>. */ > > + > > +#ifndef _ISA_LEVEL_H > > +#define _ISA_LEVEL_H > > + > > +# if defined __SSE__ && defined __SSE2__ > > +/* NB: ISAs, excluding MMX, in x86-64 ISA level baseline are used. */ > > +# define __X86_ISA_V1 1 > > +# else > > +# define __X86_ISA_V1 0 > > +# endif > > + > > +# if __X86_ISA_V1 && defined __GCC_HAVE_SYNC_COMPARE_AND_SWAP_16 \ > > + && defined HAVE_X86_LAHF_SAHF && defined __POPCNT__ \ > > + && defined __SSE3__ && defined __SSSE3__ && defined __SSE4_1__ \ > > + && defined __SSE4_2__ > > +/* NB: ISAs in x86-64 ISA level v2 are used. */ > > +# define __X86_ISA_V2 1 > > +# else > > +# define __X86_ISA_V2 0 > > +# endif > > + > > +# if __X86_ISA_V2 && defined __AVX__ && defined __AVX2__ && defined __F16C__ \ > > + && defined __FMA__ && defined __LZCNT__ && defined HAVE_X86_MOVBE \ > > + && defined __BMI__ && defined __BMI2__ > > +/* NB: ISAs in x86-64 ISA level v3 are used. */ > > +# define __X86_ISA_V3 1 > > +# else > > +# define __X86_ISA_V3 0 > > +# endif > > + > > +# if __X86_ISA_V3 && defined __AVX512F__ && defined __AVX512BW__ \ > > + && defined __AVX512CD__ && defined __AVX512DQ__ \ > > + && defined __AVX512VL__ > > +/* NB: ISAs in x86-64 ISA level v4 are used. */ > > +# define __X86_ISA_V4 1 > > +# else > > +# define __X86_ISA_V4 0 > > +# endif > > + > > +#define __X86_ISA_LEVEL \ > > + (__X86_ISA_V1 + __X86_ISA_V2 + __X86_ISA_V3 + __X86_ISA_V4) > > + > > We have isa.h to define MINIMUM_ISA. I think this file should define > MINIMUM_X86_ISA_LEVEL. > > > +#endif > > diff --git a/sysdeps/x86_64/isa-default-include.h b/sysdeps/x86_64/isa-default-include.h > > new file mode 100644 > > index 0000000000..d3091340af > > --- /dev/null > > +++ b/sysdeps/x86_64/isa-default-include.h > > isa-default-impl.h? since this header file includes the default implementation. > > > @@ -0,0 +1,49 @@ > > +/* Utility for including proper default function based on ISA level > > Include the default implementation based on the minimum ISA level. > > > + Copyright (C) 2021-2022 Free Software Foundation, Inc. > > Just 2022. > > > + This file is part of the GNU C Library. > > + > > + The GNU C Library is free software; you can redistribute it and/or > > + modify it under the terms of the GNU Lesser General Public > > + License as published by the Free Software Foundation; either > > + version 2.1 of the License, or (at your option) any later version. > > + > > + The GNU C Library is distributed in the hope that it will be useful, > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + Lesser General Public License for more details. > > + > > + You should have received a copy of the GNU Lesser General Public > > + License along with the GNU C Library; if not, see > > + <https://www.gnu.org/licenses/>. */ > > + > > +#include <isa-level.h> > > + > > +#ifndef DEFAULT_V1 > > ISA_DEFAULT_IMPL_V1 > > > +# error "Must have at least ISA V1 Version" > > Must have at least ISA V1 implementation > > > +#endif > > + > > +#ifndef DEFAULT_V2 > > +# define DEFAULT_V2 DEFAULT_V1 > > +#endif > > + > > +#ifndef DEFAULT_V3 > > +# define DEFAULT_V3 DEFAULT_V2 > > +#endif > > + > > +#ifndef DEFAULT_V4 > > +# define DEFAULT_V4 DEFAULT_V3 > > +#endif > > + > > +#define IS_DEFAULT_INCLUDE > > We can remove IS_DEFAULT_INCLUDE and > check ISA_DEFAULT_IMPL instead. > > > + > > +#if __X86_ISA_LEVEL == 1 > > #if MINIMUM_X86_ISA_LEVEL == 1 > > > +# include DEFAULT_V1 > > # define ISA_DEFAULT_IMPL ISA_DEFAULT_IMPL_V1 > > > +#elif __X86_ISA_LEVEL == 2 > > +# include DEFAULT_V2 > > +#elif __X86_ISA_LEVEL == 3 > > +# include DEFAULT_V3 > > +#elif __X86_ISA_LEVEL == 4 > > +# include DEFAULT_V4 > > # define ISA_DEFAULT_IMPL ISA_DEFAULT_IMPL_V4 > > > +#else > > +# error "Unsupport ISA Level!" > > +#endif > > #include ISA_DEFAULT_IMPL > > > diff --git a/sysdeps/x86_64/multiarch/isa-ifunc-macros.h b/sysdeps/x86_64/multiarch/isa-ifunc-macros.h > > new file mode 100644 > > index 0000000000..c24f2ab655 > > --- /dev/null > > +++ b/sysdeps/x86_64/multiarch/isa-ifunc-macros.h > > @@ -0,0 +1,106 @@ > > +/* Common ifunc selection utils > > + All versions must be listed in ifunc-impl-list.c. > > + Copyright (C) 2017-2022 Free Software Foundation, Inc. > > + This file is part of the GNU C Library. > > + > > + The GNU C Library is free software; you can redistribute it and/or > > + modify it under the terms of the GNU Lesser General Public > > + License as published by the Free Software Foundation; either > > + version 2.1 of the License, or (at your option) any later version. > > + > > + The GNU C Library is distributed in the hope that it will be useful, > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + Lesser General Public License for more details. > > + > > + You should have received a copy of the GNU Lesser General Public > > + License along with the GNU C Library; if not, see > > + <https://www.gnu.org/licenses/>. */ > > + > > +#include <init-arch.h> > > +#include <isa-level.h> > > + > > +#define OPTIMIZE_DECL(...) \ > > + extern __typeof (REDIRECT_NAME) OPTIMIZE (__VA_ARGS__) attribute_hidden; > > + > > +#define OPTIMIZE_DECL1(...) \ > > + extern __typeof (REDIRECT_NAME) OPTIMIZE1 (__VA_ARGS__) attribute_hidden; > > + > > +/* Only include at the level of the build ISA or better. I.e if built > > minimum build ISA level > > > + with ISA=V1, then include all implementations. On the other hand if > > + built with ISA=V3 only include V3/V4 implementations. If there is > > + not implementation at or above the build ISA level, then include > > no implementation at or above the minimum build ISA level. > > > + the highest ISA level implementation. */ > > +#if __X86_ISA_LEVEL <= 4 > > +# define X86_IFUNC_IMPL_ADD_V4(...) IFUNC_IMPL_ADD (__VA_ARGS__) > > +# define return_X86_OPTIMIZE_V4(...) return OPTIMIZE (__VA_ARGS__) > > +# define return_X86_OPTIMIZE1_V4(...) return OPTIMIZE1 (__VA_ARGS__) > > +# define DECLARE_X86_OPTIMIZE_V4(...) OPTIMIZE_DECL (__VA_ARGS__) > > +# define DECLARE_X86_OPTIMIZE1_V4(...) OPTIMIZE_DECL1 (__VA_ARGS__) > > These macros should be added to <init-arch.h> instead. I don't > think DECLARE_X86_OPTIMIZE_VN is necessary since unused > declarations are OK. Its not unused declaration is declaration of function that doesn't exist. But if can remove will do so for V2. > > > +#endif > > +#if __X86_ISA_LEVEL <= 3 > > +# define X86_IFUNC_IMPL_ADD_V3(...) IFUNC_IMPL_ADD (__VA_ARGS__) > > +# define return_X86_OPTIMIZE_V3(...) return OPTIMIZE (__VA_ARGS__) > > +# define return_X86_OPTIMIZE1_V3(...) return OPTIMIZE1 (__VA_ARGS__) > > +# define DECLARE_X86_OPTIMIZE_V3(...) OPTIMIZE_DECL (__VA_ARGS__) > > +# define DECLARE_X86_OPTIMIZE1_V3(...) OPTIMIZE_DECL1 (__VA_ARGS__) > > +#endif > > +#if __X86_ISA_LEVEL <= 2 > > +# define X86_IFUNC_IMPL_ADD_V2(...) IFUNC_IMPL_ADD (__VA_ARGS__) > > +# define return_X86_OPTIMIZE_V2(...) return OPTIMIZE (__VA_ARGS__) > > +# define return_X86_OPTIMIZE1_V2(...) return OPTIMIZE1 (__VA_ARGS__) > > +# define DECLARE_X86_OPTIMIZE_V2(...) OPTIMIZE_DECL (__VA_ARGS__) > > +# define DECLARE_X86_OPTIMIZE1_V2(...) OPTIMIZE_DECL1 (__VA_ARGS__) > > +#endif > > +#if __X86_ISA_LEVEL <= 1 > > +# define X86_IFUNC_IMPL_ADD_V1(...) IFUNC_IMPL_ADD (__VA_ARGS__) > > +# define return_X86_OPTIMIZE_V1(...) return OPTIMIZE (__VA_ARGS__) > > +# define return_X86_OPTIMIZE1_V1(...) return OPTIMIZE1 (__VA_ARGS__) > > +# define DECLARE_X86_OPTIMIZE_V1(...) OPTIMIZE_DECL (__VA_ARGS__) > > +# define DECLARE_X86_OPTIMIZE1_V1(...) OPTIMIZE_DECL1 (__VA_ARGS__) > > +#endif > > + > > +#ifndef return_X86_OPTIMIZE_V4 > > +# define X86_IFUNC_IMPL_ADD_V4(...) > > +# define return_X86_OPTIMIZE_V4(...) (void) (0) > > +# define return_X86_OPTIMIZE1_V4(...) (void) (0) > > +# define DECLARE_X86_OPTIMIZE_V4(...) > > +# define DECLARE_X86_OPTIMIZE1_V4(...) > > +#endif > > +#ifndef return_X86_OPTIMIZE_V3 > > +# define X86_IFUNC_IMPL_ADD_V3(...) > > +# define return_X86_OPTIMIZE_V3(...) (void) (0) > > +# define return_X86_OPTIMIZE1_V3(...) (void) (0) > > +# define DECLARE_X86_OPTIMIZE_V3(...) > > +# define DECLARE_X86_OPTIMIZE1_V3(...) > > +#endif > > +#ifndef return_X86_OPTIMIZE_V2 > > +# define X86_IFUNC_IMPL_ADD_V2(...) > > +# define return_X86_OPTIMIZE_V2(...) (void) (0) > > +# define return_X86_OPTIMIZE1_V2(...) (void) (0) > > +# define DECLARE_X86_OPTIMIZE_V2(...) > > +# define DECLARE_X86_OPTIMIZE1_V2(...) > > +#endif > > +#ifndef return_X86_OPTIMIZE_V1 > > +# define X86_IFUNC_IMPL_ADD_V1(...) > > +# define return_X86_OPTIMIZE_V1(...) (void) (0) > > +# define return_X86_OPTIMIZE1_V1(...) (void) (0) > > +# define DECLARE_X86_OPTIMIZE_V1(...) > > +# define DECLARE_X86_OPTIMIZE1_V1(...) > > +#endif > > + > > +#define DECLARE_X86_OPTIMIZE_FORCE(...) OPTIMIZE_DECL (__VA_ARGS__) > > +#define return_X86_OPTIMIZE_FORCE(...) return OPTIMIZE (__VA_ARGS__) > > +#define X86_IFUNC_IMPL_ADD_FORCE(...) IFUNC_IMPL_ADD (__VA_ARGS__) > > These are unused. Think will be used for strcspn/strspn/strpbrk but can drop for now. > > > +#if __X86_ISA_LEVEL == 1 > > +# define X86_OPTIMIZE_FALLBACK(v1, ...) OPTIMIZE (v1) > > +#elif __X86_ISA_LEVEL == 2 > > +# define X86_OPTIMIZE_FALLBACK(v1, v2, ...) OPTIMIZE (v2) > > +#elif __X86_ISA_LEVEL == 3 > > +# define X86_OPTIMIZE_FALLBACK(v1, v2, v3, ...) OPTIMIZE (v3) > > +#elif __X86_ISA_LEVEL == 4 > > +# define X86_OPTIMIZE_FALLBACK(v1, v2, v3, v4) OPTIMIZE (v4) > > +#else > > +# error "Unsupported ISA Level" > > +#endif > > -- > > 2.34.1 > > We can avoid X86_OPTIMIZE_FALLBACK by > > 1. Check the minimum ISA level IFUNC selector. > 2. Turn return_X86_OPTIMIZE_VN into __builtin_unreachable () > when unused. I think there are some edge cases we may miss. I.e an ISA v3 implementation that also has prefer_novzeroupper. It is still correct to use avx2 impl with vzeroupper but we will fail in the ifunc preference. All other suggestions will fix for v2. > > Another issue. With AVX available, IFUNC selector may > prefer SSE version when AVX_Fast_Unaligned_Load or > Prefer_No_VZEROUPPER isn't set. This happens for memmove. > We have the default implementation without IFUNC and the > best implementation with IFUNC. They may not be the > same. > > -- > H.J.
On Fri, Jun 17, 2022 at 12:30 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > On Fri, Jun 17, 2022 at 12:13 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > > > On Thu, Jun 16, 2022 at 8:50 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > > > > > 1. Factor out some of the ISA level defines in isa-level.c to > > > standalone header isa-level.h > > > > > > 2. Add new headers with ISA level dependent macros for handling > > > ifuncs. > > > > > > Note, this file does not change any code. > > > > > > Tested with and without multiarch on x86_64 for ISA levels: > > > {generic, x86-64-v2, x86-64-v3, x86-64-v4} > > > --- > > > sysdeps/x86/isa-level.c | 17 +--- > > > sysdeps/x86/isa-level.h | 70 +++++++++++++ > > > sysdeps/x86_64/isa-default-include.h | 49 +++++++++ > > > sysdeps/x86_64/multiarch/isa-ifunc-macros.h | 106 ++++++++++++++++++++ > > > 4 files changed, 230 insertions(+), 12 deletions(-) > > > create mode 100644 sysdeps/x86/isa-level.h > > > create mode 100644 sysdeps/x86_64/isa-default-include.h > > > create mode 100644 sysdeps/x86_64/multiarch/isa-ifunc-macros.h > > > > > > diff --git a/sysdeps/x86/isa-level.c b/sysdeps/x86/isa-level.c > > > index 09cd72ab20..8e899bf64b 100644 > > > --- a/sysdeps/x86/isa-level.c > > > +++ b/sysdeps/x86/isa-level.c > > > @@ -26,38 +26,31 @@ > > > <https://www.gnu.org/licenses/>. */ > > > > > > #include <elf.h> > > > - > > > +#include <sysdeps/x86/isa-level.h> > > > /* ELF program property for x86 ISA level. */ > > > #ifdef INCLUDE_X86_ISA_LEVEL > > > -# if defined __SSE__ && defined __SSE2__ > > > +# if __X86_ISA_V1 > > > > #if MINIMUM_X86_ISA_LEVEL >= 1 > > > > > /* NB: ISAs, excluding MMX, in x86-64 ISA level baseline are used. */ > > > # define ISA_BASELINE GNU_PROPERTY_X86_ISA_1_BASELINE > > > # else > > > # define ISA_BASELINE 0 > > > # endif > > > > > > -# if ISA_BASELINE && defined __GCC_HAVE_SYNC_COMPARE_AND_SWAP_16 \ > > > - && defined HAVE_X86_LAHF_SAHF && defined __POPCNT__ \ > > > - && defined __SSE3__ && defined __SSSE3__ && defined __SSE4_1__ \ > > > - && defined __SSE4_2__ > > > +# if __X86_ISA_V2 > > > > #if MINIMUM_X86_ISA_LEVEL >= 2 > > > > > /* NB: ISAs in x86-64 ISA level v2 are used. */ > > > # define ISA_V2 GNU_PROPERTY_X86_ISA_1_V2 > > > # else > > > # define ISA_V2 0 > > > # endif > > > > > > -# if ISA_V2 && defined __AVX__ && defined __AVX2__ && defined __F16C__ \ > > > - && defined __FMA__ && defined __LZCNT__ && defined HAVE_X86_MOVBE \ > > > - && defined __BMI__ && defined __BMI2__ > > > +# if __X86_ISA_V3 > > > > #if MINIMUM_X86_ISA_LEVEL >= 3 > > > > > /* NB: ISAs in x86-64 ISA level v3 are used. */ > > > # define ISA_V3 GNU_PROPERTY_X86_ISA_1_V3 > > > # else > > > # define ISA_V3 0 > > > # endif > > > > > > -# if ISA_V3 && defined __AVX512F__ && defined __AVX512BW__ \ > > > - && defined __AVX512CD__ && defined __AVX512DQ__ \ > > > - && defined __AVX512VL__ > > > +# if __X86_ISA_V4 > > > > #if MINIMUM_X86_ISA_LEVEL >= 4 > > > > > /* NB: ISAs in x86-64 ISA level v4 are used. */ > > > # define ISA_V4 GNU_PROPERTY_X86_ISA_1_V4 > > > # else > > > diff --git a/sysdeps/x86/isa-level.h b/sysdeps/x86/isa-level.h > > > new file mode 100644 > > > index 0000000000..ed696ae8eb > > > --- /dev/null > > > +++ b/sysdeps/x86/isa-level.h > > > @@ -0,0 +1,70 @@ > > > +/* Header defining the minimum x86 ISA level > > > + Copyright (C) 2020-2022 Free Software Foundation, Inc. > > > > Just 2022. > > > > > + This file is part of the GNU C Library. > > > + > > > + The GNU C Library is free software; you can redistribute it and/or > > > + modify it under the terms of the GNU Lesser General Public > > > + License as published by the Free Software Foundation; either > > > + version 2.1 of the License, or (at your option) any later version. > > > + > > > + In addition to the permissions in the GNU Lesser General Public > > > + License, the Free Software Foundation gives you unlimited > > > + permission to link the compiled version of this file with other > > > + programs, and to distribute those programs without any restriction > > > + coming from the use of this file. (The Lesser General Public > > > + License restrictions do apply in other respects; for example, they > > > + cover modification of the file, and distribution when not linked > > > + into another program.) > > > + > > > + The GNU C Library is distributed in the hope that it will be useful, > > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > > + Lesser General Public License for more details. > > > + > > > + You should have received a copy of the GNU Lesser General Public > > > + License along with the GNU C Library; if not, see > > > + <https://www.gnu.org/licenses/>. */ > > > + > > > +#ifndef _ISA_LEVEL_H > > > +#define _ISA_LEVEL_H > > > + > > > +# if defined __SSE__ && defined __SSE2__ > > > +/* NB: ISAs, excluding MMX, in x86-64 ISA level baseline are used. */ > > > +# define __X86_ISA_V1 1 > > > +# else > > > +# define __X86_ISA_V1 0 > > > +# endif > > > + > > > +# if __X86_ISA_V1 && defined __GCC_HAVE_SYNC_COMPARE_AND_SWAP_16 \ > > > + && defined HAVE_X86_LAHF_SAHF && defined __POPCNT__ \ > > > + && defined __SSE3__ && defined __SSSE3__ && defined __SSE4_1__ \ > > > + && defined __SSE4_2__ > > > +/* NB: ISAs in x86-64 ISA level v2 are used. */ > > > +# define __X86_ISA_V2 1 > > > +# else > > > +# define __X86_ISA_V2 0 > > > +# endif > > > + > > > +# if __X86_ISA_V2 && defined __AVX__ && defined __AVX2__ && defined __F16C__ \ > > > + && defined __FMA__ && defined __LZCNT__ && defined HAVE_X86_MOVBE \ > > > + && defined __BMI__ && defined __BMI2__ > > > +/* NB: ISAs in x86-64 ISA level v3 are used. */ > > > +# define __X86_ISA_V3 1 > > > +# else > > > +# define __X86_ISA_V3 0 > > > +# endif > > > + > > > +# if __X86_ISA_V3 && defined __AVX512F__ && defined __AVX512BW__ \ > > > + && defined __AVX512CD__ && defined __AVX512DQ__ \ > > > + && defined __AVX512VL__ > > > +/* NB: ISAs in x86-64 ISA level v4 are used. */ > > > +# define __X86_ISA_V4 1 > > > +# else > > > +# define __X86_ISA_V4 0 > > > +# endif > > > + > > > +#define __X86_ISA_LEVEL \ > > > + (__X86_ISA_V1 + __X86_ISA_V2 + __X86_ISA_V3 + __X86_ISA_V4) > > > + > > > > We have isa.h to define MINIMUM_ISA. I think this file should define > > MINIMUM_X86_ISA_LEVEL. > > > > > +#endif > > > diff --git a/sysdeps/x86_64/isa-default-include.h b/sysdeps/x86_64/isa-default-include.h > > > new file mode 100644 > > > index 0000000000..d3091340af > > > --- /dev/null > > > +++ b/sysdeps/x86_64/isa-default-include.h > > > > isa-default-impl.h? since this header file includes the default implementation. > > > > > @@ -0,0 +1,49 @@ > > > +/* Utility for including proper default function based on ISA level > > > > Include the default implementation based on the minimum ISA level. > > > > > + Copyright (C) 2021-2022 Free Software Foundation, Inc. > > > > Just 2022. > > > > > + This file is part of the GNU C Library. > > > + > > > + The GNU C Library is free software; you can redistribute it and/or > > > + modify it under the terms of the GNU Lesser General Public > > > + License as published by the Free Software Foundation; either > > > + version 2.1 of the License, or (at your option) any later version. > > > + > > > + The GNU C Library is distributed in the hope that it will be useful, > > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > > + Lesser General Public License for more details. > > > + > > > + You should have received a copy of the GNU Lesser General Public > > > + License along with the GNU C Library; if not, see > > > + <https://www.gnu.org/licenses/>. */ > > > + > > > +#include <isa-level.h> > > > + > > > +#ifndef DEFAULT_V1 > > > > ISA_DEFAULT_IMPL_V1 > > > > > +# error "Must have at least ISA V1 Version" > > > > Must have at least ISA V1 implementation > > > > > +#endif > > > + > > > +#ifndef DEFAULT_V2 > > > +# define DEFAULT_V2 DEFAULT_V1 > > > +#endif > > > + > > > +#ifndef DEFAULT_V3 > > > +# define DEFAULT_V3 DEFAULT_V2 > > > +#endif > > > + > > > +#ifndef DEFAULT_V4 > > > +# define DEFAULT_V4 DEFAULT_V3 > > > +#endif > > > + > > > +#define IS_DEFAULT_INCLUDE > > > > We can remove IS_DEFAULT_INCLUDE and > > check ISA_DEFAULT_IMPL instead. > > > > > + > > > +#if __X86_ISA_LEVEL == 1 > > > > #if MINIMUM_X86_ISA_LEVEL == 1 > > > > > +# include DEFAULT_V1 > > > > # define ISA_DEFAULT_IMPL ISA_DEFAULT_IMPL_V1 > > > > > +#elif __X86_ISA_LEVEL == 2 > > > +# include DEFAULT_V2 > > > +#elif __X86_ISA_LEVEL == 3 > > > +# include DEFAULT_V3 > > > +#elif __X86_ISA_LEVEL == 4 > > > +# include DEFAULT_V4 > > > > # define ISA_DEFAULT_IMPL ISA_DEFAULT_IMPL_V4 > > > > > +#else > > > +# error "Unsupport ISA Level!" > > > +#endif > > > > #include ISA_DEFAULT_IMPL > > > > > diff --git a/sysdeps/x86_64/multiarch/isa-ifunc-macros.h b/sysdeps/x86_64/multiarch/isa-ifunc-macros.h > > > new file mode 100644 > > > index 0000000000..c24f2ab655 > > > --- /dev/null > > > +++ b/sysdeps/x86_64/multiarch/isa-ifunc-macros.h > > > @@ -0,0 +1,106 @@ > > > +/* Common ifunc selection utils > > > + All versions must be listed in ifunc-impl-list.c. > > > + Copyright (C) 2017-2022 Free Software Foundation, Inc. > > > + This file is part of the GNU C Library. > > > + > > > + The GNU C Library is free software; you can redistribute it and/or > > > + modify it under the terms of the GNU Lesser General Public > > > + License as published by the Free Software Foundation; either > > > + version 2.1 of the License, or (at your option) any later version. > > > + > > > + The GNU C Library is distributed in the hope that it will be useful, > > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > > + Lesser General Public License for more details. > > > + > > > + You should have received a copy of the GNU Lesser General Public > > > + License along with the GNU C Library; if not, see > > > + <https://www.gnu.org/licenses/>. */ > > > + > > > +#include <init-arch.h> > > > +#include <isa-level.h> > > > + > > > +#define OPTIMIZE_DECL(...) \ > > > + extern __typeof (REDIRECT_NAME) OPTIMIZE (__VA_ARGS__) attribute_hidden; > > > + > > > +#define OPTIMIZE_DECL1(...) \ > > > + extern __typeof (REDIRECT_NAME) OPTIMIZE1 (__VA_ARGS__) attribute_hidden; > > > + > > > +/* Only include at the level of the build ISA or better. I.e if built > > > > minimum build ISA level > > > > > + with ISA=V1, then include all implementations. On the other hand if > > > + built with ISA=V3 only include V3/V4 implementations. If there is > > > + not implementation at or above the build ISA level, then include > > > > no implementation at or above the minimum build ISA level. > > > > > + the highest ISA level implementation. */ > > > +#if __X86_ISA_LEVEL <= 4 > > > +# define X86_IFUNC_IMPL_ADD_V4(...) IFUNC_IMPL_ADD (__VA_ARGS__) > > > +# define return_X86_OPTIMIZE_V4(...) return OPTIMIZE (__VA_ARGS__) > > > +# define return_X86_OPTIMIZE1_V4(...) return OPTIMIZE1 (__VA_ARGS__) > > > +# define DECLARE_X86_OPTIMIZE_V4(...) OPTIMIZE_DECL (__VA_ARGS__) > > > +# define DECLARE_X86_OPTIMIZE1_V4(...) OPTIMIZE_DECL1 (__VA_ARGS__) > > > > These macros should be added to <init-arch.h> instead. I don't > > think DECLARE_X86_OPTIMIZE_VN is necessary since unused > > declarations are OK. > > Its not unused declaration is declaration of function that doesn't > exist. > > But if can remove will do so for V2. > > > > > +#endif > > > +#if __X86_ISA_LEVEL <= 3 > > > +# define X86_IFUNC_IMPL_ADD_V3(...) IFUNC_IMPL_ADD (__VA_ARGS__) > > > +# define return_X86_OPTIMIZE_V3(...) return OPTIMIZE (__VA_ARGS__) > > > +# define return_X86_OPTIMIZE1_V3(...) return OPTIMIZE1 (__VA_ARGS__) > > > +# define DECLARE_X86_OPTIMIZE_V3(...) OPTIMIZE_DECL (__VA_ARGS__) > > > +# define DECLARE_X86_OPTIMIZE1_V3(...) OPTIMIZE_DECL1 (__VA_ARGS__) > > > +#endif > > > +#if __X86_ISA_LEVEL <= 2 > > > +# define X86_IFUNC_IMPL_ADD_V2(...) IFUNC_IMPL_ADD (__VA_ARGS__) > > > +# define return_X86_OPTIMIZE_V2(...) return OPTIMIZE (__VA_ARGS__) > > > +# define return_X86_OPTIMIZE1_V2(...) return OPTIMIZE1 (__VA_ARGS__) > > > +# define DECLARE_X86_OPTIMIZE_V2(...) OPTIMIZE_DECL (__VA_ARGS__) > > > +# define DECLARE_X86_OPTIMIZE1_V2(...) OPTIMIZE_DECL1 (__VA_ARGS__) > > > +#endif > > > +#if __X86_ISA_LEVEL <= 1 > > > +# define X86_IFUNC_IMPL_ADD_V1(...) IFUNC_IMPL_ADD (__VA_ARGS__) > > > +# define return_X86_OPTIMIZE_V1(...) return OPTIMIZE (__VA_ARGS__) > > > +# define return_X86_OPTIMIZE1_V1(...) return OPTIMIZE1 (__VA_ARGS__) > > > +# define DECLARE_X86_OPTIMIZE_V1(...) OPTIMIZE_DECL (__VA_ARGS__) > > > +# define DECLARE_X86_OPTIMIZE1_V1(...) OPTIMIZE_DECL1 (__VA_ARGS__) > > > +#endif > > > + > > > +#ifndef return_X86_OPTIMIZE_V4 > > > +# define X86_IFUNC_IMPL_ADD_V4(...) > > > +# define return_X86_OPTIMIZE_V4(...) (void) (0) > > > +# define return_X86_OPTIMIZE1_V4(...) (void) (0) > > > +# define DECLARE_X86_OPTIMIZE_V4(...) > > > +# define DECLARE_X86_OPTIMIZE1_V4(...) > > > +#endif > > > +#ifndef return_X86_OPTIMIZE_V3 > > > +# define X86_IFUNC_IMPL_ADD_V3(...) > > > +# define return_X86_OPTIMIZE_V3(...) (void) (0) > > > +# define return_X86_OPTIMIZE1_V3(...) (void) (0) > > > +# define DECLARE_X86_OPTIMIZE_V3(...) > > > +# define DECLARE_X86_OPTIMIZE1_V3(...) > > > +#endif > > > +#ifndef return_X86_OPTIMIZE_V2 > > > +# define X86_IFUNC_IMPL_ADD_V2(...) > > > +# define return_X86_OPTIMIZE_V2(...) (void) (0) > > > +# define return_X86_OPTIMIZE1_V2(...) (void) (0) > > > +# define DECLARE_X86_OPTIMIZE_V2(...) > > > +# define DECLARE_X86_OPTIMIZE1_V2(...) > > > +#endif > > > +#ifndef return_X86_OPTIMIZE_V1 > > > +# define X86_IFUNC_IMPL_ADD_V1(...) > > > +# define return_X86_OPTIMIZE_V1(...) (void) (0) > > > +# define return_X86_OPTIMIZE1_V1(...) (void) (0) > > > +# define DECLARE_X86_OPTIMIZE_V1(...) > > > +# define DECLARE_X86_OPTIMIZE1_V1(...) > > > +#endif > > > + > > > +#define DECLARE_X86_OPTIMIZE_FORCE(...) OPTIMIZE_DECL (__VA_ARGS__) > > > +#define return_X86_OPTIMIZE_FORCE(...) return OPTIMIZE (__VA_ARGS__) > > > +#define X86_IFUNC_IMPL_ADD_FORCE(...) IFUNC_IMPL_ADD (__VA_ARGS__) > > > > These are unused. > > Think will be used for strcspn/strspn/strpbrk but can drop for now. > > > > > +#if __X86_ISA_LEVEL == 1 > > > +# define X86_OPTIMIZE_FALLBACK(v1, ...) OPTIMIZE (v1) > > > +#elif __X86_ISA_LEVEL == 2 > > > +# define X86_OPTIMIZE_FALLBACK(v1, v2, ...) OPTIMIZE (v2) > > > +#elif __X86_ISA_LEVEL == 3 > > > +# define X86_OPTIMIZE_FALLBACK(v1, v2, v3, ...) OPTIMIZE (v3) > > > +#elif __X86_ISA_LEVEL == 4 > > > +# define X86_OPTIMIZE_FALLBACK(v1, v2, v3, v4) OPTIMIZE (v4) > > > +#else > > > +# error "Unsupported ISA Level" > > > +#endif > > > -- > > > 2.34.1 > > > > We can avoid X86_OPTIMIZE_FALLBACK by > > > > 1. Check the minimum ISA level IFUNC selector. > > 2. Turn return_X86_OPTIMIZE_VN into __builtin_unreachable () > > when unused. > > I think there are some edge cases we may miss. I.e an ISA v3 implementation > that also has prefer_novzeroupper. It is still correct to use avx2 impl with > vzeroupper but we will fail in the ifunc preference. > > All other suggestions will fix for v2. > > > > > > Another issue. With AVX available, IFUNC selector may > > prefer SSE version when AVX_Fast_Unaligned_Load or > > Prefer_No_VZEROUPPER isn't set. This happens for memmove. > > We have the default implementation without IFUNC and the > > best implementation with IFUNC. They may not be the > > same. We won't have SSE built in that case. Think we are going to be forced to just eat the AVX implementation in that case. > > > > -- > > H.J.
diff --git a/sysdeps/x86/isa-level.c b/sysdeps/x86/isa-level.c index 09cd72ab20..8e899bf64b 100644 --- a/sysdeps/x86/isa-level.c +++ b/sysdeps/x86/isa-level.c @@ -26,38 +26,31 @@ <https://www.gnu.org/licenses/>. */ #include <elf.h> - +#include <sysdeps/x86/isa-level.h> /* ELF program property for x86 ISA level. */ #ifdef INCLUDE_X86_ISA_LEVEL -# if defined __SSE__ && defined __SSE2__ +# if __X86_ISA_V1 /* NB: ISAs, excluding MMX, in x86-64 ISA level baseline are used. */ # define ISA_BASELINE GNU_PROPERTY_X86_ISA_1_BASELINE # else # define ISA_BASELINE 0 # endif -# if ISA_BASELINE && defined __GCC_HAVE_SYNC_COMPARE_AND_SWAP_16 \ - && defined HAVE_X86_LAHF_SAHF && defined __POPCNT__ \ - && defined __SSE3__ && defined __SSSE3__ && defined __SSE4_1__ \ - && defined __SSE4_2__ +# if __X86_ISA_V2 /* NB: ISAs in x86-64 ISA level v2 are used. */ # define ISA_V2 GNU_PROPERTY_X86_ISA_1_V2 # else # define ISA_V2 0 # endif -# if ISA_V2 && defined __AVX__ && defined __AVX2__ && defined __F16C__ \ - && defined __FMA__ && defined __LZCNT__ && defined HAVE_X86_MOVBE \ - && defined __BMI__ && defined __BMI2__ +# if __X86_ISA_V3 /* NB: ISAs in x86-64 ISA level v3 are used. */ # define ISA_V3 GNU_PROPERTY_X86_ISA_1_V3 # else # define ISA_V3 0 # endif -# if ISA_V3 && defined __AVX512F__ && defined __AVX512BW__ \ - && defined __AVX512CD__ && defined __AVX512DQ__ \ - && defined __AVX512VL__ +# if __X86_ISA_V4 /* NB: ISAs in x86-64 ISA level v4 are used. */ # define ISA_V4 GNU_PROPERTY_X86_ISA_1_V4 # else diff --git a/sysdeps/x86/isa-level.h b/sysdeps/x86/isa-level.h new file mode 100644 index 0000000000..ed696ae8eb --- /dev/null +++ b/sysdeps/x86/isa-level.h @@ -0,0 +1,70 @@ +/* Header defining the minimum x86 ISA level + Copyright (C) 2020-2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + In addition to the permissions in the GNU Lesser General Public + License, the Free Software Foundation gives you unlimited + permission to link the compiled version of this file with other + programs, and to distribute those programs without any restriction + coming from the use of this file. (The Lesser General Public + License restrictions do apply in other respects; for example, they + cover modification of the file, and distribution when not linked + into another program.) + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#ifndef _ISA_LEVEL_H +#define _ISA_LEVEL_H + +# if defined __SSE__ && defined __SSE2__ +/* NB: ISAs, excluding MMX, in x86-64 ISA level baseline are used. */ +# define __X86_ISA_V1 1 +# else +# define __X86_ISA_V1 0 +# endif + +# if __X86_ISA_V1 && defined __GCC_HAVE_SYNC_COMPARE_AND_SWAP_16 \ + && defined HAVE_X86_LAHF_SAHF && defined __POPCNT__ \ + && defined __SSE3__ && defined __SSSE3__ && defined __SSE4_1__ \ + && defined __SSE4_2__ +/* NB: ISAs in x86-64 ISA level v2 are used. */ +# define __X86_ISA_V2 1 +# else +# define __X86_ISA_V2 0 +# endif + +# if __X86_ISA_V2 && defined __AVX__ && defined __AVX2__ && defined __F16C__ \ + && defined __FMA__ && defined __LZCNT__ && defined HAVE_X86_MOVBE \ + && defined __BMI__ && defined __BMI2__ +/* NB: ISAs in x86-64 ISA level v3 are used. */ +# define __X86_ISA_V3 1 +# else +# define __X86_ISA_V3 0 +# endif + +# if __X86_ISA_V3 && defined __AVX512F__ && defined __AVX512BW__ \ + && defined __AVX512CD__ && defined __AVX512DQ__ \ + && defined __AVX512VL__ +/* NB: ISAs in x86-64 ISA level v4 are used. */ +# define __X86_ISA_V4 1 +# else +# define __X86_ISA_V4 0 +# endif + +#define __X86_ISA_LEVEL \ + (__X86_ISA_V1 + __X86_ISA_V2 + __X86_ISA_V3 + __X86_ISA_V4) + + +#endif diff --git a/sysdeps/x86_64/isa-default-include.h b/sysdeps/x86_64/isa-default-include.h new file mode 100644 index 0000000000..d3091340af --- /dev/null +++ b/sysdeps/x86_64/isa-default-include.h @@ -0,0 +1,49 @@ +/* Utility for including proper default function based on ISA level + Copyright (C) 2021-2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <isa-level.h> + +#ifndef DEFAULT_V1 +# error "Must have at least ISA V1 Version" +#endif + +#ifndef DEFAULT_V2 +# define DEFAULT_V2 DEFAULT_V1 +#endif + +#ifndef DEFAULT_V3 +# define DEFAULT_V3 DEFAULT_V2 +#endif + +#ifndef DEFAULT_V4 +# define DEFAULT_V4 DEFAULT_V3 +#endif + +#define IS_DEFAULT_INCLUDE + +#if __X86_ISA_LEVEL == 1 +# include DEFAULT_V1 +#elif __X86_ISA_LEVEL == 2 +# include DEFAULT_V2 +#elif __X86_ISA_LEVEL == 3 +# include DEFAULT_V3 +#elif __X86_ISA_LEVEL == 4 +# include DEFAULT_V4 +#else +# error "Unsupport ISA Level!" +#endif diff --git a/sysdeps/x86_64/multiarch/isa-ifunc-macros.h b/sysdeps/x86_64/multiarch/isa-ifunc-macros.h new file mode 100644 index 0000000000..c24f2ab655 --- /dev/null +++ b/sysdeps/x86_64/multiarch/isa-ifunc-macros.h @@ -0,0 +1,106 @@ +/* Common ifunc selection utils + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2017-2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <init-arch.h> +#include <isa-level.h> + +#define OPTIMIZE_DECL(...) \ + extern __typeof (REDIRECT_NAME) OPTIMIZE (__VA_ARGS__) attribute_hidden; + +#define OPTIMIZE_DECL1(...) \ + extern __typeof (REDIRECT_NAME) OPTIMIZE1 (__VA_ARGS__) attribute_hidden; + +/* Only include at the level of the build ISA or better. I.e if built + with ISA=V1, then include all implementations. On the other hand if + built with ISA=V3 only include V3/V4 implementations. If there is + not implementation at or above the build ISA level, then include + the highest ISA level implementation. */ +#if __X86_ISA_LEVEL <= 4 +# define X86_IFUNC_IMPL_ADD_V4(...) IFUNC_IMPL_ADD (__VA_ARGS__) +# define return_X86_OPTIMIZE_V4(...) return OPTIMIZE (__VA_ARGS__) +# define return_X86_OPTIMIZE1_V4(...) return OPTIMIZE1 (__VA_ARGS__) +# define DECLARE_X86_OPTIMIZE_V4(...) OPTIMIZE_DECL (__VA_ARGS__) +# define DECLARE_X86_OPTIMIZE1_V4(...) OPTIMIZE_DECL1 (__VA_ARGS__) +#endif +#if __X86_ISA_LEVEL <= 3 +# define X86_IFUNC_IMPL_ADD_V3(...) IFUNC_IMPL_ADD (__VA_ARGS__) +# define return_X86_OPTIMIZE_V3(...) return OPTIMIZE (__VA_ARGS__) +# define return_X86_OPTIMIZE1_V3(...) return OPTIMIZE1 (__VA_ARGS__) +# define DECLARE_X86_OPTIMIZE_V3(...) OPTIMIZE_DECL (__VA_ARGS__) +# define DECLARE_X86_OPTIMIZE1_V3(...) OPTIMIZE_DECL1 (__VA_ARGS__) +#endif +#if __X86_ISA_LEVEL <= 2 +# define X86_IFUNC_IMPL_ADD_V2(...) IFUNC_IMPL_ADD (__VA_ARGS__) +# define return_X86_OPTIMIZE_V2(...) return OPTIMIZE (__VA_ARGS__) +# define return_X86_OPTIMIZE1_V2(...) return OPTIMIZE1 (__VA_ARGS__) +# define DECLARE_X86_OPTIMIZE_V2(...) OPTIMIZE_DECL (__VA_ARGS__) +# define DECLARE_X86_OPTIMIZE1_V2(...) OPTIMIZE_DECL1 (__VA_ARGS__) +#endif +#if __X86_ISA_LEVEL <= 1 +# define X86_IFUNC_IMPL_ADD_V1(...) IFUNC_IMPL_ADD (__VA_ARGS__) +# define return_X86_OPTIMIZE_V1(...) return OPTIMIZE (__VA_ARGS__) +# define return_X86_OPTIMIZE1_V1(...) return OPTIMIZE1 (__VA_ARGS__) +# define DECLARE_X86_OPTIMIZE_V1(...) OPTIMIZE_DECL (__VA_ARGS__) +# define DECLARE_X86_OPTIMIZE1_V1(...) OPTIMIZE_DECL1 (__VA_ARGS__) +#endif + +#ifndef return_X86_OPTIMIZE_V4 +# define X86_IFUNC_IMPL_ADD_V4(...) +# define return_X86_OPTIMIZE_V4(...) (void) (0) +# define return_X86_OPTIMIZE1_V4(...) (void) (0) +# define DECLARE_X86_OPTIMIZE_V4(...) +# define DECLARE_X86_OPTIMIZE1_V4(...) +#endif +#ifndef return_X86_OPTIMIZE_V3 +# define X86_IFUNC_IMPL_ADD_V3(...) +# define return_X86_OPTIMIZE_V3(...) (void) (0) +# define return_X86_OPTIMIZE1_V3(...) (void) (0) +# define DECLARE_X86_OPTIMIZE_V3(...) +# define DECLARE_X86_OPTIMIZE1_V3(...) +#endif +#ifndef return_X86_OPTIMIZE_V2 +# define X86_IFUNC_IMPL_ADD_V2(...) +# define return_X86_OPTIMIZE_V2(...) (void) (0) +# define return_X86_OPTIMIZE1_V2(...) (void) (0) +# define DECLARE_X86_OPTIMIZE_V2(...) +# define DECLARE_X86_OPTIMIZE1_V2(...) +#endif +#ifndef return_X86_OPTIMIZE_V1 +# define X86_IFUNC_IMPL_ADD_V1(...) +# define return_X86_OPTIMIZE_V1(...) (void) (0) +# define return_X86_OPTIMIZE1_V1(...) (void) (0) +# define DECLARE_X86_OPTIMIZE_V1(...) +# define DECLARE_X86_OPTIMIZE1_V1(...) +#endif + +#define DECLARE_X86_OPTIMIZE_FORCE(...) OPTIMIZE_DECL (__VA_ARGS__) +#define return_X86_OPTIMIZE_FORCE(...) return OPTIMIZE (__VA_ARGS__) +#define X86_IFUNC_IMPL_ADD_FORCE(...) IFUNC_IMPL_ADD (__VA_ARGS__) + +#if __X86_ISA_LEVEL == 1 +# define X86_OPTIMIZE_FALLBACK(v1, ...) OPTIMIZE (v1) +#elif __X86_ISA_LEVEL == 2 +# define X86_OPTIMIZE_FALLBACK(v1, v2, ...) OPTIMIZE (v2) +#elif __X86_ISA_LEVEL == 3 +# define X86_OPTIMIZE_FALLBACK(v1, v2, v3, ...) OPTIMIZE (v3) +#elif __X86_ISA_LEVEL == 4 +# define X86_OPTIMIZE_FALLBACK(v1, v2, v3, v4) OPTIMIZE (v4) +#else +# error "Unsupported ISA Level" +#endif